mirror of
https://github.com/ArthurIdema/Zoekeend-Phrase-Indexing.git
synced 2026-03-26 22:14:10 +00:00
Tested code on half the queries
This commit is contained in:
@@ -5,22 +5,26 @@ echo -e "RUN_ID\tMODE\tSTOPWORDS\tMIN_FREQ\tMIN_PMI\tMAP\tPOSTINGS_COST\tDICT_SI
|
||||
for dir in ../results_new_postings/*; do
|
||||
[ -d "$dir" ] || continue
|
||||
SETTINGS="$dir/settings.txt"
|
||||
EVAL="$dir/eval.txt"
|
||||
DB=$(grep '^DB:' "$SETTINGS" | awk '{print $2}')
|
||||
DB="$dir/$(basename "$DB")"
|
||||
if [[ -f "$SETTINGS" && -f "$EVAL" && -f "$DB" ]]; then
|
||||
if [[ -f "$SETTINGS" && -f "$DB" ]]; then
|
||||
RUN_ID=$(grep '^RUN_ID:' "$SETTINGS" | awk '{print $2}')
|
||||
MODE=$(grep '^MODE:' "$SETTINGS" | awk '{print $2}')
|
||||
STOPWORDS=$(grep '^STOPWORDS:' "$SETTINGS" | awk '{print $2}')
|
||||
MIN_FREQ=$(grep '^MIN_FREQ:' "$SETTINGS" | awk '{print $2}')
|
||||
MIN_PMI=$(grep '^MIN_PMI:' "$SETTINGS" | awk '{print $2}')
|
||||
MAP=$(grep -E '^map[[:space:]]+all' "$EVAL" | awk '{print $3}')
|
||||
POSTINGS_COST=$(grep '^Average cost in postings:' "$EVAL" | awk '{print $5}')
|
||||
DICT_SIZE=$(duckdb "$DB" -csv -noheader "SELECT COUNT(*) FROM fts_main_documents.dict;")
|
||||
TERMS_SIZE=$(duckdb "$DB" -csv -noheader "SELECT COUNT(*) FROM fts_main_documents.terms;")
|
||||
NGRAMS=$(duckdb "$DB" -csv -noheader "SELECT COUNT(*) FROM fts_main_documents.dict WHERE term LIKE '% %';")
|
||||
AVGDL=$(duckdb "$DB" -csv -noheader "SELECT avgdl FROM fts_main_documents.stats;")
|
||||
SUMDF=$(duckdb "$DB" -csv -noheader "SELECT sumdf FROM fts_main_documents.stats;")
|
||||
echo -e "${RUN_ID}\t${MODE}\t${STOPWORDS}\t${MIN_FREQ}\t${MIN_PMI}\t${MAP}\t${POSTINGS_COST}\t${DICT_SIZE}\t${TERMS_SIZE}\t${NGRAMS}\t${AVGDL}\t${SUMDF}"
|
||||
# Get eval from cranfield_queries_half1 where name matches DB
|
||||
DB_BASENAME=$(basename "$DB" .db)
|
||||
EVAL_HALF1="../results_new_postings/cranfield_queries_half2/${DB_BASENAME}_eval.txt"
|
||||
if [[ -f "$EVAL_HALF1" ]]; then
|
||||
MAP=$(grep -E '^map[[:space:]]+all' "$EVAL_HALF1" | awk '{print $3}')
|
||||
POSTINGS_COST=$(grep '^Average cost in postings:' "$EVAL_HALF1" | awk '{print $5}')
|
||||
echo -e "${RUN_ID}\t${MODE}\t${STOPWORDS}\t${MIN_FREQ}\t${MIN_PMI}\t${MAP}\t${POSTINGS_COST}\t${DICT_SIZE}\t${TERMS_SIZE}\t${NGRAMS}\t${AVGDL}\t${SUMDF}"
|
||||
fi
|
||||
fi
|
||||
done
|
||||
|
||||
Reference in New Issue
Block a user