diff --git a/phrase_index.py b/phrase_index.py index eec8a3b..6dc3748 100644 --- a/phrase_index.py +++ b/phrase_index.py @@ -2,8 +2,7 @@ import pathlib import sys import duckdb import ir_datasets -import collections -import pandas as pd + from phrases_extractor import extract_phrases_pmi_duckdb from ze_index import normalize diff --git a/phrases_extractor.py b/phrases_extractor.py index 3eacb98..32304c0 100644 --- a/phrases_extractor.py +++ b/phrases_extractor.py @@ -1,5 +1,4 @@ import duckdb -import math from collections import Counter def create_tokenizer_duckdb(con):