diff --git a/.gitignore b/.gitignore
index fb43b6c..5c4f9a5 100644
--- a/.gitignore
+++ b/.gitignore
@@ -5,6 +5,10 @@ cranfield.qrels
 cranfieldoutput
 /duckdb-fts-main/
 /trec_eval/
+/spreadsheets/
+*.png
+plot*
+*lock*
 *.db
 *.ciff
 output*.txt
diff --git a/phrase_index.py b/phrase_index.py
index cb1a909..d3d52a5 100644
--- a/phrase_index.py
+++ b/phrase_index.py
@@ -159,7 +159,6 @@ def create_tokenizer_duckdb(con):
         );
     """)
 
-
 def create_tokenizer_ciff(con, fts_schema="fts_main_documents"):
     con.sql(f"""
         CREATE TABLE IF NOT EXISTS {fts_schema}.dict (termid BIGINT, term TEXT, df BIGINT);
@@ -190,7 +189,7 @@ def create_tokenizer_ciff(con, fts_schema="fts_main_documents"):
 def create_stopwords_table(con, fts_schema="fts_main_documents", stopwords='none'):
     """
     Create the stopwords table.
-    If stopwords is 'english', it will create a table with English stopwords.
+    If stopwords is 'english', it will create a table with English stopwords. 
     If stopwords is 'none', it will create an empty table.
     """
     con.sql(f"DROP TABLE IF EXISTS {fts_schema}.stopwords;")
@@ -252,15 +251,17 @@ def create_terms_table(con, fts_schema="fts_main_documents", input_schema="main"
     Assumes the table fts_main_documents.dict already exists.
     Adds a fieldid and termid column for compatibility with fielded search macros.
     """
-    # Cleanup input text removing special characters
+    # Cleanup input text using the same regex as DuckDB's tokenizer
     con.sql(f"""
         CREATE OR REPLACE TABLE {fts_schema}.cleaned_docs AS
         SELECT
-            did,
-            regexp_replace(content, '[0-9!@#$%^&*()_+={{}}\\[\\]:;<>,.?~\\\\/\\|''''"`-]+', ' ', 'g') AS content
+            {input_id},
+            regexp_replace(lower(strip_accents(CAST({input_val} AS VARCHAR))),
+                '[0-9!@#$%^&*()_+={{}}\\[\\]:;<>,.?~\\\\/\\|''''"`-]+', ' ', 'g') AS content,
         FROM {input_schema}.{input_table}
     """)
 
+    # Use the ciff tokenizer to find bigrams and unigrams
     con.sql(f"""
         CREATE OR REPLACE TABLE {fts_schema}.terms AS (
             SELECT
@@ -270,7 +271,7 @@ def create_terms_table(con, fts_schema="fts_main_documents", input_schema="main"
             FROM (
                 SELECT
                     row_number() OVER (ORDER BY (SELECT NULL)) AS docid,
-                    unnest({fts_schema}.tokenize({input_val})) AS term
+                    unnest({fts_schema}.tokenize(content)) AS term
                 FROM {fts_schema}.cleaned_docs
             ) AS t
             JOIN {fts_schema}.dict d ON t.term = d.term
diff --git a/ze_index.py b/ze_index.py
index 5d69ebc..dddd126 100644
--- a/ze_index.py
+++ b/ze_index.py
@@ -59,7 +59,7 @@ def create_lm(con, stemmer):
            SELECT docs.name AS docname, LN(MAX(doc_len)) + sum(subscore) AS score FROM subscores, fts_main_documents.docs AS docs WHERE subscores.docid = docs.docid GROUP BY docs.name
         ),
         postings_cost AS (
-           SELECT COUNT(DISTINCT docid) AS cost FROM qterms
+           SELECT COUNT(*) AS cost FROM term_tf
         )
         SELECT docname, score, (SELECT cost FROM postings_cost) AS postings_cost FROM scores
         );