From 1f5dec8047af8c58ce3acb5014d82caf7e6766df Mon Sep 17 00:00:00 2001
From: jwansek <eddie.atten.ea29@gmail.com>
Date: Fri, 26 Nov 2021 17:57:07 +0000
Subject: split large texts up into more managable chunks

---
 database.py | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

(limited to 'database.py')

diff --git a/database.py b/database.py
index 5c326b4..8fc3584 100644
--- a/database.py
+++ b/database.py
@@ -84,6 +84,11 @@ class Database:
             cursor.execute("SELECT COUNT(*) FROM documents;")
             return cursor.fetchone()[0]
 
+    def get_max_linked_terms(self):
+        with self.__connection.cursor(factory = DatabaseCursor) as cursor:
+            cursor.execute("SELECT MAX(`document_id`) + 2 FROM term_weights;")
+            return cursor.fetchone()[0]
+
     def append_terms(self, terms):
         with self.__connection.cursor(factory = DatabaseCursor) as cursor:
             cursor.executemany("INSERT OR IGNORE INTO vocabulary(term) VALUES (?);", [(term, ) for term in terms])
@@ -211,5 +216,6 @@ if __name__ == "__main__":
         # print(db.test_log(100))
         # print(db.test_log(21))
         # db.get_tf_idf_table()
-        for i, v in db.get_tf_idf_score("enzyme", 1).items():
-            print(i, v)
\ No newline at end of file
+        #for i, v in db.get_tf_idf_score("enzyme", 1).items():
+        #    print(i, v)
+        print(db.get_max_linked_terms())
-- 
cgit v1.2.3