From 1f5dec8047af8c58ce3acb5014d82caf7e6766df Mon Sep 17 00:00:00 2001 From: jwansek Date: Fri, 26 Nov 2021 17:57:07 +0000 Subject: split large texts up into more managable chunks --- database.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'database.py') diff --git a/database.py b/database.py index 5c326b4..8fc3584 100644 --- a/database.py +++ b/database.py @@ -84,6 +84,11 @@ class Database: cursor.execute("SELECT COUNT(*) FROM documents;") return cursor.fetchone()[0] + def get_max_linked_terms(self): + with self.__connection.cursor(factory = DatabaseCursor) as cursor: + cursor.execute("SELECT MAX(`document_id`) + 2 FROM term_weights;") + return cursor.fetchone()[0] + def append_terms(self, terms): with self.__connection.cursor(factory = DatabaseCursor) as cursor: cursor.executemany("INSERT OR IGNORE INTO vocabulary(term) VALUES (?);", [(term, ) for term in terms]) @@ -211,5 +216,6 @@ if __name__ == "__main__": # print(db.test_log(100)) # print(db.test_log(21)) # db.get_tf_idf_table() - for i, v in db.get_tf_idf_score("enzyme", 1).items(): - print(i, v) \ No newline at end of file + #for i, v in db.get_tf_idf_score("enzyme", 1).items(): + # print(i, v) + print(db.get_max_linked_terms()) -- cgit v1.2.3