From cb4be28b07823639a64afaa34bf3919220a81e12 Mon Sep 17 00:00:00 2001 From: jwansek Date: Tue, 23 Nov 2021 13:05:48 +0000 Subject: first commit, create parser and tf-idf table --- documents.py | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) create mode 100644 documents.py (limited to 'documents.py') diff --git a/documents.py b/documents.py new file mode 100644 index 0000000..2053f0f --- /dev/null +++ b/documents.py @@ -0,0 +1,28 @@ +import database +import sys +import os + +def add_documents(documents_path): + docs = [os.path.join(documents_path, f) for f in os.listdir(documents_path)] + print(docs) + with database.Database() as db: + db.append_documents(docs) + +def get_document_name_by_id(id_): + with database.Database() as db: + return db.get_document_name_by_id(id_) + +def get_document_id_by_name(document_name): + with database.Database() as db: + return db.get_document_id_by_name(document_name) + +def get_num_documents(): + with database.Database() as db: + return db.get_num_documents() + +if __name__ == "__main__": + add_documents(sys.argv[1]) + + # print(get_document_name_by_id(1)) + # print(get_document_id_by_name("../Wikibooks/USMLE Step 1 Review Reproductive.html")) + # print(get_num_documents()) \ No newline at end of file -- cgit v1.2.3