aboutsummaryrefslogtreecommitdiffstats
path: root/documents.py
diff options
context:
space:
mode:
authorjwansek <eddie.atten.ea29@gmail.com>2021-11-23 13:05:48 +0000
committerjwansek <eddie.atten.ea29@gmail.com>2021-11-23 13:05:48 +0000
commitcb4be28b07823639a64afaa34bf3919220a81e12 (patch)
tree21cbcc5597fcf3f86ddb019415c692b61683dcd6 /documents.py
parent5583c58431d583753fc671b1ae5a93b380fd0e15 (diff)
downloadsearchEngine-cb4be28b07823639a64afaa34bf3919220a81e12.tar.gz
searchEngine-cb4be28b07823639a64afaa34bf3919220a81e12.zip
first commit, create parser and tf-idf table
Diffstat (limited to 'documents.py')
-rw-r--r--documents.py28
1 files changed, 28 insertions, 0 deletions
diff --git a/documents.py b/documents.py
new file mode 100644
index 0000000..2053f0f
--- /dev/null
+++ b/documents.py
@@ -0,0 +1,28 @@
+import database
+import sys
+import os
+
+def add_documents(documents_path):
+ docs = [os.path.join(documents_path, f) for f in os.listdir(documents_path)]
+ print(docs)
+ with database.Database() as db:
+ db.append_documents(docs)
+
+def get_document_name_by_id(id_):
+ with database.Database() as db:
+ return db.get_document_name_by_id(id_)
+
+def get_document_id_by_name(document_name):
+ with database.Database() as db:
+ return db.get_document_id_by_name(document_name)
+
+def get_num_documents():
+ with database.Database() as db:
+ return db.get_num_documents()
+
+if __name__ == "__main__":
+ add_documents(sys.argv[1])
+
+ # print(get_document_name_by_id(1))
+ # print(get_document_id_by_name("../Wikibooks/USMLE Step 1 Review Reproductive.html"))
+ # print(get_num_documents()) \ No newline at end of file