diff options
author | jwansek <eddie.atten.ea29@gmail.com> | 2021-11-23 13:05:48 +0000 |
---|---|---|
committer | jwansek <eddie.atten.ea29@gmail.com> | 2021-11-23 13:05:48 +0000 |
commit | cb4be28b07823639a64afaa34bf3919220a81e12 (patch) | |
tree | 21cbcc5597fcf3f86ddb019415c692b61683dcd6 /documents.py | |
parent | 5583c58431d583753fc671b1ae5a93b380fd0e15 (diff) | |
download | searchEngine-cb4be28b07823639a64afaa34bf3919220a81e12.tar.gz searchEngine-cb4be28b07823639a64afaa34bf3919220a81e12.zip |
first commit, create parser and tf-idf table
Diffstat (limited to 'documents.py')
-rw-r--r-- | documents.py | 28 |
1 files changed, 28 insertions, 0 deletions
diff --git a/documents.py b/documents.py new file mode 100644 index 0000000..2053f0f --- /dev/null +++ b/documents.py @@ -0,0 +1,28 @@ +import database +import sys +import os + +def add_documents(documents_path): + docs = [os.path.join(documents_path, f) for f in os.listdir(documents_path)] + print(docs) + with database.Database() as db: + db.append_documents(docs) + +def get_document_name_by_id(id_): + with database.Database() as db: + return db.get_document_name_by_id(id_) + +def get_document_id_by_name(document_name): + with database.Database() as db: + return db.get_document_id_by_name(document_name) + +def get_num_documents(): + with database.Database() as db: + return db.get_num_documents() + +if __name__ == "__main__": + add_documents(sys.argv[1]) + + # print(get_document_name_by_id(1)) + # print(get_document_id_by_name("../Wikibooks/USMLE Step 1 Review Reproductive.html")) + # print(get_num_documents())
\ No newline at end of file |