aboutsummaryrefslogtreecommitdiffstats
path: root/tf_idf.py
diff options
context:
space:
mode:
authorjwansek <eddie.atten.ea29@gmail.com>2021-11-23 13:05:48 +0000
committerjwansek <eddie.atten.ea29@gmail.com>2021-11-23 13:05:48 +0000
commitcb4be28b07823639a64afaa34bf3919220a81e12 (patch)
tree21cbcc5597fcf3f86ddb019415c692b61683dcd6 /tf_idf.py
parent5583c58431d583753fc671b1ae5a93b380fd0e15 (diff)
downloadsearchEngine-cb4be28b07823639a64afaa34bf3919220a81e12.tar.gz
searchEngine-cb4be28b07823639a64afaa34bf3919220a81e12.zip
first commit, create parser and tf-idf table
Diffstat (limited to 'tf_idf.py')
-rw-r--r--tf_idf.py17
1 files changed, 17 insertions, 0 deletions
diff --git a/tf_idf.py b/tf_idf.py
new file mode 100644
index 0000000..615720d
--- /dev/null
+++ b/tf_idf.py
@@ -0,0 +1,17 @@
+import math as maths
+import database
+
+def main():
+ with database.Database() as db:
+ db.build_tf_idf_table()
+
+ db.get_tf_idf_table()
+
+def calc_log_tf(tf):
+ if tf == 0:
+ return 0
+ else:
+ return maths.log10(1 + tf)
+
+if __name__ == "__main__":
+ main() \ No newline at end of file