aboutsummaryrefslogtreecommitdiffstats
path: root/search.py
diff options
context:
space:
mode:
authorjwansek <eddie.atten.ea29@gmail.com>2021-11-23 13:05:48 +0000
committerjwansek <eddie.atten.ea29@gmail.com>2021-11-23 13:05:48 +0000
commitcb4be28b07823639a64afaa34bf3919220a81e12 (patch)
tree21cbcc5597fcf3f86ddb019415c692b61683dcd6 /search.py
parent5583c58431d583753fc671b1ae5a93b380fd0e15 (diff)
downloadsearchEngine-cb4be28b07823639a64afaa34bf3919220a81e12.tar.gz
searchEngine-cb4be28b07823639a64afaa34bf3919220a81e12.zip
first commit, create parser and tf-idf table
Diffstat (limited to 'search.py')
-rw-r--r--search.py18
1 files changed, 18 insertions, 0 deletions
diff --git a/search.py b/search.py
new file mode 100644
index 0000000..d41d0a2
--- /dev/null
+++ b/search.py
@@ -0,0 +1,18 @@
+import terms
+import sys
+import re
+
+def main(search_words):
+
+ txt = [re.sub(r"[^a-zA-Z\s]", "", i).rstrip().lower() for i in search_words]
+
+ search_words = []
+ for i in txt:
+ search_words += re.split(r"\s+|\n", i)
+
+ search_words = [terms.LEM.lemmatize(i) for i in search_words if i != "" and i not in terms.STOPWORDS]
+
+ print(search_words)
+
+if __name__ == "__main__":
+ main(sys.argv[1:]) \ No newline at end of file