summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorjwansek <eddie.atten.ea29@gmail.com>2022-04-29 18:33:48 +0100
committerjwansek <eddie.atten.ea29@gmail.com>2022-04-29 18:33:48 +0100
commitee1b57ec6197c554f3c011f9a648e2222d845994 (patch)
tree27d8aaa78d451efea3e2c72d81163f7b02602268
parent2565fa419f37eee11fbad1cccbabde5b8baa24cd (diff)
downloadSmarker-ee1b57ec6197c554f3c011f9a648e2222d845994.tar.gz
Smarker-ee1b57ec6197c554f3c011f9a648e2222d845994.zip
Added similariy metric
l---------README.md2
-rw-r--r--Smarker/assessments.py80
-rw-r--r--Smarker/database.py46
-rw-r--r--Smarker/temp.py16
-rw-r--r--docs/source/readme.md2
5 files changed, 111 insertions, 35 deletions
diff --git a/README.md b/README.md
index 185d986..2914835 120000
--- a/README.md
+++ b/README.md
@@ -15,6 +15,8 @@ formats.
- Generate plagarism reports
- Isolate code in a docker container for security
+### Screenshots
+
![Function Analysis](https://smarker.eda.gay/_static/readme_functionanalysis.png)
Function Analysis
diff --git a/Smarker/assessments.py b/Smarker/assessments.py
index 4f32353..b8eb6f0 100644
--- a/Smarker/assessments.py
+++ b/Smarker/assessments.py
@@ -1,28 +1,82 @@
+from dataclasses import dataclass
import misc_classes
import configparser
import jinja_helpers
import pycode_similar
+import subprocess
import operator
import database
import argparse
import tempfile
import yaml
+import json
import os
+import re
-def generate_plagarism_report(codes):
- for file_name, codes in codes.items():
+@dataclass
+class SimilarityMetric:
+ code_text_1:str
+ code_text_2:str
+ id_1:int
+ id_2:int
+
+ def __post_init__(self):
+ with tempfile.TemporaryDirectory() as td:
+ with open(os.path.join(td, "%i.py" % self.id_1), "w") as f:
+ f.write(self.code_text_1)
+
+ with open(os.path.join(td, "%i.py" % self.id_2), "w") as f:
+ f.write(self.code_text_2)
+
+ proc = subprocess.Popen(["pycode_similar", "-p", "0", os.path.join(td, "%i.py" % self.id_1), os.path.join(td, "%i.py" % self.id_2)], stdout = subprocess.PIPE)
+ self.details = ""
+ while True:
+ line = proc.stdout.readline()
+ if not line:
+ break
+ self.details += line.decode()
+
+ def get_similarity(self):
+ return float(re.findall(r"\d+\.\d+\s", self.details)[0])
+
+
+def generate_plagarism_report(assessment_name, db):
+ required_files = db.get_assessments_required_files(assessment_name)
+ submission_ids_to_get = set()
+ assessments = db.get_submissions(assessment_name)
+ un_added_student_nos = {i[0] for i in assessments.keys()}
+ for id_, dt in sorted(assessments.keys(), key=operator.itemgetter(0, 1), reverse=True):
+ if id_ in un_added_student_nos:
+ files = jinja_helpers.flatten_struct(assessments[(id_, dt)][0]["files"])
+
+ for file_name in required_files:
+ if files[file_name]["present"]:
+ if (not files[file_name]["has_exception"]):
+ submission_ids_to_get.add(assessments[(id_, dt)][1])
+
+ un_added_student_nos.remove(id_)
+
+ codes = db.get_submission_codes(submission_ids_to_get)
+ for file_name, submissions in codes.items():
with tempfile.TemporaryDirectory() as td:
- un_added_student_nos = {i[0] for i in codes.keys()}
- # print(un_added_student_nos)
- for k, v in sorted(codes.keys(), key=operator.itemgetter(0, 1), reverse=True):
- if k in un_added_student_nos:
- with open(os.path.join(td, "%i.py" % k), "w") as f:
- f.write(codes[(k, v)])
+ print(file_name, len(submissions))
+ for student_id, code in submissions:
+ with open(os.path.join(td, "%i.py" % student_id), "w") as f:
+ f.write(code)
+
+ cmd = ["pycode_similar"] + [os.path.join(td, f) for f in os.listdir(td)]
+ print(" ".join(cmd))
+ proc = subprocess.Popen(cmd, stdout = subprocess.PIPE)
+ stdout = ""
+ while True:
+ line = proc.stdout.readline()
+ if not line:
+ break
+ stdout += line.decode()
+
+ print(stdout)
+ input("skfhsk")
- # print("Written %s at %s" % (k, v))
- un_added_student_nos.remove(k)
- input("%s..." % td)
- print(pycode_similar.detect(os.listdir(td)))
def getparser():
config = configparser.ConfigParser()
@@ -120,7 +174,7 @@ if __name__ == "__main__":
print("Added student %s" % name)
if args["plagarism_report"] is not None:
- generate_plagarism_report(db.get_submission_codes(args["plagarism_report"]))
+ generate_plagarism_report(args["plagarism_report"], db)
# print(db.get_assessment_yaml("CMP-4009B-2020-A2"))
diff --git a/Smarker/database.py b/Smarker/database.py
index d6a2ea5..37a44db 100644
--- a/Smarker/database.py
+++ b/Smarker/database.py
@@ -204,34 +204,44 @@ class SmarkerDatabase:
))
self.__connection.commit()
- def get_submission_codes(self, assessment_name):
+ def get_submission_codes(self, submission_ids):
out = {}
with self.__connection.cursor() as cursor:
- cursor.execute("SELECT file_id, file_name FROM assessment_file WHERE assessment_name = %s;", (assessment_name, ))
- for file_id, file_name in cursor.fetchall():
- out[file_name] = {}
-
+ for submission_id in submission_ids:
cursor.execute("""
SELECT
submitted_files.file_text,
- submissions.student_no,
- submissions.submission_dt
+ submitted_files.file_id,
+ assessment_file.file_name,
+ submissions.student_no
FROM submitted_files
+ INNER JOIN assessment_file
+ ON submitted_files.file_id = assessment_file.file_id
INNER JOIN submissions
- ON submissions.submission_id = submitted_files.submission_id
- WHERE submitted_files.file_id = %s;
- """, (file_id, ))
-
- for code, student_no, dt in cursor.fetchall():
- out[file_name][(int(student_no), dt)] = code
+ ON submissions.submission_id = submitted_files.submission_id
+ WHERE submitted_files.submission_id = %s;
+ """, (submission_id))
+
+ for file_contents, id_, file_name, student_no in cursor.fetchall():
+ if file_contents is not None:
+ try:
+ out[file_name].append((int(student_no), file_contents))
+ except KeyError:
+ out[file_name] = [(int(student_no), file_contents)]
return out
- def get_most_recent_submission_report(self, assessment_name):
+ def get_submissions(self, assessment_name):
with self.__connection.cursor() as cursor:
- cursor.execute("SELECT MAX(submission_id), student_no FROM submissions WHERE assessment_name = %s GROUP BY student_no;", (assessment_name, ))
- return [(int(i[0]), int(i[1]), yaml.safe_load(i[2])) for i in cursor.fetchall()]
-
+ cursor.execute("SELECT student_no, submission_dt, report_yaml, submission_id FROM submissions WHERE assessment_name = %s;", (assessment_name, ))
+ return {(int(i[0]), i[1]): (yaml.safe_load(i[2]), int(i[3])) for i in cursor.fetchall()}
+
+ def get_assessments_required_files(self, assessment_name):
+ with self.__connection.cursor() as cursor:
+ cursor.execute("SELECT file_name FROM assessment_file WHERE assessment_name = %s;", (assessment_name, ))
+ return [i[0] for i in cursor.fetchall()]
if __name__ == "__main__":
with SmarkerDatabase(host = "vps.eda.gay", user="root", passwd=input("Input password: "), db="Smarker", port=3307) as db:
- print(db.get_most_recent_submission_report("simple_assessment"))
+ # print(db.get_assessments_required_files("example"))
+ import json
+ print(json.dumps(db.get_submission_codes((24, 21)), indent = 4))
diff --git a/Smarker/temp.py b/Smarker/temp.py
index 60b1c18..491729f 100644
--- a/Smarker/temp.py
+++ b/Smarker/temp.py
@@ -1,6 +1,14 @@
-import json
+import assessments
+import sys
+import os
-with open("100301654_report.json", "r") as f:
- tree = json.load(f)["class_tree"]
+if __name__ == "__main__":
+ with open(sys.argv[1], "r") as f:
+ ft1 = f.read()
-print(tree) \ No newline at end of file
+ with open(sys.argv[2], "r") as f:
+ ft2 = f.read()
+
+ similarityMetric = assessments.SimilarityMetric(ft1, ft2, 1, 2)
+ print(similarityMetric.get_similarity())
+ print(similarityMetric.details) \ No newline at end of file
diff --git a/docs/source/readme.md b/docs/source/readme.md
index 185d986..2914835 100644
--- a/docs/source/readme.md
+++ b/docs/source/readme.md
@@ -15,6 +15,8 @@ formats.
- Generate plagarism reports
- Isolate code in a docker container for security
+### Screenshots
+
![Function Analysis](https://smarker.eda.gay/_static/readme_functionanalysis.png)
Function Analysis