diff options
-rw-r--r-- | .gitignore | 1 | ||||
l--------- | README.md | 4 | ||||
-rw-r--r-- | Smarker/assessments.py | 101 | ||||
-rw-r--r-- | Smarker/requirements.txt | 3 | ||||
-rw-r--r-- | docs/source/_static/QuickStart/simple_assessment.yml | 12 | ||||
-rw-r--r-- | docs/source/_static/QuickStart/simple_submission_1/euclid.py | 22 | ||||
-rw-r--r-- | docs/source/_static/QuickStart/simple_submission_2/euclid.py | 10 | ||||
-rw-r--r-- | docs/source/_static/QuickStart/simple_submission_3/euclid.py | 11 | ||||
-rw-r--r-- | docs/source/_static/QuickStart/simple_submission_4/euclid.py | 16 | ||||
-rw-r--r-- | docs/source/_static/readme_matrix.png | bin | 0 -> 17528 bytes | |||
-rw-r--r-- | docs/source/_static/report.txt | 9 | ||||
-rw-r--r-- | docs/source/_static/simple.json | 60 | ||||
-rw-r--r-- | docs/source/_static/simple.txt | 89 | ||||
-rw-r--r-- | docs/source/assessments.rst | 24 | ||||
-rw-r--r-- | docs/source/docker.rst | 8 | ||||
-rw-r--r-- | docs/source/index.rst | 6 | ||||
-rw-r--r-- | docs/source/quickstart.rst | 97 | ||||
-rw-r--r-- | docs/source/readme.md | 4 | ||||
-rw-r--r-- | docs/source/reflect.rst | 23 |
19 files changed, 478 insertions, 22 deletions
@@ -3,6 +3,7 @@ out/ *.zip smarker.conf *.aux +*.pickle # Byte-compiled / optimized / DLL files __pycache__/ @@ -32,3 +32,7 @@ File with an exception  Using pytest + + + +Plagarism and collusion detection matrix diff --git a/Smarker/assessments.py b/Smarker/assessments.py index b8eb6f0..cdcdcad 100644 --- a/Smarker/assessments.py +++ b/Smarker/assessments.py @@ -1,8 +1,12 @@ from dataclasses import dataclass +from matplotlib import pyplot as plt +import numpy as np import misc_classes import configparser import jinja_helpers import pycode_similar +import pandas as pd +import pickle import subprocess import operator import database @@ -15,6 +19,11 @@ import re @dataclass class SimilarityMetric: + """Abstract class for getting a metric of similariry between two python objects. + By default it uses pycode_similar as a metric, but this can be changed by overriding + ``get_similarity()``. There is also the additional attribute ``details`` for getting + a breakdown of similarity. + """ code_text_1:str code_text_2:str id_1:int @@ -37,10 +46,60 @@ class SimilarityMetric: self.details += line.decode() def get_similarity(self): + """Gets the similarity between the two codes. + + Returns: + float: A percentage similarity metric + """ return float(re.findall(r"\d+\.\d+\s", self.details)[0]) +def visualise_matrix(dataframe:pd.DataFrame, file_name): + """Visualize and draw a similarity matrix. Simply shows the figure, + therefore this doesn't work in docker. + + Args: + dataframe (pandas.DataFrame): Pandas dataframe representing the similarity + file_name (str): The file name that corrisponds to the dataframe. Used as the title + """ + print(file_name) + print(dataframe) + + values = dataframe.values + + fig, ax = plt.subplots() + ax.matshow(values, alpha = 0.3, cmap = plt.cm.Reds) + + # axes labels + xaxis = np.arange(len(dataframe.columns)) + ax.set_xticks(xaxis) + ax.set_yticks(xaxis) + ax.set_xticklabels(dataframe.columns) + ax.set_yticklabels(dataframe.index) + + # labelling each point + for i in range(values.shape[0]): + for j in range(values.shape[1]): + if i == j: + ax.text(x = j, y = i, s = "N/A", va = 'center', ha = 'center') + else: + ax.text(x = j, y = i, s = values[i, j], va = 'center', ha = 'center') + + plt.title(file_name) + plt.show() + -def generate_plagarism_report(assessment_name, db): +def generate_plagarism_report(assessment_name, db:database.SmarkerDatabase): + """Generates a plagarism report for the given ``assessment_name``. Only + fetches submissions with present files and without any exceptions. + + Args: + assessment_name (str): The name of the assessment to fetch submissions from + db (database.SmarkerDatabase): An open database object is required + + Returns: + dict: dict of ``pandas.core.frame.DataFrame`` objects indexed by the required file name + """ + # get submissions with files and no exception required_files = db.get_assessments_required_files(assessment_name) submission_ids_to_get = set() assessments = db.get_submissions(assessment_name) @@ -56,27 +115,31 @@ def generate_plagarism_report(assessment_name, db): un_added_student_nos.remove(id_) + # get similarity matrix + report = {} codes = db.get_submission_codes(submission_ids_to_get) for file_name, submissions in codes.items(): + d = {} + d_details = {} with tempfile.TemporaryDirectory() as td: - print(file_name, len(submissions)) for student_id, code in submissions: - with open(os.path.join(td, "%i.py" % student_id), "w") as f: - f.write(code) - - cmd = ["pycode_similar"] + [os.path.join(td, f) for f in os.listdir(td)] - print(" ".join(cmd)) - proc = subprocess.Popen(cmd, stdout = subprocess.PIPE) - stdout = "" - while True: - line = proc.stdout.readline() - if not line: - break - stdout += line.decode() - - print(stdout) - input("skfhsk") - + d[student_id] = [] + d_details[student_id] = [] + for student_id_2, code_2 in submissions: + sm = SimilarityMetric(code, code_2, student_id, student_id_2) + # print("%i and %i = %.3f" % (student_id, student_id_2, SimilarityMetric(code, code_2, student_id, student_id_2).get_similarity())) + d[student_id].append(sm.get_similarity()) + d_details[student_id].append(sm) + index = [i[0] for i in submissions] + visualise_matrix(pd.DataFrame(d, index = index), file_name) + report[file_name] = pd.DataFrame(d_details, index = index) + + out_path = os.path.realpath("plagarism_report_details.pickle") + with open(out_path, "wb") as f: + pickle.dump(report, f) + print("Written report to %s" % out_path) + + return report def getparser(): config = configparser.ConfigParser() @@ -116,7 +179,7 @@ def getparser(): "-s", "--create_student", action = misc_classes.EnvDefault, envvar = "create_student", - help = "Add a student in the form e.g. 123456789,Eden,Attenborough,E.Attenborough@uea.ac.uk", + help = "Add a student in the form e.g. 123456789,Eden Attenborough,E.Attenborough@uea.ac.uk", required = False ) parser.add_argument( diff --git a/Smarker/requirements.txt b/Smarker/requirements.txt index a8fef17..3be9c36 100644 --- a/Smarker/requirements.txt +++ b/Smarker/requirements.txt @@ -10,3 +10,6 @@ pdfkit lxml
pymysql
pycode_similar
+pandas
+matplotlib
+numpy
diff --git a/docs/source/_static/QuickStart/simple_assessment.yml b/docs/source/_static/QuickStart/simple_assessment.yml new file mode 100644 index 0000000..414f00b --- /dev/null +++ b/docs/source/_static/QuickStart/simple_assessment.yml @@ -0,0 +1,12 @@ +name: simple_assessment +files: + - euclid.py: + functions: + - gcd(2) + tests: + - | + assert euclid.gcd(8,12) == 4 + run: + - python euclid.py: + regexes: + - ^4 diff --git a/docs/source/_static/QuickStart/simple_submission_1/euclid.py b/docs/source/_static/QuickStart/simple_submission_1/euclid.py new file mode 100644 index 0000000..f72707a --- /dev/null +++ b/docs/source/_static/QuickStart/simple_submission_1/euclid.py @@ -0,0 +1,22 @@ +# the newest! +# assessment 1 + +def gcd(m,n) -> int: + """Calculates the greatest common denominator between two numbers. + + Args: + x (int): Number One + y (int): Number Two + + Returns: + int: The GCD of the two numbers + """ + if m< n: + (m,n) = (n,m) + if(m%n) == 0: + return n + else: + return (gcd(n, m % n)) # recursion taking place + +# gcd +print(gcd(8,12)) diff --git a/docs/source/_static/QuickStart/simple_submission_2/euclid.py b/docs/source/_static/QuickStart/simple_submission_2/euclid.py new file mode 100644 index 0000000..0819bc5 --- /dev/null +++ b/docs/source/_static/QuickStart/simple_submission_2/euclid.py @@ -0,0 +1,10 @@ +def gcd(m,n): + if m< n: + (m,n) = (n,m) + if(m%n) == 0: + return n + else: + return (gcd(n, m % n)) # recursion taking place + +# calling function with parameters and printing it out +print(gcd(8,12)) diff --git a/docs/source/_static/QuickStart/simple_submission_3/euclid.py b/docs/source/_static/QuickStart/simple_submission_3/euclid.py new file mode 100644 index 0000000..73e7d9c --- /dev/null +++ b/docs/source/_static/QuickStart/simple_submission_3/euclid.py @@ -0,0 +1,11 @@ +def gcd(p,q): + """Docstring gcd""" + if p < q: + (p,q) = (q,p) + if(p%q) == 0: + return q + else: + return (gcd(q, p % q)) # recursion taking place + +# calling function with parameters and printing it out +print(gcd(8,12)) diff --git a/docs/source/_static/QuickStart/simple_submission_4/euclid.py b/docs/source/_static/QuickStart/simple_submission_4/euclid.py new file mode 100644 index 0000000..064d1e5 --- /dev/null +++ b/docs/source/_static/QuickStart/simple_submission_4/euclid.py @@ -0,0 +1,16 @@ +# assessment A +# student id: 4 + +def gcd(x,y): + if x > y: + small = y + else: + small = x + for i in range(1, small+1): + if((x % i == 0) and (y % i == 0)): + g = i + + return g + +# calling function with parameters and printing it out +print(gcd(8,12)) diff --git a/docs/source/_static/readme_matrix.png b/docs/source/_static/readme_matrix.png Binary files differnew file mode 100644 index 0000000..e91358b --- /dev/null +++ b/docs/source/_static/readme_matrix.png diff --git a/docs/source/_static/report.txt b/docs/source/_static/report.txt new file mode 100644 index 0000000..b78e20b --- /dev/null +++ b/docs/source/_static/report.txt @@ -0,0 +1,9 @@ +euclid.py
+ 2 ... 1
+2 100.00 ... 94.74
+3 100.00 ... 94.74
+4 63.16 ... 57.89
+1 94.74 ... 100.00
+
+[4 rows x 4 columns]
+Written report to /Smarker/plagarism_report_details.pickle
diff --git a/docs/source/_static/simple.json b/docs/source/_static/simple.json new file mode 100644 index 0000000..40accc7 --- /dev/null +++ b/docs/source/_static/simple.json @@ -0,0 +1,60 @@ +{ + "files": [ + { + "euclid.py": { + "functions": [ + { + "gcd(2)": { + "present": true, + "documentation": { + "comments": "None", + "doc": "Docstring gcd" + }, + "arguments": "(p, q)", + "minimum_arguments": 2, + "source_code": "def gcd(p,q):\n \"\"\"Docstring gcd\"\"\"\n if p < q:\n (p,q) = (q,p)\n if(p%q) == 0:\n return q\n else:\n return (gcd(q, p % q)) # recursion taking place" + } + } + ], + "run": [ + { + "python euclid.py": { + "regexes": { + "^4": [ + "4" + ] + }, + "full_output": "4\n" + } + } + ], + "tests": [ + "assert euclid.gcd(8,12) == 4\n" + ], + "present": true, + "has_exception": false, + "documentation": { + "comments": "None", + "doc": "None" + } + } + } + ], + "name": "simple_assessment", + "student_no": "123456790", + "test_results": { + "pytest_report": "============================= test session starts ==============================\nplatform linux -- Python 3.10.4, pytest-7.1.1, pluggy-1.0.0 -- /usr/bin/python3\ncachedir: .pytest_cache\nrootdir: /tmp/tmpjzy020i4/simple_submission_3\ncollecting ... collected 1 item\n\n../../../../../../tmp/tmpjzy020i4/simple_submission_3/test_euclid.py::test_1 PASSED [100%]\n\n--------------- generated xml file: /tmp/tmpyu0qypji/report.xml ----------------\n============================== 1 passed in 0.01s ===============================\n", + "junitxml": "<?xml version=\"1.0\" encoding=\"utf-8\"?><testsuites><testsuite name=\"pytest\" errors=\"0\" failures=\"0\" skipped=\"0\" tests=\"1\" time=\"0.019\" timestamp=\"2022-05-01T15:03:57.143881\" hostname=\"thonkpad2\"><testcase classname=\"test_euclid\" name=\"test_1\" time=\"0.001\" /></testsuite></testsuites>", + "meta": { + "name": "pytest", + "errors": "0", + "failures": "0", + "skipped": "0", + "tests": "1", + "time": "0.019", + "timestamp": "2022-05-01T15:03:57.143881", + "hostname": "thonkpad2" + } + }, + "class_tree": {} +} diff --git a/docs/source/_static/simple.txt b/docs/source/_static/simple.txt new file mode 100644 index 0000000..b6dcc16 --- /dev/null +++ b/docs/source/_static/simple.txt @@ -0,0 +1,89 @@ +============================= test session starts ============================== +platform linux -- Python 3.10.4, pytest-7.1.2, pluggy-1.0.0 -- /usr/bin/python3 +cachedir: .pytest_cache +rootdir: /tmp/tmp398_c3x6/simple_submission_1 +collecting ... collected 1 item + +../tmp/tmp398_c3x6/simple_submission_1/test_euclid.py::test_1 PASSED [100%] + +--------------- generated xml file: /tmp/tmpceag5_nn/report.xml ---------------- +============================== 1 passed in 0.01s =============================== +4 +=== simple_assessment - Student ID: 1 Automatic marking report === +Report generated at 2022-05-01 15:49:15.701124 + +== Class Tree: == + +{} + + +== File Analysis == + + = euclid.py = + Documentation: + 28 characters long + Comments: + ``` + # the newest! + # assessment 1 + ``` + Docstring: + *** No docstring present *** + Functions: + gcd(2): + Arguments: + (m, n) -> int + Enough? YES + Documentation: + 164 characters long + Comments: + *** No comments present *** + Docstring: + ``` + Calculates the greatest common denominator between two numbers. + + Args: + x (int): Number One + y (int): Number Two + + Returns: + int: The GCD of the two numbers + ``` + Source: + 15 lines (356 characters) + Code: + ``` + def gcd(m,n) -> int: + """Calculates the greatest common denominator between two numbers. + + Args: + x (int): Number One + y (int): Number Two + + Returns: + int: The GCD of the two numbers + """ + if m< n: + (m,n) = (n,m) + if(m%n) == 0: + return n + else: + return (gcd(n, m % n)) # recursion taking place + ``` + Runtime Analysis: + Command `python euclid.py`: + Monitor: + stdout + Regexes: + `^4`: + Found occurrences: 1 + Occurrences list: + 4 + Full runtime output: + ``` + 4 + + ``` + + + diff --git a/docs/source/assessments.rst b/docs/source/assessments.rst new file mode 100644 index 0000000..a8d7311 --- /dev/null +++ b/docs/source/assessments.rst @@ -0,0 +1,24 @@ +.. _assessments: + +``assessments.py`` +================== + +``assessments.py`` contains many useful arguments for interacting with the database: + +.. argparse:: + :module: assessments + :func: getparser + :prog: python Smarker/assessments.py + +Classes +******* + +.. autoclass:: assessments.SimilarityMetric + :members: + +Functions +********* + +.. autofunction:: assessments.visualise_matrix + +.. autofunction:: assessments.generate_plagarism_report
\ No newline at end of file diff --git a/docs/source/docker.rst b/docs/source/docker.rst index 7c3237a..232c7f4 100644 --- a/docs/source/docker.rst +++ b/docs/source/docker.rst @@ -41,4 +41,10 @@ To list assessments in the database using docker: .. code-block:: bash - sudo docker run -it --entrypoint python --rm smarker assessments.py --list yes
\ No newline at end of file + sudo docker run -it --entrypoint python --rm smarker assessments.py --list yes + +.. code-block:: bash + + touch out/report.pickle && sudo docker run -v "$(pwd)/out/report.pickle":/Smarker/plagarism_report_details.pickle -it --entrypoint python --rm smarker assessments.py --plagarism_report example + +If a file doesn't exist before it's passed through as a volume in docker, it will be created automatically as a *directory*- this causes issues if the docker image produces a file so we make a blank file first.
\ No newline at end of file diff --git a/docs/source/index.rst b/docs/source/index.rst index e36cc86..f2d7426 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -1,5 +1,7 @@ .. mdinclude:: readme.md +Read the :ref:`quickstart`. + Setting up ---------- @@ -26,6 +28,8 @@ Please note that the ``-o`` flag is required for rendering to PDFs. ``assessments.py`` contains many useful arguments for interacting with the database: +Also see :ref:`assessments` + .. argparse:: :module: assessments :func: getparser @@ -37,11 +41,13 @@ Please note that the ``-o`` flag is required for rendering to PDFs. reflect.rst database.rst + assessments.rst .. toctree:: :maxdepth: 2 :caption: Other Pages: + quickstart.rst configfile.rst docker.rst assessmentyaml.rst diff --git a/docs/source/quickstart.rst b/docs/source/quickstart.rst new file mode 100644 index 0000000..08f3cec --- /dev/null +++ b/docs/source/quickstart.rst @@ -0,0 +1,97 @@ +.. _quickstart: + +Quick start guide +================= + +This guide implements a simple assessment to make a *greatest common denominator* function. + +First make an assessment yaml file: + +.. literalinclude:: _static/QuickStart/simple_assessment.yml + :linenos: + :language: yaml + +This expects a single function called ``gcd()`` in a file called ``euclid.py`` with no fewer +than two arguments. It expects it to print ``4`` to stdout when executed. It also runs pytest +on the function. + +Then add it to the database: + +.. code-block:: bash + + docker run -v "$(pwd)/docs/source/_static/QuickStart/simple_assessment.yml":/tmp/assessment.yml -it --entrypoint python --rm smarker assessments.py -c /tmp/assessment.yml + +If using windows, I recommend using the mingw shell since powershell is bad at dealing with relative file paths in docker. + +Then add some students: + +.. code-block:: bash + + docker run -v "$(pwd)/docs/source/_static/QuickStart/simple_assessment.yml":/tmp/assessment.yml -it --entrypoint python --rm smarker assessments.py -s "1,Alice,a.bar@uea.ac.uk" + docker run -v "$(pwd)/docs/source/_static/QuickStart/simple_assessment.yml":/tmp/assessment.yml -it --entrypoint python --rm smarker assessments.py -s "2,Bob,b.bar@uea.ac.uk" + docker run -v "$(pwd)/docs/source/_static/QuickStart/simple_assessment.yml":/tmp/assessment.yml -it --entrypoint python --rm smarker assessments.py -s "3,Christina,c.bar@uea.ac.uk" + docker run -v "$(pwd)/docs/source/_static/QuickStart/simple_assessment.yml":/tmp/assessment.yml -it --entrypoint python --rm smarker assessments.py -s "4,Dan,d.bar@uea.ac.uk" + +Now we are ready to make some reports! The submissions are zip files with the student's id as the name. First lets just use the default parameters: + +.. code-block:: bash + + docker run -v "$(pwd)/docs/source/_static/QuickStart/1.zip":/tmp/1.zip -e submission=/tmp/1.zip -e assessment=simple_assessment --rm smarker + +This prints out the result as text to stdout: + +.. literalinclude:: _static/simple.txt + +Smarker can render to text, markdown, json, yaml and PDF, and produce less information, but for now we'll only use the defaults. +Do the same for the other three submissions. + +We can now generate a plagarism report. But first, lets look at the actual submitted files. Here's the submission from student 1: + +.. literalinclude:: _static/QuickStart/simple_submission_1/euclid.py + :linenos: + :language: python + +Student 2: + +.. literalinclude:: _static/QuickStart/simple_submission_2/euclid.py + :linenos: + :language: python + +Student 3: + +.. literalinclude:: _static/QuickStart/simple_submission_3/euclid.py + :linenos: + :language: python + +Student 4: + +.. literalinclude:: _static/QuickStart/simple_submission_4/euclid.py + :linenos: + :language: python + +From this we can tell that student 2 has copied from student 1 (or the other way around), changing only the header comments. +Student 3 has also copied from student 1, but has changed the variable names in an attempt to hide it. Submission 4 is completely different. + +Now we can generate a plagarism report: + +.. code-block:: bash + + touch out/report.pickle && sudo docker run -v "$(pwd)/out/report.pickle":/Smarker/plagarism_report_details.pickle -it --entrypoint python --rm smarker assessments.py --plagarism_report simple_assessment + +Which produces a pickled report matrix, and prints out to stdout: + +.. code-block:: text + + 2 3 4 1 + 2 100.00 100.00 42.86 94.74 + 3 100.00 100.00 42.86 94.74 + 4 63.16 63.16 100.00 57.89 + 1 94.74 94.74 39.29 100.00 + Written report to /Smarker/plagarism_report_details.pickle + +If we run it outside of docker, we can also get it rendered nicely in matplotlib: + +.. image:: _static/readme_matrix.png + +The matrix isn't symmetrical, which is intentional, since it considers the difference in complexity between submissions. This can be useful for +finding the culprit in copying.
\ No newline at end of file diff --git a/docs/source/readme.md b/docs/source/readme.md index 2914835..3b61499 100644 --- a/docs/source/readme.md +++ b/docs/source/readme.md @@ -32,3 +32,7 @@ File with an exception  Using pytest + + + +Plagarism and collusion detection matrix diff --git a/docs/source/reflect.rst b/docs/source/reflect.rst index c059206..6c0767a 100644 --- a/docs/source/reflect.rst +++ b/docs/source/reflect.rst @@ -1,5 +1,24 @@ ``reflect.py``: Getting information about code ============================================== -.. automodule:: reflect - :members:
\ No newline at end of file +Classes +******* + +.. autoclass:: reflect.Reflect + :members: + +.. autoexception:: reflect.MonitoredFileNotInProducedFilesException + +Thrown if the user has tried to monitor a file that isn't in the list of produced files in the :ref:`assessmentyaml`. + +Functions +********* + +.. autofunction:: reflect.gen_reflection_report + +Generates a json file report. It is quite a complex structure, but it is made so users can add other rendering templates +later on. For example, the :ref:`quickstart` looks like this: + +.. literalinclude:: _static/simple.json + :linenos: + :language: yaml
\ No newline at end of file |