2023-02-28 22:48:34 +00:00
import os , sys
import pandas as pd
from datetime import datetime
from utils . inspector import hash_submissions , suspicious_by_hash
2023-02-28 23:26:23 +00:00
CSV_DIR = os . path . join ( os . getcwd ( ) , ' csv ' )
2023-02-28 22:48:34 +00:00
def main ( ) :
submissions_dir_name = ' ' . join ( sys . argv [ 1 : ] ) if len ( sys . argv ) > 1 else exit ( f ' \n No submissions dir name given. Provide the name as an argument. \n \n Usage: python { sys . argv [ 0 ] } [submissions dir name] \n Example: python { sys . argv [ 0 ] } AssignmentX \n ' )
submissions_dir_path = os . path . join ( ' BB_submissions ' , submissions_dir_name )
if not os . path . isdir ( submissions_dir_path ) :
exit ( f ' Directory { submissions_dir_path } does not exist. \n Make sure " { submissions_dir_name } " exists in " BB_submissions " . ' )
else :
2023-02-28 23:26:23 +00:00
hashes_csv_file_path = hash_submissions ( submissions_dir_path ) # generate hashes for all files and return output csv file to load & find duplicate/suspicious hashes
2023-02-28 22:48:34 +00:00
csv = pd . read_csv ( hashes_csv_file_path )
df = pd . DataFrame ( csv ) # df with all files and their hashes
2023-02-28 23:26:23 +00:00
df_suspicious = suspicious_by_hash ( df ) # df with all files with duplicate/suspicious hash, excludes files from the same student id
2023-02-28 22:48:34 +00:00
csv_name = f ' { submissions_dir_name } _suspicious_ { datetime . now ( ) . strftime ( " % Y % m %d - % H % M % S " ) } .csv '
2023-02-28 23:26:23 +00:00
csv_out = os . path . join ( CSV_DIR , csv_name )
2023-02-28 22:48:34 +00:00
df_suspicious . to_csv ( csv_out , index = False )
2023-03-01 10:51:03 +00:00
print ( f ' [INFO] Created CSV file with duplicate/suspicious hashes in { submissions_dir_name } \n CSV file: { csv_out } ' )
2023-02-28 22:48:34 +00:00
if __name__ == ' __main__ ' :
2023-02-28 23:26:23 +00:00
main ( )