diff --git a/inspect_gradebook.py b/inspect_gradebook.py index 07311c9..bf841df 100644 --- a/inspect_gradebook.py +++ b/inspect_gradebook.py @@ -1,20 +1,20 @@ import os, sys + from utils.inspector import generate_hashes_gradebook, generate_duplicate_hashes_gradebook +from utils.settings import BB_GRADEBOOKS_DIR def main(): gradebook_dir_name = ' '.join(sys.argv[1:]) if len(sys.argv) > 1 else exit(f'\nNo gradebook directory name given. Provide the name as an argument.\n\nUsage: python {sys.argv[0]} [gradebook dir name]\nExample: python {sys.argv[0]} AssignmentX\n') - gradebook_dir_path = os.path.join('BB_gradebooks', gradebook_dir_name) + gradebook_dir_path = os.path.join(BB_GRADEBOOKS_DIR, gradebook_dir_name) if not os.path.exists(gradebook_dir_path): exit('[Info] Gradebook directory does not exist - nothing to inspect') if not os.listdir(gradebook_dir_path): # if no files in gradebook dir exit(f'[Info] No files found in this gradebook - nothing to inspect') - # generate CSV file with hashes for all files in gradebook & return path to CSV file for finding duplicate hashes - hashes_csv_file_path = generate_hashes_gradebook(gradebook_dir_path) - # generate CSV file with files having duplicate hashes - generate_duplicate_hashes_gradebook(hashes_csv_file_path) + hashes_csv_file_path = generate_hashes_gradebook(gradebook_dir_path) # generate CSV file with hashes for all files in gradebook & return path to CSV file for finding duplicate hashes + generate_duplicate_hashes_gradebook(hashes_csv_file_path) # generate CSV file with files having duplicate hashes if __name__ == '__main__': - main() \ No newline at end of file + main() diff --git a/inspect_submissions.py b/inspect_submissions.py index badaf0d..9f1e0fb 100644 --- a/inspect_submissions.py +++ b/inspect_submissions.py @@ -1,19 +1,19 @@ import os, sys + from utils.inspector import generate_hashes_submissions, generate_duplicate_hashes_submissions +from utils.settings import BB_SUBMISSIONS_DIR def main(): submissions_dir_name = ' '.join(sys.argv[1:]) if len(sys.argv) > 1 else exit(f'\nNo submissions directory name given. Provide the name as an argument.\n\nUsage: python {sys.argv[0]} [submissions dir name]\nExample: python {sys.argv[0]} AssignmentX\n') - submissions_dir_path = os.path.join('BB_submissions', submissions_dir_name) + submissions_dir_path = os.path.join(BB_SUBMISSIONS_DIR, submissions_dir_name) if not os.path.exists(submissions_dir_path): exit('[Info] Directory does not exist - nothing to inspect') if not os.listdir(submissions_dir_path): # if no files in dir exit(f'[Info] No files found in this submissions directory - nothing to inspect') - # generate CSV file with hashes for all files in submissions (except for any 'excluded') & return path to CSV file for finding duplicate hashes - hashes_csv_file_path = generate_hashes_submissions(submissions_dir_path) - # generate CSV file with files having duplicate hashes - generate_duplicate_hashes_submissions(hashes_csv_file_path) + hashes_csv_file_path = generate_hashes_submissions(submissions_dir_path) # generate CSV file with hashes for all files in submissions (except for any 'excluded') & return path to CSV file for finding duplicate hashes + generate_duplicate_hashes_submissions(hashes_csv_file_path) # generate CSV file with files having duplicate hashes if __name__ == '__main__': diff --git a/organise_gradebook.py b/organise_gradebook.py index b59034e..3bb49f7 100644 --- a/organise_gradebook.py +++ b/organise_gradebook.py @@ -1,11 +1,13 @@ import os, sys + from utils.organiser import organise_gradebook, check_submissions_dir_for_compressed +from utils.settings import BB_GRADEBOOKS_DIR, BB_SUBMISSIONS_DIR def main(): gradebook_name = ' '.join(sys.argv[1:]) if len(sys.argv) > 1 else exit(f'\nNo gradebook name given. Provide the name as an argument.\n\nUsage: python {sys.argv[0]} [gradebook dir name]\n') - gradebook_dir = os.path.join('BB_gradebooks', gradebook_name) # gradebook from Blackboard with all submissions - submissions_dir = os.path.join('BB_submissions', gradebook_name) # target dir for extracted submissions + gradebook_dir = os.path.join(BB_GRADEBOOKS_DIR, gradebook_name) # gradebook from Blackboard with all submissions + submissions_dir = os.path.join(BB_SUBMISSIONS_DIR, gradebook_name) # target dir for extracted submissions abs_path = os.getcwd() # absolute path of main/this script print(f'\nGradebook directory to organise:\n{os.path.join(abs_path, gradebook_dir)}', flush=True) @@ -16,4 +18,3 @@ def main(): if __name__ == '__main__': main() - diff --git a/utils/extractor.py b/utils/extractor.py index f255f14..449bfa7 100644 --- a/utils/extractor.py +++ b/utils/extractor.py @@ -4,6 +4,7 @@ from py7zr import SevenZipFile, exceptions from utils.settings import BAD_DIR_NAME + def mark_file_as_BAD(file: str, bad_exception: Exception) -> None: try: filename = os.path.basename(file) @@ -73,4 +74,4 @@ def extract_file_to_dir(file_path: str, student_dir: str) -> None | Exception: elif file_path.lower().endswith('.7z'): extract_7z(file_path, student_dir) else: - print(f"\n[ERROR] unknown file type: {file_path}\n", flush=True) + print(f'\n[ERROR] unknown file type: {file_path}\n', flush=True) diff --git a/utils/inspector.py b/utils/inspector.py index 13951a2..2b66f3e 100644 --- a/utils/inspector.py +++ b/utils/inspector.py @@ -5,7 +5,7 @@ import hashlib import pandas as pd from functools import partial -from utils.settings import CSV_DIR +from utils.settings import CSV_DIR, BB_GRADEBOOKS_DIR, BB_SUBMISSIONS_DIR def load_excluded_filenames(submissions_dir_name: str) -> list[str]: # helper function for hashing all files @@ -25,7 +25,6 @@ def load_excluded_filenames(submissions_dir_name: str) -> list[str]: # helper f print(f'[INFO] Error message: {e}', flush=True) return [] - def get_hashes_in_dir(dir_path: str, excluded_filenames: list = []) -> list: # helper function for hashing all files hash_list = [] for subdir, dirs, files in os.walk(dir_path): # loop through all files in the directory and generate hashes @@ -38,11 +37,10 @@ def get_hashes_in_dir(dir_path: str, excluded_filenames: list = []) -> list: # hash_list.append({ 'filepath': filepath, 'filename': filename, 'sha256 hash': filehash}) return hash_list - def generate_hashes_gradebook(gradebook_dir_path: str) -> str: # main function for hashing all files in gradebook gradebook_dir_name = os.path.abspath(gradebook_dir_path).split(os.path.sep)[-1] # get name of gradebook by separating path and use rightmost part if not os.path.isdir(gradebook_dir_path): - exit(f'Directory {gradebook_dir_path} does not exist.\nMake sure "{gradebook_dir_name}" exists in "BB_gradebooks".\n') + exit(f'Directory {gradebook_dir_path} does not exist.\nMake sure "{gradebook_dir_name}" exists in "{BB_GRADEBOOKS_DIR}".\n') dicts_with_hashes_list = get_hashes_in_dir(gradebook_dir_path) for hash_dict in dicts_with_hashes_list: @@ -64,11 +62,10 @@ def generate_hashes_gradebook(gradebook_dir_path: str) -> str: # main function print(f'[INFO] Created CSV file with all files & hashes in gradebook: {gradebook_dir_name}\nCSV file: {csv_file_path}', flush=True) return csv_file_path - def generate_hashes_submissions(submissions_dir_path: str) -> str: # main function for hashing all files in submissions submissions_dir_name = os.path.abspath(submissions_dir_path).split(os.path.sep)[-1] # get name of submission/assignment by separating path and use rightmost part if not os.path.isdir(submissions_dir_path): - exit(f'Directory {submissions_dir_path} does not exist.\nMake sure "{submissions_dir_name}" exists in "BB_submissions".\n') + exit(f'Directory {submissions_dir_path} does not exist.\nMake sure "{submissions_dir_name}" exists in "{BB_SUBMISSIONS_DIR}".\n') excluded_filenames = load_excluded_filenames(submissions_dir_name) dicts_with_hashes_list = [] @@ -96,8 +93,7 @@ def generate_hashes_submissions(submissions_dir_path: str) -> str: # main funct for student_dict in dicts_with_hashes_list: writer.writerows(student_dict) print(f'[INFO] Created CSV file with all files & hashes for submissions in: {submissions_dir_name}\nCSV file: {csv_file_path}', flush=True) - return csv_file_path - + return csv_file_path def generate_duplicate_hashes_generic(hashes_csv_file_path: str, drop_columns: list[str]): csv = pd.read_csv(hashes_csv_file_path) diff --git a/utils/organiser.py b/utils/organiser.py index 121a5d1..2ccac31 100644 --- a/utils/organiser.py +++ b/utils/organiser.py @@ -1,10 +1,12 @@ import os, shutil, re + from utils.extractor import extract_file_to_dir -from utils.settings import BAD_DIR_NAME +from utils.settings import BAD_DIR_NAME, BB_GRADEBOOKS_DIR + def validate_gradebook_dir_name(src_dir: str) -> None: if not os.path.isdir(src_dir): # check if it exists and is a directory - print(f"\n[Error] Incorrect directory: {src_dir}\n[Info] Make sure the directory exists in 'BB_gradebooks'") + print(f'\n[Error] Incorrect directory: {src_dir}\n[Info] Make sure the directory exists in "{BB_GRADEBOOKS_DIR}"') exit() if not os.listdir(src_dir): # check if there are any files in the directory print(f'\n[Info] No files found in this gradebook - nothing to organise') @@ -13,7 +15,6 @@ def validate_gradebook_dir_name(src_dir: str) -> None: print(f'\n[Info] Gradebook has only invalid compressed files in: {os.path.join(src_dir, BAD_DIR_NAME)}\n[Info] Nothing to organise') exit() - def get_comment_from_submission_txt(file_path: str) -> str | None: no_comment_text = f'Comments:\nThere are no student comments for this assignment.' no_comment_text_regex = no_comment_text @@ -30,7 +31,6 @@ def get_comment_from_submission_txt(file_path: str) -> str | None: return comment return None - def get_gradebook_stats(src_dir: str) -> dict[str, int]: all_files = [ os.path.join(src_dir, f) for f in os.listdir(src_dir) if BAD_DIR_NAME not in f ] dirs = [ f for f in all_files if os.path.isdir(f) and BAD_DIR_NAME not in f ] @@ -55,7 +55,6 @@ def get_gradebook_stats(src_dir: str) -> dict[str, int]: print(msg, flush=True) return files_counter - def organise_file_per_student(src_dir: str, dest_dir: str, file_name: str, student_no: str) -> None: student_dir = os.path.join(dest_dir, student_no) os.makedirs(student_dir, exist_ok=True) # create student directory if it doesn't exist @@ -79,7 +78,6 @@ def organise_file_per_student(src_dir: str, dest_dir: str, file_name: str, stude new_file_path = os.path.join(student_dir, os.path.basename(file_name)) shutil.move(file_path, new_file_path) # move the file to student directory - def organise_gradebook(src_dir: str, dest_dir: str) -> None: """1) extracts .zip, .rar, .7z files, organises contents into directories per student number, and deletes compressed files after successful extraction 2) organises all other files in gradebook into directories per student number @@ -107,7 +105,6 @@ def organise_gradebook(src_dir: str, dest_dir: str) -> None: print(f'[Info] Comments in file: {dest_dir}_comments.txt', flush=True) print(f'[Note] Compressed files (.zip, .rar, .7z) are automatically deleted from the gradebook directory after successful extraction', flush=True) - def check_submissions_dir_for_compressed(submissions_dir: str) -> None: """checks if any submitted compressed files contain more compressed files inside (they are not recursively extracted) diff --git a/utils/settings.py b/utils/settings.py index 6bc0d7b..2fe215d 100644 --- a/utils/settings.py +++ b/utils/settings.py @@ -1,4 +1,7 @@ import os + +BB_GRADEBOOKS_DIR = 'BB_gradebooks' # directory with extracted gradebooks downloaded from Blackboard +BB_SUBMISSIONS_DIR = 'BB_submissions' # directory with organised gradebook submissions BAD_DIR_NAME = '__BAD__' # for organise_gradebook.py - directory with corrupt/invalid compressed files CSV_DIR = os.path.join(os.getcwd(), 'csv-inspect') # for inspect_gradebook.py and inspect_submissions.py - output dir for generated CSV files