From 7577148f838354f8a8881dae9b1030fd06d043c7 Mon Sep 17 00:00:00 2001 From: vangef Date: Fri, 1 Mar 2024 15:45:34 +0000 Subject: [PATCH] added IGNORE_DIRS in settings.py and allow for multiple dir names to be ignored from extracting --- utils/extractor.py | 10 +++++----- utils/settings.py | 1 + 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/utils/extractor.py b/utils/extractor.py index 449bfa7..84bde41 100644 --- a/utils/extractor.py +++ b/utils/extractor.py @@ -2,7 +2,7 @@ import os, shutil, platform import zipfile, rarfile from py7zr import SevenZipFile, exceptions -from utils.settings import BAD_DIR_NAME +from utils.settings import BAD_DIR_NAME, IGNORE_DIRS def mark_file_as_BAD(file: str, bad_exception: Exception) -> None: @@ -19,8 +19,8 @@ def mark_file_as_BAD(file: str, bad_exception: Exception) -> None: def extract_zip(zip_file: str, target_dir: str) -> None | Exception: try: with zipfile.ZipFile(zip_file, 'r') as zip_ref: - members = [ m for m in zip_ref.infolist() if "__MACOSX" not in m.filename ] - zip_ref.extractall(target_dir, members=members) # extract all files, ignoring those with the "__MACOSX" string in the name + members = [ m for m in zip_ref.infolist() if not any(dir_name in m.filename for dir_name in IGNORE_DIRS) ] # filter out files/dirs using IGNORE_DIRS + zip_ref.extractall(target_dir, members=members) # extract remaining files zip_ref.close() except zipfile.BadZipfile as e: mark_file_as_BAD(zip_file, e) @@ -36,7 +36,7 @@ def extract_rar(rar_file: str, target_dir: str) -> None: else: # if Linux or Mac rarfile.UNRAR_TOOL = 'unrar' files = rar_ref.namelist() - files = [ f for f in files if "__MACOSX" not in f ] # filter out files with "__MACOSX" in the name + files = [ f for f in files if not any(dir_name in f for dir_name in IGNORE_DIRS) ] # filter out files/dirs using IGNORE_DIRS rar_ref.extractall(target_dir, files) # extract the remaining files rar_ref.close() except OSError as e: @@ -56,7 +56,7 @@ def extract_7z(seven_zip_file: str, target_dir: str) -> None: if not seven_zip.getnames(): raise exceptions.Bad7zFile files = seven_zip.getnames() - files = [ f for f in files if "__MACOSX" not in f ] # filter out files with "__MACOSX" in the name + files = [ f for f in files if not any(dir_name in f for dir_name in IGNORE_DIRS) ] # filter out files/dirs using IGNORE_DIRS seven_zip.extract(target_dir, targets=files) # extract the remaining files seven_zip.close() except exceptions.Bad7zFile as e: diff --git a/utils/settings.py b/utils/settings.py index 2fe215d..bb8c40b 100644 --- a/utils/settings.py +++ b/utils/settings.py @@ -5,3 +5,4 @@ BB_GRADEBOOKS_DIR = 'BB_gradebooks' # directory with extracted gradebooks downl BB_SUBMISSIONS_DIR = 'BB_submissions' # directory with organised gradebook submissions BAD_DIR_NAME = '__BAD__' # for organise_gradebook.py - directory with corrupt/invalid compressed files CSV_DIR = os.path.join(os.getcwd(), 'csv-inspect') # for inspect_gradebook.py and inspect_submissions.py - output dir for generated CSV files +IGNORE_DIRS = [ '__MACOSX', 'vendor', 'node_modules' ] # list of dir names to ignore from extracting