added IGNORE_DIRS in settings.py and allow for multiple dir names to be ignored from extracting

This commit is contained in:
2024-03-01 15:45:34 +00:00
parent 8a4dee8e73
commit 7577148f83
2 changed files with 6 additions and 5 deletions

View File

@@ -2,7 +2,7 @@ import os, shutil, platform
import zipfile, rarfile import zipfile, rarfile
from py7zr import SevenZipFile, exceptions from py7zr import SevenZipFile, exceptions
from utils.settings import BAD_DIR_NAME from utils.settings import BAD_DIR_NAME, IGNORE_DIRS
def mark_file_as_BAD(file: str, bad_exception: Exception) -> None: def mark_file_as_BAD(file: str, bad_exception: Exception) -> None:
@@ -19,8 +19,8 @@ def mark_file_as_BAD(file: str, bad_exception: Exception) -> None:
def extract_zip(zip_file: str, target_dir: str) -> None | Exception: def extract_zip(zip_file: str, target_dir: str) -> None | Exception:
try: try:
with zipfile.ZipFile(zip_file, 'r') as zip_ref: with zipfile.ZipFile(zip_file, 'r') as zip_ref:
members = [ m for m in zip_ref.infolist() if "__MACOSX" not in m.filename ] members = [ m for m in zip_ref.infolist() if not any(dir_name in m.filename for dir_name in IGNORE_DIRS) ] # filter out files/dirs using IGNORE_DIRS
zip_ref.extractall(target_dir, members=members) # extract all files, ignoring those with the "__MACOSX" string in the name zip_ref.extractall(target_dir, members=members) # extract remaining files
zip_ref.close() zip_ref.close()
except zipfile.BadZipfile as e: except zipfile.BadZipfile as e:
mark_file_as_BAD(zip_file, e) mark_file_as_BAD(zip_file, e)
@@ -36,7 +36,7 @@ def extract_rar(rar_file: str, target_dir: str) -> None:
else: # if Linux or Mac else: # if Linux or Mac
rarfile.UNRAR_TOOL = 'unrar' rarfile.UNRAR_TOOL = 'unrar'
files = rar_ref.namelist() files = rar_ref.namelist()
files = [ f for f in files if "__MACOSX" not in f ] # filter out files with "__MACOSX" in the name files = [ f for f in files if not any(dir_name in f for dir_name in IGNORE_DIRS) ] # filter out files/dirs using IGNORE_DIRS
rar_ref.extractall(target_dir, files) # extract the remaining files rar_ref.extractall(target_dir, files) # extract the remaining files
rar_ref.close() rar_ref.close()
except OSError as e: except OSError as e:
@@ -56,7 +56,7 @@ def extract_7z(seven_zip_file: str, target_dir: str) -> None:
if not seven_zip.getnames(): if not seven_zip.getnames():
raise exceptions.Bad7zFile raise exceptions.Bad7zFile
files = seven_zip.getnames() files = seven_zip.getnames()
files = [ f for f in files if "__MACOSX" not in f ] # filter out files with "__MACOSX" in the name files = [ f for f in files if not any(dir_name in f for dir_name in IGNORE_DIRS) ] # filter out files/dirs using IGNORE_DIRS
seven_zip.extract(target_dir, targets=files) # extract the remaining files seven_zip.extract(target_dir, targets=files) # extract the remaining files
seven_zip.close() seven_zip.close()
except exceptions.Bad7zFile as e: except exceptions.Bad7zFile as e:

View File

@@ -5,3 +5,4 @@ BB_GRADEBOOKS_DIR = 'BB_gradebooks' # directory with extracted gradebooks downl
BB_SUBMISSIONS_DIR = 'BB_submissions' # directory with organised gradebook submissions BB_SUBMISSIONS_DIR = 'BB_submissions' # directory with organised gradebook submissions
BAD_DIR_NAME = '__BAD__' # for organise_gradebook.py - directory with corrupt/invalid compressed files BAD_DIR_NAME = '__BAD__' # for organise_gradebook.py - directory with corrupt/invalid compressed files
CSV_DIR = os.path.join(os.getcwd(), 'csv-inspect') # for inspect_gradebook.py and inspect_submissions.py - output dir for generated CSV files CSV_DIR = os.path.join(os.getcwd(), 'csv-inspect') # for inspect_gradebook.py and inspect_submissions.py - output dir for generated CSV files
IGNORE_DIRS = [ '__MACOSX', 'vendor', 'node_modules' ] # list of dir names to ignore from extracting