Skip to content

Commit

Permalink
Merge pull request #120 from nulib/newNoMD5QCTOOLs
Browse files Browse the repository at this point in the history
Fixed
  • Loading branch information
SoFrans authored Aug 20, 2024
2 parents 62824ae + ebf4317 commit d374dd2
Show file tree
Hide file tree
Showing 2 changed files with 22 additions and 11 deletions.
31 changes: 21 additions & 10 deletions nulrdcscripts/ingest/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
import posixpath
import csv


def init_io(input_path: str, output_path: str):
"""
Sets up input directory and output csv file.
Expand All @@ -26,24 +27,23 @@ def init_io(input_path: str, output_path: str):

if not output_path:
base_folder_name = os.path.basename(input_path)
output_path = os.path.join(
input_path,
base_folder_name + '_ingest.csv'
)
output_path = os.path.join(input_path, base_folder_name + "_ingest.csv")
output_check(output_path)

return input_path, output_path


def input_check(indir: str):
"""
Checks given input is valid. Quits if not.
:param str indir: fullpath to input directory to be checked
"""
if not os.path.isdir(indir):
print("\n--- ERROR: Input must be a directory ---\n")
quit()


def output_check(outfile: str):
"""
Checks that output is a valid csv file. Quits if not.
Expand All @@ -60,10 +60,11 @@ def output_check(outfile: str):
print("\n--- ERROR: Unable to create output file", outfile + " ---\n")
quit()


def write_csv(outfile: str, csv_fields: list[str], csv_data: list[dict[str, str]]):
"""
Writes ingest sheet data to a csv.
:param str outfile: fullpath to output file including extension
:param list csv_fields: fieldnames(headers) for csv file
:param list csv_data: data to be written to csv
Expand All @@ -75,6 +76,7 @@ def write_csv(outfile: str, csv_fields: list[str], csv_data: list[dict[str, str]
for file_info in csv_data[item]:
writer.writerow(file_info)


def clean_subdir(subdir: str, indir: str):
"""
Cleans up subdir to easier use in analyzing file.
Expand All @@ -87,6 +89,7 @@ def clean_subdir(subdir: str, indir: str):
subdir = subdir.strip("/")
return subdir


def clean_dirs(dirs: list[str]):
"""
Reorganized dirs.
Expand All @@ -100,18 +103,23 @@ def clean_dirs(dirs: list[str]):
dirs[:] = [d for d in dirs if not d[0] == "."]
return dirs


def clean_files(files: list[str], skip: list[str]):
"""
Removes files to be ignored when making ingest sheet.
By default skips ".", "Thumbs.db", ".md5", ".csv", ".py"
:param list files: list of files in a directory
:param list skip: list of files to skip in addition defaults
:returns: cleaned and sorted list of files
:rtype: list of str
"""
files = [f for f in files if not f[0] == "."]
files = [f for f in files if not f == "Thumbs.db"]
files = [f for f in files if not f.endswith(".qctools.mkv")]
files = [f for f in files if not f.endswith(".qctools.xmlpoetry")]
files = [f for f in files if not f.endswith(".qctools.xml.gz")]
files = [f for f in files if not f.endswith(".framemd5")]
files = [f for f in files if not f.endswith(".md5")]
files = [f for f in files if not f.endswith(".csv")]
files = [f for f in files if not f.endswith(".py")]
Expand All @@ -123,11 +131,12 @@ def clean_files(files: list[str], skip: list[str]):
sorted_files: list[str] = sorted(files)
return sorted_files


def get_unix_fullpath(file: str, subdir: str):
"""
Creates fullpath filename for file.
Uses unix style path without leading slash.
:param str file: input filename
:param str subdir: fullpath to directory that file is in
:returns: unix style path for file
Expand All @@ -138,7 +147,8 @@ def get_unix_fullpath(file: str, subdir: str):
filename = filename.strip("/")
return filename

def yn_check(message = ""):

def yn_check(message=""):
"""
Gets yes or no response from user.
Expand All @@ -158,7 +168,8 @@ def yn_check(message = ""):
else:
print("Please respond with 'yes' or 'no'")


if __name__ == "__main__":
import doctest

doctest.testmod()
doctest.testmod()
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "nul-rdc-scripts"
version = "0.3.0"
version = "0.3.1"
description = "Scripts for NUL RDC Digitization Team"
authors = [
"Northwestern University Libraries <[email protected]>",
Expand Down

0 comments on commit d374dd2

Please sign in to comment.