Merge pull request #42 from JPC-AV/feature/mediatrace

Feature/mediatrace
JPC-AV · Jun 24, 2024 · 51a7754 · 51a7754
2 parents cd1611a + c1b91e6
commit 51a7754
Show file tree

Hide file tree

Showing 6 changed files with 108 additions and 34 deletions.
diff --git a/config/command_config.yaml b/config/command_config.yaml
@@ -23,7 +23,7 @@ tools:
 ##  mediaconch policy file name from any xml file in the config directory
     mediaconch_policy: JPC_AV_NTSC_MKV_2023-11-21.xml
 ##  'yes' or 'no'
-    run_mediaconch: 'yes'
+    run_mediaconch: 'no'
   mediainfo:
 ##  'yes' or 'no'
     check_mediainfo: 'yes'

diff --git a/config/config.yaml b/config/config.yaml
@@ -41,12 +41,6 @@ mediainfo_values:
     Sampling rate: 48.0 kHz
     Bit depth: 24 bits
     Compression mode: Lossless
-  expected_custom_fields:
-    Title:
-    Encoded by:
-    Description:
-    Encoding settings:
-    ORIGINAL MEDIA TYPE:
 exiftool_values:
   File Type: MKV
   File Type Extension: mkv
@@ -139,6 +133,19 @@ ffmpeg_values:
       DESCRIPTION:
       ORIGINAL MEDIA TYPE:
       ENCODED_BY:
+mediatrace:
+  COLLECTION:
+  TITLE:
+  CATALOG_NUMBER:
+  DESCRIPTION:
+  DATE_DIGITIZED:
+  ENCODING_SETTINGS:
+  ENCODED_BY:
+  ORIGINAL_MEDIA_TYPE:
+  DATE_TAGGED:
+  TERMS_OF_USE:
+  _TECHNICAL_NOTES:
+  _ORIGINAL_FPS:
 qct-parse:
   content:
     silence:

diff --git a/src/AV_Spex/av_spex_the_file.py b/src/AV_Spex/av_spex_the_file.py
@@ -23,6 +23,7 @@
 from .checks.fixity_check import check_fixity, output_fixity
 from .checks.filename_check import check_filenames
 from .checks.mediainfo_check import parse_mediainfo
+from .checks.mediatrace_check import parse_mediatrace
 from .checks.exiftool_check import parse_exiftool
 from .checks.ffprobe_check import parse_ffprobe
 from .checks.embed_fixity import extract_tags, extract_hashes, embed_fixity, validate_embedded_md5
@@ -67,6 +68,22 @@ def run_command(command, input_path, output_type, output_path):
     logger.debug(f'\nRunning command: {full_command}')
     subprocess.run(full_command, shell=True, env=env)
 
+def run_mediatrace_command(command, input_path):
+    '''
+    Run a shell command with 4 variables: command name, path to the input file, output type (often '>'), path to the output file
+    '''
+
+    # Get the current PATH environment variable
+    env = os.environ.copy()
+    env['PATH'] = '/usr/local/bin:' + env.get('PATH', '')
+
+    full_command = f"{command} \"{input_path}\" "
+
+    logger.debug(f'\nRunning mediainfo to generate MediaTrace XML: {full_command}')
+    output = subprocess.run(full_command, shell=True, capture_output=True)
+
+    return output
+
 # Mediaconch needs its own function, because the command's flags and multiple inputs don't conform to the simple 3 part structure of the other commands
 def run_mediaconch_command(command, input_path, output_type, output_path):
     '''
@@ -392,6 +409,14 @@ def main():
             # If check_mediainfo is set to 'yes' in command_config.yaml then
             parse_mediainfo(mediainfo_output_path)
             # Run parse functions defined in the '_check.py' scripts
+
+        mediatrace_output_path = os.path.join(destination_directory, f'{video_id}_mediatrace_output.xml')
+        if command_config.command_dict['tools']['mediainfo']['check_mediainfo'] == 'yes':
+            logger.info(f"\nCreating MediaTrace XML file to check custom MKV Tag metadata fields:")
+            # If check_mediainfo is set to 'yes' in command_config.yaml then
+            run_command("mediainfo --Details=1 --Output=XML", video_path, '>', mediatrace_output_path)
+            parse_mediatrace(mediatrace_output_path)
+            # Run parse functions defined in the '_check.py' scripts
 
         ffprobe_output_path = os.path.join(destination_directory, f'{video_id}_ffprobe_output.txt')
         if command_config.command_dict['tools']['ffprobe']['run_ffprobe'] == 'yes':

diff --git a/src/AV_Spex/checks/mediainfo_check.py b/src/AV_Spex/checks/mediainfo_check.py
@@ -13,7 +13,6 @@ def parse_mediainfo(file_path):
     expected_general = config_path.config_dict['mediainfo_values']['expected_general']
     expected_video = config_path.config_dict['mediainfo_values']['expected_video']
     expected_audio = config_path.config_dict['mediainfo_values']['expected_audio']
-    expected_custom_fields = config_path.config_dict['mediainfo_values']['expected_custom_fields']
 
     section_data = {}
     # creates empty dictionary "section_data"
@@ -114,32 +113,15 @@ def parse_mediainfo(file_path):
             if actual_value not in expected_value:
                 mediainfo_differences.append(f"Metadata field in Audio: {expected_key} has a value of {actual_value}\nThe expected value is: {expected_value}")
                 # append this string to the list "mediainfo_differences"
-
-    custom_mediainfo_differences = []
-    for expected_key, expected_value in expected_custom_fields.items():
-    # defines variables "expected_key" and "expected_value" to the dictionary "expected_audio"
-        if expected_key not in (section_data["General"]):
-            custom_mediainfo_differences.append(f"metadata field in General: {expected_key} does not exist") 
-        elif len(section_data["General"][expected_key]) == 0:
-        # count the values in the nested dictionary "General" with 'len', if the values are zero, then:
-            custom_mediainfo_differences.append(f"General: {expected_key} is empty")
-            # append this string to the list "mediainfo_differences"
 
-    if not mediainfo_differences and not custom_mediainfo_differences:
+    if not mediainfo_differences:
     # if the list "mediainfo_differences" is empty, then
         logger.info("\nAll specified fields and values found in the MediaInfo output.")
-    elif not mediainfo_differences:
-        logger.info("\nAll specified metadata fields and values found in the MediaInfo output, but some custom embedded fields are missing or don't match.")
     else:
     # if the list "mediainfo_differences" is not empty, then
         logger.critical(f"\nSome specified MediaInfo fields or values are missing or don't match:")
         for diff in mediainfo_differences:
             logger.critical(f'{diff}')
-
-    if custom_mediainfo_differences:
-        logger.critical("\nThe specified MediaInfo fields or values for embedded metadata are below:")
-        for custom_diff in custom_mediainfo_differences:
-            logger.critical(f'{custom_diff}')
 
 # Only execute if this file is run directly, not imported)
 if __name__ == "__main__":

diff --git a/src/AV_Spex/checks/mediatrace_check.py b/src/AV_Spex/checks/mediatrace_check.py
@@ -0,0 +1,59 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+import os
+
+import subprocess
+import xml.etree.ElementTree as ET
+
+from ..utils.log_setup import logger
+from ..utils.find_config import config_path
+
+def parse_mediatrace(xml_file):
+    expected_mediatrace = config_path.config_dict['mediatrace']
+    expected_mt_keys = expected_mediatrace.keys()
+
+    # Parse the XML file
+    tree = ET.parse(xml_file)
+    root = tree.getroot()
+
+    # Define the namespace
+    ns = {'mt': 'https://mediaarea.net/mediatrace'}
+
+    mediatrace_output = {}
+    for mt_key in expected_mt_keys:
+
+        # Find all 'block' elements with the name attribute matching 'SimpleTag'
+        simple_tags = root.findall(".//mt:block[@name='SimpleTag']", ns)
+
+        for simple_tag in simple_tags:
+            # Find the 'TagName' block with the specific string_we_have
+            tag_name_block = simple_tag.find(f".//mt:block[@name='TagName']/mt:data[.='{mt_key}']", ns)
+            if tag_name_block is not None:
+                # Find the corresponding 'TagString' block
+                tag_string_block = simple_tag.find(f".//mt:block[@name='TagString']/mt:data", ns)
+                if tag_string_block is not None:
+                    mediatrace_output[mt_key] = tag_string_block.text
+                    #found = True
+                    break
+        #if not found:
+         #       mediatrace_output[mt_key] = None
+
+    mediatrace_differences = []
+    for expected_key, expected_value in expected_mediatrace.items():
+    # defines variables "expected_key" and "expected_value" to the dictionary "expected_mediatrace"
+        if expected_key not in mediatrace_output:
+            mediatrace_differences.append(f"MediaTrace metadata field {expected_key} does not exist") 
+        elif len(mediatrace_output[expected_key]) == 0:
+        # count the values in the dictionary "mediatrace_output" with 'len', if the values are zero, then:
+            mediatrace_differences.append(f"MediaTrace: {expected_key} is empty")
+            # append this string to the list "mediatrace_differences"
+
+    if not mediatrace_differences:
+        # if the list "mediatrace_differences" is empty, then
+        logger.info("\nAll specified mediatrace fields and values found in  output.")
+
+    if mediatrace_differences:
+        logger.critical("\nSome specified MediaTrace fields or values are missing or don't match:")
+        for diff in mediatrace_differences:
+            logger.critical(f"{diff}")
diff --git a/src/AV_Spex/checks/qct_parse.py b/src/AV_Spex/checks/qct_parse.py
@@ -631,11 +631,12 @@ def run_qctparse(video_path, qctools_output_path, qctools_check_output):
 	# set the path for the thumbnail export
 	metadata_dir = os.path.dirname(qctools_output_path)
 	thumbPath = os.path.join(metadata_dir, "ThumbExports")
-	if not os.path.exists(thumbPath):
-		os.makedirs(thumbPath)
-	else:
-		thumbPath = uniquify(thumbPath) 
-		os.makedirs(thumbPath)
+	if qct_parse['thumbExport']:
+		if not os.path.exists(thumbPath):
+			os.makedirs(thumbPath)
+		else:
+			thumbPath = uniquify(thumbPath) 
+			os.makedirs(thumbPath)
 
 	profile = {} # init a dictionary where we'll store reference values from config.yaml file
 
@@ -654,7 +655,7 @@ def run_qctparse(video_path, qctools_output_path, qctools_check_output):
 					break
 
 	######## Iterate Through the XML for content detection ########
-	if qct_parse['contentFilter'] != None:
+	if qct_parse['contentFilter']:
 		logger.debug(f"Checking for segments of {os.path.basename(video_path)} that match the content filter {qct_parse['contentFilter']}\n")
 		duration_str = get_duration(video_path)
 		contentFilter_name = qct_parse['contentFilter']
@@ -696,7 +697,7 @@ def run_qctparse(video_path, qctools_output_path, qctools_check_output):
 		if durationStart == "" and durationEnd == "":
 			logger.error("No color bars detected\n")
 		if barsStartString and barsEndString:
-			print_bars_durations(qctools_check_output,thumbPath,barsEndString)
+			print_bars_durations(qctools_check_output,barsStartString,barsEndString)
 			if qct_parse['thumbExport']:
 				barsStampString = dts2ts(durationStart)
 				printThumb(video_path,"color_bars",startObj,thumbPath,"first_frame",barsStampString)
@@ -710,7 +711,7 @@ def run_qctparse(video_path, qctools_output_path, qctools_check_output):
 			evalBars(startObj,pkt,durationStart,durationEnd,framesList)
 			# Define the keys for which you want to calculate the average
 			keys_to_average = ['YMAX', 'YMIN', 'UMIN', 'UMAX', 'VMIN', 'VMAX', 'SATMIN', 'SATMAX']
-			# Initialize a dictionary to store the average values
+			# Create a dictionary of the median values of each of the keys from the frameDict created in the evalBars function
 			average_dict = {key: median([float(frameDict[key]) for frameDict in framesList if key in frameDict]) for key in keys_to_average}
 			if average_dict is None:
 				logger.critical(f"\nSomething went wrong - Cannot run evaluate color bars\n")