add dis_enc_bitdepth to CambiFeatureExtractor (#1171)

* add dis_enc_bitdepth as a property of Asset, require dis_enc_bitdepth in CambiFeatureExtractor, add tests * Add encode bitdepth into dis_str property of Asset --------- Co-authored-by: Lukas Krasula <[email protected]>
Netflix · Feb 28, 2023 · c2608d9 · c2608d9
1 parent b0e3fa2
commit c2608d9
Show file tree

Hide file tree

Showing 6 changed files with 235 additions and 4 deletions.
diff --git a/python/test/asset_test.py b/python/test/asset_test.py
@@ -77,6 +77,83 @@ def test_dis_encode_width_height(self):
                       asset_dict={'width': 720, 'height': 480})
         self.assertEqual(asset.dis_encode_width_height, (720, 480))
 
+    def test_dis_encode_bitdepth(self):
+        asset = Asset(dataset="test", content_id=0, asset_id=0,
+                      ref_path="", dis_path="",
+                      asset_dict={'dis_enc_bitdepth': 8})
+        self.assertEqual(asset.dis_encode_bitdepth, 8)
+
+        asset = Asset(dataset="test", content_id=0, asset_id=0,
+                      ref_path="", dis_path="",
+                      asset_dict={'dis_enc_bitdepth': 24})
+        with self.assertRaises(AssertionError):
+            print(asset.dis_encode_bitdepth)
+
+        asset = Asset(dataset="test", content_id=0, asset_id=0,
+                      ref_path="", dis_path="",
+                      asset_dict={'dis_yuv_type': 'yuv420p'})
+        self.assertEqual(asset.dis_encode_bitdepth, 8)
+
+        asset = Asset(dataset="test", content_id=0, asset_id=0,
+                      ref_path="", dis_path="",
+                      asset_dict={'dis_yuv_type': 'yuv422p'})
+        self.assertEqual(asset.dis_encode_bitdepth, 8)
+
+        asset = Asset(dataset="test", content_id=0, asset_id=0,
+                      ref_path="", dis_path="",
+                      asset_dict={'dis_yuv_type': 'yuv444p'})
+        self.assertEqual(asset.dis_encode_bitdepth, 8)
+
+        asset = Asset(dataset="test", content_id=0, asset_id=0,
+                      ref_path="", dis_path="",
+                      asset_dict={'dis_yuv_type': 'yuv420p10le'})
+        self.assertEqual(asset.dis_encode_bitdepth, 10)
+
+        asset = Asset(dataset="test", content_id=0, asset_id=0,
+                      ref_path="", dis_path="",
+                      asset_dict={'dis_yuv_type': 'yuv422p10le'})
+        self.assertEqual(asset.dis_encode_bitdepth, 10)
+
+        asset = Asset(dataset="test", content_id=0, asset_id=0,
+                      ref_path="", dis_path="",
+                      asset_dict={'dis_yuv_type': 'yuv444p10le'})
+        self.assertEqual(asset.dis_encode_bitdepth, 10)
+
+        asset = Asset(dataset="test", content_id=0, asset_id=0,
+                      ref_path="", dis_path="",
+                      asset_dict={'dis_yuv_type': 'yuv420p12le'})
+        self.assertEqual(asset.dis_encode_bitdepth, 12)
+
+        asset = Asset(dataset="test", content_id=0, asset_id=0,
+                      ref_path="", dis_path="",
+                      asset_dict={'dis_yuv_type': 'yuv422p12le'})
+        self.assertEqual(asset.dis_encode_bitdepth, 12)
+
+        asset = Asset(dataset="test", content_id=0, asset_id=0,
+                      ref_path="", dis_path="",
+                      asset_dict={'dis_yuv_type': 'yuv444p12le'})
+        self.assertEqual(asset.dis_encode_bitdepth, 12)
+
+        asset = Asset(dataset="test", content_id=0, asset_id=0,
+                      ref_path="", dis_path="",
+                      asset_dict={'dis_yuv_type': 'yuv420p16le'})
+        self.assertEqual(asset.dis_encode_bitdepth, 16)
+
+        asset = Asset(dataset="test", content_id=0, asset_id=0,
+                      ref_path="", dis_path="",
+                      asset_dict={'dis_yuv_type': 'yuv422p16le'})
+        self.assertEqual(asset.dis_encode_bitdepth, 16)
+
+        asset = Asset(dataset="test", content_id=0, asset_id=0,
+                      ref_path="", dis_path="",
+                      asset_dict={'dis_yuv_type': 'yuv444p16le'})
+        self.assertEqual(asset.dis_encode_bitdepth, 16)
+
+        asset = Asset(dataset="test", content_id=0, asset_id=0,
+                      ref_path="", dis_path="",
+                      asset_dict={'dis_yuv_type': 'yuv420p10le', 'dis_enc_bitdepth': 8})
+        self.assertEqual(asset.dis_encode_bitdepth, 8)
+
     def test_quality_width_height(self):
         asset = Asset(dataset="test", content_id=0, asset_id=0,
                       ref_path="", dis_path="",
@@ -345,6 +422,24 @@ def test_str(self):
             "test_0_1_refvideo_720x480_vs_disvideo_720x480_e_480x320_q_720x480"
         )
 
+        asset = Asset(dataset="test", content_id=0, asset_id=1,
+                      ref_path="dir/refvideo.yuv", dis_path="dir/disvideo.yuv",
+                      asset_dict={'width': 720, 'height': 480,
+                                  'dis_yuv_type': 'yuv420p10le', 'dis_enc_bitdepth': 10})
+        self.assertEqual(
+            str(asset),
+            "test_0_1_refvideo_720x480_vs_disvideo_720x480_yuv420p10le_q_720x480"
+        )
+
+        asset = Asset(dataset="test", content_id=0, asset_id=1,
+                      ref_path="dir/refvideo.yuv", dis_path="dir/disvideo.yuv",
+                      asset_dict={'width': 720, 'height': 480,
+                                  'dis_yuv_type': 'yuv420p10le', 'dis_enc_bitdepth': 8})
+        self.assertEqual(
+            str(asset),
+            "test_0_1_refvideo_720x480_vs_disvideo_720x480_ebd_8_yuv420p10le_q_720x480"
+        )
+
     def test_hash_equal(self):
         asset1 = Asset(dataset="test", content_id=0, asset_id=2,
                       ref_path="dir/refvideo.yuv", dis_path="dir/disvideo.yuv",

diff --git a/python/test/cambi_test.py b/python/test/cambi_test.py
@@ -82,6 +82,65 @@ def test_run_cambi_fextractor_10b(self):
         self.assertAlmostEqual(results[0]['Cambi_feature_cambi_score'],
                                0.0013863333333333334, places=4)
 
+    def test_run_cambi_fextractor_incorrect_enc_bitdepth(self):
+        _, _, asset, asset_original = set_default_cambi_video_for_testing_10b()
+        asset.asset_dict['dis_enc_bitdepth'] = 10
+        self.fextractor = CambiFeatureExtractor(
+            [asset, asset_original],
+            None, fifo_mode=False,
+            result_store=None,
+            optional_dict={}
+        )
+        self.fextractor.run(parallelize=False)
+        results = self.fextractor.results
+
+        # score: arithmetic mean score over all frames
+        self.assertAlmostEqual(results[0]['Cambi_feature_cambi_score'],
+                               0.0013863333333333334, places=4)
+
+    def test_run_cambi_fextractor_correct_enc_bitdepth(self):
+        _, _, asset, asset_original = set_default_cambi_video_for_testing_10b()
+        asset.asset_dict['dis_enc_bitdepth'] = 8
+        self.fextractor = CambiFeatureExtractor(
+            [asset, asset_original],
+            None, fifo_mode=False,
+            result_store=None,
+            optional_dict={}
+        )
+        self.fextractor.run(parallelize=False)
+        results = self.fextractor.results
+
+        # score: arithmetic mean score over all frames
+        self.assertAlmostEqual(results[0]['Cambi_feature_cambi_score'],
+                               0.00020733333333333332, places=4)
+
+    def test_run_cambi_fextractor_notyuv_correct_enc_bitdepth_8(self):
+        _, _, asset, asset_original = set_default_576_324_videos_for_testing()
+        self.fextractor = CambiFeatureExtractor(
+            [asset, asset_original],
+            None, fifo_mode=False,
+            result_store=None,
+            optional_dict={}
+        )
+        self.fextractor.run(parallelize=False)
+        results = self.fextractor.results
+
+        # score: arithmetic mean score over all frames
+        self.assertAlmostEqual(results[1]['Cambi_feature_cambi_score'],
+                               0.00020733333333333332, places=4)
+
+    def test_run_cambi_fextractor_enc_bitdepth_none(self):
+        _, _, asset, asset_original = set_default_cambi_video_for_testing_10b()
+        asset.asset_dict['dis_enc_bitdepth'] = None
+        self.fextractor = CambiFeatureExtractor(
+            [asset, asset_original],
+            None, fifo_mode=False,
+            result_store=None,
+            optional_dict={}
+        )
+        with self.assertRaises(AssertionError):
+            self.fextractor.run(parallelize=False)
+
     def test_run_cambi_fextractor_max_log_contrast(self):
         _, _, asset, asset_original = set_default_576_324_videos_for_testing()
         self.fextractor = CambiFeatureExtractor(

diff --git a/python/vmaf/core/asset.py b/python/vmaf/core/asset.py
@@ -12,7 +12,8 @@
 
 from vmaf.core.mixin import WorkdirEnabled
 from vmaf.tools.misc import get_file_name_without_extension, \
-    get_file_name_with_extension, get_unique_str_from_recursive_dict
+    get_file_name_with_extension, get_unique_str_from_recursive_dict, \
+    map_yuv_type_to_bitdepth
 from vmaf.config import VmafConfig
 from vmaf.core.proc_func import proc_func_dict
 
@@ -220,6 +221,20 @@ def dis_encode_width_height(self):
         else:
             return self.dis_width_height
 
+    @property
+    def dis_encode_bitdepth(self):
+        """
+        Bitdepth of the encoded video before any conversions were applied.
+        :return: bitdepth of the encode.
+        Defaults to bitdepth of dis_yuv_type (e.g. 8 for yuv420p).
+        """
+        if 'dis_enc_bitdepth' in self.asset_dict:
+            assert self.asset_dict['dis_enc_bitdepth'] in [8, 10, 12, 16], \
+                "Supported encoding bitdepths are 8, 10, 12, and 16."
+            return self.asset_dict['dis_enc_bitdepth']
+        else:
+            return map_yuv_type_to_bitdepth(self.dis_yuv_type)
+
     def clear_up_width_height(self):
         if 'width' in self.asset_dict:
             del self.asset_dict['width']
@@ -466,6 +481,13 @@ def dis_str(self):
             w, h = self.dis_encode_width_height
             s += "_e_{w}x{h}".format(w=w, h=h)
 
+        if self.dis_encode_bitdepth is not None and \
+                map_yuv_type_to_bitdepth(self.dis_yuv_type) != self.dis_encode_bitdepth:
+            # only add dis_encode_bitdepth to the string if it is not None and it is different from the bitdepth
+            # of dis_yuv_type
+            ebd = self.dis_encode_bitdepth
+            s += "_ebd_{ebd}".format(ebd=ebd)
+
         if self.dis_yuv_type != self.DEFAULT_YUV_TYPE:
             s += "_{}".format(self.dis_yuv_type)
 

diff --git a/python/vmaf/core/cambi_feature_extractor.py b/python/vmaf/core/cambi_feature_extractor.py
@@ -5,7 +5,8 @@
 class CambiFeatureExtractor(VmafexecFeatureExtractorMixin, FeatureExtractor):
 
     TYPE = "Cambi_feature"
-    VERSION = "0.4" # Supporting scaled encodes and minor change to the spatial mask
+    # VERSION = "0.4" # Supporting scaled encodes and minor change to the spatial mask
+    VERSION = "0.5"  # Supporting bitdepth converted encodes
 
     ATOM_FEATURES = ['cambi']
 
@@ -24,9 +25,15 @@ def _generate_result(self, asset):
             'dis_height, or 3) width and height.'
         encode_width, encode_height = asset.dis_encode_width_height
 
-        additional_params = dict()
+        assert asset.dis_encode_bitdepth is not None, \
+            'For Cambi, dis_encode_bitdepth cannot be None. One can specify dis_encode_bitdepth by adding ' \
+            'dis_enc_bitdepth field to asset_dict. The supported values are 8, 10, 12, or 16.'
+        encode_bitdepth = asset.dis_encode_bitdepth
+
+        additional_params = {'enc_bitdepth': encode_bitdepth}
         if encode_width != quality_width or encode_height != quality_height:
-            additional_params = {'enc_width': encode_width, 'enc_height': encode_height}
+            additional_params['enc_width'] = encode_width
+            additional_params['enc_height'] = encode_height
 
         log_file_path = self._get_log_file_path(asset)
 

diff --git a/python/vmaf/routine.py b/python/vmaf/routine.py
@@ -255,6 +255,13 @@ def read_dataset(dataset, **kwargs):
         if 'dis_end_frame' in dis_video:
             asset_dict['dis_end_frame'] = dis_video['dis_end_frame']
 
+        if 'enc_width' in dis_video:
+            asset_dict['dis_enc_width'] = dis_video['enc_width']
+        if 'enc_height' in dis_video:
+            asset_dict['dis_enc_height'] = dis_video['enc_height']
+        if 'enc_bitdepth' in dis_video:
+            asset_dict['dis_enc_bitdepth'] = dis_video['enc_bitdepth']
+
         if groundtruth is None and skip_asset_with_none_groundtruth:
             pass
         else:

diff --git a/python/vmaf/tools/misc.py b/python/vmaf/tools/misc.py
@@ -800,6 +800,47 @@ def linear_fit(x, y):
     return scipy.optimize.curve_fit(linear_func, x, y, [1.0, 0.0])
 
 
+def map_yuv_type_to_bitdepth(yuv_type):
+    """
+    >>> map_yuv_type_to_bitdepth('yuv420p')
+    8
+    >>> map_yuv_type_to_bitdepth('yuv422p')
+    8
+    >>> map_yuv_type_to_bitdepth('yuv444p')
+    8
+    >>> map_yuv_type_to_bitdepth('yuv420p10le')
+    10
+    >>> map_yuv_type_to_bitdepth('yuv422p10le')
+    10
+    >>> map_yuv_type_to_bitdepth('yuv444p10le')
+    10
+    >>> map_yuv_type_to_bitdepth('yuv420p12le')
+    12
+    >>> map_yuv_type_to_bitdepth('yuv422p12le')
+    12
+    >>> map_yuv_type_to_bitdepth('yuv444p12le')
+    12
+    >>> map_yuv_type_to_bitdepth('yuv420p16le')
+    16
+    >>> map_yuv_type_to_bitdepth('yuv422p16le')
+    16
+    >>> map_yuv_type_to_bitdepth('yuv444p16le')
+    16
+    >>> map_yuv_type_to_bitdepth('notyuv') is None
+    True
+    """
+    if yuv_type in ['yuv420p', 'yuv422p', 'yuv444p']:
+        return 8
+    elif yuv_type in ['yuv420p10le', 'yuv422p10le', 'yuv444p10le']:
+        return 10
+    elif yuv_type in ['yuv420p12le', 'yuv422p12le', 'yuv444p12le']:
+        return 12
+    elif yuv_type in ['yuv420p16le', 'yuv422p16le', 'yuv444p16le']:
+        return 16
+    else:
+        return None
+
+
 if __name__ == '__main__':
     import doctest
     doctest.testmod()