From 907e87d0566ca2356f4671dc99c42a08fb12a439 Mon Sep 17 00:00:00 2001
From: TimAdams84 <tim-adams@gmx.net>
Date: Mon, 12 Aug 2024 15:05:01 +0200
Subject: [PATCH 1/8] doc: Change quality.py to scores.py and adjust README.md

---
 README.md                        | 13 +++++--
 syndat/__init__.py               |  4 +-
 syndat/{quality.py => scores.py} | 63 +++++++++++++++++++++-----------
 tests/test_correlation.py        |  2 +-
 4 files changed, 55 insertions(+), 27 deletions(-)
 rename syndat/{quality.py => scores.py} (83%)

diff --git a/README.md b/README.md
index c594952..297a708 100644
--- a/README.md
+++ b/README.md
@@ -25,11 +25,18 @@ import syndat
 real = pd.read_csv("real.csv")
 synthetic = pd.read_csv("synthetic.csv")
 
-jsd = syndat.quality.jsd(real, synthetic)
-auc = syndat.quality.auc(real, synthetic)
-norm = syndat.quality.correlation(real, synthetic)
+# How similar are the statistical distributions of real and synthetic features 
+distribution_similarity_score = syndat.scores.distribution(real, synthetic)
+
+# How hard is it for a classifier to discriminate real and synthetic data
+discrimination_score = syndat.scores.discrimination(real, synthetic)
+
+# How well are pairwise feature correlations preserved
+correlation_score = syndat.scores.correlation(real, synthetic)
 ```
 
+Scores are defined in a range of 0-100, with a higher score corresponding to better data fidelity.
+
 ## Visualization
 
 Visualize real vs. synthetic data distributions and summary statistics for each feature:
diff --git a/syndat/__init__.py b/syndat/__init__.py
index 5ec6e93..a4778ef 100644
--- a/syndat/__init__.py
+++ b/syndat/__init__.py
@@ -1,3 +1,3 @@
 from syndat import domain
-from syndat import quality
-from syndat import visualization
\ No newline at end of file
+from syndat import scores
+from syndat import visualization
diff --git a/syndat/quality.py b/syndat/scores.py
similarity index 83%
rename from syndat/quality.py
rename to syndat/scores.py
index 8641487..2171640 100644
--- a/syndat/quality.py
+++ b/syndat/scores.py
@@ -13,11 +13,13 @@
 
 from syndat.domain import AggregationMethod
 
+logger = logging.getLogger(__name__)
+
 
 def auc(real: pandas.DataFrame, synthetic: pandas.DataFrame, n_folds=5,
         drop_na_threshold=0.9, score: bool = True) -> float:
     """
-    Computes the Differentiation Complexity Score / ROC AUC score of a classifier trained to differentiate between real
+    Computes the Discrimination Complexity Score / ROC AUC score of a classifier trained to differentiate between real
     and synthetic data.
 
     :param real: The real data.
@@ -27,18 +29,43 @@ def auc(real: pandas.DataFrame, synthetic: pandas.DataFrame, n_folds=5,
     :param score: Return result in a normalized score in [0,100]. Default is True.
     :return: Differentiation Complexity Score / AUC ROC Score
     """
+
     warnings.warn(
-        "old_function is deprecated and will be removed in a future version. Please use discrimination_score instead.",
+        "auc is deprecated and will be removed in a future version. Please use discrimination instead.",
         DeprecationWarning,
         stacklevel=2
     )
-    return discrimination_score(real, synthetic, n_folds=n_folds, drop_na_threshold=drop_na_threshold, score=score)
+    return discrimination(real, synthetic, n_folds=n_folds, drop_na_threshold=drop_na_threshold, score=score)
 
 
-def discrimination_score(real: pandas.DataFrame, synthetic: pandas.DataFrame, n_folds=5,
-                         drop_na_threshold=0.9, score: bool = True) -> float:
+def jsd(real: pd.DataFrame, synthetic: pd.DataFrame, aggregate_results: bool = True,
+        aggregation_method: AggregationMethod = AggregationMethod.AVERAGE, score: bool = True,
+        n_unique_threshold=10) -> Union[List[float], float]:
     """
-    Computes the Differentiation Complexity Score / ROC AUC score of a classifier trained to differentiate between real
+    Computes the feature distribution similarity using the Jensen-Shannon distance of real and synthetic data.
+
+    :param real: The real data.
+    :param synthetic: The synthetic data.
+    :param aggregate_results: Compute a single aggregated score for all features. Default is True.
+    :param aggregation_method: How the scores are aggregated. Default is using the median of all feature scores.
+    :param score: Return result in a normalized score in [0,100]. Default is True.
+    :param n_unique_threshold: Threshold to determine at which number of unique values bins will span over several
+    values.
+    :return: Distribution Similarity / JSD
+    """
+
+    warnings.warn(
+        "auc is deprecated and will be removed in a future version. Please use discrimination instead.",
+        DeprecationWarning,
+        stacklevel=2
+    )
+    return distribution(real, synthetic, aggregate_results, aggregation_method, score, n_unique_threshold)
+
+
+def discrimination(real: pandas.DataFrame, synthetic: pandas.DataFrame, n_folds=5,
+                   drop_na_threshold=0.9, score: bool = True) -> float:
+    """
+    Computes the Discrimination Complexity Score / ROC AUC score of a classifier trained to differentiate between real
     and synthetic data.
 
     :param real: The real data.
@@ -52,10 +79,10 @@ def discrimination_score(real: pandas.DataFrame, synthetic: pandas.DataFrame, n_
     real_filtered, synthetic_filtered = __filter_rows_with_common_categories(real, synthetic)
     # check for missing values in real data
     real_clean = real_filtered.dropna(thresh=int(drop_na_threshold * len(real_filtered)), axis=1)
-    logging.info(f'Dropped {real_clean.shape[1] - real_clean.shape[1]} '
+    logger.info(f'Dropped {real_clean.shape[1] - real_clean.shape[1]} '
                  f'due to high missingness (threshold is {drop_na_threshold}).')
     real_clean = real_clean.dropna()
-    logging.info(f'Removed {len(real) - len(real_clean)} entries due to missing values.')
+    logger.info(f'Removed {len(real) - len(real_clean)} entries due to missing values.')
     # assert that both real and synthetic have same columns
     synthetic_clean = synthetic_filtered[real_clean.columns]
     # one-hot-encode categorical columns
@@ -74,9 +101,9 @@ def discrimination_score(real: pandas.DataFrame, synthetic: pandas.DataFrame, n_
         return auc_score
 
 
-def jsd(real: pd.DataFrame, synthetic: pd.DataFrame, aggregate_results: bool = True,
-        aggregation_method: AggregationMethod = AggregationMethod.AVERAGE, score: bool = True,
-        n_unique_threshold=10) -> Union[List[float], float]:
+def distribution(real: pd.DataFrame, synthetic: pd.DataFrame, aggregate_results: bool = True,
+                 aggregation_method: AggregationMethod = AggregationMethod.AVERAGE, score: bool = True,
+                 n_unique_threshold=10) -> Union[List[float], float]:
     """
     Computes the feature distribution similarity using the Jensen-Shannon distance of real and synthetic data.
 
@@ -100,7 +127,7 @@ def jsd(real: pd.DataFrame, synthetic: pd.DataFrame, aggregate_results: bool = T
         col_dtype_real = real[col].dtype
         col_dtype_synthetic = synthetic[col].dtype
         if col_dtype_real != col_dtype_synthetic:
-            logging.warning(f'Real data at col {col} is dtype {col_dtype_real} but synthetic is {col_dtype_synthetic}. '
+            logger.warning(f'Real data at col {col} is dtype {col_dtype_real} but synthetic is {col_dtype_synthetic}. '
                             f'Evaluation will be done based on the assumed data type of the real data.')
             synthetic[col] = synthetic[col].astype(col_dtype_real)
         # categorical column
@@ -216,21 +243,15 @@ def __filter_rows_with_common_categories(real: pd.DataFrame, synthetic: pd.DataF
     synthetic_categorical_cols = synthetic.select_dtypes(include=['object', 'category']).columns
     # Identify common categorical columns
     common_categorical_cols = set(real_categorical_cols) & set(synthetic_categorical_cols)
-    if not common_categorical_cols:
-        logging.warning("No common categorical columns found. Correlation will be computed on numeric data only.")
     # Filter rows with common categories in each column
     for col in common_categorical_cols:
         real_categories = set(real[col].unique())
         synthetic_categories = set(synthetic[col].unique())
         common_categories = real_categories & synthetic_categories
         if len(real_categories - common_categories) > 0:
-            logging.warning(
-                f"Categories {real_categories - common_categories} in column '{col}' "
-                f"are in real data but not in synthetic data and will be excluded.")
-        if len(synthetic_categories - common_categories) > 0:
-            logging.warning(
-                f"Categories {synthetic_categories - common_categories} in column '{col}' "
-                f"are in synthetic data but not in real data and will be excluded.")
+            logger.warning(
+                f"Categories {real_categories - common_categories} in column '{col}' are in real data but not in "
+                f"synthetic data. They will not be considered in the score computation.")
         # Filter rows to keep only common categories
         real = real[real[col].isin(common_categories)]
         synthetic = synthetic[synthetic[col].isin(common_categories)]
diff --git a/tests/test_correlation.py b/tests/test_correlation.py
index 965656e..ef4b55f 100644
--- a/tests/test_correlation.py
+++ b/tests/test_correlation.py
@@ -2,7 +2,7 @@
 
 import pandas as pd
 
-from syndat.quality import correlation
+from syndat.scores import correlation
 
 
 class TestCorrelation(unittest.TestCase):

From e172c87a4bc24a6f9633941fe5e832356c699bed Mon Sep 17 00:00:00 2001
From: TimAdams84 <tim-adams@gmx.net>
Date: Mon, 12 Aug 2024 15:11:33 +0200
Subject: [PATCH 2/8] build: Move requirements to setup file

---
 requirements.txt |  7 -------
 setup.py         | 27 +++++++++++++++++++++++++++
 2 files changed, 27 insertions(+), 7 deletions(-)
 delete mode 100644 requirements.txt

diff --git a/requirements.txt b/requirements.txt
deleted file mode 100644
index f18fe5f..0000000
--- a/requirements.txt
+++ /dev/null
@@ -1,7 +0,0 @@
-pandas~=2.1.4
-numpy~=1.26.2
-scipy~=1.11.4
-scikit-learn~=1.3.2
-matplotlib~=3.8.2
-seaborn~=0.13.0
-setuptools==69.0.2
\ No newline at end of file
diff --git a/setup.py b/setup.py
index 13edc7a..673b954 100644
--- a/setup.py
+++ b/setup.py
@@ -23,4 +23,31 @@
     author_email='tim.adams@scai.fraunhofer.de',
     description=DESCRIPTION,
     long_description=DESCRIPTION,
+    long_description_content_type='text/markdown',
+    install_requires=[
+        'pandas~=2.1.4',
+        'numpy~=1.26.2',
+        'scipy~=1.11.4',
+        'scikit-learn~=1.3.2',
+        'matplotlib~=3.8.2',
+        'seaborn~=0.13.0',
+        'setuptools==69.0.2'
+    ],
+    classifiers=[
+        'Development Status :: 3 - Alpha',
+        'Intended Audience :: Developers',
+        'License :: OSI Approved :: Apache Software License',
+        'Programming Language :: Python :: 3',
+        'Programming Language :: Python :: 3.9',
+        'Programming Language :: Python :: 3.10',
+        'Programming Language :: Python :: 3.11',
+    ],
+    include_package_data=True,  # Ensure non-Python files are included
+    python_requires='>=3.9',  # Specify minimum Python version
+    keywords='synthetic-data',
+    project_urls={
+        'Documentation': 'https://github.com/SCAI-BIO/syndat#readme',
+        'Source': 'https://github.com/SCAI-BIO/syndat',
+        'Tracker': 'https://github.com/SCAI-BIO/syndat/issues',
+    },
 )

From b31222192ad922b4c6c1fc77867c26e44dbceaaf Mon Sep 17 00:00:00 2001
From: TimAdams84 <tim-adams@gmx.net>
Date: Mon, 12 Aug 2024 15:15:12 +0200
Subject: [PATCH 3/8] build: Add keywords

---
 setup.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/setup.py b/setup.py
index 673b954..040f0f5 100644
--- a/setup.py
+++ b/setup.py
@@ -44,7 +44,7 @@
     ],
     include_package_data=True,  # Ensure non-Python files are included
     python_requires='>=3.9',  # Specify minimum Python version
-    keywords='synthetic-data',
+    keywords='synthetic-data, data-quality, data-visualization',
     project_urls={
         'Documentation': 'https://github.com/SCAI-BIO/syndat#readme',
         'Source': 'https://github.com/SCAI-BIO/syndat',

From 7ff573f283d0f2d452d7977e20cf4eef974edf1c Mon Sep 17 00:00:00 2001
From: TimAdams84 <tim-adams@gmx.net>
Date: Mon, 12 Aug 2024 16:07:53 +0200
Subject: [PATCH 4/8] fix: Re-add requirements.txt to resolve conflicts

---
  requirements.txt  | 7 +++++++
 1 file changed, 7 insertions(+)
 create mode 100644  requirements.txt 

diff --git a/ requirements.txt  b/ requirements.txt 
new file mode 100644
index 0000000..3327fc2
--- /dev/null
+++ b/ requirements.txt 	
@@ -0,0 +1,7 @@
+pandas~=2.1.4
+numpy~=1.26.2
+scipy~=1.11.4
+scikit-learn~=1.5.1
+matplotlib~=3.8.2
+seaborn~=0.13.0
+setuptools==70.0.0
\ No newline at end of file

From e81e62963626bfec1fa926b1b3b2493ff2d33f09 Mon Sep 17 00:00:00 2001
From: TimAdams84 <tim-adams@gmx.net>
Date: Mon, 12 Aug 2024 16:09:35 +0200
Subject: [PATCH 5/8] fix: Re-add requirements.txt to resolve conflicts

---
  requirements.txt  | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/ requirements.txt  b/ requirements.txt 
index 3327fc2..f18fe5f 100644
--- a/ requirements.txt 	
+++ b/ requirements.txt 	
@@ -1,7 +1,7 @@
 pandas~=2.1.4
 numpy~=1.26.2
 scipy~=1.11.4
-scikit-learn~=1.5.1
+scikit-learn~=1.3.2
 matplotlib~=3.8.2
 seaborn~=0.13.0
-setuptools==70.0.0
\ No newline at end of file
+setuptools==69.0.2
\ No newline at end of file

From 3a4d71363727a80c0e1bfe770be4898c4d3c1925 Mon Sep 17 00:00:00 2001
From: TimAdams84 <tim-adams@gmx.net>
Date: Mon, 12 Aug 2024 16:15:11 +0200
Subject: [PATCH 6/8] chore: Update setup dependencies

---
 setup.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/setup.py b/setup.py
index 040f0f5..a51e185 100644
--- a/setup.py
+++ b/setup.py
@@ -28,10 +28,10 @@
         'pandas~=2.1.4',
         'numpy~=1.26.2',
         'scipy~=1.11.4',
-        'scikit-learn~=1.3.2',
+        'scikit-learn~=1.5.1',
         'matplotlib~=3.8.2',
         'seaborn~=0.13.0',
-        'setuptools==69.0.2'
+        'setuptools==70.0.0'
     ],
     classifiers=[
         'Development Status :: 3 - Alpha',

From b3d47cfa635337816ffc9ef65e924ee3ef8974c9 Mon Sep 17 00:00:00 2001
From: TimAdams84 <tim-adams@gmx.net>
Date: Mon, 12 Aug 2024 16:18:35 +0200
Subject: [PATCH 7/8] fix: tests

---
 tests/test_auc.py | 12 ++++++------
 tests/test_jsd.py | 48 +++++++++++++++++++++++------------------------
 2 files changed, 30 insertions(+), 30 deletions(-)

diff --git a/tests/test_auc.py b/tests/test_auc.py
index 9ffd9ae..d6d6ee3 100644
--- a/tests/test_auc.py
+++ b/tests/test_auc.py
@@ -43,13 +43,13 @@ def preprocess_categorical_data(self, real_data, synthetic_data):
         return real_data_encoded, synthetic_data_encoded
 
     def test_auc_score(self):
-        auc_score = syndat.quality.auc(self.real_data, self.synthetic_data)
+        auc_score = syndat.scores.discrimination(self.real_data, self.synthetic_data)
         self.assertTrue(isinstance(auc_score, float))
         self.assertGreaterEqual(auc_score, 0.0)
         self.assertLessEqual(auc_score, 100.0)
 
     def test_auc_score_normalized(self):
-        auc_score = syndat.quality.auc(self.real_data, self.synthetic_data)
+        auc_score = syndat.scores.discrimination(self.real_data, self.synthetic_data)
         self.assertTrue(isinstance(auc_score, float))
         self.assertGreaterEqual(auc_score, 0.0)
         self.assertLessEqual(auc_score, 100.0)
@@ -57,7 +57,7 @@ def test_auc_score_normalized(self):
     def test_auc_score_with_missing_values(self):
         # Introduce missing values in real data
         self.real_data.iloc[::10, 0] = np.nan  # 10% missing data
-        auc_score = syndat.quality.auc(self.real_data, self.synthetic_data)
+        auc_score = syndat.scores.discrimination(self.real_data, self.synthetic_data)
         self.assertTrue(isinstance(auc_score, float))
         self.assertGreaterEqual(auc_score, 0.0)
         self.assertLessEqual(auc_score, 100.0)
@@ -65,19 +65,19 @@ def test_auc_score_with_missing_values(self):
     def test_auc_score_with_missing_values_drop_col(self):
         # Introduce missing values in real data
         self.real_data.iloc[::2, 0] = np.nan  # 50% missing data -> col drop
-        auc_score = syndat.quality.auc(self.real_data, self.synthetic_data)
+        auc_score = syndat.scores.discrimination(self.real_data, self.synthetic_data)
         self.assertTrue(isinstance(auc_score, float))
         self.assertGreaterEqual(auc_score, 0.0)
         self.assertLessEqual(auc_score, 100.0)
 
     def test_auc_score_with_custom_folds(self):
-        auc_score = syndat.quality.auc(self.real_data, self.synthetic_data, n_folds=5)
+        auc_score = syndat.scores.discrimination(self.real_data, self.synthetic_data, n_folds=5)
         self.assertTrue(isinstance(auc_score, float))
         self.assertGreaterEqual(auc_score, 0.0)
         self.assertLessEqual(auc_score, 100.0)
 
     def test_auc_score_with_categorical_data(self):
-        auc_score = syndat.quality.auc(self.real_data_cat, self.synthetic_data_cat)
+        auc_score = syndat.scores.discrimination(self.real_data_cat, self.synthetic_data_cat)
         self.assertTrue(isinstance(auc_score, float))
         self.assertGreaterEqual(auc_score, 0.0)
         self.assertLessEqual(auc_score, 100.0)
diff --git a/tests/test_jsd.py b/tests/test_jsd.py
index 60fbc0e..aa1de0e 100644
--- a/tests/test_jsd.py
+++ b/tests/test_jsd.py
@@ -17,8 +17,8 @@ def test_jsd_zero_int64(self):
             'feature1': [6, 7, 8, 9, 10],
             'feature2': [15, 16, 17, 18, 19]
         })
-        jsd = syndat.quality.jsd(real, synthetic)
-        self.assertEqual(0, jsd)
+        distribution = syndat.scores.distribution(real, synthetic)
+        self.assertEqual(0, distribution)
 
     def test_jsd_zero_int64_float64(self):
         synthetic = pd.DataFrame({
@@ -30,8 +30,8 @@ def test_jsd_zero_int64_float64(self):
             'feature1': [6, 7, 8, 9, 10],
             'feature2': [0.6, 0.7, 0.8, 0.9, 1.0]
         })
-        jsd = syndat.quality.jsd(real, synthetic)
-        self.assertEqual(jsd, 0)
+        distribution = syndat.scores.distribution(real, synthetic)
+        self.assertEqual(distribution, 0)
 
     def test_jsd_perfect_int64(self):
         synthetic = pd.DataFrame({
@@ -43,8 +43,8 @@ def test_jsd_perfect_int64(self):
             'feature1': [1, 2, 1, 2, 3],
             'feature2': [11, 12, 13, 14, 15]
         })
-        jsd = syndat.quality.jsd(real, synthetic)
-        self.assertEqual(jsd, 100)
+        distribution = syndat.scores.distribution(real, synthetic)
+        self.assertEqual(distribution, 100)
 
     def test_jsd_perfect_int64_and_float64(self):
         synthetic = pd.DataFrame({
@@ -56,8 +56,8 @@ def test_jsd_perfect_int64_and_float64(self):
             'feature1': [1, 2, 1, 2, 3],
             'feature2': [0.1, 0.2, 0.3, 0.4, 0.5]
         })
-        jsd = syndat.quality.jsd(real, synthetic)
-        self.assertEqual(jsd, 100)
+        distribution = syndat.scores.distribution(real, synthetic)
+        self.assertEqual(distribution, 100)
 
     def test_jsd_different_col_types(self):
         synthetic = pd.DataFrame({
@@ -69,7 +69,7 @@ def test_jsd_different_col_types(self):
             'feature1': [1.2, 2.1, 1.1, 2.1, 3.1],
             'feature2': [1, 2, 3, 4, 5]
         })
-        jsd = syndat.quality.jsd(real, synthetic, score=False)
+        distribution = syndat.scores.distribution(real, synthetic, score=False)
 
     def test_jsd_negative_int64(self):
         synthetic = pd.DataFrame({
@@ -81,7 +81,7 @@ def test_jsd_negative_int64(self):
             'feature1': [-1, 2, 3, 4, 5],
             'feature2': [1, 2, 3, 4, 5]
         })
-        jsd = syndat.quality.jsd(real, synthetic)
+        distribution = syndat.scores.distribution(real, synthetic)
 
     def test_jsd_single_outlier(self):
         synthetic = pd.DataFrame({
@@ -93,8 +93,8 @@ def test_jsd_single_outlier(self):
             'feature1': [1, 1, 1, 1, 1, 1, 2, 3, 4, 5, 6, 7, 8, 9, 9],
             'feature2': [1, 1, 1, 1, 1, 1, 2, 3, 4, 5, 6, 7, 8, 9, 100],
         })
-        jsd = syndat.quality.jsd(real, synthetic)
-        self.assertTrue(jsd < 100)
+        distribution = syndat.scores.distribution(real, synthetic)
+        self.assertTrue(distribution < 100)
 
     def test_jsd_categorical_equal(self):
         synthetic = pd.DataFrame({
@@ -106,8 +106,8 @@ def test_jsd_categorical_equal(self):
             'feature1': ['A', 'B', 'A', 'B', 'C'],
             'feature2': ['X', 'Y', 'Y', 'X', 'Z']
         })
-        jsd = syndat.quality.jsd(real, synthetic)
-        self.assertEqual(jsd, 100)
+        distribution = syndat.scores.distribution(real, synthetic)
+        self.assertEqual(distribution, 100)
 
     def test_jsd_categorical_different(self):
         synthetic = pd.DataFrame({
@@ -119,8 +119,8 @@ def test_jsd_categorical_different(self):
             'feature1': ['A', 'B', 'A', 'B', 'D'],
             'feature2': ['X', 'Y', 'Z', 'X', 'W']
         })
-        jsd = syndat.quality.jsd(real, synthetic)
-        self.assertTrue(jsd < 100)
+        distribution = syndat.scores.distribution(real, synthetic)
+        self.assertTrue(distribution < 100)
 
     def test_jsd_categorical_mixed(self):
         synthetic = pd.DataFrame({
@@ -132,8 +132,8 @@ def test_jsd_categorical_mixed(self):
             'feature1': ['A', 'B', 'C', 'F', 'G'],
             'feature2': [1.0, 2.0, 3.0, 6.0, 7.0]
         })
-        jsd = syndat.quality.jsd(real, synthetic)
-        self.assertTrue(jsd < 100)
+        distribution = syndat.scores.distribution(real, synthetic)
+        self.assertTrue(distribution < 100)
 
     def test_jsd_categorical_with_numerical(self):
         synthetic = pd.DataFrame({
@@ -145,8 +145,8 @@ def test_jsd_categorical_with_numerical(self):
             'feature1': ['A', 'B', 'C', 'A', 'D'],
             'feature2': [1.0, 2.0, 3.0, 4.0, 6.0]
         })
-        jsd = syndat.quality.jsd(real, synthetic)
-        self.assertTrue(jsd < 100)
+        distribution = syndat.scores.distribution(real, synthetic)
+        self.assertTrue(distribution < 100)
 
     def test_jsd_categorical_with_nan(self):
         synthetic = pd.DataFrame({
@@ -158,8 +158,8 @@ def test_jsd_categorical_with_nan(self):
             'feature1': ['A', 'B', 'C', 'D', None],
             'feature2': [1.0, 2.0, None, 4.0, 5.0]
         })
-        jsd = syndat.quality.jsd(real, synthetic)
-        self.assertTrue(jsd < 100)
+        distribution = syndat.scores.distribution(real, synthetic)
+        self.assertTrue(distribution < 100)
 
     def test_jsd_categorical_all_nan(self):
         synthetic = pd.DataFrame({
@@ -171,5 +171,5 @@ def test_jsd_categorical_all_nan(self):
             'feature1': [None, None, None, None, None],
             'feature2': [None, None, None, None, None]
         })
-        jsd = syndat.quality.jsd(real, synthetic)
-        self.assertEqual(jsd, 100)
+        distribution = syndat.scores.distribution(real, synthetic)
+        self.assertEqual(distribution, 100)

From 178c152edfc87fce7d10323f0ee2dcb9ee941e1e Mon Sep 17 00:00:00 2001
From: TimAdams84 <tim-adams@gmx.net>
Date: Mon, 12 Aug 2024 16:22:56 +0200
Subject: [PATCH 8/8] fix: rename requirements

---
  requirements.txt  => requirements.txt | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename  requirements.txt  => requirements.txt (100%)

diff --git a/ requirements.txt  b/requirements.txt
similarity index 100%
rename from  requirements.txt 
rename to requirements.txt