Skip to content

Commit

Permalink
Fix: Handle constant columns
Browse files Browse the repository at this point in the history
  • Loading branch information
tiadams committed Dec 16, 2024
1 parent ca19d1a commit 091c218
Showing 1 changed file with 10 additions and 1 deletion.
11 changes: 10 additions & 1 deletion syndat/scores.py
Original file line number Diff line number Diff line change
Expand Up @@ -203,6 +203,12 @@ def correlation(real: pd.DataFrame, synthetic: pd.DataFrame, score=True) -> floa
# Compute numerical correlation only
real_numerical = real_encoded.select_dtypes(include=[np.number])
synthetic_numerical = synthetic_encoded.select_dtypes(include=[np.number])
# Remove constant columns (zero variance)
constant_columns = real_numerical.columns[real_numerical.nunique() <= 1]
if len(constant_columns) > 0:
logger.warning(f'Removing constant columns {constant_columns} for correlation computation.')
real_numerical = real_numerical.drop(columns=constant_columns, errors="ignore")
synthetic_numerical = synthetic_numerical.drop(columns=constant_columns, errors="ignore")
# Compute correlation matrices
corr_real = real_numerical.corr()
corr_synthetic = synthetic_numerical.corr()
Expand All @@ -214,8 +220,11 @@ def correlation(real: pd.DataFrame, synthetic: pd.DataFrame, score=True) -> floa
if not corr_real.drop(columns=one_hot_encoded_columns).empty:
corr_real = corr_real.drop(columns=one_hot_encoded_columns)
corr_synthetic = corr_synthetic.drop(columns=one_hot_encoded_columns)
# assure both matrices have the same columns
corr_synthetic = corr_synthetic[corr_real]
# now compute correlation matrices
norm_diff = np.linalg.norm(corr_real - corr_synthetic)
corr_diff = corr_real - corr_synthetic
norm_diff = np.linalg.norm(corr_diff)
norm_real = np.linalg.norm(corr_real)
norm_quotient = norm_diff / norm_real
if score:
Expand Down

0 comments on commit 091c218

Please sign in to comment.