From d542a2beafa82819468e92a06c51f40d9a28a2b6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Pedersen?= Date: Tue, 14 Jan 2025 12:44:38 +0100 Subject: [PATCH 1/6] Implemented numpy backend for Multi Macenko normalizer --- torchstain/numpy/normalizers/multitarget.py | 117 ++++++++++++++++++++ 1 file changed, 117 insertions(+) create mode 100644 torchstain/numpy/normalizers/multitarget.py diff --git a/torchstain/numpy/normalizers/multitarget.py b/torchstain/numpy/normalizers/multitarget.py new file mode 100644 index 0000000..c2add09 --- /dev/null +++ b/torchstain/numpy/normalizers/multitarget.py @@ -0,0 +1,117 @@ +import numpy as np + +class NumpyMultiMacenkoNormalizer: + def __init__(self, norm_mode="avg-post"): + self.norm_mode = norm_mode + self.HERef = np.array([[0.5626, 0.2159], + [0.7201, 0.8012], + [0.4062, 0.5581]]) + self.maxCRef = np.array([1.9705, 1.0308]) + + def __convert_rgb2od(self, I, Io, beta): + I = np.transpose(I, (1, 2, 0)) + OD = -np.log((I.reshape(-1, I.shape[-1]).astype(float) + 1) / Io) + ODhat = OD[~np.any(OD < beta, axis=1)] + return OD, ODhat + + def __find_phi_bounds(self, ODhat, eigvecs, alpha): + That = np.dot(ODhat, eigvecs) + phi = np.arctan2(That[:, 1], That[:, 0]) + + minPhi = np.percentile(phi, alpha) + maxPhi = np.percentile(phi, 100 - alpha) + + return minPhi, maxPhi + + def __find_HE_from_bounds(self, eigvecs, minPhi, maxPhi): + vMin = np.dot(eigvecs, [np.cos(minPhi), np.sin(minPhi)]).reshape(-1, 1) + vMax = np.dot(eigvecs, [np.cos(maxPhi), np.sin(maxPhi)]).reshape(-1, 1) + + HE = np.concatenate([vMin, vMax], axis=1) if vMin[0] > vMax[0] else np.concatenate([vMax, vMin], axis=1) + return HE + + def __find_HE(self, ODhat, eigvecs, alpha): + minPhi, maxPhi = self.__find_phi_bounds(ODhat, eigvecs, alpha) + return self.__find_HE_from_bounds(eigvecs, minPhi, maxPhi) + + def __find_concentration(self, OD, HE): + Y = OD.T + C, _, _, _ = np.linalg.lstsq(HE, Y, rcond=None) + return C + + def __compute_matrices_single(self, I, Io, alpha, beta): + OD, ODhat = self.__convert_rgb2od(I, Io, beta) + + cov_matrix = np.cov(ODhat.T) + eigvals, eigvecs = np.linalg.eigh(cov_matrix) + eigvecs = eigvecs[:, [1, 2]] + + HE = self.__find_HE(ODhat, eigvecs, alpha) + C = self.__find_concentration(OD, HE) + maxC = np.array([np.percentile(C[0, :], 99), np.percentile(C[1, :], 99)]) + + return HE, C, maxC + + def fit(self, Is, Io=240, alpha=1, beta=0.15): + if self.norm_mode == "avg-post": + HEs, _, maxCs = zip(*[self.__compute_matrices_single(I, Io, alpha, beta) for I in Is]) + + self.HERef = np.mean(HEs, axis=0) + self.maxCRef = np.mean(maxCs, axis=0) + elif self.norm_mode == "concat": + ODs, ODhats = zip(*[self.__convert_rgb2od(I, Io, beta) for I in Is]) + OD = np.vstack(ODs) + ODhat = np.vstack(ODhats) + + cov_matrix = np.cov(ODhat.T) + eigvals, eigvecs = np.linalg.eigh(cov_matrix) + eigvecs = eigvecs[:, [1, 2]] + + HE = self.__find_HE(ODhat, eigvecs, alpha) + C = self.__find_concentration(OD, HE) + maxCs = np.array([np.percentile(C[0, :], 99), np.percentile(C[1, :], 99)]) + + self.HERef = HE + self.maxCRef = maxCs + elif self.norm_mode == "avg-pre": + ODs, ODhats = zip(*[self.__convert_rgb2od(I, Io, beta) for I in Is]) + + covs = [np.cov(ODhat.T) for ODhat in ODhats] + eigvecs = np.mean([np.linalg.eigh(cov)[1][:, [1, 2]] for cov in covs], axis=0) + + OD = np.vstack(ODs) + ODhat = np.vstack(ODhats) + + HE = self.__find_HE(ODhat, eigvecs, alpha) + C = self.__find_concentration(OD, HE) + maxCs = np.array([np.percentile(C[0, :], 99), np.percentile(C[1, :], 99)]) + + self.HERef = HE + self.maxCRef = maxCs + elif self.norm_mode in ["fixed-single", "stochastic-single"]: + self.HERef, _, self.maxCRef = self.__compute_matrices_single(Is[0], Io, alpha, beta) + else: + raise ValueError("Unknown norm mode") + + def normalize(self, I, Io=240, alpha=1, beta=0.15, stains=True): + c, h, w = I.shape + + HE, C, maxC = self.__compute_matrices_single(I, Io, alpha, beta) + C = (self.maxCRef / maxC).reshape(-1, 1) * C + + Inorm = Io * np.exp(-np.dot(self.HERef, C)) + Inorm[Inorm > 255] = 255 + Inorm = np.transpose(Inorm, (1, 0)).reshape(h, w, c).astype(np.int32) + + H, E = None, None + + if stains: + H = Io * np.exp(-np.dot(self.HERef[:, 0].reshape(-1, 1), C[0, :].reshape(1, -1))) + H[H > 255] = 255 + H = np.transpose(H, (1, 0)).reshape(h, w, c).astype(np.int32) + + E = Io * np.exp(-np.dot(self.HERef[:, 1].reshape(-1, 1), C[1, :].reshape(1, -1))) + E[E > 255] = 255 + E = np.transpose(E, (1, 0)).reshape(h, w, c).astype(np.int32) + + return Inorm, H, E From c049cd762af6099b246a651e3ed7585740d5bc6a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Pedersen?= Date: Tue, 14 Jan 2025 12:44:56 +0100 Subject: [PATCH 2/6] Implemented working test for numpy vs torch multimacenko --- tests/test_torch.py | 37 ++++++++++++++++++++++++ torchstain/numpy/normalizers/__init__.py | 3 +- 2 files changed, 39 insertions(+), 1 deletion(-) diff --git a/tests/test_torch.py b/tests/test_torch.py index 68ea0c6..1b53909 100644 --- a/tests/test_torch.py +++ b/tests/test_torch.py @@ -122,3 +122,40 @@ def test_reinhard_torch(): # assess whether the normalized images are identical across backends np.testing.assert_almost_equal(result_numpy.flatten(), result_torch.flatten(), decimal=2, verbose=True) + + +def test_macenko_torch(): + size = 1024 + curr_file_path = os.path.dirname(os.path.realpath(__file__)) + target = cv2.resize(cv2.cvtColor(cv2.imread(os.path.join(curr_file_path, "../data/target.png")), cv2.COLOR_BGR2RGB), (size, size)) + to_transform = cv2.resize(cv2.cvtColor(cv2.imread(os.path.join(curr_file_path, "../data/source.png")), cv2.COLOR_BGR2RGB), (size, size)) + + # setup preprocessing and preprocess image to be normalized + T = transforms.Compose([ + transforms.ToTensor(), + transforms.Lambda(lambda x: x * 255) + ]) + t_to_transform = T(to_transform) + target_transformed = T(target) + + # move channel to first + target_numpy = np.moveaxis(target, -1, 0) + to_transform_numpy = np.moveaxis(to_transform, -1, 0) + + # initialize normalizers for each backend and fit to target image + normalizer = torchstain.normalizers.MultiMacenkoNormalizer(backend='numpy') + normalizer.fit([target_numpy, target_numpy, target_numpy]) + + torch_normalizer = torchstain.normalizers.MultiMacenkoNormalizer(backend='torch') + torch_normalizer.fit([target_transformed, target_transformed, target_transformed]) + + # transform + result_numpy, _, _ = normalizer.normalize(I=to_transform_numpy, stains=True) + result_torch, _, _ = torch_normalizer.normalize(I=t_to_transform, stains=True) + + # convert to numpy and set dtype + result_numpy = result_numpy.astype("float32") / 255. + result_torch = result_torch.numpy().astype("float32") / 255. + + # assess whether the normalized images are identical across backends + np.testing.assert_almost_equal(result_numpy.flatten(), result_torch.flatten(), decimal=2, verbose=True) diff --git a/torchstain/numpy/normalizers/__init__.py b/torchstain/numpy/normalizers/__init__.py index d453cf1..accd103 100644 --- a/torchstain/numpy/normalizers/__init__.py +++ b/torchstain/numpy/normalizers/__init__.py @@ -1,2 +1,3 @@ from .macenko import NumpyMacenkoNormalizer -from .reinhard import NumpyReinhardNormalizer \ No newline at end of file +from .reinhard import NumpyReinhardNormalizer +from .multitarget import NumpyMultiMacenkoNormalizer From 141e4b98d36dcfba9451112a05c2440a32df0bbe Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Pedersen?= Date: Tue, 14 Jan 2025 13:15:18 +0100 Subject: [PATCH 3/6] Refactored --- tests/test_torch.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tests/test_torch.py b/tests/test_torch.py index 1b53909..71c6b7f 100644 --- a/tests/test_torch.py +++ b/tests/test_torch.py @@ -11,6 +11,7 @@ def setup_function(fn): print("torch version:", torch.__version__, "torchvision version:", torchvision.__version__) + def test_cov(): x = np.random.randn(10, 10) cov_np = np.cov(x) @@ -18,6 +19,7 @@ def test_cov(): np.testing.assert_almost_equal(cov_np, cov_t.numpy()) + def test_percentile(): x = np.random.randn(10, 10) p = 20 @@ -26,6 +28,7 @@ def test_percentile(): np.testing.assert_almost_equal(p_np, p_t) + def test_macenko_torch(): size = 1024 curr_file_path = os.path.dirname(os.path.realpath(__file__)) @@ -57,6 +60,7 @@ def test_macenko_torch(): # assess whether the normalized images are identical across backends np.testing.assert_almost_equal(result_numpy.flatten(), result_torch.flatten(), decimal=2, verbose=True) + def test_multitarget_macenko_torch(): size = 1024 curr_file_path = os.path.dirname(os.path.realpath(__file__)) From 17127a50bb2da70e7f8c468463f0c4e65c5d7fab Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Pedersen?= Date: Tue, 14 Jan 2025 13:15:30 +0100 Subject: [PATCH 4/6] Implemented TensorFlowMultiMacenkoNormalizer --- torchstain/base/normalizers/multitarget.py | 6 +- torchstain/tf/normalizers/__init__.py | 1 + torchstain/tf/normalizers/multitarget.py | 124 +++++++++++++++++++++ 3 files changed, 129 insertions(+), 2 deletions(-) create mode 100644 torchstain/tf/normalizers/multitarget.py diff --git a/torchstain/base/normalizers/multitarget.py b/torchstain/base/normalizers/multitarget.py index 495569b..7c5d03d 100644 --- a/torchstain/base/normalizers/multitarget.py +++ b/torchstain/base/normalizers/multitarget.py @@ -1,10 +1,12 @@ def MultiMacenkoNormalizer(backend="torch", **kwargs): if backend == "numpy": - raise NotImplementedError("MultiMacenkoNormalizer is not implemented for NumPy backend") + from torchstain.numpy.normalizers import NumpyMultiMacenkoNormalizer + return NumpyMultiMacenkoNormalizer(**kwargs) elif backend == "torch": from torchstain.torch.normalizers import TorchMultiMacenkoNormalizer return TorchMultiMacenkoNormalizer(**kwargs) elif backend == "tensorflow": - raise NotImplementedError("MultiMacenkoNormalizer is not implemented for TensorFlow backend") + from torchstain.tf.normalizers import TensorFlowMultiMacenkoNormalizer + return TensorFlowMultiMacenkoNormalizer(**kwargs) else: raise Exception(f"Unsupported backend {backend}") diff --git a/torchstain/tf/normalizers/__init__.py b/torchstain/tf/normalizers/__init__.py index fb0718f..be51267 100644 --- a/torchstain/tf/normalizers/__init__.py +++ b/torchstain/tf/normalizers/__init__.py @@ -1,2 +1,3 @@ from torchstain.tf.normalizers.macenko import TensorFlowMacenkoNormalizer from torchstain.tf.normalizers.reinhard import TensorFlowReinhardNormalizer +from torchstain.tf.normalizers.multitarget import TensorFlowMultiMacenkoNormalizer diff --git a/torchstain/tf/normalizers/multitarget.py b/torchstain/tf/normalizers/multitarget.py new file mode 100644 index 0000000..747a7ff --- /dev/null +++ b/torchstain/tf/normalizers/multitarget.py @@ -0,0 +1,124 @@ +import tensorflow as tf +from torchstain.tf.utils import cov, percentile, solveLS + +class TensorFlowMultiMacenkoNormalizer: + def __init__(self, norm_mode="avg-post"): + self.norm_mode = norm_mode + self.HERef = tf.constant([[0.5626, 0.2159], + [0.7201, 0.8012], + [0.4062, 0.5581]], dtype=tf.float32) + self.maxCRef = tf.constant([1.9705, 1.0308], dtype=tf.float32) + + def __convert_rgb2od(self, I, Io, beta): + I = tf.transpose(I, perm=[1, 2, 0]) # Shape: (height, width, 3) + OD = -tf.math.log((tf.reshape(I, [-1, tf.shape(I)[-1]]) + 1) / Io) + ODhat = tf.boolean_mask(OD, ~tf.reduce_any(OD < beta, axis=1)) + + if tf.size(ODhat) == 0: + raise ValueError("ODhat is empty. Check image values and beta threshold.") + + return OD, ODhat + + def __find_phi_bounds(self, ODhat, eigvecs, alpha): + That = tf.matmul(ODhat, eigvecs) + phi = tf.math.atan2(That[:, 1], That[:, 0]) + + minPhi = percentile(phi, alpha) + maxPhi = percentile(phi, 100 - alpha) + return minPhi, maxPhi + + def __find_HE_from_bounds(self, eigvecs, minPhi, maxPhi): + # Expand minPhi and maxPhi to have a second dimension + vMin = tf.matmul(eigvecs, tf.stack([tf.cos(minPhi), tf.sin(minPhi)])[:, tf.newaxis]) + vMax = tf.matmul(eigvecs, tf.stack([tf.cos(maxPhi), tf.sin(maxPhi)])[:, tf.newaxis]) + + # Concatenate along the last dimension and return + HE = tf.where(vMin[0] > vMax[0], + tf.concat([vMin, vMax], axis=1), + tf.concat([vMax, vMin], axis=1)) + return HE + + def __find_HE(self, ODhat, eigvecs, alpha): + minPhi, maxPhi = self.__find_phi_bounds(ODhat, eigvecs, alpha) + return self.__find_HE_from_bounds(eigvecs, minPhi, maxPhi) + + def __find_concentration(self, OD, HE): + # Solve linear system using the provided solveLS function + Y = tf.transpose(OD) + C = solveLS(HE, Y) + return C + + def __compute_matrices_single(self, I, Io, alpha, beta): + OD, ODhat = self.__convert_rgb2od(I, Io, beta) + + cov_matrix = cov(tf.transpose(ODhat)) # cov expects shape (dims, samples) + eigvals, eigvecs = tf.linalg.eigh(cov_matrix) + eigvecs = tf.gather(eigvecs, [1, 2], axis=1) + + HE = self.__find_HE(ODhat, eigvecs, alpha) + C = self.__find_concentration(OD, HE) + maxC = tf.stack([percentile(C[0, :], 99), percentile(C[1, :], 99)]) + return HE, C, maxC + + def fit(self, Is, Io=240, alpha=1, beta=0.15): + if not isinstance(Is, list) or len(Is) == 0: + raise ValueError("Input images should be a non-empty list of tensors.") + + for i, I in enumerate(Is): + if not isinstance(I, tf.Tensor): + raise ValueError(f"Image at index {i} is not a TensorFlow tensor.") + if I.ndim != 3 or I.shape[0] != 3: + raise ValueError(f"Image at index {i} should have shape (3, height, width).") + + if self.norm_mode == "avg-post": + HEs, _, maxCs = zip(*[self.__compute_matrices_single(I, Io, alpha, beta) for I in Is]) + self.HERef = tf.reduce_mean(tf.stack(HEs), axis=0) + self.maxCRef = tf.reduce_mean(tf.stack(maxCs), axis=0) + elif self.norm_mode == "concat": + ODs, ODhats = zip(*[self.__convert_rgb2od(I, Io, beta) for I in Is]) + OD = tf.concat(ODs, axis=0) + ODhat = tf.concat(ODhats, axis=0) + + cov_matrix = cov(tf.transpose(ODhat)) # cov expects shape (dims, samples) + eigvals, eigvecs = tf.linalg.eigh(cov_matrix) + eigvecs = tf.gather(eigvecs, [1, 2], axis=1) + + HE = self.__find_HE(ODhat, eigvecs, alpha) + C = self.__find_concentration(OD, HE) + maxCs = tf.stack([percentile(C[0, :], 99), percentile(C[1, :], 99)]) + + self.HERef = HE + self.maxCRef = maxCs + else: + raise ValueError("Unsupported normalization mode.") + + def normalize(self, I, Io=240, alpha=1, beta=0.15, stains=True): + c, h, w = I.shape + + HE, C, maxC = self.__compute_matrices_single(I, Io, alpha, beta) + + # Ensure maxCRef and maxC are broadcastable + scaling_factors = (self.maxCRef / maxC)[:, tf.newaxis] # Shape: [2, 1] + C = scaling_factors * C[:2, :] # Use only the first two rows of C + + # Reconstruct the normalized image + Inorm = Io * tf.exp(-tf.linalg.matmul(self.HERef, C)) + Inorm = tf.clip_by_value(Inorm, 0, 255) + Inorm = tf.transpose(tf.reshape(Inorm, [c, h, w]), perm=[1, 2, 0]) # Convert back to HWC format + Inorm = tf.cast(Inorm, tf.int32) + + H, E = None, None + + if stains: + # Extract the H and E components + H = Io * tf.exp(-tf.linalg.matmul(self.HERef[:, 0:1], C[0:1, :])) + H = tf.clip_by_value(H, 0, 255) + H = tf.transpose(tf.reshape(H, [c, h, w]), perm=[1, 2, 0]) + H = tf.cast(H, tf.int32) + + E = Io * tf.exp(-tf.linalg.matmul(self.HERef[:, 1:2], C[1:2, :])) + E = tf.clip_by_value(E, 0, 255) + E = tf.transpose(tf.reshape(E, [c, h, w]), perm=[1, 2, 0]) + E = tf.cast(E, tf.int32) + + return Inorm, H, E \ No newline at end of file From 8fbbe7909d42b16b385a9887dcc424139e5bc4f9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Pedersen?= Date: Tue, 14 Jan 2025 13:15:53 +0100 Subject: [PATCH 5/6] Added test for multimacenko tf vs numpy --- tests/test_tf.py | 38 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) diff --git a/tests/test_tf.py b/tests/test_tf.py index 8edb65a..6ad8635 100644 --- a/tests/test_tf.py +++ b/tests/test_tf.py @@ -5,6 +5,7 @@ import tensorflow as tf import numpy as np + def test_cov(): x = np.random.randn(10, 10) cov_np = np.cov(x) @@ -12,6 +13,7 @@ def test_cov(): np.testing.assert_almost_equal(cov_np, cov_t.numpy()) + def test_percentile(): x = np.random.randn(10, 10) p = 20 @@ -20,6 +22,7 @@ def test_percentile(): np.testing.assert_almost_equal(p_np, p_t) + def test_macenko_tf(): size = 1024 curr_file_path = os.path.dirname(os.path.realpath(__file__)) @@ -48,6 +51,7 @@ def test_macenko_tf(): # assess whether the normalized images are identical across backends np.testing.assert_almost_equal(result_numpy.flatten(), result_tf.flatten(), decimal=2, verbose=True) + def test_reinhard_tf(): size = 1024 curr_file_path = os.path.dirname(os.path.realpath(__file__)) @@ -75,3 +79,37 @@ def test_reinhard_tf(): # assess whether the normalized images are identical across backends np.testing.assert_almost_equal(result_numpy.flatten(), result_tf.flatten(), decimal=2, verbose=True) + + +def test_multistain_tf(): + size = 1024 + curr_file_path = os.path.dirname(os.path.realpath(__file__)) + target = cv2.resize(cv2.cvtColor(cv2.imread(os.path.join(curr_file_path, "../data/target.png")), cv2.COLOR_BGR2RGB), (size, size)) + to_transform = cv2.resize(cv2.cvtColor(cv2.imread(os.path.join(curr_file_path, "../data/source.png")), cv2.COLOR_BGR2RGB), (size, size)) + + # setup preprocessing and preprocess image to be normalized + T = lambda x: tf.convert_to_tensor(np.moveaxis(x, -1, 0).astype("float32")) # * 255 + t_to_transform = T(to_transform) + target_transformed = T(target) + + # move channel to first + target_numpy = np.moveaxis(target, -1, 0) + to_transform_numpy = np.moveaxis(to_transform, -1, 0) + + # initialize normalizers for each backend and fit to target image + normalizer = torchstain.normalizers.MultiMacenkoNormalizer(backend='numpy') + normalizer.fit([target_numpy, target_numpy, target_numpy]) + + tf_normalizer = torchstain.normalizers.MultiMacenkoNormalizer(backend='tensorflow') + tf_normalizer.fit([target_transformed, target_transformed, target_transformed]) + + # transform + result_numpy, _, _ = normalizer.normalize(I=to_transform_numpy, stains=True) + result_tf, _, _ = tf_normalizer.normalize(I=t_to_transform, stains=True) + + # convert to numpy and set dtype + result_numpy = result_numpy.astype("float32") / 255. + result_tf = result_tf.numpy().astype("float32") / 255. + + # assess whether the normalized images are identical across backends + np.testing.assert_almost_equal(result_numpy.flatten(), result_tf.flatten(), decimal=2, verbose=True) From fa23e6dce0c385dc5c5f31ff257a5af284520c25 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Pedersen?= Date: Tue, 14 Jan 2025 13:20:10 +0100 Subject: [PATCH 6/6] Updated README on backend support for MultiMacenko --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 3e6dec7..9d5b7d2 100644 --- a/README.md +++ b/README.md @@ -56,7 +56,7 @@ norm, H, E = normalizer.normalize(I=t_to_transform, stains=True) | Macenko | ✓ | ✓ | ✓ | | Reinhard | ✓ | ✓ | ✓ | | Modified Reinhard | ✓ | ✓ | ✓ | -| Multi-target Macenko | ✗ | ✓ | ✗ | +| Multi-target Macenko | ✓ | ✓ | ✓ | | Macenko-Aug | ✓ | ✓ | ✓ | ## Backend comparison