From 1c2319fd0bda1f4d28f7d7d91f54b1f76625d11b Mon Sep 17 00:00:00 2001
From: Walerian <wsobczak@indeed.com>
Date: Mon, 12 Dec 2022 16:40:01 +0900
Subject: [PATCH] Normalize namespace and metric names.

According to the documentation
https://docs.datadoghq.com/metrics/custom_metrics/#naming-custom-metrics
metric names contain only ASCII alphanumerics, underscores,
and periods, while other characters are converted to underscores.

This commit ensures we replace all invalid characters with underscores
for both namespaces and metric names.

A similar thing has been already done for tags in #489 and #517,
so I followed the example.

It solves #740.
---
 datadog/dogstatsd/base.py           |  6 +++---
 datadog/util/format.py              | 10 ++++++++--
 tests/unit/dogstatsd/test_statsd.py |  4 ++++
 tests/unit/util/test_format.py      | 18 +++++++++++++++++-
 4 files changed, 32 insertions(+), 6 deletions(-)

diff --git a/datadog/dogstatsd/base.py b/datadog/dogstatsd/base.py
index 1bdb47f5d..aac71d85d 100644
--- a/datadog/dogstatsd/base.py
+++ b/datadog/dogstatsd/base.py
@@ -27,7 +27,7 @@
 from datadog.dogstatsd.route import get_default_route
 from datadog.dogstatsd.container import ContainerID
 from datadog.util.compat import is_p3k, text
-from datadog.util.format import normalize_tags
+from datadog.util.format import normalize_tags, normalize_metric_name
 from datadog.version import __version__
 
 # Logging
@@ -318,7 +318,7 @@ def __init__(
             constant_tags = []
         self.constant_tags = constant_tags + env_tags
         if namespace is not None:
-            namespace = text(namespace)
+            namespace = normalize_metric_name(text(namespace))
         self.namespace = namespace
         self.use_ms = use_ms
         self.default_sample_rate = default_sample_rate
@@ -763,7 +763,7 @@ def _serialize_metric(self, metric, metric_type, value, tags, sample_rate=1):
         # Create/format the metric packet
         return "%s%s:%s|%s%s%s%s" % (
             (self.namespace + ".") if self.namespace else "",
-            metric,
+            normalize_metric_name(metric),
             value,
             metric_type,
             ("|@" + text(sample_rate)) if sample_rate != 1 else "",
diff --git a/datadog/util/format.py b/datadog/util/format.py
index f6b1e96af..554ec2d71 100644
--- a/datadog/util/format.py
+++ b/datadog/util/format.py
@@ -10,7 +10,8 @@
 from datadog.util.compat import conditional_lru_cache
 
 TAG_INVALID_CHARS_RE = re.compile(r"[^\w\d_\-:/\.]", re.UNICODE)
-TAG_INVALID_CHARS_SUBS = "_"
+METRIC_NAME_INVALID_CHARS_RE = re.compile(r"[^\w\d_\.]", re.UNICODE)
+INVALID_CHARS_SUBS = "_"
 
 
 def pretty_json(obj):
@@ -33,10 +34,15 @@ def force_to_epoch_seconds(epoch_sec_or_dt):
 
 @conditional_lru_cache
 def _normalize_tags_with_cache(tag_list):
-    return [TAG_INVALID_CHARS_RE.sub(TAG_INVALID_CHARS_SUBS, tag) for tag in tag_list]
+    return [TAG_INVALID_CHARS_RE.sub(INVALID_CHARS_SUBS, tag) for tag in tag_list]
 
 
 def normalize_tags(tag_list):
     # We have to turn our input tag list into a non-mutable tuple for it to
     # be hashable (and thus usable) by the @lru_cache decorator.
     return _normalize_tags_with_cache(tuple(tag_list))
+
+
+@conditional_lru_cache
+def normalize_metric_name(metric_name):
+    return METRIC_NAME_INVALID_CHARS_RE.sub(INVALID_CHARS_SUBS, metric_name)
diff --git a/tests/unit/dogstatsd/test_statsd.py b/tests/unit/dogstatsd/test_statsd.py
index 658804a19..48c4e32a1 100644
--- a/tests/unit/dogstatsd/test_statsd.py
+++ b/tests/unit/dogstatsd/test_statsd.py
@@ -294,6 +294,10 @@ def test_gauge(self):
         self.statsd.gauge('gauge', 123.4)
         self.assert_equal_telemetry('gauge:123.4|g\n', self.recv(2))
 
+    def test_gauge_with_unescaped_name(self):
+        self.statsd.gauge('my|new.p3rfect#_gauge', 123.4)
+        self.assert_equal_telemetry('my_new.p3rfect__gauge:123.4|g\n', self.recv(2))
+
     def test_counter(self):
         self.statsd.increment('page.views')
         self.statsd.flush()
diff --git a/tests/unit/util/test_format.py b/tests/unit/util/test_format.py
index dc4d6b62b..9ab5d9a1d 100644
--- a/tests/unit/util/test_format.py
+++ b/tests/unit/util/test_format.py
@@ -6,7 +6,7 @@
 
 import pytest
 
-from datadog.util.format import construct_url, normalize_tags
+from datadog.util.format import construct_url, normalize_tags, normalize_metric_name
 
 
 class TestConstructURL:
@@ -52,3 +52,19 @@ class TestNormalizeTags:
     @pytest.mark.parametrize("original_tags,expected_tags", test_data)
     def test_normalize_tags(self, original_tags, expected_tags):
             assert normalize_tags(original_tags) == expected_tags
+
+class TestNormalizeMetricName:
+    """
+    Test of the format's `normalize_metric_name` functionality
+    """
+    test_data = [
+        ('', ''),
+        ('just a metric name', 'just_a_metric_name'),
+        ('xyz.abc!@#$%^&*()0987654321{}}{', 'xyz.abc__________0987654321____'),
+        ('xyz.abc_123', 'xyz.abc_123'),
+        ('абśжż西アطر', 'абśжż西アطر'),
+        ('a😃😃b', 'a__b'),
+    ]
+    @pytest.mark.parametrize("original_metric_name,expected_metric_name", test_data)
+    def test_normalize_metric_name(self, original_metric_name, expected_metric_name):
+            assert normalize_metric_name(original_metric_name) == expected_metric_name