From a5d0de34c6996849d7b1ad511db5bc8543c3e89d Mon Sep 17 00:00:00 2001 From: Lalleh Rafeei <84813886+lrafeei@users.noreply.github.com> Date: Thu, 14 Nov 2024 10:52:56 -0800 Subject: [PATCH] Package capture harvest (#1228) * Initial commit for UI debug * Add 2 sec timer for module polling (in fast harvest) * Remove Plugin List tests * Only send loaded modules during shutdown * Add plugin test * Add plugin collection to slow harvest * Fix logic errors for conditional send * Add update_loaded_module to list of methods * Disable flaskmaster-py38 and pin coverage-py38 * Add py37 coverage version * Revert coverage version * Add more info for disabled test * Address reviewer comments * Add ignore flag for linter error * [Mega-Linter] Apply linters fixes * Trigger tests * Remove commented-out import --------- Co-authored-by: mergify[bot] <37929162+mergify[bot]@users.noreply.github.com> Co-authored-by: lrafeei --- newrelic/common/agent_http.py | 12 ++- newrelic/core/application.py | 46 +++++++++- newrelic/core/data_collector.py | 8 +- newrelic/core/environment.py | 87 +++++++++---------- .../test_connect_response_fields.py | 1 + tests/agent_unittests/test_environment.py | 58 +++---------- 6 files changed, 111 insertions(+), 101 deletions(-) diff --git a/newrelic/common/agent_http.py b/newrelic/common/agent_http.py index eb5b210d33..33c7a6297c 100644 --- a/newrelic/common/agent_http.py +++ b/newrelic/common/agent_http.py @@ -35,7 +35,7 @@ from ssl import get_default_verify_paths except ImportError: - class _DEFAULT_CERT_PATH(): + class _DEFAULT_CERT_PATH: cafile = None capath = None @@ -73,7 +73,7 @@ def _urllib3_ssl_recursion_workaround(wrapped, instance, args, kwargs): return wrapped(*args, **kwargs) -class BaseClient(): +class BaseClient: AUDIT_LOG_ID = 0 def __init__( @@ -122,7 +122,10 @@ def log_request(cls, fp, method, url, params, payload, headers, body=None, compr # Obfuscate license key from headers and URL params if headers: - headers = {k: obfuscate_license_key(v) if k.lower() in HEADER_AUDIT_LOGGING_DENYLIST else v for k, v in headers.items()} + headers = { + k: obfuscate_license_key(v) if k.lower() in HEADER_AUDIT_LOGGING_DENYLIST else v + for k, v in headers.items() + } if params and "license_key" in params: params = params.copy() @@ -517,7 +520,7 @@ def __init__( ) -class SupportabilityMixin(): +class SupportabilityMixin: @staticmethod def _supportability_request(params, payload, body, compression_time): # ********* @@ -606,6 +609,7 @@ class DeveloperModeClient(SupportabilityMixin, BaseClient): "span_event_data": None, "custom_event_data": None, "log_event_data": None, + "update_loaded_modules": ["Jars", [[" ", " ", {}]]], "shutdown": [], } diff --git a/newrelic/core/application.py b/newrelic/core/application.py index 4a5632f807..417e46cad5 100644 --- a/newrelic/core/application.py +++ b/newrelic/core/application.py @@ -31,7 +31,7 @@ from newrelic.core.custom_event import create_custom_event from newrelic.core.data_collector import create_session from newrelic.core.database_utils import SQLConnections -from newrelic.core.environment import environment_settings +from newrelic.core.environment import environment_settings, plugins from newrelic.core.internal_metrics import ( InternalTrace, InternalTraceContext, @@ -52,9 +52,10 @@ _logger = logging.getLogger(__name__) +MAX_PACKAGE_CAPTURE_TIME_PER_SLOW_HARVEST = 2.0 -class Application(): +class Application: """Class which maintains recorded data for a single application.""" def __init__(self, app_name, linked_applications=None): @@ -107,6 +108,8 @@ def __init__(self, app_name, linked_applications=None): self._data_samplers_lock = threading.Lock() self._data_samplers_started = False + self._remaining_plugins = True + # We setup empty rules engines here even though they will be # replaced when application first registered. This is done to # avoid a race condition in setting it later. Otherwise we have @@ -120,6 +123,7 @@ def __init__(self, app_name, linked_applications=None): } self._data_samplers = [] + self.modules = [] # Thread profiler and state of whether active or not. @@ -129,6 +133,8 @@ def __init__(self, app_name, linked_applications=None): self.profile_manager = profile_session_manager() + self.plugins = plugins() # initialize the generator + self._uninstrumented = [] @property @@ -1238,6 +1244,28 @@ def harvest(self, shutdown=False, flexible=False): data_sampler.name, ) + # Send environment plugin list + + stopwatch_start = time.time() + while ( + configuration + and configuration.package_reporting.enabled + and self._remaining_plugins + and ((time.time() - stopwatch_start) < MAX_PACKAGE_CAPTURE_TIME_PER_SLOW_HARVEST) + ): + try: + module_info = next(self.plugins) + self.modules.append(module_info) + except StopIteration: + self._remaining_plugins = False + + # Send the accumulated environment plugin list if not empty + if self.modules: + self._active_session.send_loaded_modules(self.modules) + + # Reset the modules list every harvest cycle + self.modules = [] + # Add a metric we can use to track how many harvest # periods have occurred. @@ -1671,6 +1699,20 @@ def internal_agent_shutdown(self, restart=False): self.stop_data_samplers() + # Finishes collecting environment plugin information + # if this has not been completed during harvest + # lifetime of the application + + while self.configuration and self.configuration.package_reporting.enabled and self._remaining_plugins: + try: + module_info = next(self.plugins) + self.modules.append(module_info) + except StopIteration: + self._remaining_plugins = False + if self.modules: + self._active_session.send_loaded_modules(self.modules) + self.modules = [] + # Now shutdown the actual agent session. try: diff --git a/newrelic/core/data_collector.py b/newrelic/core/data_collector.py index 2d312bc0d5..3b2f8af831 100644 --- a/newrelic/core/data_collector.py +++ b/newrelic/core/data_collector.py @@ -35,7 +35,7 @@ _logger = logging.getLogger(__name__) -class Session(): +class Session: PROTOCOL = AgentProtocol OTLP_PROTOCOL = OtlpProtocol CLIENT = ApplicationModeClient @@ -209,6 +209,12 @@ def send_profile_data(self, profile_data): payload = (self.agent_run_id, profile_data) return self._protocol.send("profile_data", payload) + def send_loaded_modules(self, environment_info): + """Called to submit loaded modules.""" + + payload = ("Jars", environment_info) + return self._protocol.send("update_loaded_modules", payload) + def shutdown_session(self): """Called to perform orderly deregistration of agent run against the data collector, rather than simply dropping the connection and diff --git a/newrelic/core/environment.py b/newrelic/core/environment.py index 1ac78b3ecb..dcf91f5e8f 100644 --- a/newrelic/core/environment.py +++ b/newrelic/core/environment.py @@ -29,7 +29,6 @@ physical_processor_count, total_physical_memory, ) -from newrelic.core.config import global_settings from newrelic.packages.isort import stdlibs as isort_stdlibs try: @@ -198,58 +197,54 @@ def environment_settings(): env.extend(dispatcher) + return env + + +def plugins(): # Module information. stdlib_builtin_module_names = _get_stdlib_builtin_module_names() - plugins = [] - - settings = global_settings() - if settings and settings.package_reporting.enabled: - # Using any iterable to create a snapshot of sys.modules can occassionally - # fail in a rare case when modules are imported in parallel by different - # threads. - # - # TL;DR: Do NOT use an iterable on the original sys.modules to generate the - # list - for name, module in sys.modules.copy().items(): - # Exclude lib.sub_paths as independent modules except for newrelic.hooks. - nr_hook = name.startswith("newrelic.hooks.") - if "." in name and not nr_hook or name.startswith("_"): + # Using any iterable to create a snapshot of sys.modules can occassionally + # fail in a rare case when modules are imported in parallel by different + # threads. + # + # TL;DR: Do NOT use an iterable on the original sys.modules to generate the + # list + for name, module in sys.modules.copy().items(): + # Exclude lib.sub_paths as independent modules except for newrelic.hooks. + nr_hook = name.startswith("newrelic.hooks.") + if "." in name and not nr_hook or name.startswith("_"): + continue + + # If the module isn't actually loaded (such as failed relative imports + # in Python 2.7), the module will be None and should not be reported. + try: + if not module: continue - - # If the module isn't actually loaded (such as failed relative imports - # in Python 2.7), the module will be None and should not be reported. + except Exception: # nosec B112 + # if the application uses generalimport to manage optional depedencies, + # it's possible that generalimport.MissingOptionalDependency is raised. + # In this case, we should not report the module as it is not actually loaded and + # is not a runtime dependency of the application. + # + continue + + # Exclude standard library/built-in modules. + if name in stdlib_builtin_module_names: + continue + + # Don't attempt to look up version information for our hooks + version = None + if not nr_hook: try: - if not module: - continue + version = get_package_version(name) except Exception: - # if the application uses generalimport to manage optional depedencies, - # it's possible that generalimport.MissingOptionalDependency is raised. - # In this case, we should not report the module as it is not actually loaded and - # is not a runtime dependency of the application. - # - continue - - # Exclude standard library/built-in modules. - if name in stdlib_builtin_module_names: - continue - - # Don't attempt to look up version information for our hooks - version = None - if not nr_hook: - try: - version = get_package_version(name) - except Exception: - pass - - # If it has no version it's likely not a real package so don't report it unless - # it's a new relic hook. - if nr_hook or version: - plugins.append(f"{name} ({version})") - - env.append(("Plugin List", plugins)) + pass - return env + # If it has no version it's likely not a real package so don't report it unless + # it's a new relic hook. + if nr_hook or version: + yield [name, version, {}] if version else [name, " ", {}] def _get_stdlib_builtin_module_names(): diff --git a/tests/agent_unittests/test_connect_response_fields.py b/tests/agent_unittests/test_connect_response_fields.py index 452be53f00..bccd433bea 100644 --- a/tests/agent_unittests/test_connect_response_fields.py +++ b/tests/agent_unittests/test_connect_response_fields.py @@ -46,6 +46,7 @@ ("send_agent_command_results", ({0: {}},)), ("agent_settings", ({},)), ("send_span_events", (EMPTY_SAMPLES, ())), + ("update_loaded_modules", ("Jars", [[" ", " ", {}]])), ("shutdown_session", ()), ) diff --git a/tests/agent_unittests/test_environment.py b/tests/agent_unittests/test_environment.py index 22a102cd14..21cfb1c707 100644 --- a/tests/agent_unittests/test_environment.py +++ b/tests/agent_unittests/test_environment.py @@ -15,16 +15,15 @@ import sys import pytest -from testing_support.fixtures import override_generic_settings from newrelic.core.config import global_settings -from newrelic.core.environment import environment_settings +from newrelic.core.environment import environment_settings, plugins settings = global_settings() def module(version): - class Module(): + class Module: pass if version: @@ -35,40 +34,17 @@ class Module(): def test_plugin_list(): # Let's pretend we fired an import hook - import newrelic.hooks.adapter_gunicorn # noqa: F401 + import pytest # noqa: F401 - environment_info = environment_settings() - - for key, plugin_list in environment_info: - if key == "Plugin List": - break - else: - assert False, "'Plugin List' not found" - - # Check that bogus plugins don't get reported - assert "newrelic.hooks.newrelic" not in plugin_list - # Check that plugin that should get reported has version info. - assert f"pytest ({pytest.__version__})" in plugin_list - - -@override_generic_settings(settings, {"package_reporting.enabled": False}) -def test_plugin_list_when_package_reporting_disabled(): - # Let's pretend we fired an import hook - import newrelic.hooks.adapter_gunicorn # noqa: F401 - - environment_info = environment_settings() + for name, version, _ in plugins(): + if name == "newrelic.hooks.newrelic": + assert False, "Bogus plugin found" + if name == "pytest": + # Check that plugin that should get reported has version info. + assert version == pytest.__version__ - for key, plugin_list in environment_info: - if key == "Plugin List": - break - else: - assert False, "'Plugin List' not found" - # Check that bogus plugins don't get reported - assert plugin_list == [] - - -class NoIteratorDict(): +class NoIteratorDict: def __init__(self, d): self.d = d @@ -85,20 +61,6 @@ def __contains__(self, *args, **kwargs): return self.d.__contains__(*args, **kwargs) -def test_plugin_list_uses_no_sys_modules_iterator(monkeypatch): - modules = NoIteratorDict(sys.modules) - monkeypatch.setattr(sys, "modules", modules) - - # If environment_settings iterates over sys.modules, an attribute error will be generated - environment_info = environment_settings() - - for key, plugin_list in environment_info: - if key == "Plugin List": - break - else: - assert False, "'Plugin List' not found" - - @pytest.mark.parametrize( "loaded_modules,dispatcher,dispatcher_version,worker_version", (