From 4adaf7ec80e5d524ea5468e35ef7e1570f5060c7 Mon Sep 17 00:00:00 2001 From: David Butenhof Date: Fri, 10 Jan 2025 16:17:17 -0500 Subject: [PATCH] Unit test coverage `crucible_svc.py` test coverage is now at 96%. While the remaining 4% is worth some effort later, subsequent ILAB PRs will change some of this code anyway and will require test adjustments so it's good enough for now. --- backend/app/services/crucible_svc.py | 87 +- backend/tests/fake_elastic.py | 91 ++- backend/tests/test_crucible.py | 1107 +++++++++++++++++++++++++- 3 files changed, 1191 insertions(+), 94 deletions(-) diff --git a/backend/app/services/crucible_svc.py b/backend/app/services/crucible_svc.py index d27d63bd..3d20bd56 100644 --- a/backend/app/services/crucible_svc.py +++ b/backend/app/services/crucible_svc.py @@ -87,7 +87,7 @@ class Point: value: float -colors = [ +COLOR_NAMES = [ "black", "aqua", "blue", @@ -1104,10 +1104,12 @@ async def get_runs( s = self._normalize_date(start) results["startDate"] = datetime.fromtimestamp( s / 1000.0, tz=timezone.utc - ) + ).isoformat() if end: e = self._normalize_date(end) - results["endDate"] = datetime.fromtimestamp(e / 1000.0, tz=timezone.utc) + results["endDate"] = datetime.fromtimestamp( + e / 1000.0, tz=timezone.utc + ).isoformat() if s and e and s > e: raise HTTPException( @@ -1345,7 +1347,6 @@ async def get_samples( sample = s["sample"] sample["iteration"] = s["iteration"]["num"] sample["primary_metric"] = s["iteration"]["primary-metric"] - sample["status"] = s["iteration"]["status"] samples.append(sample) return samples @@ -1400,61 +1401,6 @@ async def get_periods( body.append(period) return body - async def get_timeline(self, run: str, **kwargs) -> dict[str, Any]: - """Report the relative timeline of a run - - With nested object lists, show runs to iterations to samples to - periods. - - Args: - run: run ID - kwargs: additional OpenSearch parameters - """ - itr = await self.search( - index="iteration", - filters=[{"term": {"run.id": run}}], - **kwargs, - ignore_unavailable=True, - ) - sam = await self.search( - index="sample", - filters=[{"term": {"run.id": run}}], - **kwargs, - ignore_unavailable=True, - ) - per = await self.search( - index="period", - filters=[{"term": {"run.id": run}}], - **kwargs, - ignore_unavailable=True, - ) - samples = defaultdict(list) - periods = defaultdict(list) - - for s in self._hits(sam): - samples[s["iteration"]["id"]].append(s) - for p in self._hits(per): - periods[p["sample"]["id"]].append(p) - - iterations = [] - robj = {"id": run, "iterations": iterations} - body = {"run": robj} - for i in self._hits(itr): - if "begin" not in robj: - robj["begin"] = self._format_timestamp(i["run"]["begin"]) - robj["end"] = self._format_timestamp(i["run"]["end"]) - iteration = i["iteration"] - iterations.append(iteration) - iteration["samples"] = [] - for s in samples.get(iteration["id"], []): - sample = s["sample"] - sample["periods"] = [] - for pr in periods.get(sample["id"], []): - period = self._format_period(pr["period"]) - sample["periods"].append(period) - iteration["samples"].append(sample) - return body - async def get_metrics_list(self, run: str, **kwargs) -> dict[str, Any]: """Return a list of metrics available for a run @@ -1494,12 +1440,14 @@ async def get_metrics_list(self, run: str, **kwargs) -> dict[str, Any]: if name in met: record = met[name] else: - record = {"periods": [], "breakouts": defaultdict(set)} + record = {"periods": [], "breakouts": defaultdict(list)} met[name] = record if "period" in h: record["periods"].append(h["period"]["id"]) for n, v in desc["names"].items(): - record["breakouts"][n].add(v) + # mimic a set, since the set type doesn't serialize + if v not in record["breakouts"][n]: + record["breakouts"][n].append(v) return met async def get_metric_breakouts( @@ -1555,8 +1503,8 @@ async def get_metric_breakouts( f"Metric name {metric_name} not found for run {run}", ) classes = set() - response = {"label": metric, "class": classes} - breakouts = defaultdict(set) + response = {"label": metric} + breakouts = defaultdict(list) pl = set() for m in self._hits(metrics): desc = m["metric_desc"] @@ -1567,11 +1515,13 @@ async def get_metric_breakouts( if "period" in m: pl.add(m["period"]["id"]) for n, v in desc["names"].items(): - breakouts[n].add(v) + if v not in breakouts[n]: + breakouts[n].append(v) # We want to help filter a consistent summary, so only show those # names with more than one value. if len(pl) > 1: - response["periods"] = pl + response["periods"] = sorted(pl) + response["class"] = sorted(classes) response["breakouts"] = {n: v for n, v in breakouts.items() if len(v) > 1} self.logger.info("Processing took %.3f seconds", time.time() - start) return response @@ -1634,6 +1584,9 @@ async def get_metrics_data( filters.extend(await self._build_timestamp_range_filters(periods)) response = [] + + # NOTE -- _get_metric_ids already failed if we found multiple IDs but + # aggregation wasn't specified. if len(ids) > 1: # Find the minimum sample interval of the selected metrics aggdur = await self.search( @@ -1961,9 +1914,9 @@ async def get_metrics_graph(self, graphdata: GraphList) -> dict[str, Any]: if g.color: color = g.color else: - color = colors[cindex] + color = COLOR_NAMES[cindex] cindex += 1 - if cindex >= len(colors): + if cindex >= len(COLOR_NAMES): cindex = 0 graphitem = { "x": x, diff --git a/backend/tests/fake_elastic.py b/backend/tests/fake_elastic.py index bc222ef2..1b7425c0 100644 --- a/backend/tests/fake_elastic.py +++ b/backend/tests/fake_elastic.py @@ -1,3 +1,4 @@ +from collections import defaultdict from dataclasses import dataclass from typing import Any, Optional, Union @@ -8,10 +9,21 @@ class Request: index: str body: dict[str, Any] - doc_type: str - params: Any - headers: Any - kwargs: dict[str, Any] + doc_type: Optional[str] = None + params: Optional[Any] = None + headers: Optional[Any] = None + kwargs: Optional[dict[str, Any]] = None + + def __eq__(self, other) -> bool: + iok = self.index == other.index + bok = self.body == other.body + dok = self.doc_type == other.doc_type + pok = self.params == other.params + hok = self.headers == other.headers + + # make empty dict and None match + kok = (not self.kwargs and not other.kwargs) or self.kwargs == other.kwargs + return iok and bok and dok and pok and hok and kok class FakeAsyncElasticsearch(AsyncElasticsearch): @@ -30,7 +42,7 @@ def __init__(self, hosts: Union[str, list[str]], **kwargs): self.hosts = hosts self.args = kwargs self.closed = False - self.data = {} + self.data = defaultdict(list) self.requests = [] # Testing helpers to manage fake searches @@ -38,13 +50,29 @@ def set_query( self, root_index: str, hit_list: Optional[list[dict[str, Any]]] = None, - aggregation_list: Optional[dict[str, Any]] = None, + aggregations: Optional[dict[str, Any]] = None, version: int = 7, + repeat: int = 1, ): + """Add a canned response to an Opensearch query + + The overall response and items in the hit and aggregation lists will be + augmented with the usual boilerplate. + + Multiple returns for a single index can be queued, in order, via + successive calls. To return the same result on multiple calls, specify + a "repeat" value greater than 1. + + Args: + root_index: CDM index name (run, period, etc) + hit_list: list of hit objects to be returned + aggregation_list: list of aggregation objects to return + version: CDM version + repeat: + """ ver = f"v{version:d}dev" index = f"cdm{ver}-{root_index}" hits = [] - aggregations = None if hit_list: for d in hit_list: source = d @@ -57,16 +85,18 @@ def set_query( "_source": source, } ) - if aggregation_list: - aggregations = { - k: { - "doc_count_error_upper_bound": 0, - "sum_other_doc_count": 0, - "buckets": v, - } - for k, v in aggregation_list.items() - } - self.data[index] = { + aggregate_response = {} + if aggregations: + for agg, val in aggregations.items(): + if isinstance(val, list): + aggregate_response[agg] = { + "doc_count_error_upper_bound": 0, + "sum_other_doc_count": 0, + "buckets": val, + } + else: + aggregate_response[agg] = val + response = { "took": 1, "timed_out": False, "_shards": {"total": 1, "successful": 1, "skipped": 0, "failed": 0}, @@ -76,8 +106,10 @@ def set_query( "hits": hits, }, } - if aggregations: - self.data[index]["aggregations"] = aggregations + if aggregate_response: + response["aggregations"] = aggregate_response + for c in range(repeat): + self.data[index].append(response) # Faked AsyncElasticsearch methods async def close(self): @@ -92,6 +124,22 @@ async def ping(self, **kwargs): async def search( self, body=None, index=None, doc_type=None, params=None, headers=None, **kwargs ): + """Return a canned response to a search query. + + Args: + body: query body + index: Opensearch index name + doc_type: document type (rarely used) + params: Opensearch search parameters (rarely used) + headers: HTTP headers (rarely used) + kwargs: whatever else you might pass to search + + Only the index is used here; to verify the correct Opensearch query + bodies and parameters, the full request is recorded for inspection. + + Return: + A JSON dict with the first canned result for the index, or an error + """ self.requests.append( Request( index=index, @@ -102,9 +150,8 @@ async def search( kwargs=kwargs, ) ) - if index in self.data: - target = self.data[index] - del self.data[index] + if index in self.data and len(self.data[index]) > 0: + target = self.data[index].pop(0) return target return { "error": { diff --git a/backend/tests/test_crucible.py b/backend/tests/test_crucible.py index 01e00172..953f2954 100644 --- a/backend/tests/test_crucible.py +++ b/backend/tests/test_crucible.py @@ -1,4 +1,6 @@ -from datetime import datetime +from collections import defaultdict +from datetime import datetime, timezone +import json from elasticsearch import AsyncElasticsearch from fastapi import HTTPException @@ -6,7 +8,13 @@ from vyper import Vyper import app.config -from app.services.crucible_svc import CommonParams, CrucibleService, Parser +from app.services.crucible_svc import ( + CommonParams, + CrucibleService, + Graph, + GraphList, + Parser, +) from tests.fake_elastic import FakeAsyncElasticsearch, Request @@ -664,7 +672,7 @@ async def test_run_filters(self, fake_crucible): fake_crucible.elastic.set_query( "tag", - aggregation_list={ + aggregations={ "key": [ { "key": "topology", @@ -704,7 +712,7 @@ async def test_run_filters(self, fake_crucible): ) fake_crucible.elastic.set_query( "param", - aggregation_list={ + aggregations={ "key": [ { "key": "bucket", @@ -720,7 +728,7 @@ async def test_run_filters(self, fake_crucible): ) fake_crucible.elastic.set_query( "run", - aggregation_list={ + aggregations={ "begin": [{"key": 123456789, "doc_count": 1}], "benchmark": [{"key": "ilab", "doc_count": 25}], "desc": [], @@ -766,6 +774,162 @@ async def test_get_run_ids(self, fake_crucible: CrucibleService): "period", [{"term": {"period.name": "measurement"}}] ) + async def test_get_runs_none(self, fake_crucible: CrucibleService): + """Test run summary""" + fake_crucible.elastic.set_query("run", []) + fake_crucible.elastic.set_query("iteration", []) + fake_crucible.elastic.set_query("tag", []) + fake_crucible.elastic.set_query("param", []) + assert { + "count": 0, + "offset": 0, + "results": [], + "sort": [], + "total": 0, + } == await fake_crucible.get_runs() + + @pytest.mark.parametrize( + "args", + ( + {}, + {"size": 2, "offset": 1}, + {"start": "2024-01-01"}, + {"end": "2024-02-01"}, + {"start": "2024-01-01", "end": "2025-01-01"}, + {"sort": ["end:desc"]}, + {"filter": ["tag:a=42", "param:z=xyzzy", "run:benchmark=test"]}, + ), + ) + async def test_get_runs_simple(self, args, fake_crucible: CrucibleService): + """Test run summary""" + fake_crucible.elastic.set_query( + "run", + [{"run": {"id": "r1", "begin": "0", "end": "5000", "benchmark": "test"}}], + ) + fake_crucible.elastic.set_query( + "iteration", + [ + { + "run": {"id": "r1"}, + "iteration": { + "id": "i1", + "num": 1, + "primary-period": "tp", + "primary-metric": "src::tst1", + "status": "fail", + }, + }, + { + "run": {"id": "r1"}, + "iteration": { + "id": "i2", + "num": 2, + "primary-period": "tp", + "primary-metric": "src::tst2", + "status": "pass", + }, + }, + ], + ) + fake_crucible.elastic.set_query( + "tag", [{"run": {"id": "r1"}, "tag": {"name": "a", "val": 42}}], repeat=2 + ) + fake_crucible.elastic.set_query( + "param", + [ + { + "run": {"id": "r1"}, + "iteration": {"id": "i1"}, + "param": {"arg": "b", "val": "cde"}, + }, + { + "run": {"id": "r1"}, + "iteration": {"id": "i1"}, + "param": {"arg": "z", "val": "xyzzy"}, + }, + { + "run": {"id": "r1"}, + "iteration": {"id": "i2"}, + "param": {"arg": "b", "val": "cde"}, + }, + { + "run": {"id": "r1"}, + "iteration": {"id": "i2"}, + "param": {"arg": "x", "val": "plugh"}, + }, + ], + repeat=2, + ) + expected = { + "count": 1, + "offset": 0, + "results": [ + { + "begin": "0", + "begin_date": "1970-01-01 00:00:00+00:00", + "benchmark": "test", + "end": "5000", + "end_date": "1970-01-01 00:00:05+00:00", + "id": "r1", + "iterations": [ + { + "iteration": 1, + "params": defaultdict( + None, + { + "b": "cde", + "z": "xyzzy", + }, + ), + "primary_metric": "src::tst1", + "primary_period": "tp", + "status": "fail", + }, + { + "iteration": 2, + "params": defaultdict( + None, + { + "b": "cde", + "x": "plugh", + }, + ), + "primary_metric": "src::tst2", + "primary_period": "tp", + "status": "pass", + }, + ], + "params": { + "b": "cde", + }, + "primary_metrics": {"src::tst1", "src::tst2"}, + "status": "fail", + "tags": defaultdict(None, {"a": 42}), + }, + ], + "sort": [], + "total": 1, + } + if "size" in args: + expected["size"] = args["size"] + if args.get("offset"): + expected["offset"] = args["offset"] + if args.get("start"): + expected["startDate"] = ( + datetime.fromisoformat(args["start"]) + .astimezone(tz=timezone.utc) + .isoformat() + ) + if args.get("end"): + expected["endDate"] = ( + datetime.fromisoformat(args["end"]) + .astimezone(tz=timezone.utc) + .isoformat() + ) + if args.get("sort"): + expected["sort"] = args["sort"] + assert expected == await fake_crucible.get_runs(**args) + async def test_get_tags(self, fake_crucible: CrucibleService): """Get tags for a run ID""" fake_crucible.elastic.set_query( @@ -905,3 +1069,936 @@ async def test_get_iterations(self, fake_crucible: CrucibleService): ], ) assert iterations == await fake_crucible.get_iterations("one") + + async def test_get_samples_none(self, fake_crucible: CrucibleService): + """Test error when neither run nor iteration is specified""" + with pytest.raises(HTTPException) as exc: + await fake_crucible.get_samples() + assert 400 == exc.value.status_code + assert ( + "A sample query requires either a run or iteration ID" == exc.value.detail + ) + + @pytest.mark.parametrize("ids", (("one", None), (None, 1))) + async def test_get_samples(self, fake_crucible: CrucibleService, ids): + """Get samples for a run ID""" + samples = [ + { + "num": "1", + "path": None, + "id": "one", + "status": "pass", + "primary_metric": "pm", + "primary_period": "m", + "iteration": 1, + }, + { + "id": "two", + "num": "2", + "path": None, + "status": "pass", + "primary_metric": "pm", + "primary_period": "m", + "iteration": 1, + }, + { + "id": "three", + "num": "3", + "path": None, + "status": "pass", + "primary_metric": "pm", + "primary_period": "m", + "iteration": 1, + }, + ] + fake_crucible.elastic.set_query( + "sample", + [ + { + "run": {"id": "one"}, + "iteration": { + "primary-metric": "pm", + "primary-period": "m", + "num": 1, + }, + "sample": s, + } + for s in samples + ], + ) + assert samples == await fake_crucible.get_samples(*ids) + + async def test_get_periods_none(self, fake_crucible: CrucibleService): + """Test error when neither run, iteration, nor sample is specified""" + with pytest.raises(HTTPException) as exc: + await fake_crucible.get_periods() + assert 400 == exc.value.status_code + assert ( + "A period query requires a run, iteration, or sample ID" == exc.value.detail + ) + + @pytest.mark.parametrize( + "ids", (("one", None, None), (None, 1, None), (None, None, 1)) + ) + async def test_get_periods(self, fake_crucible: CrucibleService, ids): + """Get samples for a run ID""" + periods = [ + { + "begin": "2024-12-05 21:16:31.046000+00:00", + "end": "2024-12-05 21:40:31.166000+00:00", + "id": "306C8A78-B352-11EF-8E37-AD212D0A0B9F", + "name": "measurement", + "iteration": 1, + "sample": 1, + "primary_metric": "ilab::sdg-samples-sec", + "status": "pass", + } + ] + fake_crucible.elastic.set_query( + "period", + [ + { + "run": {"id": "one"}, + "iteration": { + "primary-metric": p["primary_metric"], + "primary-period": "measurement", + "num": 1, + "status": p["status"], + }, + "sample": {"num": 1, "status": p["status"], "path": None}, + "period": { + "id": p["id"], + "name": p["name"], + "begin": str( + int(datetime.fromisoformat(p["begin"]).timestamp() * 1000) + ), + "end": str( + int(datetime.fromisoformat(p["end"]).timestamp() * 1000) + ), + "primary-metric": p["primary_metric"], + "status": p["status"], + }, + } + for p in periods + ], + ) + assert periods == await fake_crucible.get_periods(*ids) + + async def test_get_metrics_list(self, fake_crucible: CrucibleService): + """Get samples for a run ID""" + metrics = { + "source1::type1": { + "periods": [], + "breakouts": {"name1": ["value1", "value2"]}, + }, + "source1::type2": {"periods": ["p1", "p2"], "breakouts": {}}, + } + query = [ + { + "run": {"id": "one"}, + "metric_desc": { + "source": "source1", + "type": "type1", + "names": {"name1": "value1"}, + }, + }, + { + "run": {"id": "one"}, + "metric_desc": { + "source": "source1", + "type": "type1", + "names": {"name1": "value1"}, + }, + }, + { + "run": {"id": "one"}, + "metric_desc": { + "source": "source1", + "type": "type1", + "names": {"name1": "value2"}, + }, + }, + { + "run": {"id": "one"}, + "metric_desc": { + "source": "source1", + "type": "type1", + "names": {"name1": "value2"}, + }, + }, + { + "run": {"id": "one"}, + "period": {"id": "p1"}, + "metric_desc": {"source": "source1", "type": "type2", "names": {}}, + }, + { + "run": {"id": "one"}, + "period": {"id": "p2"}, + "metric_desc": {"source": "source1", "type": "type2", "names": {}}, + }, + ] + fake_crucible.elastic.set_query("metric_desc", query) + result = await fake_crucible.get_metrics_list("one") + + # NOTE: the method returns a defaultdict, which doesn't compare to a + # dict but "in the real world" serializes the same: so we just + # serialize and deserialize to mimic the actual API behavior. + result = json.loads(json.dumps(result)) + assert metrics == result + + async def test_get_metric_breakout_none(self, fake_crucible: CrucibleService): + """Test error when the metric isn't found""" + fake_crucible.elastic.set_query("metric_desc", []) + with pytest.raises(HTTPException) as exc: + await fake_crucible.get_metric_breakouts( + "one", metric="source1::type1", names=[], periods=[] + ) + assert 400 == exc.value.status_code + assert "Metric name source1::type1 not found for run one" == exc.value.detail + + @pytest.mark.parametrize("period", (True, False)) + async def test_get_metric_breakout(self, period, fake_crucible: CrucibleService): + """Get samples for a run ID""" + metrics = { + "label": "source1::type1", + "class": ["classless", "classy"], + "type": "type1", + "source": "source1", + "breakouts": {"name1": ["value1", "value2"]}, + } + md1 = { + "run": {"id": "one"}, + "metric_desc": { + "source": "source1", + "type": "type1", + "class": "classy", + "names": {"name1": "value1"}, + }, + } + md2 = { + "run": {"id": "one"}, + "metric_desc": { + "source": "source1", + "type": "type1", + "names": {"name1": "value2"}, + }, + } + if period: + metrics["periods"] = ["p1", "p2"] + md1["period"] = {"id": "p1"} + md2["period"] = {"id": "p2"} + query = [ + md1, + md2, + { + "run": {"id": "one"}, + "metric_desc": { + "source": "source1", + "type": "type1", + "class": "classless", + "names": {"name1": "value1"}, + }, + }, + { + "run": {"id": "one"}, + "metric_desc": { + "source": "source1", + "type": "type1", + "names": {"name1": "value2"}, + }, + }, + ] + fake_crucible.elastic.set_query("metric_desc", query) + result = await fake_crucible.get_metric_breakouts( + "one", metric="source1::type1", names=[], periods=[] + ) + + # NOTE: the method returns a defaultdict, which doesn't compare to a + # dict but "in the real world" serializes the same: so we just + # serialize and deserialize to mimic the actual API behavior. + result = json.loads(json.dumps(result)) + assert metrics == result + + async def test_metrics_data_one_noagg(self, fake_crucible: CrucibleService): + """Return data samples for a single metric""" + + fake_crucible.elastic.set_query( + "metric_desc", + [{"metric_desc": {"id": "one-metric", "names": {}}}], + ) + fake_crucible.elastic.set_query( + "metric_data", + [ + { + "metric_desc": {"id": "one-metric"}, + "metric_data": { + "begin": "1726165775123", + "end": "1726165789213", + "duration": 14100, + "value": 9.35271216694379, + }, + }, + { + "metric_desc": {"id": "one-metric"}, + "metric_data": { + "begin": "1726165790000", + "end": "1726165804022", + "duration": 14022, + "value": 9.405932330557683, + }, + }, + ], + ) + expected = [ + { + "begin": "2024-09-12 18:29:35.123000+00:00", + "duration": 14.1, + "end": "2024-09-12 18:29:49.213000+00:00", + "value": 9.35271216694379, + }, + { + "begin": "2024-09-12 18:29:50+00:00", + "duration": 14.022, + "end": "2024-09-12 18:30:04.022000+00:00", + "value": 9.405932330557683, + }, + ] + assert expected == await fake_crucible.get_metrics_data("runid", "source::type") + assert fake_crucible.elastic.requests == [ + Request( + "cdmv7dev-metric_desc", + { + "query": { + "bool": { + "filter": [ + { + "term": { + "run.id": "runid", + }, + }, + { + "term": { + "metric_desc.source": "source", + }, + }, + { + "term": { + "metric_desc.type": "type", + }, + }, + ], + }, + }, + "size": 262144, + }, + kwargs={"ignore_unavailable": True}, + ), + Request( + "cdmv7dev-metric_data", + { + "query": { + "bool": { + "filter": [ + { + "terms": { + "metric_desc.id": [ + "one-metric", + ], + }, + }, + ], + }, + }, + "size": 262144, + }, + ), + ] + + @pytest.mark.parametrize("count", (0, 2)) + async def test_metrics_data_agg(self, count, fake_crucible): + """Return data samples for aggregated metrics""" + + fake_crucible.elastic.set_query( + "metric_desc", + [ + {"metric_desc": {"id": "one-metric", "names": {}}}, + {"metric_desc": {"id": "two-metric", "names": {}}}, + ], + ) + fake_crucible.elastic.set_query( + "metric_data", + aggregations={ + "duration": { + "count": count, + "min": 14022, + "max": 14100, + "avg": 14061, + "sum": 28122, + } + }, + ) + if count: + fake_crucible.elastic.set_query( + "metric_data", + aggregations={ + "interval": [ + {"key": 1726165789213, "value": {"value": 9.35271216694379}}, + {"key": 1726165804022, "value": {"value": 9.405932330557683}}, + ] + }, + ) + expected = [ + { + "begin": "2024-09-12 18:29:35.191000+00:00", + "duration": 14.022, + "end": "2024-09-12 18:29:49.213000+00:00", + "value": 9.35271216694379, + }, + { + "begin": "2024-09-12 18:29:50+00:00", + "duration": 14.022, + "end": "2024-09-12 18:30:04.022000+00:00", + "value": 9.405932330557683, + }, + ] + else: + expected = [] + assert expected == await fake_crucible.get_metrics_data( + "r1", "source::type", aggregate=True + ) + expected_requests = [ + Request( + "cdmv7dev-metric_desc", + { + "query": { + "bool": { + "filter": [ + { + "term": { + "run.id": "r1", + }, + }, + { + "term": { + "metric_desc.source": "source", + }, + }, + { + "term": { + "metric_desc.type": "type", + }, + }, + ], + }, + }, + "size": 262144, + }, + kwargs={"ignore_unavailable": True}, + ), + Request( + "cdmv7dev-metric_data", + { + "aggs": { + "duration": { + "stats": { + "field": "metric_data.duration", + }, + }, + }, + "query": { + "bool": { + "filter": [ + { + "terms": { + "metric_desc.id": [ + "one-metric", + "two-metric", + ], + }, + }, + ], + }, + }, + "size": 0, + }, + ), + ] + if count: + expected_requests.append( + Request( + "cdmv7dev-metric_data", + { + "aggs": { + "interval": { + "aggs": { + "value": { + "sum": { + "field": "metric_data.value", + }, + }, + }, + "histogram": { + "field": "metric_data.end", + "interval": 14022, + }, + }, + }, + "query": { + "bool": { + "filter": [ + { + "terms": { + "metric_desc.id": [ + "one-metric", + "two-metric", + ], + }, + }, + ], + }, + }, + "size": 0, + }, + ), + ) + assert fake_crucible.elastic.requests == expected_requests + + async def test_metrics_summary(self, fake_crucible: CrucibleService): + """Return data summary for a metrics""" + + fake_crucible.elastic.set_query( + "metric_desc", + [ + {"metric_desc": {"id": "one-metric", "names": {"a": "1"}}}, + ], + ) + expected = { + "count": 5, + "min": 9.35271216694379, + "max": 9.405932330557683, + "avg": 9.379322249, + "sum": 18.758644498, + } + fake_crucible.elastic.set_query("metric_data", aggregations={"score": expected}) + assert expected == await fake_crucible.get_metrics_summary( + "runid", "one-metric::type", ["a=1"] + ) + assert fake_crucible.elastic.requests == [ + Request( + "cdmv7dev-metric_desc", + { + "query": { + "bool": { + "filter": [ + { + "term": { + "run.id": "runid", + }, + }, + { + "term": { + "metric_desc.source": "one-metric", + }, + }, + { + "term": { + "metric_desc.type": "type", + }, + }, + {"term": {"metric_desc.names.a": "1"}}, + ], + }, + }, + "size": 262144, + }, + kwargs={"ignore_unavailable": True}, + ), + Request( + "cdmv7dev-metric_data", + { + "aggs": {"score": {"stats": {"field": "metric_data.value"}}}, + "query": { + "bool": { + "filter": [ + { + "terms": { + "metric_desc.id": [ + "one-metric", + ], + }, + }, + ], + }, + }, + "size": 0, + }, + ), + ] + + @pytest.mark.parametrize( + "runs,param_idx,periods,period_idx,title", + ( + ([], 0, [], 0, "source::type"), + (["r2", "r1"], 0, [], 0, "source::type {run 2}"), + ([], 0, ["p1"], 0, "source::type (n=42)"), + ([], 1, ["p1"], 1, "source::type"), + ([], 1, ["p1"], 2, "source::type"), + ), + ) + async def test_graph_title_no_query( + self, + runs, + param_idx, + periods, + period_idx, + title, + fake_crucible: CrucibleService, + ): + """Test generation of default metric titles""" + + param_runs = [ + {"r1": {"i1": {"n": "42"}, "i2": {"n": "31"}}}, + {"r1": {"i1": {"n": "42"}, "i2": {"n": "42"}}}, + ][param_idx] + period_runs = [ + {"r1": {"i1": {"p1"}, "i2": {"p2"}}}, + {"r1": {"i1": {"p1"}}}, + {"r1": {"i1": {"p2"}}}, + ][period_idx] + name = await fake_crucible._graph_title( + "r1", + runs, + Graph(metric="source::type", periods=periods), + param_runs, + period_runs, + ) + assert name == title + + async def test_graph_title_query(self, fake_crucible: CrucibleService): + """Test generation of default metric titles""" + + param_runs = {} + period_runs = {} + fake_crucible.elastic.set_query( + "param", + [ + { + "run": {"id": "r1"}, + "iteration": {"id": "i1"}, + "param": {"arg": "a", "val": "1"}, + }, + ], + ) + fake_crucible.elastic.set_query( + "period", + [ + { + "run": {"id": "r1"}, + "iteration": {"id": "i1"}, + "period": {"id": "p1"}, + }, + ], + ) + name = await fake_crucible._graph_title( + "r1", + [], + Graph(metric="source::type"), + param_runs, + period_runs, + ) + assert name == "source::type" + assert fake_crucible.elastic.requests == [ + Request( + "cdmv7dev-param", + { + "query": { + "bool": { + "filter": [ + { + "term": { + "run.id": "r1", + }, + }, + ], + }, + }, + "size": 262144, + }, + ), + Request( + "cdmv7dev-period", + { + "query": { + "bool": { + "filter": [ + { + "term": { + "run.id": "r1", + }, + }, + ], + }, + }, + "size": 262144, + }, + ), + ] + + async def test_metrics_graph_norun(self, fake_crucible: CrucibleService): + with pytest.raises(HTTPException) as exc: + await fake_crucible.get_metrics_graph( + GraphList( + name="graph", + graphs=[Graph(metric="source::type", aggregate=True, title="test")], + ) + ) + assert exc.value.status_code == 400 + assert exc.value.detail == "each graph request must have a run ID" + + @pytest.mark.parametrize("count", (0, 2)) + async def test_metrics_graph(self, count, fake_crucible: CrucibleService): + """Return graph for aggregated metrics""" + + metrics = [{"metric_desc": {"id": "one-metric", "names": {}}}] + if count: + metrics.append({"metric_desc": {"id": "two-metric", "names": {}}}) + fake_crucible.elastic.set_query( + "metric_data", + aggregations={ + "duration": { + "count": count, + "min": 14022, + "max": 14100, + "avg": 14061, + "sum": 28122, + } + }, + ) + fake_crucible.elastic.set_query( + "metric_data", + aggregations={ + "interval": [ + {"key": 1726165789213, "value": {"value": 9.35271216694379}}, + {"key": 1726165804022, "value": {"value": 9.405932330557683}}, + ] + }, + ) + expected = { + "data": [ + { + "labels": { + "x": "sample timestamp", + "y": "samples / second", + }, + "marker": { + "color": "black", + }, + "mode": "line", + "name": "test", + "type": "scatter", + "x": [ + "2024-09-12 18:29:49.213000+00:00", + "2024-09-12 18:30:03.234000+00:00", + "2024-09-12 18:30:04.022000+00:00", + "2024-09-12 18:30:18.043000+00:00", + ], + "y": [ + 9.35271216694379, + 9.35271216694379, + 9.405932330557683, + 9.405932330557683, + ], + "yaxis": "y", + }, + ], + "layout": { + "width": "1500", + "yaxis": { + "color": "black", + "title": "source::type", + }, + }, + } + else: + expected = { + "data": [ + { + "labels": { + "x": "sample timestamp", + "y": "samples / second", + }, + "marker": { + "color": "black", + }, + "mode": "line", + "name": "test", + "type": "scatter", + "x": [ + "2024-09-12 18:29:49.213000+00:00", + "2024-09-12 18:29:50.213000+00:00", + "2024-09-12 18:30:04.022000+00:00", + "2024-09-12 18:30:05.022000+00:00", + ], + "y": [ + 9.35271216694379, + 9.35271216694379, + 9.405932330557683, + 9.405932330557683, + ], + "yaxis": "y", + }, + ], + "layout": { + "width": "1500", + "yaxis": { + "color": "black", + "title": "source::type", + }, + }, + } + fake_crucible.elastic.set_query( + "metric_data", + [ + { + "metric_data": { + "begin": "1726165789213", + "end": "1726165790213", + "value": 9.35271216694379, + } + }, + { + "metric_data": { + "begin": "1726165804022", + "end": "1726165805022", + "value": 9.405932330557683, + } + }, + ], + ) + fake_crucible.elastic.set_query("metric_desc", metrics) + + assert expected == await fake_crucible.get_metrics_graph( + GraphList( + run="r1", + name="graph", + graphs=[Graph(metric="source::type", aggregate=True, title="test")], + ) + ) + expected_requests = [ + Request( + "cdmv7dev-metric_desc", + { + "query": { + "bool": { + "filter": [ + { + "term": { + "run.id": "r1", + }, + }, + { + "term": { + "metric_desc.source": "source", + }, + }, + { + "term": { + "metric_desc.type": "type", + }, + }, + ], + }, + }, + "size": 262144, + }, + kwargs={"ignore_unavailable": True}, + ), + ] + if count: + expected_requests.extend( + [ + Request( + "cdmv7dev-metric_data", + { + "aggs": { + "duration": { + "stats": { + "field": "metric_data.duration", + }, + }, + }, + "query": { + "bool": { + "filter": [ + { + "terms": { + "metric_desc.id": [ + "one-metric", + "two-metric", + ], + }, + }, + ], + }, + }, + "size": 0, + }, + ), + Request( + "cdmv7dev-metric_data", + { + "aggs": { + "interval": { + "aggs": { + "value": { + "sum": { + "field": "metric_data.value", + }, + }, + }, + "histogram": { + "field": "metric_data.begin", + "interval": 14022, + }, + }, + }, + "query": { + "bool": { + "filter": [ + { + "terms": { + "metric_desc.id": [ + "one-metric", + "two-metric", + ], + }, + }, + ], + }, + }, + "size": 0, + }, + ), + ] + ) + else: + expected_requests.append( + Request( + "cdmv7dev-metric_data", + { + "query": { + "bool": { + "filter": [ + { + "terms": { + "metric_desc.id": ["one-metric"], + }, + }, + ], + }, + }, + "size": 262144, + }, + ), + ) + assert fake_crucible.elastic.requests == expected_requests