From 15b3fc1b6a64eba925e0775378a917737fcab169 Mon Sep 17 00:00:00 2001 From: Cody Date: Mon, 16 Sep 2024 22:36:24 -0400 Subject: [PATCH] bug fixes, improved docs etl, docs eda, bump version --- eda.qmd | 103 ++++++++++++++++++++++++++++ pyproject.toml | 2 +- src/ibis_analytics/etl/transform.py | 2 + src/ibis_analytics/metrics.py | 4 +- 4 files changed, 108 insertions(+), 3 deletions(-) diff --git a/eda.qmd b/eda.qmd index 04b2b14..65a69dd 100644 --- a/eda.qmd +++ b/eda.qmd @@ -34,6 +34,109 @@ ibis.options.repr.interactive.max_columns = None px.defaults.template = "plotly_dark" ``` + +```{python} +def lookback(t, days=None): + if days is None: + return t + return t.filter(t["timestamp"] > ibis.now() - ibis.interval(days=days)) +``` + +```{python} +t = docs_t +t +``` + +```{python} +t.filter(t["path"].startswith("/posts/")) +``` + +```{python} +px.bar( + t.filter(t["path"].startswith("/posts/")) + .mutate(path=ibis._["path"].re_extract(r"^/posts/[^/]+", 0)) + # .filter(t["path"].contains("ibis-version-")) + .group_by("path") + .agg(count=ibis._.count()) + .order_by(ibis.desc("count")) + .limit(10), + x="path", + y="count", + log_y=True, +) +``` + +```{python} +c = px.line( + metrics.docs_rolling_by_path( + t.filter(t["path"].startswith("/posts/")) + .filter(~ibis._["path"].contains("ibis-version-")) + .mutate(path=ibis._["path"].re_extract(r"^/posts/[^/]+", 0)), + days=28, + ).pipe(lookback, days=None), + x="timestamp", + y="rolling_docs", + color="path", + log_y=True, +) +# no legend +c.update_layout(showlegend=False) +``` + + +--- + +```{python} +``` + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ```{python} diff --git a/pyproject.toml b/pyproject.toml index 4395293..f1ea548 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -7,7 +7,7 @@ allow-direct-references = true [project] name = "ibis-analytics" -version = "0.10.0" +version = "0.11.0" authors = [{ name = "Cody", email = "cody@dkdc.dev" }] description = "Ibis analytics with Ibis" readme = "readme.md" diff --git a/src/ibis_analytics/etl/transform.py b/src/ibis_analytics/etl/transform.py index aeaa994..493d971 100644 --- a/src/ibis_analytics/etl/transform.py +++ b/src/ibis_analytics/etl/transform.py @@ -167,6 +167,8 @@ def docs(t): def transform(t): t = t.rename({"path": "2_path", "timestamp": "date"}) + t = t.mutate(path=ibis._["path"].replace("/index.html", "")) + t = t.mutate(path=ibis._["path"].re_extract(r"^/posts/[^/]+", 0)) return t docs = t.pipe(preprocess).pipe(transform).pipe(postprocess) diff --git a/src/ibis_analytics/metrics.py b/src/ibis_analytics/metrics.py index d263d1c..b62d8b8 100644 --- a/src/ibis_analytics/metrics.py +++ b/src/ibis_analytics/metrics.py @@ -92,7 +92,7 @@ def docs_rolling(t: ibis.Table, days: int = 28) -> ibis.Table: .over( ibis.window( order_by="timestamp", - preceding=28, + preceding=days, following=0, ) ), @@ -115,7 +115,7 @@ def docs_rolling_by_path(t: ibis.Table, days: int = 28) -> ibis.Table: ibis.window( order_by="timestamp", group_by="path", - preceding=28, + preceding=days, following=0, ) ),