diff --git a/.github/workflows/automatic_semantic_pr.yml b/.github/workflows/automatic_semantic_pr.yml
deleted file mode 100644
index 7c9ccb96..00000000
--- a/.github/workflows/automatic_semantic_pr.yml
+++ /dev/null
@@ -1,19 +0,0 @@
-# for config, see here: https://github.com/amannn/action-semantic-pull-requests
-
-name: "Lint PR"
-
-on:
- pull_request_target:
- types:
- - opened
- - edited
- - synchronize
-
-jobs:
- main:
- name: Validate PR title
- runs-on: ubuntu-latest
- steps:
- - uses: amannn/action-semantic-pull-request@v4
- env:
- GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
\ No newline at end of file
diff --git a/.github/workflows/documentation.yml b/.github/workflows/documentation.yml
new file mode 100644
index 00000000..887e6e65
--- /dev/null
+++ b/.github/workflows/documentation.yml
@@ -0,0 +1,29 @@
+
+name: Documentation
+on:
+ push:
+ branches:
+ - master
+jobs:
+ build:
+ runs-on: ubuntu-latest
+ steps:
+ - name: Checkout
+ uses: actions/checkout@v3
+ with:
+ fetch-depth: 0 # otherwise, you will failed to push refs to dest repo
+ - name: Install dependencies
+ run: |
+ python -m pip install --upgrade pip
+ pip install -r requirements.txt
+ pip install -e .
+ - name: Build and Commit
+ uses: sphinx-notes/pages@v2
+ with:
+ documentation_path: docs
+ install_requirements: "true"
+ - name: Push changes
+ uses: ad-m/github-push-action@v2
+ with:
+ github_token: ${{ secrets.SPHINX_DOCUMENTATION }}
+ branch: gh-pages
\ No newline at end of file
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 7582c489..f411785a 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -1,6 +1,37 @@
+default_stages: [commit, push]
+
repos:
-- repo: https://github.com/psf/black
+ - repo: https://github.com/pycqa/isort
+ rev: 5.10.1
+ hooks:
+ - id: isort
+ name: isort (python)
+ args: ["--profile", "black", "--filter-files"]
+
+ - repo: https://github.com/asottile/add-trailing-comma
+ rev: v2.2.3
+ hooks:
+ - id: add-trailing-comma
+
+ - repo: https://github.com/asottile/pyupgrade
+ rev: v2.34.0
+ hooks:
+ - id: pyupgrade
+
+ - repo: https://github.com/myint/docformatter
+ rev: v1.3.1
+ hooks:
+ - id: docformatter
+ args: [--in-place]
+
+ - repo: https://github.com/psf/black
rev: 22.3.0
hooks:
- - id: black
- language_version: python3.8
\ No newline at end of file
+ - id: black
+ language_version: python3.8
+
+ - repo: https://github.com/PyCQA/flake8
+ rev: 4.0.1
+ hooks:
+ - id: flake8
+ args: [--config, .flake8]
diff --git a/README.md b/README.md
index bf651b4f..e756eb73 100644
--- a/README.md
+++ b/README.md
@@ -1,5 +1,12 @@
+
+# PSYCOP Machine Learning Utilities
+
![python versions](https://img.shields.io/badge/Python-%3E=3.7-blue)
[![Code style: black](https://img.shields.io/badge/Code%20Style-Black-black)](https://black.readthedocs.io/en/stable/the_black_code_style/current_style.html)
+[![github actions pytest](https://github.com/Aarhus-Psychiatry-Research/psycop-ml-utils/actions/workflows/pytest.yml/badge.svg)](https://github.com/Aarhus-Psychiatry-Research/psycop-ml-utils/actions)
+[![github actions docs](https://github.com/Aarhus-Psychiatry-Research/psycop-ml-utils/actions/workflows/documentation.yml/badge.svg)](https://Aarhus-Psychiatry-Research.github.io/psycop-ml-utils/)
+![coverage](https://img.shields.io/endpoint?url=https://gist.githubusercontent.com/martbern/d6c40a5b5a3169c079e8b8f778b8e517/raw/badge-psycop-ml-utils-pytest-coverage.json)
+=======
![badge](https://img.shields.io/endpoint?url=https://gist.githubusercontent.com/martbern/d6c40a5b5a3169c079e8b8f778b8e517/raw/badge-psycop-ml-utils-pytest-coverage.json)
# Installation
@@ -32,106 +39,43 @@ or
# Usage
- [ ] Update examples as API matures
-## Loading data from SQL
-
-Currently only contains one function to load a view from SQL, `sql_load`
-```py
-from psycopmlutils.loaders.sql_load import sql_load
+## 🔧 Installation
+To get started using psycop-ml-utils simply install it using pip by running the following line in your terminal:
-view = "[FOR_SFI_fritekst_resultat_udfoert_i_psykiatrien_aendret_2011]"
-sql = "SELECT * FROM [fct]." + view
-df = sql_load(sql, chunksize = None)
```
-
-## Flattening time series
-To train baseline models (logistic regression, elastic net, SVM, XGBoost/random forest etc.), we need to represent the longitudinal data in a tabular, flattened way.
-
-In essence, we need to generate a training example for each prediction time, where that example contains "latest_blood_pressure" (float), "X_diagnosis_within_n_hours" (boolean) etc.
-
-To generate this, I propose the time-series flattener class (`TimeSeriesFlattener`). It builds a dataset like described above.
-
-### TimeSeriesFlattener
-```python
-class FlattenedDataset:
- def __init__():
- """Class containing a time-series flattened.
-
- Args:
- prediction_times_df (DataFrame): Dataframe with prediction times.
- prediction_timestamp_colname (str, optional): Colname for timestamps. Defaults to "timestamp".
- id_colname (str, optional): Colname for patients ids. Defaults to "dw_ek_borger".
- """
-
- def add_outcome():
- """Adds an outcome-column to the dataset
-
- Args:
- outcome_df (DataFrame): Cols: dw_ek_borger, datotid, value if relevant.
- lookahead_days (float): How far ahead to look for an outcome in days. If none found, use fallback.
- resolve_multiple (str): What to do with more than one value within the lookahead.
- Suggestions: earliest, latest, mean, max, min.
- fallback (List[str]): What to do if no value within the lookahead.
- Suggestions: latest, mean_of_patient, mean_of_population, hardcode (qualified guess)
- timestamp_colname (str): Column name for timestamps
- values_colname (str): Colname for outcome values in outcome_df
- id_colname (str): Column name for citizen id
- new_col_name (str): Name to use for new col. Automatically generated as '{new_col_name}_within_{lookahead_days}_days'.
- Defaults to using values_colname.
- """
-
- def add_predictor():
- """Adds a predictor-column to the dataset
-
- Args:
- predictor_df (DataFrame): Cols: dw_ek_borger, datotid, value if relevant.
- lookahead_days (float): How far ahead to look for an outcome in days. If none found, use fallback.
- resolve_multiple (str): What to do with more than one value within the lookahead.
- Suggestions: earliest, latest, mean, max, min.
- fallback (List[str]): What to do if no value within the lookahead.
- Suggestions: latest, mean_of_patient, mean_of_population, hardcode (qualified guess)
- outcome_colname (str): What to name the column
- id_colname (str): Column name for citizen id
- timestamp_colname (str): Column name for timestamps
- """
+pip install git+https://github.com/Aarhus-Psychiatry-Research/psycop-ml-utils.git
```
-Inspiration-code can be found in previous commits.
+For more detailed instructions on installation, see the [installation instructions](https://Aarhus-Psychiatry-Research.github.io/psycop-ml-utils/installation).
-#### Example
-- [ ] Update examples as API matures
-```python
-import FlattenedDataset
-
-dataset = FlattenedDataset(prediction_times_df = prediction_times, prediction_timestamp_colname = "timestamp", id_colname = "dw_ek_borger")
-
-dataset.add_outcome(
- outcome_df=type_2_diabetes_df,
- lookahead_days=730,
- resolve_multiple="max",
- fallback=[0],
- name="t2d",
-)
-
-dataset.add_predictor(
- predictor=hba1c,
- lookback_window=365,
- resolve_multiple="max",
- fallback=["latest", 40],
- name="hba1c",
-)
-```
+## 📖 Documentation
+
+| Documentation | |
+| -------------------------- | --------------------------------------------------------------------------- |
+| 📚 **[Usage Guides]** | Guides and instructions on how the package and its features. |
+| 📰 **[News and changelog]** | New additions, changes and version history. |
+| 🎛 **[API References]** | The detailed reference for psycop-ml-utils's API. Including function documentation |
+| 🙋 **[FAQ]** | Frequently asked question |
+
+[usage guides]: https://Aarhus-Psychiatry-Research.github.io/psycop-ml-utils/introduction.html
+[api references]: https://Aarhus-Psychiatry-Research.github.io/psycop-ml-utils/
+[Augmenters]: https://Aarhus-Psychiatry-Research.github.io/psycop-ml-utils/augmenters.html
+[Demo]: https://share.streamlit.io/Aarhus-Psychiatry-Research/psycop-ml-utils/dev/streamlit.py
+[News and changelog]: https://Aarhus-Psychiatry-Research.github.io/psycop-ml-utils/news.html
+[FAQ]: https://Aarhus-Psychiatry-Research.github.io/psycop-ml-utils/faq.html
-Dataset now looks like this:
+## 💬 Where to ask questions
-| dw_ek_borger | datetime_prediction | outc_t2d_within_next_730_days | pred_max_hba1c_within_prev_365_days |
-|--------------|---------------------|-------------------------------|-------------------------------------|
-| 1 | yyyy-mm-dd hh:mm:ss | 0 | 48 |
-| 2 | yyyy-mm-dd hh:mm:ss | 0 | 40 |
-| 3 | yyyy-mm-dd hh:mm:ss | 1 | 44 |
+| Type | |
+| ------------------------------ | ---------------------- |
+| 🚨 **Bug Reports** | [GitHub Issue Tracker] |
+| 🎁 **Feature Requests & Ideas** | [GitHub Issue Tracker] |
+| 👩💻 **Usage Questions** | [GitHub Discussions] |
+| 🗯 **General Discussion** | [GitHub Discussions] |
+[github issue tracker]: https://github.com/Aarhus-Psychiatry-Research/psycop-ml-utils/issues
+[github discussions]: https://github.com/Aarhus-Psychiatry-Research/psycop-ml-utils/discussions
-For binary outcomes, `add_predictor` with `fallback = [0]` would take a df with only the times where the event occurred, and then generate 0's for the rest.
-I propose we create the above functionality on a just-in-time basis, building the features as we need them.
diff --git a/citation.cff b/citation.cff
new file mode 100644
index 00000000..82b7c46b
--- /dev/null
+++ b/citation.cff
@@ -0,0 +1,15 @@
+cff-version: 1.2.0
+message: "If you use this software, please cite it as below."
+authors:
+- family-names: "Martin"
+ given-names: "Bernstorff"
+- family-names: "Lasse"
+ given-names: "Hansen"
+- family-names: "Enevoldsen"
+ given-names: "Kenneth"
+ orcid: "https://orcid.org/0000-0001-8733-0966"
+title: "PSYCOP machine learning utilities"
+version: 0.1.1
+# doi: 10.5281/zenodo.6675315
+date-released: 2022-21-06
+url: "https://github.com/Aarhus-Psychiatry-Research/psycop-ml-utils"
\ No newline at end of file
diff --git a/docs/Makefile b/docs/Makefile
new file mode 100644
index 00000000..d4bb2cbb
--- /dev/null
+++ b/docs/Makefile
@@ -0,0 +1,20 @@
+# Minimal makefile for Sphinx documentation
+#
+
+# You can set these variables from the command line, and also
+# from the environment for the first two.
+SPHINXOPTS ?=
+SPHINXBUILD ?= sphinx-build
+SOURCEDIR = .
+BUILDDIR = _build
+
+# Put it first so that "make" without argument is like "make help".
+help:
+ @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
+
+.PHONY: help Makefile
+
+# Catch-all target: route all unknown targets to Sphinx using the new
+# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS).
+%: Makefile
+ @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
diff --git a/docs/_static/favicon.ico b/docs/_static/favicon.ico
new file mode 100644
index 00000000..292771a0
Binary files /dev/null and b/docs/_static/favicon.ico differ
diff --git a/docs/_static/icon.png b/docs/_static/icon.png
new file mode 100644
index 00000000..85126960
Binary files /dev/null and b/docs/_static/icon.png differ
diff --git a/docs/_static/icon_with_title.png b/docs/_static/icon_with_title.png
new file mode 100644
index 00000000..9ed41011
Binary files /dev/null and b/docs/_static/icon_with_title.png differ
diff --git a/docs/api.model_performance.rst b/docs/api.model_performance.rst
new file mode 100644
index 00000000..0cf36918
--- /dev/null
+++ b/docs/api.model_performance.rst
@@ -0,0 +1,21 @@
+Model Performance
+--------------------------------------------------
+
+
+model_performance.model_performance
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+.. automodule:: psycopmlutils.model_performance.model_performance
+ :members:
+ :undoc-members:
+ :show-inheritance:
+ :exclude-members:
+
+model_performance.utils
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+.. automodule:: psycopmlutils.model_performance.utils
+ :members:
+ :undoc-members:
+ :show-inheritance:
+ :exclude-members:
\ No newline at end of file
diff --git a/docs/api.timeseriesflattener.rst b/docs/api.timeseriesflattener.rst
new file mode 100644
index 00000000..255b973f
--- /dev/null
+++ b/docs/api.timeseriesflattener.rst
@@ -0,0 +1,31 @@
+Time Series Flattener
+--------------------------------------------------
+
+
+timeseriesflattener.create_feature_combinations
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+.. automodule:: psycopmlutils.timeseriesflattener.create_feature_combinations
+ :members:
+ :undoc-members:
+ :show-inheritance:
+ :exclude-members:
+
+timeseriesflattener.flattened_dataset
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+.. automodule:: psycopmlutils.timeseriesflattener.flattened_dataset
+ :members:
+ :undoc-members:
+ :show-inheritance:
+ :exclude-members:
+
+
+timeseriesflattener.resolve_multiple_functions
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+.. automodule:: psycopmlutils.timeseriesflattener.resolve_multiple_functions
+ :members:
+ :undoc-members:
+ :show-inheritance:
+ :exclude-members:
\ No newline at end of file
diff --git a/docs/api.writers.rst b/docs/api.writers.rst
new file mode 100644
index 00000000..a87b702c
--- /dev/null
+++ b/docs/api.writers.rst
@@ -0,0 +1,12 @@
+Writers
+--------------------------------------------------
+
+
+writers.sql_writer
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+.. automodule:: psycopmlutils.writers.sql_writer
+ :members:
+ :undoc-members:
+ :show-inheritance:
+ :exclude-members:
diff --git a/docs/changelog.md b/docs/changelog.md
new file mode 100644
index 00000000..9a1b94a5
--- /dev/null
+++ b/docs/changelog.md
@@ -0,0 +1,4 @@
+# News and Changelog
+
+- v. 0.1.1 (21 June 2022)
+ - Documentation was added
\ No newline at end of file
diff --git a/docs/conf.py b/docs/conf.py
new file mode 100644
index 00000000..0507d42d
--- /dev/null
+++ b/docs/conf.py
@@ -0,0 +1,93 @@
+# Configuration file for the Sphinx documentation builder.
+#
+# This file only contains a selection of the most common options. For a full
+# list see the documentation:
+# https://www.sphinx-doc.org/en/master/usage/configuration.html
+
+# -- Path setup --------------------------------------------------------------
+
+# If extensions (or modules to document with autodoc) are in another directory,
+# add these directories to sys.path here. If the directory is relative to the
+# documentation root, use os.path.abspath to make it absolute, like shown here.
+#
+
+from psycopmlutils import __version__
+
+# -- Project information -----------------------------------------------------
+
+project = "psycop-ml-utils"
+author = "Martin Bernstorff, Lasse Hansen, and Kenneth Enevoldsen"
+
+# The full version, including alpha/beta/rc tags
+release = __version__
+
+
+# -- General configuration ---------------------------------------------------
+
+# Add any Sphinx extension module names here, as strings. They can be
+# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
+# ones.
+extensions = [
+ "sphinx.ext.napoleon",
+ "sphinx.ext.viewcode",
+ "sphinxext.opengraph",
+ "sphinx_copybutton",
+ "sphinx.ext.githubpages",
+ "myst_parser",
+]
+
+# Add any paths that contain templates here, relative to this directory.
+templates_path = ["_templates"]
+
+# List of patterns, relative to source directory, that match files and
+# directories to ignore when looking for source files.
+# This pattern also affects html_static_path and html_extra_path.
+exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"]
+
+
+# -- Options for HTML output -------------------------------------------------
+
+# The theme to use for HTML and HTML Help pages. See the documentation for
+# a list of builtin themes.
+#
+html_theme = "furo" # "press", "sphinx_rtd_theme", "furo"
+
+# Add any paths that contain custom static files (such as style sheets) here,
+# relative to this directory. They are copied after the builtin static files,
+# so a file named "default.css" will overwrite the builtin "default.css".
+html_static_path = ["_static"]
+
+html_show_sourcelink = True
+
+html_context = {
+ "display_github": True, # Add 'Edit on Github' link instead of 'View page source'
+ "github_user": "Aarhus-Psychiatry-Research",
+ "github_repo": project,
+ "github_version": "main",
+ "conf_py_path": "/docs/",
+}
+
+
+source_suffix = {
+ ".rst": "restructuredtext",
+ ".md": "markdown",
+}
+
+html_static_path = ["_static"]
+html_favicon = "_static/favicon.ico"
+
+html_theme_options = {
+ "light_logo": "icon_with_title.png",
+ "dark_logo": "icon_with_title.png",
+ "light_css_variables": {
+ "color-brand-primary": "#204279",
+ "color-brand-content": "#204279",
+ },
+ "dark_css_variables": {
+ "color-brand-primary": "#4872b8",
+ "color-brand-content": "#4872b8",
+ },
+ "sidebar_hide_name": True,
+ "navigation_with_keys": True,
+ "top_of_page_button": "edit",
+}
diff --git a/docs/faq.rst b/docs/faq.rst
new file mode 100644
index 00000000..dad9d4ba
--- /dev/null
+++ b/docs/faq.rst
@@ -0,0 +1,62 @@
+FAQ
+-------
+
+
+How do I test the code and run the test suite?
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Augmenty comes with an extensive test suite. To run the tests, you should clone the repository, then build augmenty from the source.
+This will also install the required development dependencies and test utilities defined in the requirements.txt.
+
+
+.. code-block::
+
+ # install test dependencies
+ pip install -r requirements.txt
+
+ python -m pytest
+
+
+which will run all the test in the :code:`tests` folder.
+
+Specific tests can be run using:
+
+.. code-block::
+
+ python -m pytest tests/desired_test.py
+
+
+If you want to check code coverage you can run the following:
+
+.. code-block::
+
+ pip install pytest-cov
+
+ python -m pytest --cov=.
+
+
+How is the documentation generated?
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Augmenty uses `sphinx `__ to generate documentation. It uses the `Furo `__ theme with custom styling.
+
+To make the documentation you can run:
+
+.. code-block::
+
+ # install sphinx, themes and extensions
+ pip install requirements.txt
+
+ # generate html from documentations
+
+ make -C docs html
+
+
+How do I cite this work?
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+If you use this library in your research, we please cite:
+
+.. code-block::
+
+ add citation here
diff --git a/docs/index.rst b/docs/index.rst
new file mode 100644
index 00000000..6a2d9e71
--- /dev/null
+++ b/docs/index.rst
@@ -0,0 +1,36 @@
+PSYCOP Machine Learning Utilites
+================================
+
+.. image:: https://img.shields.io/github/stars/Aarhus-Psychiatry-Research/psycop-ml-utils.svg?style=social&label=Star&maxAge=2592000
+ :target: https://github.com/Aarhus-Psychiatry-Research/psycop-ml-utils
+
+
+Contents
+---------------------------------
+
+.. toctree::
+ :maxdepth: 3
+ :caption: Getting started
+
+ installation
+ changelog
+ faq
+
+.. toctree::
+ :maxdepth: 3
+ :caption: API references
+
+ api.model_performance
+ api.timeseriesflattener
+ api.writers
+
+
+.. toctree::
+ GitHub Repository
+
+
+Indices and search
+==================
+
+* :ref:`genindex`
+* :ref:`search`
diff --git a/docs/installation.md b/docs/installation.md
new file mode 100644
index 00000000..19ae461c
--- /dev/null
+++ b/docs/installation.md
@@ -0,0 +1,40 @@
+
+# Installation
+To get started using psycop-ml-utils simply install it using pip by running the following line in your terminal:
+
+Install using your preferred package manager, e.g.:
+`pip install git+https://github.com/Aarhus-Psychiatry-Research/psycop-ml-utils.git`
+
+or using peotry by running
+
+`poetry add git+https://github.com/Aarhus-Psychiatry-Research/psycop-ml-utils.git`
+
+
+## For development
+We use poetry for dependency management. To install poety following the instruction on their [website](https://python-poetry.org/docs/#osx--linux--bashonwindows-install-instructions).
+
+
+Clone the repo, move into it, then run `poetry install`. I.e.:
+
+```bash
+git clone https://github.com/Aarhus-Psychiatry-Research/psycop-ml-utils.git
+cd psycop-ml-utils
+poetry install
+```
+
+To increase the version:
+`poetry version [patch|minor|major]` according to [semantic versioning](https://semver.org/).
+
+Adding new as dependnecies:
+`poetry add (--dev) [packagename]`
+
+No need to update a `requirements.txt`. It's replace by `pyproject.toml`, and `poetry` manages it automatically.
+
+
+## When using
+Install using your preferred package manager, e.g.:
+`pip install git+https://github.com/Aarhus-Psychiatry-Research/psycop-ml-utils.git`
+
+or
+
+`poetry add git+https://github.com/Aarhus-Psychiatry-Research/psycop-ml-utils.git`
diff --git a/docs/make.bat b/docs/make.bat
new file mode 100644
index 00000000..2119f510
--- /dev/null
+++ b/docs/make.bat
@@ -0,0 +1,35 @@
+@ECHO OFF
+
+pushd %~dp0
+
+REM Command file for Sphinx documentation
+
+if "%SPHINXBUILD%" == "" (
+ set SPHINXBUILD=sphinx-build
+)
+set SOURCEDIR=.
+set BUILDDIR=_build
+
+if "%1" == "" goto help
+
+%SPHINXBUILD% >NUL 2>NUL
+if errorlevel 9009 (
+ echo.
+ echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
+ echo.installed, then set the SPHINXBUILD environment variable to point
+ echo.to the full path of the 'sphinx-build' executable. Alternatively you
+ echo.may add the Sphinx directory to PATH.
+ echo.
+ echo.If you don't have Sphinx installed, grab it from
+ echo.http://sphinx-doc.org/
+ exit /b 1
+)
+
+%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
+goto end
+
+:help
+%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
+
+:end
+popd
diff --git a/example/timeseriesflattener/generate_features_and_write_to_sql.py b/example/timeseriesflattener/generate_features_and_write_to_sql.py
index 677849b8..eda9a9fe 100644
--- a/example/timeseriesflattener/generate_features_and_write_to_sql.py
+++ b/example/timeseriesflattener/generate_features_and_write_to_sql.py
@@ -1,15 +1,15 @@
import time
-from pathlib import Path
import numpy as np
import pandas as pd
+from wasabi import msg
+
import psycopmlutils.loaders # noqa
from psycopmlutils.timeseriesflattener import (
FlattenedDataset,
create_feature_combinations,
)
from psycopmlutils.writers.sql_writer import write_df_to_sql
-from wasabi import msg
if __name__ == "__main__":
RESOLVE_MULTIPLE = ["mean", "max", "min"]
@@ -41,7 +41,7 @@
"resolve_multiple": RESOLVE_MULTIPLE,
"fallback": np.nan,
},
- ]
+ ],
)
event_times = psycopmlutils.loaders.LoadOutcome.t2d()
@@ -87,11 +87,11 @@
# Finish
msg.good(
- f"Finished adding {len(PREDICTOR_LIST)} predictors, took {round((end_time - start_time)/60, 1)} minutes"
+ f"Finished adding {len(PREDICTOR_LIST)} predictors, took {round((end_time - start_time)/60, 1)} minutes",
)
msg.info(
- f"Dataframe size is {flattened_df.df.memory_usage(index=True, deep=True).sum() / 1024 / 1024} MiB"
+ f"Dataframe size is {flattened_df.df.memory_usage(index=True, deep=True).sum() / 1024 / 1024} MiB",
)
msg.good("Done!")
@@ -117,7 +117,7 @@
]
msg.warn(
- f"{dataset_name}: There are {len(ids_in_split_but_not_in_flattened_df)} ({round(len(ids_in_split_but_not_in_flattened_df)/len(split_ids)*100, 2)}%) ids which are in {dataset_name}_ids but not in flattened_df_ids, will get dropped during merge"
+ f"{dataset_name}: There are {len(ids_in_split_but_not_in_flattened_df)} ({round(len(ids_in_split_but_not_in_flattened_df)/len(split_ids)*100, 2)}%) ids which are in {dataset_name}_ids but not in flattened_df_ids, will get dropped during merge",
)
split_df = pd.merge(flattened_df.df, df_split_ids, how="inner")
diff --git a/poetry.lock b/poetry.lock
index dbeb791c..fd640ce0 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -1,3 +1,11 @@
+[[package]]
+name = "alabaster"
+version = "0.7.12"
+description = "A configurable sidebar-enabled Sphinx theme"
+category = "dev"
+optional = false
+python-versions = "*"
+
[[package]]
name = "atomicwrites"
version = "1.4.0"
@@ -20,11 +28,37 @@ docs = ["furo", "sphinx", "zope.interface", "sphinx-notfound-page"]
tests = ["coverage[toml] (>=5.0.2)", "hypothesis", "pympler", "pytest (>=4.3.0)", "six", "mypy", "pytest-mypy-plugins", "zope.interface", "cloudpickle"]
tests_no_zope = ["coverage[toml] (>=5.0.2)", "hypothesis", "pympler", "pytest (>=4.3.0)", "six", "mypy", "pytest-mypy-plugins", "cloudpickle"]
+[[package]]
+name = "babel"
+version = "2.10.3"
+description = "Internationalization utilities"
+category = "dev"
+optional = false
+python-versions = ">=3.6"
+
+[package.dependencies]
+pytz = ">=2015.7"
+
+[[package]]
+name = "beautifulsoup4"
+version = "4.11.1"
+description = "Screen-scraping library"
+category = "dev"
+optional = false
+python-versions = ">=3.6.0"
+
+[package.dependencies]
+soupsieve = ">1.2"
+
+[package.extras]
+html5lib = ["html5lib"]
+lxml = ["lxml"]
+
[[package]]
name = "black"
version = "22.3.0"
description = "The uncompromising code formatter."
-category = "main"
+category = "dev"
optional = false
python-versions = ">=3.6.2"
@@ -50,6 +84,14 @@ category = "main"
optional = false
python-versions = ">=3.6"
+[[package]]
+name = "certifi"
+version = "2022.6.15"
+description = "Python package for providing Mozilla's CA Bundle."
+category = "dev"
+optional = false
+python-versions = ">=3.6"
+
[[package]]
name = "cfgv"
version = "3.3.1"
@@ -58,11 +100,22 @@ category = "dev"
optional = false
python-versions = ">=3.6.1"
+[[package]]
+name = "charset-normalizer"
+version = "2.0.12"
+description = "The Real First Universal Charset Detector. Open, modern and actively maintained alternative to Chardet."
+category = "dev"
+optional = false
+python-versions = ">=3.5.0"
+
+[package.extras]
+unicode_backport = ["unicodedata2"]
+
[[package]]
name = "click"
version = "8.1.3"
description = "Composable command line interface toolkit"
-category = "main"
+category = "dev"
optional = false
python-versions = ">=3.7"
@@ -73,7 +126,7 @@ colorama = {version = "*", markers = "platform_system == \"Windows\""}
name = "colorama"
version = "0.4.5"
description = "Cross-platform colored terminal text."
-category = "main"
+category = "dev"
optional = false
python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*"
@@ -99,6 +152,25 @@ category = "dev"
optional = false
python-versions = "*"
+[[package]]
+name = "docformatter"
+version = "1.4"
+description = "Formats docstrings to follow PEP 257."
+category = "dev"
+optional = false
+python-versions = "*"
+
+[package.dependencies]
+untokenize = "*"
+
+[[package]]
+name = "docutils"
+version = "0.18.1"
+description = "Docutils -- Python Documentation Utilities"
+category = "dev"
+optional = false
+python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*"
+
[[package]]
name = "filelock"
version = "3.7.1"
@@ -111,6 +183,33 @@ python-versions = ">=3.7"
docs = ["furo (>=2021.8.17b43)", "sphinx (>=4.1)", "sphinx-autodoc-typehints (>=1.12)"]
testing = ["covdefaults (>=1.2.0)", "coverage (>=4)", "pytest (>=4)", "pytest-cov", "pytest-timeout (>=1.4.2)"]
+[[package]]
+name = "flake8"
+version = "4.0.1"
+description = "the modular source code checker: pep8 pyflakes and co"
+category = "dev"
+optional = false
+python-versions = ">=3.6"
+
+[package.dependencies]
+mccabe = ">=0.6.0,<0.7.0"
+pycodestyle = ">=2.8.0,<2.9.0"
+pyflakes = ">=2.4.0,<2.5.0"
+
+[[package]]
+name = "furo"
+version = "2022.6.21"
+description = "A clean customisable Sphinx documentation theme."
+category = "dev"
+optional = false
+python-versions = ">=3.7"
+
+[package.dependencies]
+beautifulsoup4 = "*"
+pygments = "*"
+sphinx = ">=4.0,<6.0"
+sphinx-basic-ng = "*"
+
[[package]]
name = "greenlet"
version = "1.1.2"
@@ -133,6 +232,38 @@ python-versions = ">=3.7"
[package.extras]
license = ["ukkonen"]
+[[package]]
+name = "idna"
+version = "3.3"
+description = "Internationalized Domain Names in Applications (IDNA)"
+category = "dev"
+optional = false
+python-versions = ">=3.5"
+
+[[package]]
+name = "imagesize"
+version = "1.3.0"
+description = "Getting image size from png/jpeg/jpeg2000/gif file"
+category = "dev"
+optional = false
+python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*"
+
+[[package]]
+name = "importlib-metadata"
+version = "4.11.4"
+description = "Read metadata from Python packages"
+category = "dev"
+optional = false
+python-versions = ">=3.7"
+
+[package.dependencies]
+zipp = ">=0.5"
+
+[package.extras]
+docs = ["sphinx", "jaraco.packaging (>=9)", "rst.linker (>=1.9)"]
+perf = ["ipython"]
+testing = ["pytest (>=6)", "pytest-checkdocs (>=2.4)", "pytest-flake8", "pytest-cov", "pytest-enabler (>=1.0.1)", "packaging", "pyfakefs", "flufl.flake8", "pytest-perf (>=0.9.2)", "pytest-black (>=0.3.7)", "pytest-mypy (>=0.9.1)", "importlib-resources (>=1.3)"]
+
[[package]]
name = "iniconfig"
version = "1.1.1"
@@ -141,6 +272,20 @@ category = "dev"
optional = false
python-versions = "*"
+[[package]]
+name = "jinja2"
+version = "3.1.2"
+description = "A very fast and expressive template engine."
+category = "dev"
+optional = false
+python-versions = ">=3.7"
+
+[package.dependencies]
+MarkupSafe = ">=2.0"
+
+[package.extras]
+i18n = ["Babel (>=2.7)"]
+
[[package]]
name = "joblib"
version = "1.1.0"
@@ -149,14 +294,98 @@ category = "main"
optional = false
python-versions = ">=3.6"
+[[package]]
+name = "markdown-it-py"
+version = "2.1.0"
+description = "Python port of markdown-it. Markdown parsing, done right!"
+category = "dev"
+optional = false
+python-versions = ">=3.7"
+
+[package.dependencies]
+mdurl = ">=0.1,<1.0"
+
+[package.extras]
+benchmarking = ["psutil", "pytest", "pytest-benchmark (>=3.2,<4.0)"]
+code_style = ["pre-commit (==2.6)"]
+compare = ["commonmark (>=0.9.1,<0.10.0)", "markdown (>=3.3.6,<3.4.0)", "mistletoe (>=0.8.1,<0.9.0)", "mistune (>=2.0.2,<2.1.0)", "panflute (>=2.1.3,<2.2.0)"]
+linkify = ["linkify-it-py (>=1.0,<2.0)"]
+plugins = ["mdit-py-plugins"]
+profiling = ["gprof2dot"]
+rtd = ["attrs", "myst-parser", "pyyaml", "sphinx", "sphinx-copybutton", "sphinx-design", "sphinx-book-theme"]
+testing = ["coverage", "pytest", "pytest-cov", "pytest-regressions"]
+
+[[package]]
+name = "markupsafe"
+version = "2.1.1"
+description = "Safely add untrusted strings to HTML/XML markup."
+category = "dev"
+optional = false
+python-versions = ">=3.7"
+
+[[package]]
+name = "mccabe"
+version = "0.6.1"
+description = "McCabe checker, plugin for flake8"
+category = "dev"
+optional = false
+python-versions = "*"
+
+[[package]]
+name = "mdit-py-plugins"
+version = "0.3.0"
+description = "Collection of plugins for markdown-it-py"
+category = "dev"
+optional = false
+python-versions = "~=3.6"
+
+[package.dependencies]
+markdown-it-py = ">=1.0.0,<3.0.0"
+
+[package.extras]
+code_style = ["pre-commit (==2.6)"]
+rtd = ["myst-parser (>=0.14.0,<0.15.0)", "sphinx-book-theme (>=0.1.0,<0.2.0)"]
+testing = ["coverage", "pytest (>=3.6,<4)", "pytest-cov", "pytest-regressions"]
+
+[[package]]
+name = "mdurl"
+version = "0.1.1"
+description = "Markdown URL utilities"
+category = "dev"
+optional = false
+python-versions = ">=3.7"
+
[[package]]
name = "mypy-extensions"
version = "0.4.3"
description = "Experimental type system extensions for programs checked with the mypy typechecker."
-category = "main"
+category = "dev"
optional = false
python-versions = "*"
+[[package]]
+name = "myst-parser"
+version = "0.18.0"
+description = "An extended commonmark compliant parser, with bridges to docutils & sphinx."
+category = "dev"
+optional = false
+python-versions = ">=3.7"
+
+[package.dependencies]
+docutils = ">=0.15,<0.19"
+jinja2 = "*"
+markdown-it-py = ">=1.0.0,<3.0.0"
+mdit-py-plugins = ">=0.3.0,<0.4.0"
+pyyaml = "*"
+sphinx = ">=4,<6"
+typing-extensions = "*"
+
+[package.extras]
+code_style = ["pre-commit (>=2.12,<3.0)"]
+linkify = ["linkify-it-py (>=1.0,<2.0)"]
+rtd = ["ipython", "sphinx-book-theme", "sphinx-design", "sphinxext-rediraffe (>=0.2.7,<0.3.0)", "sphinxcontrib.mermaid (>=0.7.1,<0.8.0)", "sphinxext-opengraph (>=0.6.3,<0.7.0)"]
+testing = ["beautifulsoup4", "coverage", "pytest (>=6,<7)", "pytest-cov", "pytest-regressions", "pytest-param-files (>=0.3.4,<0.4.0)", "sphinx-pytest"]
+
[[package]]
name = "nodeenv"
version = "1.6.0"
@@ -209,7 +438,7 @@ test = ["hypothesis (>=5.5.3)", "pytest (>=6.0)", "pytest-xdist (>=1.31)"]
name = "pathspec"
version = "0.9.0"
description = "Utility library for gitignore style pattern matching of file paths."
-category = "main"
+category = "dev"
optional = false
python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,>=2.7"
@@ -217,7 +446,7 @@ python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,>=2.7"
name = "platformdirs"
version = "2.5.2"
description = "A small Python module for determining appropriate platform-specific dirs, e.g. a \"user data dir\"."
-category = "main"
+category = "dev"
optional = false
python-versions = ">=3.7"
@@ -261,6 +490,30 @@ category = "dev"
optional = false
python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*"
+[[package]]
+name = "pycodestyle"
+version = "2.8.0"
+description = "Python style guide checker"
+category = "dev"
+optional = false
+python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*"
+
+[[package]]
+name = "pyflakes"
+version = "2.4.0"
+description = "passive checker of Python programs"
+category = "dev"
+optional = false
+python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*"
+
+[[package]]
+name = "pygments"
+version = "2.12.0"
+description = "Pygments is a syntax highlighting package written in Python."
+category = "dev"
+optional = false
+python-versions = ">=3.6"
+
[[package]]
name = "pyodbc"
version = "4.0.32"
@@ -343,6 +596,24 @@ category = "dev"
optional = false
python-versions = ">=3.6"
+[[package]]
+name = "requests"
+version = "2.28.0"
+description = "Python HTTP for Humans."
+category = "dev"
+optional = false
+python-versions = ">=3.7, <4"
+
+[package.dependencies]
+certifi = ">=2017.4.17"
+charset-normalizer = ">=2.0.0,<2.1.0"
+idna = ">=2.5,<4"
+urllib3 = ">=1.21.1,<1.27"
+
+[package.extras]
+socks = ["PySocks (>=1.5.6,!=1.5.7)"]
+use_chardet_on_py3 = ["chardet (>=3.0.2,<5)"]
+
[[package]]
name = "scikit-learn"
version = "1.1.1"
@@ -382,6 +653,165 @@ category = "main"
optional = false
python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*"
+[[package]]
+name = "snowballstemmer"
+version = "2.2.0"
+description = "This package provides 29 stemmers for 28 languages generated from Snowball algorithms."
+category = "dev"
+optional = false
+python-versions = "*"
+
+[[package]]
+name = "soupsieve"
+version = "2.3.2.post1"
+description = "A modern CSS selector implementation for Beautiful Soup."
+category = "dev"
+optional = false
+python-versions = ">=3.6"
+
+[[package]]
+name = "sphinx"
+version = "5.0.2"
+description = "Python documentation generator"
+category = "dev"
+optional = false
+python-versions = ">=3.6"
+
+[package.dependencies]
+alabaster = ">=0.7,<0.8"
+babel = ">=1.3"
+colorama = {version = ">=0.3.5", markers = "sys_platform == \"win32\""}
+docutils = ">=0.14,<0.19"
+imagesize = "*"
+importlib-metadata = {version = ">=4.4", markers = "python_version < \"3.10\""}
+Jinja2 = ">=2.3"
+packaging = "*"
+Pygments = ">=2.0"
+requests = ">=2.5.0"
+snowballstemmer = ">=1.1"
+sphinxcontrib-applehelp = "*"
+sphinxcontrib-devhelp = "*"
+sphinxcontrib-htmlhelp = ">=2.0.0"
+sphinxcontrib-jsmath = "*"
+sphinxcontrib-qthelp = "*"
+sphinxcontrib-serializinghtml = ">=1.1.5"
+
+[package.extras]
+docs = ["sphinxcontrib-websupport"]
+lint = ["flake8 (>=3.5.0)", "isort", "mypy (>=0.950)", "docutils-stubs", "types-typed-ast", "types-requests"]
+test = ["pytest (>=4.6)", "html5lib", "cython", "typed-ast"]
+
+[[package]]
+name = "sphinx-basic-ng"
+version = "0.0.1a11"
+description = "A modern skeleton for Sphinx themes."
+category = "dev"
+optional = false
+python-versions = ">=3.7"
+
+[package.dependencies]
+sphinx = ">=4.0,<6.0"
+
+[package.extras]
+docs = ["furo", "myst-parser", "sphinx-copybutton", "sphinx-inline-tabs", "ipython"]
+
+[[package]]
+name = "sphinx-copybutton"
+version = "0.5.0"
+description = "Add a copy button to each of your code cells."
+category = "dev"
+optional = false
+python-versions = ">=3.6"
+
+[package.dependencies]
+sphinx = ">=1.8"
+
+[package.extras]
+code_style = ["pre-commit (==2.12.1)"]
+rtd = ["sphinx", "ipython", "myst-nb", "sphinx-book-theme"]
+
+[[package]]
+name = "sphinxcontrib-applehelp"
+version = "1.0.2"
+description = "sphinxcontrib-applehelp is a sphinx extension which outputs Apple help books"
+category = "dev"
+optional = false
+python-versions = ">=3.5"
+
+[package.extras]
+lint = ["flake8", "mypy", "docutils-stubs"]
+test = ["pytest"]
+
+[[package]]
+name = "sphinxcontrib-devhelp"
+version = "1.0.2"
+description = "sphinxcontrib-devhelp is a sphinx extension which outputs Devhelp document."
+category = "dev"
+optional = false
+python-versions = ">=3.5"
+
+[package.extras]
+lint = ["flake8", "mypy", "docutils-stubs"]
+test = ["pytest"]
+
+[[package]]
+name = "sphinxcontrib-htmlhelp"
+version = "2.0.0"
+description = "sphinxcontrib-htmlhelp is a sphinx extension which renders HTML help files"
+category = "dev"
+optional = false
+python-versions = ">=3.6"
+
+[package.extras]
+lint = ["flake8", "mypy", "docutils-stubs"]
+test = ["pytest", "html5lib"]
+
+[[package]]
+name = "sphinxcontrib-jsmath"
+version = "1.0.1"
+description = "A sphinx extension which renders display math in HTML via JavaScript"
+category = "dev"
+optional = false
+python-versions = ">=3.5"
+
+[package.extras]
+test = ["pytest", "flake8", "mypy"]
+
+[[package]]
+name = "sphinxcontrib-qthelp"
+version = "1.0.3"
+description = "sphinxcontrib-qthelp is a sphinx extension which outputs QtHelp document."
+category = "dev"
+optional = false
+python-versions = ">=3.5"
+
+[package.extras]
+lint = ["flake8", "mypy", "docutils-stubs"]
+test = ["pytest"]
+
+[[package]]
+name = "sphinxcontrib-serializinghtml"
+version = "1.1.5"
+description = "sphinxcontrib-serializinghtml is a sphinx extension which outputs \"serialized\" HTML files (json and pickle)."
+category = "dev"
+optional = false
+python-versions = ">=3.5"
+
+[package.extras]
+lint = ["flake8", "mypy", "docutils-stubs"]
+test = ["pytest"]
+
+[[package]]
+name = "sphinxext-opengraph"
+version = "0.6.3"
+description = "Sphinx Extension to enable OGP support"
+category = "dev"
+optional = false
+python-versions = ">=3.6"
+
+[package.dependencies]
+sphinx = ">=2.0"
+
[[package]]
name = "sqlalchemy"
version = "1.4.37"
@@ -434,7 +864,7 @@ python-versions = ">=2.6, !=3.0.*, !=3.1.*, !=3.2.*"
name = "tomli"
version = "2.0.1"
description = "A lil' TOML parser"
-category = "main"
+category = "dev"
optional = false
python-versions = ">=3.7"
@@ -442,10 +872,31 @@ python-versions = ">=3.7"
name = "typing-extensions"
version = "4.2.0"
description = "Backported and Experimental Type Hints for Python 3.7+"
-category = "main"
+category = "dev"
optional = false
python-versions = ">=3.7"
+[[package]]
+name = "untokenize"
+version = "0.1.1"
+description = "Transforms tokens into original source code (while preserving whitespace)."
+category = "dev"
+optional = false
+python-versions = "*"
+
+[[package]]
+name = "urllib3"
+version = "1.26.9"
+description = "HTTP library with thread-safe connection pooling, file post, and more."
+category = "dev"
+optional = false
+python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, <4"
+
+[package.extras]
+brotli = ["brotlicffi (>=0.8.0)", "brotli (>=1.0.9)", "brotlipy (>=0.6.0)"]
+secure = ["pyOpenSSL (>=0.14)", "cryptography (>=1.3.4)", "idna (>=2.0.0)", "certifi", "ipaddress"]
+socks = ["PySocks (>=1.5.6,!=1.5.7,<2.0)"]
+
[[package]]
name = "virtualenv"
version = "20.14.1"
@@ -472,12 +923,28 @@ category = "main"
optional = false
python-versions = "*"
+[[package]]
+name = "zipp"
+version = "3.8.0"
+description = "Backport of pathlib-compatible object wrapper for zip files"
+category = "dev"
+optional = false
+python-versions = ">=3.7"
+
+[package.extras]
+docs = ["sphinx", "jaraco.packaging (>=9)", "rst.linker (>=1.9)"]
+testing = ["pytest (>=6)", "pytest-checkdocs (>=2.4)", "pytest-flake8", "pytest-cov", "pytest-enabler (>=1.0.1)", "jaraco.itertools", "func-timeout", "pytest-black (>=0.3.7)", "pytest-mypy (>=0.9.1)"]
+
[metadata]
lock-version = "1.1"
python-versions = ">=3.8,<3.11"
-content-hash = "49d06b895f6a4d7c2c5e13c0ba283fa49d80deea3a6b9f6d170d253ffb41472b"
+content-hash = "92f8e7f8c16c1f0d3f5718e62d8dbec5046929c2fbcc9be5b14d550ce0931e83"
[metadata.files]
+alabaster = [
+ {file = "alabaster-0.7.12-py2.py3-none-any.whl", hash = "sha256:446438bdcca0e05bd45ea2de1668c1d9b032e1a9154c2c259092d77031ddd359"},
+ {file = "alabaster-0.7.12.tar.gz", hash = "sha256:a661d72d58e6ea8a57f7a86e37d86716863ee5e92788398526d58b26a4e4dc02"},
+]
atomicwrites = [
{file = "atomicwrites-1.4.0-py2.py3-none-any.whl", hash = "sha256:6d1784dea7c0c8d4a5172b6c620f40b6e4cbfdf96d783691f2e1302a7b88e197"},
{file = "atomicwrites-1.4.0.tar.gz", hash = "sha256:ae70396ad1a434f9c7046fd2dd196fc04b12f9e91ffb859164193be8b6168a7a"},
@@ -486,6 +953,14 @@ attrs = [
{file = "attrs-21.4.0-py2.py3-none-any.whl", hash = "sha256:2d27e3784d7a565d36ab851fe94887c5eccd6a463168875832a1be79c82828b4"},
{file = "attrs-21.4.0.tar.gz", hash = "sha256:626ba8234211db98e869df76230a137c4c40a12d72445c45d5f5b716f076e2fd"},
]
+babel = [
+ {file = "Babel-2.10.3-py3-none-any.whl", hash = "sha256:ff56f4892c1c4bf0d814575ea23471c230d544203c7748e8c68f0089478d48eb"},
+ {file = "Babel-2.10.3.tar.gz", hash = "sha256:7614553711ee97490f732126dc077f8d0ae084ebc6a96e23db1482afabdb2c51"},
+]
+beautifulsoup4 = [
+ {file = "beautifulsoup4-4.11.1-py3-none-any.whl", hash = "sha256:58d5c3d29f5a36ffeb94f02f0d786cd53014cf9b3b3951d42e0080d8a9498d30"},
+ {file = "beautifulsoup4-4.11.1.tar.gz", hash = "sha256:ad9aa55b65ef2808eb405f46cf74df7fcb7044d5cbc26487f96eb2ef2e436693"},
+]
black = [
{file = "black-22.3.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:2497f9c2386572e28921fa8bec7be3e51de6801f7459dffd6e62492531c47e09"},
{file = "black-22.3.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:5795a0375eb87bfe902e80e0c8cfaedf8af4d49694d69161e5bd3206c18618bb"},
@@ -515,10 +990,18 @@ catalogue = [
{file = "catalogue-2.0.7-py3-none-any.whl", hash = "sha256:cab4feda641fe05da1e6a1a9d123b0869d5ca324dcd93d4a5c384408ab62e7fb"},
{file = "catalogue-2.0.7.tar.gz", hash = "sha256:535d33ae79ebd21ca298551d85da186ae8b8e1df36b0fb0246da774163ec2d6b"},
]
+certifi = [
+ {file = "certifi-2022.6.15-py3-none-any.whl", hash = "sha256:fe86415d55e84719d75f8b69414f6438ac3547d2078ab91b67e779ef69378412"},
+ {file = "certifi-2022.6.15.tar.gz", hash = "sha256:84c85a9078b11105f04f3036a9482ae10e4621616db313fe045dd24743a0820d"},
+]
cfgv = [
{file = "cfgv-3.3.1-py2.py3-none-any.whl", hash = "sha256:c6a0883f3917a037485059700b9e75da2464e6c27051014ad85ba6aaa5884426"},
{file = "cfgv-3.3.1.tar.gz", hash = "sha256:f5a830efb9ce7a445376bb66ec94c638a9787422f96264c98edc6bdeed8ab736"},
]
+charset-normalizer = [
+ {file = "charset-normalizer-2.0.12.tar.gz", hash = "sha256:2857e29ff0d34db842cd7ca3230549d1a697f96ee6d3fb071cfa6c7393832597"},
+ {file = "charset_normalizer-2.0.12-py3-none-any.whl", hash = "sha256:6881edbebdb17b39b4eaaa821b438bf6eddffb4468cf344f09f89def34a8b1df"},
+]
click = [
{file = "click-8.1.3-py3-none-any.whl", hash = "sha256:bb4d8133cb15a609f44e8213d9b391b0809795062913b383c62be0ee95b1db48"},
{file = "click-8.1.3.tar.gz", hash = "sha256:7682dc8afb30297001674575ea00d1814d808d6a36af415a82bd481d37ba7b8e"},
@@ -574,10 +1057,25 @@ distlib = [
{file = "distlib-0.3.4-py2.py3-none-any.whl", hash = "sha256:6564fe0a8f51e734df6333d08b8b94d4ea8ee6b99b5ed50613f731fd4089f34b"},
{file = "distlib-0.3.4.zip", hash = "sha256:e4b58818180336dc9c529bfb9a0b58728ffc09ad92027a3f30b7cd91e3458579"},
]
+docformatter = [
+ {file = "docformatter-1.4.tar.gz", hash = "sha256:064e6d81f04ac96bc0d176cbaae953a0332482b22d3ad70d47c8a7f2732eef6f"},
+]
+docutils = [
+ {file = "docutils-0.18.1-py2.py3-none-any.whl", hash = "sha256:23010f129180089fbcd3bc08cfefccb3b890b0050e1ca00c867036e9d161b98c"},
+ {file = "docutils-0.18.1.tar.gz", hash = "sha256:679987caf361a7539d76e584cbeddc311e3aee937877c87346f31debc63e9d06"},
+]
filelock = [
{file = "filelock-3.7.1-py3-none-any.whl", hash = "sha256:37def7b658813cda163b56fc564cdc75e86d338246458c4c28ae84cabefa2404"},
{file = "filelock-3.7.1.tar.gz", hash = "sha256:3a0fd85166ad9dbab54c9aec96737b744106dc5f15c0b09a6744a445299fcf04"},
]
+flake8 = [
+ {file = "flake8-4.0.1-py2.py3-none-any.whl", hash = "sha256:479b1304f72536a55948cb40a32dce8bb0ffe3501e26eaf292c7e60eb5e0428d"},
+ {file = "flake8-4.0.1.tar.gz", hash = "sha256:806e034dda44114815e23c16ef92f95c91e4c71100ff52813adf7132a6ad870d"},
+]
+furo = [
+ {file = "furo-2022.6.21-py3-none-any.whl", hash = "sha256:061b68e323345e27fcba024cf33a1e77f3dfd8d9987410be822749a706e2add6"},
+ {file = "furo-2022.6.21.tar.gz", hash = "sha256:9aa983b7488a4601d13113884bfb7254502c8729942e073a0acb87a5512af223"},
+]
greenlet = [
{file = "greenlet-1.1.2-cp27-cp27m-macosx_10_14_x86_64.whl", hash = "sha256:58df5c2a0e293bf665a51f8a100d3e9956febfbf1d9aaf8c0677cf70218910c6"},
{file = "greenlet-1.1.2-cp27-cp27m-manylinux1_x86_64.whl", hash = "sha256:aec52725173bd3a7b56fe91bc56eccb26fbdff1386ef123abb63c84c5b43b63a"},
@@ -639,18 +1137,96 @@ identify = [
{file = "identify-2.5.1-py2.py3-none-any.whl", hash = "sha256:0dca2ea3e4381c435ef9c33ba100a78a9b40c0bab11189c7cf121f75815efeaa"},
{file = "identify-2.5.1.tar.gz", hash = "sha256:3d11b16f3fe19f52039fb7e39c9c884b21cb1b586988114fbe42671f03de3e82"},
]
+idna = [
+ {file = "idna-3.3-py3-none-any.whl", hash = "sha256:84d9dd047ffa80596e0f246e2eab0b391788b0503584e8945f2368256d2735ff"},
+ {file = "idna-3.3.tar.gz", hash = "sha256:9d643ff0a55b762d5cdb124b8eaa99c66322e2157b69160bc32796e824360e6d"},
+]
+imagesize = [
+ {file = "imagesize-1.3.0-py2.py3-none-any.whl", hash = "sha256:1db2f82529e53c3e929e8926a1fa9235aa82d0bd0c580359c67ec31b2fddaa8c"},
+ {file = "imagesize-1.3.0.tar.gz", hash = "sha256:cd1750d452385ca327479d45b64d9c7729ecf0b3969a58148298c77092261f9d"},
+]
+importlib-metadata = [
+ {file = "importlib_metadata-4.11.4-py3-none-any.whl", hash = "sha256:c58c8eb8a762858f49e18436ff552e83914778e50e9d2f1660535ffb364552ec"},
+ {file = "importlib_metadata-4.11.4.tar.gz", hash = "sha256:5d26852efe48c0a32b0509ffbc583fda1a2266545a78d104a6f4aff3db17d700"},
+]
iniconfig = [
{file = "iniconfig-1.1.1-py2.py3-none-any.whl", hash = "sha256:011e24c64b7f47f6ebd835bb12a743f2fbe9a26d4cecaa7f53bc4f35ee9da8b3"},
{file = "iniconfig-1.1.1.tar.gz", hash = "sha256:bc3af051d7d14b2ee5ef9969666def0cd1a000e121eaea580d4a313df4b37f32"},
]
+jinja2 = [
+ {file = "Jinja2-3.1.2-py3-none-any.whl", hash = "sha256:6088930bfe239f0e6710546ab9c19c9ef35e29792895fed6e6e31a023a182a61"},
+ {file = "Jinja2-3.1.2.tar.gz", hash = "sha256:31351a702a408a9e7595a8fc6150fc3f43bb6bf7e319770cbc0db9df9437e852"},
+]
joblib = [
{file = "joblib-1.1.0-py2.py3-none-any.whl", hash = "sha256:f21f109b3c7ff9d95f8387f752d0d9c34a02aa2f7060c2135f465da0e5160ff6"},
{file = "joblib-1.1.0.tar.gz", hash = "sha256:4158fcecd13733f8be669be0683b96ebdbbd38d23559f54dca7205aea1bf1e35"},
]
+markdown-it-py = [
+ {file = "markdown-it-py-2.1.0.tar.gz", hash = "sha256:cf7e59fed14b5ae17c0006eff14a2d9a00ed5f3a846148153899a0224e2c07da"},
+ {file = "markdown_it_py-2.1.0-py3-none-any.whl", hash = "sha256:93de681e5c021a432c63147656fe21790bc01231e0cd2da73626f1aa3ac0fe27"},
+]
+markupsafe = [
+ {file = "MarkupSafe-2.1.1-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:86b1f75c4e7c2ac2ccdaec2b9022845dbb81880ca318bb7a0a01fbf7813e3812"},
+ {file = "MarkupSafe-2.1.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:f121a1420d4e173a5d96e47e9a0c0dcff965afdf1626d28de1460815f7c4ee7a"},
+ {file = "MarkupSafe-2.1.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a49907dd8420c5685cfa064a1335b6754b74541bbb3706c259c02ed65b644b3e"},
+ {file = "MarkupSafe-2.1.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:10c1bfff05d95783da83491be968e8fe789263689c02724e0c691933c52994f5"},
+ {file = "MarkupSafe-2.1.1-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b7bd98b796e2b6553da7225aeb61f447f80a1ca64f41d83612e6139ca5213aa4"},
+ {file = "MarkupSafe-2.1.1-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:b09bf97215625a311f669476f44b8b318b075847b49316d3e28c08e41a7a573f"},
+ {file = "MarkupSafe-2.1.1-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:694deca8d702d5db21ec83983ce0bb4b26a578e71fbdbd4fdcd387daa90e4d5e"},
+ {file = "MarkupSafe-2.1.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:efc1913fd2ca4f334418481c7e595c00aad186563bbc1ec76067848c7ca0a933"},
+ {file = "MarkupSafe-2.1.1-cp310-cp310-win32.whl", hash = "sha256:4a33dea2b688b3190ee12bd7cfa29d39c9ed176bda40bfa11099a3ce5d3a7ac6"},
+ {file = "MarkupSafe-2.1.1-cp310-cp310-win_amd64.whl", hash = "sha256:dda30ba7e87fbbb7eab1ec9f58678558fd9a6b8b853530e176eabd064da81417"},
+ {file = "MarkupSafe-2.1.1-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:671cd1187ed5e62818414afe79ed29da836dde67166a9fac6d435873c44fdd02"},
+ {file = "MarkupSafe-2.1.1-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3799351e2336dc91ea70b034983ee71cf2f9533cdff7c14c90ea126bfd95d65a"},
+ {file = "MarkupSafe-2.1.1-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e72591e9ecd94d7feb70c1cbd7be7b3ebea3f548870aa91e2732960fa4d57a37"},
+ {file = "MarkupSafe-2.1.1-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:6fbf47b5d3728c6aea2abb0589b5d30459e369baa772e0f37a0320185e87c980"},
+ {file = "MarkupSafe-2.1.1-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:d5ee4f386140395a2c818d149221149c54849dfcfcb9f1debfe07a8b8bd63f9a"},
+ {file = "MarkupSafe-2.1.1-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:bcb3ed405ed3222f9904899563d6fc492ff75cce56cba05e32eff40e6acbeaa3"},
+ {file = "MarkupSafe-2.1.1-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:e1c0b87e09fa55a220f058d1d49d3fb8df88fbfab58558f1198e08c1e1de842a"},
+ {file = "MarkupSafe-2.1.1-cp37-cp37m-win32.whl", hash = "sha256:8dc1c72a69aa7e082593c4a203dcf94ddb74bb5c8a731e4e1eb68d031e8498ff"},
+ {file = "MarkupSafe-2.1.1-cp37-cp37m-win_amd64.whl", hash = "sha256:97a68e6ada378df82bc9f16b800ab77cbf4b2fada0081794318520138c088e4a"},
+ {file = "MarkupSafe-2.1.1-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:e8c843bbcda3a2f1e3c2ab25913c80a3c5376cd00c6e8c4a86a89a28c8dc5452"},
+ {file = "MarkupSafe-2.1.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:0212a68688482dc52b2d45013df70d169f542b7394fc744c02a57374a4207003"},
+ {file = "MarkupSafe-2.1.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8e576a51ad59e4bfaac456023a78f6b5e6e7651dcd383bcc3e18d06f9b55d6d1"},
+ {file = "MarkupSafe-2.1.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4b9fe39a2ccc108a4accc2676e77da025ce383c108593d65cc909add5c3bd601"},
+ {file = "MarkupSafe-2.1.1-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:96e37a3dc86e80bf81758c152fe66dbf60ed5eca3d26305edf01892257049925"},
+ {file = "MarkupSafe-2.1.1-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:6d0072fea50feec76a4c418096652f2c3238eaa014b2f94aeb1d56a66b41403f"},
+ {file = "MarkupSafe-2.1.1-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:089cf3dbf0cd6c100f02945abeb18484bd1ee57a079aefd52cffd17fba910b88"},
+ {file = "MarkupSafe-2.1.1-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:6a074d34ee7a5ce3effbc526b7083ec9731bb3cbf921bbe1d3005d4d2bdb3a63"},
+ {file = "MarkupSafe-2.1.1-cp38-cp38-win32.whl", hash = "sha256:421be9fbf0ffe9ffd7a378aafebbf6f4602d564d34be190fc19a193232fd12b1"},
+ {file = "MarkupSafe-2.1.1-cp38-cp38-win_amd64.whl", hash = "sha256:fc7b548b17d238737688817ab67deebb30e8073c95749d55538ed473130ec0c7"},
+ {file = "MarkupSafe-2.1.1-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:e04e26803c9c3851c931eac40c695602c6295b8d432cbe78609649ad9bd2da8a"},
+ {file = "MarkupSafe-2.1.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:b87db4360013327109564f0e591bd2a3b318547bcef31b468a92ee504d07ae4f"},
+ {file = "MarkupSafe-2.1.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:99a2a507ed3ac881b975a2976d59f38c19386d128e7a9a18b7df6fff1fd4c1d6"},
+ {file = "MarkupSafe-2.1.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:56442863ed2b06d19c37f94d999035e15ee982988920e12a5b4ba29b62ad1f77"},
+ {file = "MarkupSafe-2.1.1-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:3ce11ee3f23f79dbd06fb3d63e2f6af7b12db1d46932fe7bd8afa259a5996603"},
+ {file = "MarkupSafe-2.1.1-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:33b74d289bd2f5e527beadcaa3f401e0df0a89927c1559c8566c066fa4248ab7"},
+ {file = "MarkupSafe-2.1.1-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:43093fb83d8343aac0b1baa75516da6092f58f41200907ef92448ecab8825135"},
+ {file = "MarkupSafe-2.1.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:8e3dcf21f367459434c18e71b2a9532d96547aef8a871872a5bd69a715c15f96"},
+ {file = "MarkupSafe-2.1.1-cp39-cp39-win32.whl", hash = "sha256:d4306c36ca495956b6d568d276ac11fdd9c30a36f1b6eb928070dc5360b22e1c"},
+ {file = "MarkupSafe-2.1.1-cp39-cp39-win_amd64.whl", hash = "sha256:46d00d6cfecdde84d40e572d63735ef81423ad31184100411e6e3388d405e247"},
+ {file = "MarkupSafe-2.1.1.tar.gz", hash = "sha256:7f91197cc9e48f989d12e4e6fbc46495c446636dfc81b9ccf50bb0ec74b91d4b"},
+]
+mccabe = [
+ {file = "mccabe-0.6.1-py2.py3-none-any.whl", hash = "sha256:ab8a6258860da4b6677da4bd2fe5dc2c659cff31b3ee4f7f5d64e79735b80d42"},
+ {file = "mccabe-0.6.1.tar.gz", hash = "sha256:dd8d182285a0fe56bace7f45b5e7d1a6ebcbf524e8f3bd87eb0f125271b8831f"},
+]
+mdit-py-plugins = [
+ {file = "mdit-py-plugins-0.3.0.tar.gz", hash = "sha256:ecc24f51eeec6ab7eecc2f9724e8272c2fb191c2e93cf98109120c2cace69750"},
+ {file = "mdit_py_plugins-0.3.0-py3-none-any.whl", hash = "sha256:b1279701cee2dbf50e188d3da5f51fee8d78d038cdf99be57c6b9d1aa93b4073"},
+]
+mdurl = [
+ {file = "mdurl-0.1.1-py3-none-any.whl", hash = "sha256:6a8f6804087b7128040b2fb2ebe242bdc2affaeaa034d5fc9feeed30b443651b"},
+ {file = "mdurl-0.1.1.tar.gz", hash = "sha256:f79c9709944df218a4cdb0fcc0b0c7ead2f44594e3e84dc566606f04ad749c20"},
+]
mypy-extensions = [
{file = "mypy_extensions-0.4.3-py2.py3-none-any.whl", hash = "sha256:090fedd75945a69ae91ce1303b5824f428daf5a028d2f6ab8a299250a846f15d"},
{file = "mypy_extensions-0.4.3.tar.gz", hash = "sha256:2d82818f5bb3e369420cb3c4060a7970edba416647068eb4c5343488a6c604a8"},
]
+myst-parser = [
+ {file = "myst-parser-0.18.0.tar.gz", hash = "sha256:739a4d96773a8e55a2cacd3941ce46a446ee23dcd6b37e06f73f551ad7821d86"},
+ {file = "myst_parser-0.18.0-py3-none-any.whl", hash = "sha256:4965e51918837c13bf1c6f6fe2c6bddddf193148360fbdaefe743a4981358f6a"},
+]
nodeenv = [
{file = "nodeenv-1.6.0-py2.py3-none-any.whl", hash = "sha256:621e6b7076565ddcacd2db0294c0381e01fd28945ab36bcf00f41c5daf63bef7"},
{file = "nodeenv-1.6.0.tar.gz", hash = "sha256:3ef13ff90291ba2a4a7a4ff9a979b63ffdd00a464dbe04acf0ea6471517a4c2b"},
@@ -726,6 +1302,18 @@ py = [
{file = "py-1.11.0-py2.py3-none-any.whl", hash = "sha256:607c53218732647dff4acdfcd50cb62615cedf612e72d1724fb1a0cc6405b378"},
{file = "py-1.11.0.tar.gz", hash = "sha256:51c75c4126074b472f746a24399ad32f6053d1b34b68d2fa41e558e6f4a98719"},
]
+pycodestyle = [
+ {file = "pycodestyle-2.8.0-py2.py3-none-any.whl", hash = "sha256:720f8b39dde8b293825e7ff02c475f3077124006db4f440dcbc9a20b76548a20"},
+ {file = "pycodestyle-2.8.0.tar.gz", hash = "sha256:eddd5847ef438ea1c7870ca7eb78a9d47ce0cdb4851a5523949f2601d0cbbe7f"},
+]
+pyflakes = [
+ {file = "pyflakes-2.4.0-py2.py3-none-any.whl", hash = "sha256:3bb3a3f256f4b7968c9c788781e4ff07dce46bdf12339dcda61053375426ee2e"},
+ {file = "pyflakes-2.4.0.tar.gz", hash = "sha256:05a85c2872edf37a4ed30b0cce2f6093e1d0581f8c19d7393122da7e25b2b24c"},
+]
+pygments = [
+ {file = "Pygments-2.12.0-py3-none-any.whl", hash = "sha256:dc9c10fb40944260f6ed4c688ece0cd2048414940f1cea51b8b226318411c519"},
+ {file = "Pygments-2.12.0.tar.gz", hash = "sha256:5eb116118f9612ff1ee89ac96437bb6b49e8f04d8a13b514ba26f620208e26eb"},
+]
pyodbc = [
{file = "pyodbc-4.0.32-cp27-cp27m-win32.whl", hash = "sha256:2152ce6d5131d769ff5839aa762e12d844c95e9ec4bb2f666e8cd9dfa1ae2240"},
{file = "pyodbc-4.0.32-cp27-cp27m-win_amd64.whl", hash = "sha256:56ec4974096d40d6c62a228799122dbc2ade6c4045cc5d31860212a32cae95b1"},
@@ -797,6 +1385,10 @@ pyyaml = [
{file = "PyYAML-6.0-cp39-cp39-win_amd64.whl", hash = "sha256:b3d267842bf12586ba6c734f89d1f5b871df0273157918b0ccefa29deb05c21c"},
{file = "PyYAML-6.0.tar.gz", hash = "sha256:68fb519c14306fec9720a2a5b45bc9f0c8d1b9c72adf45c37baedfcd949c35a2"},
]
+requests = [
+ {file = "requests-2.28.0-py3-none-any.whl", hash = "sha256:bc7861137fbce630f17b03d3ad02ad0bf978c844f3536d0edda6499dafce2b6f"},
+ {file = "requests-2.28.0.tar.gz", hash = "sha256:d568723a7ebd25875d8d1eaf5dfa068cd2fc8194b2e483d7b1f7c81918dbec6b"},
+]
scikit-learn = [
{file = "scikit-learn-1.1.1.tar.gz", hash = "sha256:3e77b71e8e644f86c8b5be7f1c285ef597de4c384961389ee3e9ca36c445b256"},
{file = "scikit_learn-1.1.1-cp310-cp310-macosx_10_13_x86_64.whl", hash = "sha256:102f51797cd8944bf44a038d106848ddf2804f2c1edf7aea45fba81a4fdc4d80"},
@@ -846,6 +1438,54 @@ six = [
{file = "six-1.16.0-py2.py3-none-any.whl", hash = "sha256:8abb2f1d86890a2dfb989f9a77cfcfd3e47c2a354b01111771326f8aa26e0254"},
{file = "six-1.16.0.tar.gz", hash = "sha256:1e61c37477a1626458e36f7b1d82aa5c9b094fa4802892072e49de9c60c4c926"},
]
+snowballstemmer = [
+ {file = "snowballstemmer-2.2.0-py2.py3-none-any.whl", hash = "sha256:c8e1716e83cc398ae16824e5572ae04e0d9fc2c6b985fb0f900f5f0c96ecba1a"},
+ {file = "snowballstemmer-2.2.0.tar.gz", hash = "sha256:09b16deb8547d3412ad7b590689584cd0fe25ec8db3be37788be3810cbf19cb1"},
+]
+soupsieve = [
+ {file = "soupsieve-2.3.2.post1-py3-none-any.whl", hash = "sha256:3b2503d3c7084a42b1ebd08116e5f81aadfaea95863628c80a3b774a11b7c759"},
+ {file = "soupsieve-2.3.2.post1.tar.gz", hash = "sha256:fc53893b3da2c33de295667a0e19f078c14bf86544af307354de5fcf12a3f30d"},
+]
+sphinx = [
+ {file = "Sphinx-5.0.2-py3-none-any.whl", hash = "sha256:d3e57663eed1d7c5c50895d191fdeda0b54ded6f44d5621b50709466c338d1e8"},
+ {file = "Sphinx-5.0.2.tar.gz", hash = "sha256:b18e978ea7565720f26019c702cd85c84376e948370f1cd43d60265010e1c7b0"},
+]
+sphinx-basic-ng = [
+ {file = "sphinx_basic_ng-0.0.1a11-py3-none-any.whl", hash = "sha256:9aecb5345816998789ef76658a83e3c0a12aafa14b17d40e28cd4aaeb94d1517"},
+ {file = "sphinx_basic_ng-0.0.1a11.tar.gz", hash = "sha256:bf9a8fda0379c7d2ab51c9543f2b18e014b77fb295b49d64f3c1a910c863b34f"},
+]
+sphinx-copybutton = [
+ {file = "sphinx-copybutton-0.5.0.tar.gz", hash = "sha256:a0c059daadd03c27ba750da534a92a63e7a36a7736dcf684f26ee346199787f6"},
+ {file = "sphinx_copybutton-0.5.0-py3-none-any.whl", hash = "sha256:9684dec7434bd73f0eea58dda93f9bb879d24bff2d8b187b1f2ec08dfe7b5f48"},
+]
+sphinxcontrib-applehelp = [
+ {file = "sphinxcontrib-applehelp-1.0.2.tar.gz", hash = "sha256:a072735ec80e7675e3f432fcae8610ecf509c5f1869d17e2eecff44389cdbc58"},
+ {file = "sphinxcontrib_applehelp-1.0.2-py2.py3-none-any.whl", hash = "sha256:806111e5e962be97c29ec4c1e7fe277bfd19e9652fb1a4392105b43e01af885a"},
+]
+sphinxcontrib-devhelp = [
+ {file = "sphinxcontrib-devhelp-1.0.2.tar.gz", hash = "sha256:ff7f1afa7b9642e7060379360a67e9c41e8f3121f2ce9164266f61b9f4b338e4"},
+ {file = "sphinxcontrib_devhelp-1.0.2-py2.py3-none-any.whl", hash = "sha256:8165223f9a335cc1af7ffe1ed31d2871f325254c0423bc0c4c7cd1c1e4734a2e"},
+]
+sphinxcontrib-htmlhelp = [
+ {file = "sphinxcontrib-htmlhelp-2.0.0.tar.gz", hash = "sha256:f5f8bb2d0d629f398bf47d0d69c07bc13b65f75a81ad9e2f71a63d4b7a2f6db2"},
+ {file = "sphinxcontrib_htmlhelp-2.0.0-py2.py3-none-any.whl", hash = "sha256:d412243dfb797ae3ec2b59eca0e52dac12e75a241bf0e4eb861e450d06c6ed07"},
+]
+sphinxcontrib-jsmath = [
+ {file = "sphinxcontrib-jsmath-1.0.1.tar.gz", hash = "sha256:a9925e4a4587247ed2191a22df5f6970656cb8ca2bd6284309578f2153e0c4b8"},
+ {file = "sphinxcontrib_jsmath-1.0.1-py2.py3-none-any.whl", hash = "sha256:2ec2eaebfb78f3f2078e73666b1415417a116cc848b72e5172e596c871103178"},
+]
+sphinxcontrib-qthelp = [
+ {file = "sphinxcontrib-qthelp-1.0.3.tar.gz", hash = "sha256:4c33767ee058b70dba89a6fc5c1892c0d57a54be67ddd3e7875a18d14cba5a72"},
+ {file = "sphinxcontrib_qthelp-1.0.3-py2.py3-none-any.whl", hash = "sha256:bd9fc24bcb748a8d51fd4ecaade681350aa63009a347a8c14e637895444dfab6"},
+]
+sphinxcontrib-serializinghtml = [
+ {file = "sphinxcontrib-serializinghtml-1.1.5.tar.gz", hash = "sha256:aa5f6de5dfdf809ef505c4895e51ef5c9eac17d0f287933eb49ec495280b6952"},
+ {file = "sphinxcontrib_serializinghtml-1.1.5-py2.py3-none-any.whl", hash = "sha256:352a9a00ae864471d3a7ead8d7d79f5fc0b57e8b3f95e9867eb9eb28999b92fd"},
+]
+sphinxext-opengraph = [
+ {file = "sphinxext-opengraph-0.6.3.tar.gz", hash = "sha256:cd89e13cc7a44739f81b64ee57c1c20ef0c05dda5d1d8201d31ec2f34e4c29db"},
+ {file = "sphinxext_opengraph-0.6.3-py3-none-any.whl", hash = "sha256:bf76017c105856b07edea6caf4942b6ae9bb168585dccfd6dbdb6e4161f6b03a"},
+]
sqlalchemy = [
{file = "SQLAlchemy-1.4.37-cp27-cp27m-macosx_10_14_x86_64.whl", hash = "sha256:d9050b0c4a7f5538650c74aaba5c80cd64450e41c206f43ea6d194ae6d060ff9"},
{file = "SQLAlchemy-1.4.37-cp27-cp27m-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:b4c92823889cf9846b972ee6db30c0e3a92c0ddfc76c6060a6cda467aa5fb694"},
@@ -900,6 +1540,13 @@ typing-extensions = [
{file = "typing_extensions-4.2.0-py3-none-any.whl", hash = "sha256:6657594ee297170d19f67d55c05852a874e7eb634f4f753dbd667855e07c1708"},
{file = "typing_extensions-4.2.0.tar.gz", hash = "sha256:f1c24655a0da0d1b67f07e17a5e6b2a105894e6824b92096378bb3668ef02376"},
]
+untokenize = [
+ {file = "untokenize-0.1.1.tar.gz", hash = "sha256:3865dbbbb8efb4bb5eaa72f1be7f3e0be00ea8b7f125c69cbd1f5fda926f37a2"},
+]
+urllib3 = [
+ {file = "urllib3-1.26.9-py2.py3-none-any.whl", hash = "sha256:44ece4d53fb1706f667c9bd1c648f5469a2ec925fcf3a776667042d645472c14"},
+ {file = "urllib3-1.26.9.tar.gz", hash = "sha256:aabaf16477806a5e1dd19aa41f8c2b7950dd3c746362d7e3223dbe6de6ac448e"},
+]
virtualenv = [
{file = "virtualenv-20.14.1-py2.py3-none-any.whl", hash = "sha256:e617f16e25b42eb4f6e74096b9c9e37713cf10bf30168fb4a739f3fa8f898a3a"},
{file = "virtualenv-20.14.1.tar.gz", hash = "sha256:ef589a79795589aada0c1c5b319486797c03b67ac3984c48c669c0e4f50df3a5"},
@@ -908,3 +1555,7 @@ wasabi = [
{file = "wasabi-0.9.1-py3-none-any.whl", hash = "sha256:217edcb2850993c7931399e7419afccde13539d589e333bc85f9053cf0bb1772"},
{file = "wasabi-0.9.1.tar.gz", hash = "sha256:ada6f13e9b70ef26bf95fad0febdfdebe2005e29a08ad58f4bbae383a97298cf"},
]
+zipp = [
+ {file = "zipp-3.8.0-py3-none-any.whl", hash = "sha256:c4f6e5bbf48e74f7a38e7cc5b0480ff42b0ae5178957d564d18932525d5cf099"},
+ {file = "zipp-3.8.0.tar.gz", hash = "sha256:56bf8aadb83c24db6c4b577e13de374ccfb67da2078beba1d037c17980bf43ad"},
+]
diff --git a/pyproject.toml b/pyproject.toml
index 074fefb1..d7897257 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -19,7 +19,6 @@ classifiers = [
[tool.poetry.dependencies]
python = ">=3.8,<3.11"
-black = "^22.3.0"
numpy = "^1.22.4"
scipy = "^1.8.1"
pandas = "^1.4.2"
@@ -30,9 +29,18 @@ wasabi = "^0.9.1"
scikit-learn = "^1.1.1"
[tool.poetry.dev-dependencies]
+black = "^22.3.0"
pre-commit = "^2.19.0"
pytest = "^7.1.2"
pytest-cov = "^3.0.0"
+Sphinx = "^5.0.2"
+furo = "^2022.6.21"
+sphinx-copybutton = "^0.5.0"
+sphinxext-opengraph = "^0.6.3"
+myst-parser = "^0.18.0"
+flake8 = "^4.0.1"
+docformatter = "^1.4"
+
[tool.coverage.run]
omit = [
diff --git a/src/psycopmlutils/__init__.py b/src/psycopmlutils/__init__.py
index e69de29b..bcfcb44d 100644
--- a/src/psycopmlutils/__init__.py
+++ b/src/psycopmlutils/__init__.py
@@ -0,0 +1 @@
+from .about import __download_url__, __title__, __version__ # noqa
diff --git a/src/about.py b/src/psycopmlutils/about.py
similarity index 69%
rename from src/about.py
rename to src/psycopmlutils/about.py
index 1cd3cf55..8e6562a3 100644
--- a/src/about.py
+++ b/src/psycopmlutils/about.py
@@ -1,3 +1,3 @@
-__version__ = "0.1.0" # only source of version ID
+__version__ = "0.1.2" # only source of version ID
__title__ = "psycopmlutils"
__download_url__ = "https://github.com/Aarhus-Psychiatry-Research/psycop-ml-utils.git"
diff --git a/src/psycopmlutils/loaders/__init__.py b/src/psycopmlutils/loaders/__init__.py
index 3ca6ce80..b3e16b24 100644
--- a/src/psycopmlutils/loaders/__init__.py
+++ b/src/psycopmlutils/loaders/__init__.py
@@ -1,8 +1,8 @@
-from .load_demographics import LoadDemographic
-from .load_diagnoses import LoadDiagnoses
-from .load_outcomes import LoadOutcome
-from .load_lab_results import LoadLabResults
-from .load_medications import LoadMedications
-from .load_visits import LoadVisits
-from .sql_load import sql_load
-from .load_ids import LoadIDs
+from .load_demographics import LoadDemographic # noqa
+from .load_diagnoses import LoadDiagnoses # noqa
+from .load_ids import LoadIDs # noqa
+from .load_lab_results import LoadLabResults # noqa
+from .load_medications import LoadMedications # noqa
+from .load_outcomes import LoadOutcome # noqa
+from .load_visits import LoadVisits # noqa
+from .sql_load import sql_load # noqa
diff --git a/src/psycopmlutils/loaders/load_demographics.py b/src/psycopmlutils/loaders/load_demographics.py
index b11baa72..a02e0084 100644
--- a/src/psycopmlutils/loaders/load_demographics.py
+++ b/src/psycopmlutils/loaders/load_demographics.py
@@ -1,14 +1,12 @@
import pandas as pd
+
from psycopmlutils.loaders.sql_load import sql_load
from psycopmlutils.utils import data_loaders
-from wasabi import msg
class LoadDemographic:
@data_loaders.register("birthdays")
def birthdays():
- # msg.info("Loading birthdays")
-
view = "[FOR_kohorte_demografi_inkl_2021_feb2022]"
sql = f"SELECT dw_ek_borger, foedselsdato FROM [fct].{view}"
@@ -25,8 +23,6 @@ def birthdays():
@data_loaders.register("male")
def male():
- # msg.info("Loading sexes")
-
view = "[FOR_kohorte_demografi_inkl_2021_feb2022]"
sql = f"SELECT dw_ek_borger, koennavn FROM [fct].{view}"
@@ -40,5 +36,4 @@ def male():
inplace=True,
)
- # msg.good("Loaded sexes")
return df.reset_index(drop=True)
diff --git a/src/psycopmlutils/loaders/load_diagnoses.py b/src/psycopmlutils/loaders/load_diagnoses.py
index 7d4c3f38..ee076b3e 100644
--- a/src/psycopmlutils/loaders/load_diagnoses.py
+++ b/src/psycopmlutils/loaders/load_diagnoses.py
@@ -1,11 +1,9 @@
-from pathlib import Path
from typing import List, Union
-import catalogue
import pandas as pd
+
from psycopmlutils.loaders.sql_load import sql_load
from psycopmlutils.utils import data_loaders
-from wasabi import msg
class LoadDiagnoses:
@@ -14,7 +12,8 @@ def aggregate_from_physical_visits(
output_col_name: str,
wildcard_icd_10_end: bool = False,
) -> pd.DataFrame:
- """Load all diagnoses matching any icd_code in icd_codes. Create output_col_name and set to 1.
+ """Load all diagnoses matching any icd_code in icd_codes. Create
+ output_col_name and set to 1.
Args:
icd_codes (List[str]): List of icd_codes.
@@ -24,8 +23,6 @@ def aggregate_from_physical_visits(
Returns:
pd.DataFrame
"""
- print_str = f"diagnoses matching any of {icd_codes}"
- # msg.info(f"Loading {print_str}")
diagnoses_source_table_info = {
"lpr3": {
@@ -55,8 +52,6 @@ def aggregate_from_physical_visits(
]
df = pd.concat(dfs)
-
- # msg.good(f"Loaded {print_str}")
return df.reset_index(drop=True)
def from_physical_visits(
@@ -64,7 +59,9 @@ def from_physical_visits(
output_col_name: str = "value",
wildcard_icd_10_end: bool = False,
) -> pd.DataFrame:
- """Load diagnoses from all physical visits. If icd_code is a list, will aggregate as one column (e.g. ["E780", "E785"] into a ypercholesterolemia column).
+ """Load diagnoses from all physical visits. If icd_code is a list, will
+ aggregate as one column (e.g. ["E780", "E785"] into a
+ ypercholesterolemia column).
Args:
icd_code (str): Substring to match diagnoses for. Matches any diagnoses, whether a-diagnosis, b-diagnosis etc.
@@ -74,9 +71,6 @@ def from_physical_visits(
Returns:
pd.DataFrame
"""
- print_str = f"diagnoses matching ICD-code {icd_code}"
- # msg.info(f"Loading {print_str}")
-
diagnoses_source_table_info = {
"lpr3": {
"fct": "FOR_LPR3kontakter_psyk_somatik_inkl_2021",
@@ -104,7 +98,6 @@ def from_physical_visits(
df = pd.concat(dfs)
- # msg.good(f"Loaded {print_str}")
return df.reset_index(drop=True)
def _load(
@@ -114,18 +107,24 @@ def _load(
output_col_name: str = None,
wildcard_icd_10_end: bool = True,
) -> pd.DataFrame:
- """Load the visits that have diagnoses that match icd_code from the beginning of their adiagnosekode string.
- Aggregates all that match.
+ """Load the visits that have diagnoses that match icd_code from the
+ beginning of their adiagnosekode string. Aggregates all that match.
Args:
- icd_code (Union[List[str], str]): Substring(s) to match diagnoses for. Matches any diagnoses, whether a-diagnosis, b-diagnosis etc.
- source_timestamp_col_name (str): Name of the timestamp column in the SQL table.
- view (str): Which view to use, e.g. "FOR_Medicin_ordineret_inkl_2021_feb2022"
- output_col_name (str, optional): Name of new column string. Defaults to None.
- wildcard_icd_10_end (bool, optional): Whether to match on icd_code*. Defaults to true.
+ icd_code (Union[List[str], str]): Substring(s) to match diagnoses for.
+ Matches any diagnoses, whether a-diagnosis, b-diagnosis etc.
+ source_timestamp_col_name (str): Name of the timestamp column in the SQL
+ table.
+ view (str): Which view to use, e.g.
+ "FOR_Medicin_ordineret_inkl_2021_feb2022"
+ output_col_name (str, optional): Name of new column string. Defaults to
+ None.
+ wildcard_icd_10_end (bool, optional): Whether to match on icd_code*.
+ Defaults to true.
Returns:
- pd.DataFrame: A pandas dataframe with dw_ek_borger, timestamp and output_col_name = 1
+ pd.DataFrame: A pandas dataframe with dw_ek_borger, timestamp and
+ output_col_name = 1
"""
fct = f"[{fct}]"
@@ -140,11 +139,14 @@ def _load(
match_col_sql_str = " OR ".join(match_col_sql_strings)
else:
- match_col_sql_str = (
+ match_col_sql_str = ( # noqa
f"lower(diagnosegruppestreng) LIKE lower('%{icd_code}{sql_ending}')"
)
- sql = f"SELECT dw_ek_borger, {source_timestamp_col_name}, diagnosegruppestreng FROM [fct].{fct} WHERE ({match_col_sql_str})"
+ sql = (
+ f"SELECT dw_ek_borger, {source_timestamp_col_name}, diagnosegruppestreng"
+ + " FROM [fct].{fct} WHERE ({match_col_sql_str})"
+ )
df = sql_load(sql, database="USR_PS_FORSK", chunksize=None)
@@ -158,7 +160,7 @@ def _load(
return df.rename(
columns={
source_timestamp_col_name: "timestamp",
- }
+ },
)
@data_loaders.register("essential_hypertension")
diff --git a/src/psycopmlutils/loaders/load_ids.py b/src/psycopmlutils/loaders/load_ids.py
index 4536aa9f..185b1bf4 100644
--- a/src/psycopmlutils/loaders/load_ids.py
+++ b/src/psycopmlutils/loaders/load_ids.py
@@ -1,12 +1,11 @@
import pandas as pd
+
from psycopmlutils.loaders.sql_load import sql_load
-from psycopmlutils.utils import data_loaders
-from wasabi import msg
class LoadIDs:
def load(split: str) -> pd.DataFrame:
- """Loads ids for a given split
+ """Loads ids for a given split.
Args:
split (str): Which split to load IDs from. Takes either "train", "test" or "val".
diff --git a/src/psycopmlutils/loaders/load_lab_results.py b/src/psycopmlutils/loaders/load_lab_results.py
index d5145a75..cab73d0f 100644
--- a/src/psycopmlutils/loaders/load_lab_results.py
+++ b/src/psycopmlutils/loaders/load_lab_results.py
@@ -1,8 +1,7 @@
-import catalogue
import pandas as pd
+
from psycopmlutils.loaders.sql_load import sql_load
from psycopmlutils.utils import data_loaders
-from wasabi import msg
class LoadLabResults:
@@ -15,11 +14,11 @@ def blood_sample(blood_sample_id: str) -> pd.DataFrame:
Returns:
pd.DataFrame
"""
- print_str = f"blood samples matching NPU-code {blood_sample_id}"
- # msg.info(f"Loading {print_str}")
-
view = "[FOR_labka_alle_blodprover_inkl_2021_feb2022]"
- sql = f"SELECT dw_ek_borger, datotid_sidstesvar, numerisksvar FROM [fct].{view} WHERE NPUkode = '{blood_sample_id}'"
+ sql = (
+ f"SELECT dw_ek_borger, datotid_sidstesvar, numerisksvar FROM [fct].{view}"
+ + " WHERE NPUkode = '{blood_sample_id}'"
+ )
df = sql_load(sql, database="USR_PS_FORSK", chunksize=None)
@@ -32,7 +31,8 @@ def blood_sample(blood_sample_id: str) -> pd.DataFrame:
return df.reset_index(drop=True)
def _aggregate_blood_samples(blood_sample_ids: list) -> pd.DataFrame:
- """Aggregate multiple blood_sample_ids (typically NPU-codes) into one column.
+ """Aggregate multiple blood_sample_ids (typically NPU-codes) into one
+ column.
Args:
blood_sample_ids (list): List of blood_sample_id, typically an NPU-codes.
@@ -118,7 +118,7 @@ def unscheduled_p_glc():
blood_sample_ids += [f"DNK{suffix}" for suffix in dnk_suffixes]
return LoadLabResults._aggregate_blood_samples(
- blood_sample_ids=blood_sample_ids
+ blood_sample_ids=blood_sample_ids,
)
@data_loaders.register("triglycerides")
@@ -142,7 +142,7 @@ def ldl():
@data_loaders.register("fasting_ldl")
def fasting_ldl():
return LoadLabResults._aggregate_blood_samples(
- blood_sample_ids=["NPU10171", "AAB00102"]
+ blood_sample_ids=["NPU10171", "AAB00102"],
)
@data_loaders.register("alat")
@@ -168,13 +168,13 @@ def crp():
@data_loaders.register("creatinine")
def creatinine():
return LoadLabResults._aggregate_blood_samples(
- blood_sample_ids=["NPU18016", "ASS00355", "ASS00354"]
+ blood_sample_ids=["NPU18016", "ASS00355", "ASS00354"],
)
@data_loaders.register("egfr")
def egfr():
return LoadLabResults._aggregate_blood_samples(
- blood_sample_ids=["DNK35302", "DNK35131", "AAB00345", "AAB00343"]
+ blood_sample_ids=["DNK35302", "DNK35131", "AAB00345", "AAB00343"],
)
@data_loaders.register("albumine_creatinine_ratio")
diff --git a/src/psycopmlutils/loaders/load_medications.py b/src/psycopmlutils/loaders/load_medications.py
index 5a6383ef..a42082ef 100644
--- a/src/psycopmlutils/loaders/load_medications.py
+++ b/src/psycopmlutils/loaders/load_medications.py
@@ -1,14 +1,16 @@
import pandas as pd
-from psycopmlutils.loaders.sql_load import sql_load
-from psycopmlutils.utils import data_loaders
from wasabi import msg
+from psycopmlutils.loaders.sql_load import sql_load
+
class LoadMedications:
def aggregate_medications(
- output_col_name: str, atc_code_prefixes: list
+ output_col_name: str,
+ atc_code_prefixes: list,
) -> pd.DataFrame:
- """Aggregate multiple blood_sample_ids (typically NPU-codes) into one column.
+ """Aggregate multiple blood_sample_ids (typically NPU-codes) into one
+ column.
Args:
output_col_name (str): Name for new column.
@@ -19,7 +21,8 @@ def aggregate_medications(
"""
dfs = [
LoadMedications.load(
- blood_sample_id=f"{id}", output_col_name=output_col_name
+ blood_sample_id=f"{id}",
+ output_col_name=output_col_name,
)
for id in atc_code_prefixes
]
@@ -33,25 +36,32 @@ def load(
load_administered: bool = True,
wildcard_at_end: bool = True,
) -> pd.DataFrame:
- """Load medications. Aggregates prescribed/administered if both true. If wildcard_atc_at_end, match from atc_code*.
- Aggregates all that match. Beware that data is incomplete prior to sep. 2016 for prescribed medications.
+ """Load medications. Aggregates prescribed/administered if both true.
+ If wildcard_atc_at_end, match from atc_code*. Aggregates all that
+ match. Beware that data is incomplete prior to sep. 2016 for prescribed
+ medications.
Args:
- atc_code (str): ATC-code prefix to load. Matches atc_code_prefix*. Aggregates all.
- output_col_name (str, optional): Name of output_col_name. Contains 1 if atc_code matches atc_code_prefix, 0 if not.Defaults to {atc_code_prefix}_value.
- load_prescribed (bool, optional): Whether to load prescriptions. Defaults to True. Beware incomplete until sep 2016.
- load_administered (bool, optional): Whether to load administrations. Defaults to True.
- wildcard_atc_at_end (bool, optional): Whether to match on atc_code* or atc_code.
+ atc_code (str): ATC-code prefix to load. Matches atc_code_prefix*.
+ Aggregates all.
+ output_col_name (str, optional): Name of output_col_name. Contains 1 if
+ atc_code matches atc_code_prefix, 0 if not.Defaults to
+ {atc_code_prefix}_value.
+ load_prescribed (bool, optional): Whether to load prescriptions. Defaults to
+ True. Beware incomplete until sep 2016.
+ load_administered (bool, optional): Whether to load administrations.
+ Defaults to True.
+ wildcard_atc_at_end (bool, optional): Whether to match on atc_code* or
+ atc_code.
Returns:
pd.DataFrame: Cols: dw_ek_borger, timestamp, {atc_code_prefix}_value = 1
"""
- print_str = f"medications matching NPU-code {atc_code}"
- # msg.info(f"Loading {print_str}")
if load_prescribed:
msg.warn(
- "Beware, there are missing prescriptions until september 2019. Hereafter, data is complete."
+ "Beware, there are missing prescriptions until september 2019. "
+ "Hereafter, data is complete.",
)
df = pd.DataFrame()
@@ -76,17 +86,16 @@ def load(
)
df = pd.concat([df, df_medication_administered])
- if output_col_name == None:
+ if output_col_name is None:
output_col_name = atc_code
df.rename(
columns={
- atc_code: f"value",
+ atc_code: "value",
},
inplace=True,
)
- # msg.good(f"Loaded {print_str}")
return df.reset_index(drop=True)
def _load_one_source(
@@ -96,27 +105,37 @@ def _load_one_source(
output_col_name: str = None,
wildcard_atc_at_end: bool = False,
) -> pd.DataFrame:
- """Load the prescribed medications that match atc. If wildcard_atc_at_end, match from atc_code*.
- Aggregates all that match. Beware that data is incomplete prior to sep. 2016 for prescribed medications.
+ """Load the prescribed medications that match atc. If
+ wildcard_atc_at_end, match from atc_code*. Aggregates all that match.
+ Beware that data is incomplete prior to sep. 2016 for prescribed
+ medications.
Args:
atc_code (str): ATC string to match on.
- source_timestamp_col_name (str): Name of the timestamp column in the SQL table.
- view (str): Which view to use, e.g. "FOR_Medicin_ordineret_inkl_2021_feb2022"
- output_col_name (str, optional): Name of new column string. Defaults to None.
- wildcard_atc_at_end (bool, optional): Whether to match on atc_code* or atc_code.
+ source_timestamp_col_name (str): Name of the timestamp column in the SQL
+ table.
+ view (str): Which view to use, e.g.
+ "FOR_Medicin_ordineret_inkl_2021_feb2022"
+ output_col_name (str, optional): Name of new column string. Defaults to
+ None.
+ wildcard_atc_at_end (bool, optional): Whether to match on atc_code* or
+ atc_code.
Returns:
- pd.DataFrame: A pandas dataframe with dw_ek_borger, timestamp and output_col_name = 1
+ pd.DataFrame: A pandas dataframe with dw_ek_borger, timestamp and
+ output_col_name = 1
"""
if wildcard_atc_at_end:
end_of_sql = "%"
else:
- end_of_sql = ""
+ end_of_sql = "" # noqa
view = f"[{view}]"
- sql = f"SELECT dw_ek_borger, {source_timestamp_col_name}, atc FROM [fct].{view} WHERE (lower(atc)) LIKE lower('{atc_code}{end_of_sql}')"
+ sql = (
+ f"SELECT dw_ek_borger, {source_timestamp_col_name}, atc FROM [fct].{view}"
+ + " WHERE (lower(atc)) LIKE lower('{atc_code}{end_of_sql}')"
+ )
df = sql_load(sql, database="USR_PS_FORSK", chunksize=None)
@@ -130,5 +149,5 @@ def _load_one_source(
return df.rename(
columns={
source_timestamp_col_name: "timestamp",
- }
+ },
)
diff --git a/src/psycopmlutils/loaders/load_outcomes.py b/src/psycopmlutils/loaders/load_outcomes.py
index 806e9ddb..325cb3d2 100644
--- a/src/psycopmlutils/loaders/load_outcomes.py
+++ b/src/psycopmlutils/loaders/load_outcomes.py
@@ -1,7 +1,8 @@
+import pandas as pd
from wasabi import msg
+
from psycopmlutils.loaders.sql_load import sql_load
from psycopmlutils.utils import data_loaders
-import pandas as pd
class LoadOutcome:
diff --git a/src/psycopmlutils/loaders/load_visits.py b/src/psycopmlutils/loaders/load_visits.py
index f477b870..6feca2be 100644
--- a/src/psycopmlutils/loaders/load_visits.py
+++ b/src/psycopmlutils/loaders/load_visits.py
@@ -1,6 +1,7 @@
-from psycopmlutils.loaders.sql_load import sql_load
from wasabi import msg
+from psycopmlutils.loaders.sql_load import sql_load
+
class LoadVisits:
def physical_visits_to_psychiatry():
diff --git a/src/psycopmlutils/loaders/sql_load.py b/src/psycopmlutils/loaders/sql_load.py
index c34f0815..b6cec102 100644
--- a/src/psycopmlutils/loaders/sql_load.py
+++ b/src/psycopmlutils/loaders/sql_load.py
@@ -4,7 +4,6 @@
import pandas as pd
from sqlalchemy import create_engine
-from sqlalchemy.pool import NullPool
def sql_load(
@@ -14,15 +13,17 @@ def sql_load(
chunksize: Optional[int] = None,
format_timestamp_cols_to_datetime: bool = True,
) -> Union[pd.DataFrame, Generator[pd.DataFrame, None, None]]:
- """Function to load a SQL query. If chunksize is None, all data will be loaded into memory.
- Otherwise, will stream the data in chunks of chunksize as a generator
+ """Function to load a SQL query. If chunksize is None, all data will be
+ loaded into memory. Otherwise, will stream the data in chunks of chunksize
+ as a generator.
Args:
query (str): The SQL query
server (str): The BI server
database (str): The BI database
chunksize (int, optional): Defaults to 1000.
- format_timestamp_cols_to_datetime (bool, optional): Whether to format all columns with "datotid" in their name as pandas datetime. Defaults to true.
+ format_timestamp_cols_to_datetime (bool, optional): Whether to format all
+ columns with "datotid" in their name as pandas datetime. Defaults to true.
Returns:
Union[pd.DataFrame, Generator[pd.DataFrame]]: DataFrame or generator of DataFrames
@@ -32,19 +33,21 @@ def sql_load(
>>> view = "[FOR_SFI_fritekst_resultat_udfoert_i_psykiatrien_aendret_2011]"
>>> sql = "SELECT * FROM [fct]." + view
>>> df = sql_load(sql, chunksize = None)
-
"""
driver = "SQL Server"
params = urllib.parse.quote(
- "DRIVER={0};SERVER={1};DATABASE={2};Trusted_Connection=yes".format(
- driver, server, database
- )
+ "DRIVER={};SERVER={};DATABASE={};Trusted_Connection=yes".format(
+ driver,
+ server,
+ database,
+ ),
)
engine = create_engine(f"mssql+pyodbc:///?odbc_connect={params}")
conn = engine.connect().execution_options(
- stream_results=True, fast_executemany=True
+ stream_results=True,
+ fast_executemany=True,
)
df = pd.read_sql(query, conn, chunksize=chunksize)
diff --git a/src/psycopmlutils/model_performance/__init__.py b/src/psycopmlutils/model_performance/__init__.py
index 6fd36429..c7de8188 100644
--- a/src/psycopmlutils/model_performance/__init__.py
+++ b/src/psycopmlutils/model_performance/__init__.py
@@ -1 +1 @@
-from .model_performance import ModelPerformance
+from .model_performance import ModelPerformance # noqa
diff --git a/src/psycopmlutils/model_performance/model_performance.py b/src/psycopmlutils/model_performance/model_performance.py
index d6ae048d..95990c1c 100644
--- a/src/psycopmlutils/model_performance/model_performance.py
+++ b/src/psycopmlutils/model_performance/model_performance.py
@@ -5,14 +5,6 @@
import numpy as np
import pandas as pd
-from psycopmlutils.model_performance.utils import (
- add_metadata_cols,
- aggregate_predictions,
- get_metadata_cols,
- idx_to_class,
- labels_to_int,
- scores_to_probs,
-)
from sklearn.metrics import (
accuracy_score,
confusion_matrix,
@@ -22,6 +14,15 @@
roc_auc_score,
)
+from psycopmlutils.model_performance.utils import (
+ add_metadata_cols,
+ aggregate_predictions,
+ get_metadata_cols,
+ idx_to_class,
+ labels_to_int,
+ scores_to_probs,
+)
+
class ModelPerformance:
"""Class to generate model performances."""
@@ -87,7 +88,9 @@ def performance_metrics_from_df(
if metadata_col_names:
# Add metadata if specified
metadata = get_metadata_cols(
- df, metadata_col_names, skip=[prediction_col_name, label_col_name]
+ df,
+ metadata_col_names,
+ skip=[prediction_col_name, label_col_name],
)
performance = add_metadata_cols(performance, metadata)
return performance
@@ -123,7 +126,7 @@ def performance_metrics_from_file(
path = Path(path)
if path.suffix != ".jsonl":
raise ValueError(
- f"Only .jsonl files are supported for import, not {path.suffix}"
+ f"Only .jsonl files are supported for import, not {path.suffix}",
)
df = pd.read_json(path, orient="records", lines=True)
return ModelPerformance.performance_metrics_from_df(
@@ -147,7 +150,8 @@ def performance_metrics_from_folder(
id2label: Optional[Dict[int, str]] = None,
to_wide=False,
) -> pd.DataFrame:
- """Load and calculates performance metrics for all files matching a pattern in a folder.
+ """Load and calculates performance metrics for all files matching a
+ pattern in a folder.
Only supports jsonl for now.
@@ -187,7 +191,8 @@ def _evaluate_single_model(
to_wide: bool,
id2label: Dict[int, str] = None,
) -> pd.DataFrame:
- """Calculate performance metrics from a dataframe. Optionally adds aggregated performance by id.
+ """Calculate performance metrics from a dataframe. Optionally adds
+ aggregated performance by id.
Args:
df (pd.DataFrame): Dataframe with one prediction per row
@@ -205,7 +210,10 @@ def _evaluate_single_model(
"""
if aggregate_by_id:
df = aggregate_predictions(
- df, id_col_name, prediction_col_name, label_col_name
+ df,
+ id_col_name,
+ prediction_col_name,
+ label_col_name,
)
# get predicted labels
@@ -216,7 +224,9 @@ def _evaluate_single_model(
predictions = np.round(df[prediction_col_name])
metrics = ModelPerformance.compute_metrics(
- df[label_col_name], predictions, to_wide
+ df[label_col_name],
+ predictions,
+ to_wide,
)
# calculate roc if binary model
@@ -238,9 +248,12 @@ def _evaluate_single_model(
@staticmethod
def calculate_roc_auc(
- labels: Union[pd.Series, List], predicted: Union[pd.Series, List], to_wide: bool
+ labels: Union[pd.Series, List],
+ predicted: Union[pd.Series, List],
+ to_wide: bool,
) -> pd.DataFrame:
- """Calculate the area under the receiver operating characteristic curve.
+ """Calculate the area under the receiver operating characteristic
+ curve.
Potentially extendable to calculate other metrics that require probabilities
instead of label predictions
@@ -257,7 +270,7 @@ def calculate_roc_auc(
return pd.DataFrame([{"auc-overall": roc_auc}])
else:
return pd.DataFrame(
- [{"class": "overall", "score_type": "auc", "value": roc_auc}]
+ [{"class": "overall", "score_type": "auc", "value": roc_auc}],
)
@staticmethod
@@ -266,7 +279,8 @@ def compute_metrics(
predicted: Union[pd.Series, List[Union[str, int]]],
to_wide: bool,
) -> pd.DataFrame:
- """Compute a whole bunch of performance metrics for both binary and multiclass tasks.
+ """Compute a whole bunch of performance metrics for both binary and
+ multiclass tasks.
Args:
labels (Union[pd.Series, List]): True class
@@ -284,16 +298,24 @@ def compute_metrics(
performance["f1_macro-overall"] = f1_score(labels, predicted, average="macro")
performance["f1_micro-overall"] = f1_score(labels, predicted, average="micro")
performance["precision_macro-overall"] = precision_score(
- labels, predicted, average="macro"
+ labels,
+ predicted,
+ average="macro",
)
performance["precision_micro-overall"] = precision_score(
- labels, predicted, average="micro"
+ labels,
+ predicted,
+ average="micro",
)
performance["recall_macro-overall"] = recall_score(
- labels, predicted, average="macro"
+ labels,
+ predicted,
+ average="macro",
)
performance["recall_micro-overall"] = recall_score(
- labels, predicted, average="micro"
+ labels,
+ predicted,
+ average="micro",
)
performance["confusion_matrix-overall"] = confusion_matrix(labels, predicted)
@@ -315,7 +337,9 @@ def compute_metrics(
performance = pd.melt(performance)
# split score and class into two columns
performance[["score_type", "class"]] = performance["variable"].str.split(
- "-", 1, expand=True
+ "-",
+ 1,
+ expand=True,
)
# drop unused columns and rearrange
performance = performance[["class", "score_type", "value"]]
@@ -343,7 +367,7 @@ def compute_metrics(
],
"label": ["ASD", "ASD", "DEPR", "DEPR", "TD", "TD", "SCHZ", "SCHZ"],
"model_name": ["test"] * 8,
- }
+ },
)
id2label = {0: "ASD", 1: "DEPR", 2: "TD", 3: "SCHZ"}
@@ -364,7 +388,7 @@ def compute_metrics(
"label": ["TD", "TD", "DEPR", "DEPR"],
"optional_grouping1": ["grouping1"] * 4,
"optional_grouping2": ["grouping2"] * 4,
- }
+ },
)
binary_res = ModelPerformance.performance_metrics_from_df(
diff --git a/src/psycopmlutils/model_performance/utils.py b/src/psycopmlutils/model_performance/utils.py
index 50044049..94e7d360 100644
--- a/src/psycopmlutils/model_performance/utils.py
+++ b/src/psycopmlutils/model_performance/utils.py
@@ -1,4 +1,4 @@
-from typing import List, TypeVar, Union, Dict
+from typing import Dict, List, TypeVar, Union
import numpy as np
import pandas as pd
@@ -12,7 +12,8 @@
def scores_to_probs(scores: Union[SeriesListOfFloats, SeriesOfFloats]) -> Series:
- """Converts a series of lists of probabilities for each class or a list of floats for binary classification a list of floats of maximum length 2.
+ """Converts a series of lists of probabilities for each class or a list of
+ floats for binary classification a list of floats of maximum length 2.
Args:
scores (Union[Series[List[float]], Series[float]]): Series containing probabilities for each class or a list of floats for binary classification.
@@ -28,7 +29,8 @@ def scores_to_probs(scores: Union[SeriesListOfFloats, SeriesOfFloats]) -> Series
def labels_to_int(
- labels: Union[SeriesOfStr, SeriesOfInt], label2id: Dict[str, int]
+ labels: Union[SeriesOfStr, SeriesOfInt],
+ label2id: Dict[str, int],
) -> Series:
"""Converts label to int mapping. Only makes sense for binary models. If
already int will return as is.
@@ -48,7 +50,10 @@ def labels_to_int(
def aggregate_predictions(
- df: pd.DataFrame, id_col: str, predictions_col: str, label_col: str
+ df: pd.DataFrame,
+ id_col: str,
+ predictions_col: str,
+ label_col: str,
):
"""Calculates the mean prediction by a grouping col (id_col).
@@ -67,12 +72,12 @@ def get_first_entry(x: pd.Series):
return x.unique()[0]
return df.groupby(id_col).agg(
- {predictions_col: mean_scores, label_col: get_first_entry}
+ {predictions_col: mean_scores, label_col: get_first_entry},
)
def idx_to_class(idx: List[int], mapping: dict) -> List[str]:
- """Returns the label from an id2label mapping
+ """Returns the label from an id2label mapping.
Args:
idx (List[int]): index
@@ -85,9 +90,11 @@ def idx_to_class(idx: List[int], mapping: dict) -> List[str]:
def get_metadata_cols(
- df: pd.DataFrame, cols: List[str], skip: List[str]
+ df: pd.DataFrame,
+ cols: List[str],
+ skip: List[str],
) -> pd.DataFrame:
- """Extracts model metadata to a 1 row dataframe
+ """Extracts model metadata to a 1 row dataframe.
Args:
df (pd.DataFrame): Dataframe with predictions and metadata.
@@ -125,18 +132,19 @@ def get_metadata_cols(
val = df[col].unique()
if len(val) > 1:
raise ValueError(
- f"The column '{col}' contains more than one unique value."
+ f"The column '{col}' contains more than one unique value.",
)
metadata[col] = val[0]
else:
raise ValueError(
- f"The metadata column '{col}' is not contained in the data"
+ f"The metadata column '{col}' is not contained in the data",
)
return pd.DataFrame.from_records([metadata])
def add_metadata_cols(df: pd.DataFrame, metadata: pd.DataFrame) -> pd.DataFrame:
- """Adds 1 row dataframe with metadata to the long format performance dataframe
+ """Adds 1 row dataframe with metadata to the long format performance
+ dataframe.
Args:
df (pd.DataFrame): Dataframe to add metadata to.
diff --git a/src/psycopmlutils/timeseriesflattener/__init__.py b/src/psycopmlutils/timeseriesflattener/__init__.py
index 496e2ae4..a0bbe874 100644
--- a/src/psycopmlutils/timeseriesflattener/__init__.py
+++ b/src/psycopmlutils/timeseriesflattener/__init__.py
@@ -1,2 +1,2 @@
-from .create_feature_combinations import create_feature_combinations
-from .flattened_dataset import FlattenedDataset
+from .create_feature_combinations import create_feature_combinations # noqa
+from .flattened_dataset import FlattenedDataset # noqa
diff --git a/src/psycopmlutils/timeseriesflattener/create_feature_combinations.py b/src/psycopmlutils/timeseriesflattener/create_feature_combinations.py
index 5528f3c8..59555a4f 100644
--- a/src/psycopmlutils/timeseriesflattener/create_feature_combinations.py
+++ b/src/psycopmlutils/timeseriesflattener/create_feature_combinations.py
@@ -2,7 +2,7 @@
def list_has_dict_with_list_as_value(
- list_of_dicts: List[Dict[str, Union[str, list]]]
+ list_of_dicts: List[Dict[str, Union[str, list]]],
) -> bool:
"""Checks if any dict in a list of dicts has a value that is a list.
@@ -20,9 +20,7 @@ def list_has_dict_with_list_as_value(
def dict_has_list_in_any_value(dict: Dict[str, Union[str, list]]) -> bool:
- """
- Checks if a dict has any values that are lists
- """
+ """Checks if a dict has any values that are lists."""
for value in dict.values():
if type(value) == list:
return True
@@ -32,7 +30,8 @@ def dict_has_list_in_any_value(dict: Dict[str, Union[str, list]]) -> bool:
def create_feature_combinations(
arg_sets: List[Dict[str, Union[str, list]]],
) -> List[Dict[str, Union[str, float, int]]]:
- """Create feature combinations from a dictionary of feature specifications. See example for shape.
+ """Create feature combinations from a dictionary of feature specifications.
+ See example for shape.
Args:
arg_sets (List[Dict[str, Union[str, list]]]): A set of argument sets for .add_predictor. See example for shape.
diff --git a/src/psycopmlutils/timeseriesflattener/flattened_dataset.py b/src/psycopmlutils/timeseriesflattener/flattened_dataset.py
index 9d6d4631..eb606a37 100644
--- a/src/psycopmlutils/timeseriesflattener/flattened_dataset.py
+++ b/src/psycopmlutils/timeseriesflattener/flattened_dataset.py
@@ -6,9 +6,10 @@
import pandas as pd
from catalogue import Registry # noqa
from pandas import DataFrame
+from wasabi import msg
+
from psycopmlutils.timeseriesflattener.resolve_multiple_functions import resolve_fns
from psycopmlutils.utils import data_loaders
-from wasabi import msg
class FlattenedDataset:
@@ -21,7 +22,8 @@ def __init__(
timestamp_col_name: str = "timestamp",
n_workers: int = 60,
):
- """Class containing a time-series, flattened. A 'flattened' version is a tabular representation for each prediction time.
+ """Class containing a time-series, flattened. A 'flattened' version is
+ a tabular representation for each prediction time.
A prediction time is every timestamp where you want your model to issue a prediction.
@@ -60,7 +62,7 @@ def __init__(
for col_name in [self.timestamp_col_name, self.id_col_name]:
if col_name not in self.df.columns:
raise ValueError(
- f"{col_name} does not exist in prediction_times_df, change the df or set another argument"
+ f"{col_name} does not exist in prediction_times_df, change the df or set another argument",
)
# Check timestamp col type
@@ -69,16 +71,16 @@ def __init__(
if timestamp_col_type not in ["Timestamp"]:
try:
self.df[self.timestamp_col_name] = pd.to_datetime(
- self.df[self.timestamp_col_name]
+ self.df[self.timestamp_col_name],
)
- except:
+ except Exception:
raise ValueError(
- f"prediction_times_df: {self.timestamp_col_name} is of type {timestamp_col_type}, and could not be converted to 'Timestamp' from Pandas. Will cause problems. Convert before initialising FlattenedDataset."
+ f"prediction_times_df: {self.timestamp_col_name} is of type {timestamp_col_type}, and could not be converted to 'Timestamp' from Pandas. Will cause problems. Convert before initialising FlattenedDataset.",
)
# Create pred_time_uuid_columne
self.df[self.pred_time_uuid_col_name] = self.df[self.id_col_name].astype(
- str
+ str,
) + self.df[self.timestamp_col_name].dt.strftime("-%Y-%m-%d-%H-%M-%S")
self.loaders_catalogue = data_loaders
@@ -141,19 +143,19 @@ def add_temporal_predictors_from_list_of_argument_dictionaries(
if resolve_multiple_fns is not None:
try:
resolved_func = resolve_multiple_fns.get(
- [arg_dict["resolve_multiple"]]
+ [arg_dict["resolve_multiple"]],
)
- except:
+ except Exception:
pass
try:
resolved_func = resolve_fns.get(arg_dict["resolve_multiple"])
- except:
+ except Exception:
pass
if not isinstance(resolved_func, Callable):
raise ValueError(
- "resolve_function neither is nor resolved to a Callable"
+ "resolve_function neither is nor resolved to a Callable",
)
# Rename arguments for create_flattened_df_for_val
@@ -181,7 +183,7 @@ def add_temporal_predictors_from_list_of_argument_dictionaries(
try:
arg_dict["values_df"] = predictor_dfs[arg_dict["values_df"]]
- except:
+ except Exception:
# Error handling in _validate_processed_arg_dicts
# to handle in bulk
pass
@@ -197,7 +199,7 @@ def add_temporal_predictors_from_list_of_argument_dictionaries(
]
processed_arg_dicts.append(
- select_and_assert_keys(dictionary=arg_dict, key_list=required_keys)
+ select_and_assert_keys(dictionary=arg_dict, key_list=required_keys),
)
# Validate dicts before starting pool, saves time if errors!
@@ -206,7 +208,8 @@ def add_temporal_predictors_from_list_of_argument_dictionaries(
pool = Pool(self.n_workers)
flattened_predictor_dfs = pool.map(
- self._flatten_temporal_values_to_df_wrapper, processed_arg_dicts
+ self._flatten_temporal_values_to_df_wrapper,
+ processed_arg_dicts,
)
flattened_predictor_dfs = [
@@ -235,7 +238,7 @@ def _validate_processed_arg_dicts(self, arg_dicts: list):
for d in arg_dicts:
if not isinstance(d["values_df"], (DataFrame, Callable)):
msg.warn(
- f"values_df resolves to neither a Callable nor a DataFrame in {d}"
+ f"values_df resolves to neither a Callable nor a DataFrame in {d}",
)
warn = True
@@ -249,11 +252,12 @@ def _validate_processed_arg_dicts(self, arg_dicts: list):
if warn:
raise ValueError(
- "Errors in argument dictionaries, didn't generate any features."
+ "Errors in argument dictionaries, didn't generate any features.",
)
def _flatten_temporal_values_to_df_wrapper(self, kwargs_dict: Dict) -> DataFrame:
- """Wrap flatten_temporal_values_to_df with kwargs for multithreading pool.
+ """Wrap flatten_temporal_values_to_df with kwargs for multithreading
+ pool.
Args:
kwargs_dict (Dict): Dictionary of kwargs
@@ -290,9 +294,9 @@ def add_age(
id_to_date_of_birth_mapping[date_of_birth_col_name],
format="%Y-%m-%d",
)
- except:
+ except Exception:
raise ValueError(
- f"Conversion of {date_of_birth_col_name} to datetime failed, doesn't match format %Y-%m-%d. Recommend converting to datetime before adding."
+ f"Conversion of {date_of_birth_col_name} to datetime failed, doesn't match format %Y-%m-%d. Recommend converting to datetime before adding.",
)
self.add_static_predictor(id_to_date_of_birth_mapping)
@@ -397,7 +401,8 @@ def add_temporal_predictor(
source_values_col_name: str = "value",
new_col_name: str = None,
):
- """Add a column with predictor values to the flattened dataset (e.g. "average value of bloodsample within n days").
+ """Add a column with predictor values to the flattened dataset (e.g.
+ "average value of bloodsample within n days").
Args:
predictor_df (DataFrame): A table in wide format. Required columns: patient_id, timestamp, value.
@@ -429,7 +434,8 @@ def add_temporal_col_to_flattened_dataset(
is_fallback_prop_warning_threshold: float = 0.9,
keep_val_timestamp: bool = False,
):
- """Add a column to the dataset (either predictor or outcome depending on the value of "direction").
+ """Add a column to the dataset (either predictor or outcome depending
+ on the value of "direction").
Args:
values_df (DataFrame): A table in wide format. Required columns: patient_id, timestamp, value.
@@ -448,7 +454,7 @@ def add_temporal_col_to_flattened_dataset(
if timestamp_col_type not in ["Timestamp"]:
raise ValueError(
- f"{self.timestamp_col_name} is of type {timestamp_col_type}, not 'Timestamp' from Pandas. Will cause problems. Convert before initialising FlattenedDataset."
+ f"{self.timestamp_col_name} is of type {timestamp_col_type}, not 'Timestamp' from Pandas. Will cause problems. Convert before initialising FlattenedDataset.",
)
df = FlattenedDataset.flatten_temporal_values_to_df(
@@ -487,32 +493,42 @@ def flatten_temporal_values_to_df(
keep_val_timestamp: bool = False,
) -> DataFrame:
- """Create a dataframe with flattened values (either predictor or outcome depending on the value of "direction").
+ """Create a dataframe with flattened values (either predictor or
+ outcome depending on the value of "direction").
Args:
- prediction_times_with_uuid_df (DataFrame): Dataframe with id_col and timestamps for each prediction time.
- values_df (Union[Callable, DataFrame]): A dataframe or callable resolving to a dataframe containing id_col, timestamp and value cols.
+ prediction_times_with_uuid_df (DataFrame): Dataframe with id_col and
+ timestamps for each prediction time.
+ values_df (Union[Callable, DataFrame]): A dataframe or callable resolving to
+ a dataframe containing id_col, timestamp and value cols.
direction (str): Whether to look "ahead" or "behind" the prediction time.
interval_days (float): How far to look in each direction.
- resolve_multiple (Union[Callable, str]): How to handle multiple values within interval_days. Takes either
+ resolve_multiple (Union[Callable, str]): How to handle multiple values
+ within interval_days. Takes either
i) a function that takes a list as an argument and returns a float, or
ii) a str mapping to a callable from the resolve_multiple_fn catalogue.
- fallback (Union[float, str]): Which value to put if no value within the lookahead. "NaN" for Pandas NA.
- id_col_name (str): Name of id_column in prediction_times_with_uuid_df and values_df.
- Required because this is a static method.
- timestamp_col_name (str): Name of timestamp column in prediction_times_with_uuid_df and values_df.
- Required because this is a static method.
- pred_time_uuid_col_name (str): Name of uuid column in prediction_times_with_uuid_df.
- Required because this is a static method.
- new_col_name (Optional[str], optional): Name of new column in returned dataframe. .
- source_values_col_name (str, optional): Name of column containing values in values_df. Defaults to "value".
- is_fallback_prop_warning_threshold (float, optional): Triggers a ValueError if proportion of
- prediction_times that receive fallback is larger than threshold.
- Indicates unlikely to be a learnable feature. Defaults to 0.9.
- keep_val_timestamp (bool, optional): Whether to keep the timestamp for the temporal value as a separate column. Defaults to False.
+ fallback (Union[float, str]): Which value to put if no value within the
+ lookahead. "NaN" for Pandas NA.
+ id_col_name (str): Name of id_column in prediction_times_with_uuid_df and
+ values_df. Required because this is a static method.
+ timestamp_col_name (str): Name of timestamp column in
+ prediction_times_with_uuid_df and values_df. Required because this is a
+ static method.
+ pred_time_uuid_col_name (str): Name of uuid column in
+ prediction_times_with_uuid_df. Required because this is a static method.
+ new_col_name (Optional[str], optional): Name of new column in returned
+ dataframe.
+ source_values_col_name (str, optional): Name of column containing values in
+ values_df. Defaults to "value".
+ is_fallback_prop_warning_threshold (float, optional): Triggers a ValueError
+ if proportion of prediction_times that receive fallback is larger than
+ threshold. Indicates unlikely to be a learnable feature. Defaults to
+ 0.9.
+ keep_val_timestamp (bool, optional): Whether to keep the timestamp for the
+ temporal value as a separate column. Defaults to False.
Returns:
- DataFrame:
+ DataFrame
"""
# Resolve values_df if not already a dataframe.
@@ -525,7 +541,7 @@ def flatten_temporal_values_to_df(
for col_name in [timestamp_col_name, id_col_name]:
if col_name not in values_df.columns:
raise ValueError(
- f"{col_name} does not exist in df_prediction_times, change the df or set another argument"
+ f"{col_name} does not exist in df_prediction_times, change the df or set another argument",
)
# Rename column
@@ -584,7 +600,7 @@ def flatten_temporal_values_to_df(
> is_fallback_prop_warning_threshold
):
msg.warn(
- f"""{full_col_str}: Beware, {prop_of_values_that_are_fallback*100}% of rows contain the fallback value, indicating that it is unlikely to be a learnable feature. Consider redefining. You can generate the feature anyway by passing an is_fallback_prop_warning_threshold argument with a higher threshold or None."""
+ f"""{full_col_str}: Beware, {prop_of_values_that_are_fallback*100}% of rows contain the fallback value, indicating that it is unlikely to be a learnable feature. Consider redefining. You can generate the feature anyway by passing an is_fallback_prop_warning_threshold argument with a higher threshold or None.""",
)
if low_variance_threshold is not None:
@@ -593,7 +609,7 @@ def flatten_temporal_values_to_df(
)
if variance_as_fraction_of_mean < low_variance_threshold:
msg.warn(
- f"""{full_col_str}: Beware, variance / mean < low_variance_threshold ({variance_as_fraction_of_mean} < {low_variance_threshold}), indicating high risk of overfitting. Consider redefining. You can generate the feature anyway by passing an low_variance_threshold argument with a lower threshold or None."""
+ f"""{full_col_str}: Beware, variance / mean < low_variance_threshold ({variance_as_fraction_of_mean} < {low_variance_threshold}), indicating high risk of overfitting. Consider redefining. You can generate the feature anyway by passing an low_variance_threshold argument with a lower threshold or None.""",
)
msg.good(f"Returning flattened dataframe with {full_col_str}")
@@ -619,7 +635,8 @@ def add_back_prediction_times_without_value(
pred_times_with_uuid: DataFrame,
pred_time_uuid_colname: str,
) -> DataFrame:
- """Ensure all prediction times are represented in the returned dataframe.
+ """Ensure all prediction times are represented in the returned
+ dataframe.
Args:
df (DataFrame):
@@ -642,7 +659,8 @@ def resolve_multiple_values_within_interval_days(
timestamp_col_name: str,
pred_time_uuid_colname: str,
) -> DataFrame:
- """Apply the resolve_multiple function to prediction_times where there are multiple values within the interval_days lookahead.
+ """Apply the resolve_multiple function to prediction_times where there
+ are multiple values within the interval_days lookahead.
Args:
resolve_multiple (Callable): Takes a grouped df and collapses each group to one record (e.g. sum, count etc.).
@@ -672,7 +690,8 @@ def drop_records_outside_interval_days(
timestamp_pred_colname: str,
timestamp_value_colname: str,
) -> DataFrame:
- """Keep only rows where timestamp_value is within interval_days in direction of timestamp_pred.
+ """Keep only rows where timestamp_value is within interval_days in
+ direction of timestamp_pred.
Args:
direction (str): Whether to look ahead or behind.
@@ -705,13 +724,15 @@ def drop_records_outside_interval_days(
else:
raise ValueError("direction can only be 'ahead' or 'behind'")
- return df[df["is_in_interval"] == True].drop(
- ["is_in_interval", "time_from_pred_to_val_in_days"], axis=1
+ return df[df["is_in_interval"]].drop(
+ ["is_in_interval", "time_from_pred_to_val_in_days"],
+ axis=1,
)
def select_and_assert_keys(dictionary: Dict, key_list: List[str]) -> Dict:
- """Keep only the keys in the dictionary that are in key_order, and orders them as in the lsit.
+ """Keep only the keys in the dictionary that are in key_order, and orders
+ them as in the lsit.
Args:
dict (Dict): Dictionary to process
diff --git a/src/psycopmlutils/writers/sql_writer.py b/src/psycopmlutils/writers/sql_writer.py
index 43e31392..17150c55 100644
--- a/src/psycopmlutils/writers/sql_writer.py
+++ b/src/psycopmlutils/writers/sql_writer.py
@@ -1,12 +1,9 @@
import urllib
import urllib.parse
-from multiprocessing.sharedctypes import Value
import pandas as pd
from sqlalchemy import create_engine
-from sqlalchemy.pool import NullPool
from tqdm import tqdm
-from wasabi import msg
def chunker(seq, size):
@@ -15,7 +12,11 @@ def chunker(seq, size):
def insert_with_progress(
- df: pd.DataFrame, table_name: str, conn, rows_per_chunk: int, if_exists: str
+ df: pd.DataFrame,
+ table_name: str,
+ conn,
+ rows_per_chunk: int,
+ if_exists: str,
):
"""Chunk dataframe and insert each chunk, showing a progress bar.
@@ -62,7 +63,7 @@ def write_df_to_sql(
driver = "SQL Server"
params = urllib.parse.quote(
- f"DRIVER={driver};SERVER={server};DATABASE={database};Trusted_Connection=yes"
+ f"DRIVER={driver};SERVER={server};DATABASE={database};Trusted_Connection=yes",
)
url = f"mssql+pyodbc:///?odbc_connect={params}"
diff --git a/tests/test_model_performance/test_model_performance.py b/tests/test_model_performance/test_model_performance.py
index 439c849e..b06a2eb4 100644
--- a/tests/test_model_performance/test_model_performance.py
+++ b/tests/test_model_performance/test_model_performance.py
@@ -2,8 +2,8 @@
import pandas as pd
import pytest
+
from psycopmlutils.model_performance import ModelPerformance
-from sklearn.model_selection import PredefinedSplit
@pytest.fixture(scope="function")
@@ -27,7 +27,7 @@ def multiclass_df():
],
"label": ["ASD", "ASD", "DEPR", "DEPR", "TD", "TD", "SCHZ", "SCHZ"],
"model_name": ["test"] * 8,
- }
+ },
)
@@ -40,7 +40,7 @@ def binary_df():
"label": ["TD", "TD", "DEPR", "DEPR"],
"optional_grouping1": ["grouping1"] * 4,
"optional_grouping2": ["grouping2"] * 4,
- }
+ },
)
@@ -97,7 +97,8 @@ def test_binary_transform_from_dataframe(binary_df, binary_score_mapping):
def test_binary_transform_from_dataframe_with_float(
- binary_float_df, binary_score_mapping
+ binary_float_df,
+ binary_score_mapping,
):
res = ModelPerformance.performance_metrics_from_df(
diff --git a/tests/test_timeseriesflattener/test_add_values.py b/tests/test_timeseriesflattener/test_add_values.py
index da524a71..023c4856 100644
--- a/tests/test_timeseriesflattener/test_add_values.py
+++ b/tests/test_timeseriesflattener/test_add_values.py
@@ -1,20 +1,17 @@
import numpy as np
import pandas as pd
import pytest
-from psycopmlutils.timeseriesflattener import (
- FlattenedDataset,
- create_feature_combinations,
-)
-from psycopmlutils.timeseriesflattener.resolve_multiple_functions import (
- get_max_in_group,
-)
-
from utils_for_testing import (
assert_flattened_outcome_as_expected,
assert_flattened_predictor_as_expected,
str_to_df,
)
+from psycopmlutils.timeseriesflattener import (
+ FlattenedDataset,
+ create_feature_combinations,
+)
+
# Predictors
def test_predictor_after_prediction_time():
@@ -124,7 +121,8 @@ def test_raise_error_if_timestamp_col_not_timestamp_type():
"""
df_prediction_times = str_to_df(
- prediction_times_df_str, convert_timestamp_to_datetime=True
+ prediction_times_df_str,
+ convert_timestamp_to_datetime=True,
)
df_event_times = str_to_df(outcome_df_str, convert_timestamp_to_datetime=False)
@@ -136,7 +134,10 @@ def test_raise_error_if_timestamp_col_not_timestamp_type():
with pytest.raises(ValueError):
dataset.add_temporal_outcome(
- df_event_times, lookahead_days=5, resolve_multiple="max", fallback=0
+ df_event_times,
+ lookahead_days=5,
+ resolve_multiple="max",
+ fallback=0,
)
@@ -201,8 +202,8 @@ def test_static_predictor():
"1994-12-31 00:00:01",
"1994-12-31 00:00:01",
"1994-12-31 00:00:01",
- ]
- }
+ ],
+ },
)
pd.testing.assert_series_equal(
@@ -234,8 +235,8 @@ def test_add_age():
0.0,
27.0,
27.0,
- ]
- }
+ ],
+ },
)
pd.testing.assert_series_equal(
@@ -417,11 +418,12 @@ def test_add_temporal_predictors_then_temporal_outcome():
"resolve_multiple": "min",
"fallback": np.nan,
},
- ]
+ ],
)
flattened_dataset.add_temporal_predictors_from_list_of_argument_dictionaries(
- predictors=PREDICTOR_LIST, predictor_dfs={"predictors": predictors_df}
+ predictors=PREDICTOR_LIST,
+ predictor_dfs={"predictors": predictors_df},
)
flattened_dataset.add_temporal_outcome(
diff --git a/tests/test_timeseriesflattener/test_create_feature_combinations.py b/tests/test_timeseriesflattener/test_create_feature_combinations.py
index e619d3ed..18fc3085 100644
--- a/tests/test_timeseriesflattener/test_create_feature_combinations.py
+++ b/tests/test_timeseriesflattener/test_create_feature_combinations.py
@@ -13,7 +13,7 @@ def test_skip_all_if_no_need_to_process():
"lookbehind_days": 1,
"resolve_multiple": "max",
"fallback": 0,
- }
+ },
]
assert create_feature_combinations(input) == input
@@ -64,7 +64,7 @@ def test_list_has_dict_with_list_as_val():
"resolve_multiple": "max",
"fallback": 0,
"source_values_col_name": "val",
- }
+ },
]
assert list_has_dict_with_list_as_value(test_pos_dataset)
@@ -75,7 +75,7 @@ def test_list_has_dict_with_list_as_val():
"resolve_multiple": "max",
"fallback": 0,
"source_values_col_name": "val",
- }
+ },
]
assert not list_has_dict_with_list_as_value(test_neg_dataset)
@@ -109,7 +109,7 @@ def test_create_feature_combinations():
"lookbehind_days": [1, 30],
"resolve_multiple": "max",
"fallback": 0,
- }
+ },
]
expected_output = [
diff --git a/tests/test_timeseriesflattener/test_errors.py b/tests/test_timeseriesflattener/test_errors.py
index 62cc3b66..61723dfb 100644
--- a/tests/test_timeseriesflattener/test_errors.py
+++ b/tests/test_timeseriesflattener/test_errors.py
@@ -1,8 +1,8 @@
import pytest
-from psycopmlutils.timeseriesflattener.flattened_dataset import FlattenedDataset
-
from utils_for_testing import str_to_df
+from psycopmlutils.timeseriesflattener.flattened_dataset import FlattenedDataset
+
def test_col_does_not_exist_in_prediction_times():
prediction_times_str = """dw_ek_borger,
@@ -12,7 +12,7 @@ def test_col_does_not_exist_in_prediction_times():
prediction_times_df = str_to_df(prediction_times_str)
with pytest.raises(ValueError):
- flattened_df = FlattenedDataset(
+ flattened_df = FlattenedDataset( # noqa
prediction_times_df=prediction_times_df,
timestamp_col_name="timestamp",
id_col_name="dw_ek_borger",
diff --git a/tests/test_timeseriesflattener/test_resolve_multiple.py b/tests/test_timeseriesflattener/test_resolve_multiple.py
index 5161f1b8..c2530b5c 100644
--- a/tests/test_timeseriesflattener/test_resolve_multiple.py
+++ b/tests/test_timeseriesflattener/test_resolve_multiple.py
@@ -1,4 +1,9 @@
-from psycopmlutils.timeseriesflattener.resolve_multiple_functions import (
+from utils_for_testing import (
+ assert_flattened_outcome_as_expected,
+ assert_flattened_predictor_as_expected,
+)
+
+from psycopmlutils.timeseriesflattener.resolve_multiple_functions import ( # noqa
get_earliest_value_in_group,
get_latest_value_in_group,
get_max_in_group,
@@ -6,11 +11,6 @@
get_min_in_group,
)
-from utils_for_testing import (
- assert_flattened_outcome_as_expected,
- assert_flattened_predictor_as_expected,
-)
-
def test_resolve_multiple_catalogue():
prediction_times_str = """dw_ek_borger,timestamp,
diff --git a/tests/test_timeseriesflattener/test_wrapper.py b/tests/test_timeseriesflattener/test_wrapper.py
index cee7a604..3dbce6f6 100644
--- a/tests/test_timeseriesflattener/test_wrapper.py
+++ b/tests/test_timeseriesflattener/test_wrapper.py
@@ -1,13 +1,12 @@
import pandas as pd
import pytest
from pandas.testing import assert_frame_equal
+from utils_for_testing import str_to_df
+
from psycopmlutils.timeseriesflattener.create_feature_combinations import (
create_feature_combinations,
)
from psycopmlutils.timeseriesflattener.flattened_dataset import FlattenedDataset
-from psycopmlutils.utils import data_loaders
-
-from utils_for_testing import load_event_times, str_to_df
def test_generate_two_features_from_dict():
@@ -23,7 +22,7 @@ def test_generate_two_features_from_dict():
"""
expected_df_str = """dw_ek_borger,timestamp,event_times_df_within_1_days_max_fallback_0,event_times_df_within_2_days_max_fallback_0,event_times_df_within_3_days_max_fallback_0,event_times_df_within_4_days_max_fallback_0
- 1,2021-12-31 00:00:00,1,2,2,2
+ 1,2021-12-31 00:00:00,1,2,2,2
"""
prediction_times_df = str_to_df(prediction_times_str)
@@ -46,7 +45,7 @@ def test_generate_two_features_from_dict():
"fallback": 0,
"source_values_col_name": "val",
},
- ]
+ ],
)
flattened_dataset.add_temporal_predictors_from_list_of_argument_dictionaries(
@@ -106,7 +105,7 @@ def test_output_independent_of_order_of_input():
"fallback": 0,
"source_values_col_name": "val",
},
- ]
+ ],
)
predictor_list2 = create_feature_combinations(
@@ -118,7 +117,7 @@ def test_output_independent_of_order_of_input():
"fallback": 0,
"source_values_col_name": "val",
},
- ]
+ ],
)
predictor_str = """dw_ek_borger,timestamp,value,
@@ -143,10 +142,10 @@ def test_output_independent_of_order_of_input():
# We don't care about indeces. Sort to match the ordering.
assert_frame_equal(
flattened_dataset1.df.sort_values(["dw_ek_borger", "timestamp"]).reset_index(
- drop=True
+ drop=True,
),
flattened_dataset2.df.sort_values(["dw_ek_borger", "timestamp"]).reset_index(
- drop=True
+ drop=True,
),
check_index_type=False,
check_like=True,
@@ -161,7 +160,7 @@ def test_add_df_from_catalogue():
"""
expected_df_str = """dw_ek_borger,timestamp,load_event_times_within_1_days_max_fallback_0,load_event_times_within_2_days_max_fallback_0,load_event_times_within_3_days_max_fallback_0,load_event_times_within_4_days_max_fallback_0
- 1,2021-12-31 00:00:00,1,2,2,2
+ 1,2021-12-31 00:00:00,1,2,2,2
"""
prediction_times_df = str_to_df(prediction_times_str)
@@ -183,7 +182,7 @@ def test_add_df_from_catalogue():
"fallback": 0,
"source_values_col_name": "val",
},
- ]
+ ],
)
flattened_dataset.add_temporal_predictors_from_list_of_argument_dictionaries(
@@ -206,12 +205,7 @@ def test_wrong_formatting():
1,2021-12-31 00:00:00
"""
- expected_df_str = """dw_ek_borger,timestamp,event_times_df_within_1_days_max_fallback_0,event_times_df_within_2_days_max_fallback_0,event_times_df_within_3_days_max_fallback_0,event_times_df_within_4_days_max_fallback_0
- 1,2021-12-31 00:00:00,1,2,2,2
- """
-
prediction_times_df = str_to_df(prediction_times_str)
- expected_df = str_to_df(expected_df_str)
predictor_str = """dw_ek_borger,timestamp,value,
1,2021-12-30 00:00:01, 1
diff --git a/tests/test_timeseriesflattener/utils_for_testing.py b/tests/test_timeseriesflattener/utils_for_testing.py
index 9314b6a3..820759e4 100644
--- a/tests/test_timeseriesflattener/utils_for_testing.py
+++ b/tests/test_timeseriesflattener/utils_for_testing.py
@@ -2,6 +2,7 @@
import pandas as pd
from pandas import DataFrame
+
from psycopmlutils.timeseriesflattener.flattened_dataset import FlattenedDataset
from psycopmlutils.utils import data_loaders
@@ -19,7 +20,8 @@ def str_to_df(str, convert_timestamp_to_datetime: bool = True) -> DataFrame:
def convert_cols_with_matching_colnames_to_datetime(
- df: DataFrame, colname_substr: str
+ df: DataFrame,
+ colname_substr: str,
) -> DataFrame:
"""Convert columns that contain colname_substr in their name to datetimes
Args:
@@ -143,6 +145,7 @@ def assert_flattened_values_as_expected(
is_fallback_prop_warning_threshold: float = 0.9,
):
"""Run tests from string representations of dataframes.
+
Args:
Args:
prediction_times_df_str (str): A string-representation of prediction-times df
@@ -190,13 +193,13 @@ def assert_flattened_values_as_expected(
flattened_values_colname = f"{values_colname}_within_{interval_days}_days_{resolve_multiple}_fallback_{fallback}"
expected_flattened_values = pd.DataFrame(
- {flattened_values_colname: expected_flattened_values}
+ {flattened_values_colname: expected_flattened_values},
)
pd.testing.assert_series_equal(
left=dataset.df[flattened_values_colname].reset_index(drop=True),
right=expected_flattened_values[flattened_values_colname].reset_index(
- drop=True
+ drop=True,
),
check_dtype=False,
)