Skip to content

Commit

Permalink
added code search (#160)
Browse files Browse the repository at this point in the history
* added code search

* added single code table fetch

* update table fetching for codes

* added codes test

* run pre-commit

* update docs
  • Loading branch information
larsbuntemeyer authored Mar 21, 2024
1 parent ceb3033 commit 5eaa507
Show file tree
Hide file tree
Showing 5 changed files with 122 additions and 16 deletions.
8 changes: 8 additions & 0 deletions docs/api.rst
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,14 @@ Top-level functions
magic_numbers
file_pattern

Codes
=====

.. autosummary::
:toctree: generated/

codes.get_dict
codes.search

Physics
=======
Expand Down
5 changes: 5 additions & 0 deletions docs/whats_new.rst
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,11 @@ What's New
(Unreleased)
------------

New Features
~~~~~~~~~~~~

- Added :py:meth:`codes.search` for searching in the code table using keyword arguments (:pull:`160`).

Internal Changes
~~~~~~~~~~~~~~~~

Expand Down
73 changes: 62 additions & 11 deletions pyremo/codes.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,16 +40,30 @@
"""

# from .tables import codes as code_table # code_table, read_table

import numpy as np
import pandas as pd

from .tables import codes

tables = codes.tables

# table = pd.concat([table for name, table in code_table.items()])


def _code_from_varname(varname):
"""Returns a code from a varname.
Used typically for varnames create by a `cdo -f nc copy` command
on IEG files.
"""
if "var" in varname:
import re

return int(re.findall("[0-9]+", varname)[0])
else:
return None


def get_dict(id):
"""Returns a dictionary with variable info.
Expand Down Expand Up @@ -124,15 +138,52 @@ def get_dict_by_code(code):
return dict


def _code_from_varname(varname):
"""Returns a code from a varname.
def _search_df(df, **kwargs):
"""Search dataframe by arbitray conditions
Converts kwargs to pandas search conditions. If kwargs is a list,
pandas isin is used as condition.
Used typically for varnames create by a `cdo -f nc copy` command
on IEG files.
"""
if "var" in varname:
import re
df = df.reset_index()
condition_list = []
for key, item in kwargs.items():
if isinstance(item, (list, tuple)):
cond = "(df['{0}'].isin({1}))".format(key, repr(item))
else:
cond = "(df['{0}'] == {1})".format(key, repr(item))
condition_list.append(cond)
conditions = " & ".join(condition_list)
return df[eval(conditions)]


def search(**kwargs):
"""Returns a tables with variabl meta data.
Searches the code table by arbitrary attributes.
All search parameters can also be iteratables.
Parameters
----------
code: int
Variable code.
variable: str
Variable name (REMO standard).
cf_name: str
CF Variable name (Climate and Forecast convention).
description: str
Variable description (REMO standard).
units: str
Unit (REMO standard).
time_cell_method: str
Time cell method for standard Remo output (point or mean).
Returns
-------
df : pd.DataFrame
Search result.
return int(re.findall("[0-9]+", varname)[0])
else:
return None
"""
table = pd.concat(codes.tables.values())
return _search_df(table, **kwargs)
19 changes: 14 additions & 5 deletions pyremo/tables/_resources.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
from os import path as op

import pandas as pd
import pooch

Expand Down Expand Up @@ -69,12 +71,19 @@ def read_remo_domain_tables():
}


# def read_remo_code_tables():
# resource = CODE_RESOURCE
# return {
# table.split(".")[0]: read_remote_table(table, resource, index_col="code")
# for table in resource.registry.keys()
# }


def read_remo_code_tables():
resource = CODE_RESOURCE
return {
table.split(".")[0]: read_remote_table(table, resource, index_col="code")
for table in resource.registry.keys()
}
filename = pooch.retrieve(
op.join(base_url, "code-list", "table.csv"), known_hash=None, path=cache_url
)
return {"codes": pd.read_csv(filename, index_col="code")}


def read_remo_vc_tables():
Expand Down
33 changes: 33 additions & 0 deletions tests/test_codes.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
import pandas as pd

from pyremo import codes


def test_codes():
expected = {
"variable": "T",
"description": "temperature",
"units": "K",
"layer": 110.0,
"time_cell_method": None,
"cf_name": "ta",
"code": 130,
}
assert codes.get_dict(130) == expected
assert codes.get_dict("T") == expected


def test_code_search():
df = pd.DataFrame(
{
"code": 167,
"variable": "TEMP2",
"description": "2m temperature",
"units": "K",
"layer": 1.0,
"time_cell_method": "mean",
"cf_name": "tas",
},
index=[0],
)
assert df.equals(codes.search(code=167).reset_index(drop=True))

0 comments on commit 5eaa507

Please sign in to comment.