diff --git a/forgebox/__init__.py b/forgebox/__init__.py
index 98a433b..a34b2f6 100644
--- a/forgebox/__init__.py
+++ b/forgebox/__init__.py
@@ -1 +1 @@
-__version__ = "0.4.5"
+__version__ = "0.4.7"
diff --git a/forgebox/_nbdev.py b/forgebox/_nbdev.py
index c11fd92..e8c03ee 100644
--- a/forgebox/_nbdev.py
+++ b/forgebox/_nbdev.py
@@ -2,12 +2,17 @@
__all__ = ["index", "modules", "custom_doc_links", "git_url"]
-index = {"list_vc": "02_imports.ipynb",
- "col_list_vc": "02_imports.ipynb",
+index = {"list_vc": "01_pandas_extra.ipynb",
+ "col_list_vc": "01_pandas_extra.ipynb",
+ "pd.DataFrame.vc": "01_pandas_extra.ipynb",
+ "pd.Series.list_vc": "01_pandas_extra.ipynb",
+ "pd.DataFrame.list_vc": "01_pandas_extra.ipynb",
+ "default_rename_rule": "01_pandas_extra.ipynb",
+ "rename_by_rule": "01_pandas_extra.ipynb",
+ "pd.DataFrame.rename_by_rule": "01_pandas_extra.ipynb",
+ "column_order": "01_pandas_extra.ipynb",
+ "pd.DataFrame.column_order": "01_pandas_extra.ipynb",
"__all__": "02_imports.ipynb",
- "pd.DataFrame.vc": "02_imports.ipynb",
- "pd.Series.list_vc": "02_imports.ipynb",
- "pd.DataFrame.list_vc": "02_imports.ipynb",
"Path.ls": "02_imports.ipynb",
"Async": "03_async.ipynb",
"PandasDisplay": "03_df.ipynb",
@@ -24,6 +29,7 @@
"make_hboxes": "05_inter_widgets.ipynb",
"SingleButton": "05_inter_widgets.ipynb",
"Labeler": "05_inter_widgets.ipynb",
+ "Flatten": "06_flatten.ipynb",
"l2norm": "06_spacy.ipynb",
"normal": "06_spacy.ipynb",
"distance": "06_spacy.ipynb",
@@ -135,11 +141,13 @@
"eng_twt_tk": "dataframe_pipeline.ipynb",
"Opts": "optimizers.ipynb"}
-modules = ["imports.py",
+modules = ["pdenhanced.py",
+ "imports.py",
"asyncing.py",
"df.py",
"html.py",
"widgets.py",
+ "flatten.py",
"spacy.py",
"freemap.py",
"loop.py",
diff --git a/forgebox/flatten.py b/forgebox/flatten.py
new file mode 100644
index 0000000..b7710bb
--- /dev/null
+++ b/forgebox/flatten.py
@@ -0,0 +1,43 @@
+# AUTOGENERATED! DO NOT EDIT! File to edit: nbs/06_flatten.ipynb (unless otherwise specified).
+
+__all__ = ['Flatten']
+
+# Cell
+
+from typing import List, Callable, Any, Dict
+class Flatten:
+ """
+ Flatten a tree structure dictionary
+ """
+ def __init__(
+ self, data,
+ key_callback: Callable = None,
+ key_connection: str = "_",
+ ):
+ self.data = data
+ self.key_callback = key_callback
+ self.key_connection = key_connection
+
+ def flattening(
+ self, data,
+ result=None,
+ upper_key=""
+ ) -> Dict[str, str]:
+ """
+ Recursive flatten function
+ """
+ if result is None:
+ result = {}
+ for key, value in data.items():
+ if self.key_callback is not None:
+ key = self.key_callback(key)
+ if isinstance(value, dict):
+ self.flattening(value, result,
+ upper_key=f"{key}{self.key_connection}")
+ else:
+ result[f"{upper_key}{key}"] = value
+ return result
+
+ def __call__(self) -> Dict[str, str]:
+ return self.flattening(self.data)
+
diff --git a/forgebox/imports.py b/forgebox/imports.py
index 303bd26..a460cfd 100644
--- a/forgebox/imports.py
+++ b/forgebox/imports.py
@@ -1,37 +1,14 @@
# AUTOGENERATED! DO NOT EDIT! File to edit: nbs/02_imports.ipynb (unless otherwise specified).
-__all__ = ['list_vc', 'col_list_vc', '__all__']
+__all__ = ['__all__']
# Cell
-__all__ = ["pd","np","partial","Path","json","Counter","list_vc","col_list_vc",
+__all__ = ["pd","np","partial","Path","json","Counter",
"plt","os","sys","glob","Image",]
-import pandas as pd
-
-def list_vc(
- df, colname: str, value: str
-) -> pd.DataFrame:
- """
- count the values in a column
- that each cell is a list
- """
- return df[colname].list_vc(value)
-
-def col_list_vc(
- col, value: str
-) -> pd.DataFrame:
- """
- count the values in a column
- that each cell is a list
- """
- return pd.DataFrame(
- col.apply(lambda x: value in x).value_counts()
- )
-
-pd.DataFrame.vc = lambda self,col:pd.DataFrame(self[col].value_counts())
-pd.Series.list_vc = col_list_vc
-pd.DataFrame.list_vc = list_vc
+# import enhanced version of pandas
+from .pdenhanced import pd
import numpy as np
from pathlib import Path
import json
diff --git a/forgebox/pdenhanced.py b/forgebox/pdenhanced.py
new file mode 100644
index 0000000..a683e96
--- /dev/null
+++ b/forgebox/pdenhanced.py
@@ -0,0 +1,69 @@
+# AUTOGENERATED! DO NOT EDIT! File to edit: nbs/01_pandas_extra.ipynb (unless otherwise specified).
+
+__all__ = ['list_vc', 'col_list_vc', 'default_rename_rule', 'rename_by_rule', 'column_order']
+
+# Cell
+import pandas as pd
+from typing import Callable
+
+# Cell
+def list_vc(
+ df, colname: str, value: str
+) -> pd.DataFrame:
+ """
+ count the values in a column
+ that each cell is a list
+ """
+ return df[colname].list_vc(value)
+
+def col_list_vc(
+ col, value: str
+) -> pd.DataFrame:
+ """
+ count the values in a column
+ that each cell is a list
+ """
+ return pd.DataFrame(
+ col.apply(lambda x: value in x).value_counts()
+ )
+
+pd.DataFrame.vc = lambda self,col:pd.DataFrame(self[col].value_counts())
+pd.Series.list_vc = col_list_vc
+pd.DataFrame.list_vc = list_vc
+
+# Cell
+def default_rename_rule(x: str) -> str:
+ return x.replace(" ", "_").replace("-", "_").lower()
+
+
+def rename_by_rule(
+ df,
+ rule: Callable = default_rename_rule
+) -> pd.DataFrame:
+ """
+ rename the columns by a rule function
+ """
+ df = df.rename(
+ columns=dict((c, rule(c)) for c in df.columns))
+ return df
+
+pd.DataFrame.rename_by_rule = rename_by_rule
+
+# Cell
+def column_order(df, *col_names) -> pd.DataFrame:
+ """
+ df = df.column_order("col1", "col2", "col3")
+ will put col1, col2, and col3 as the 1st 3 column
+ """
+ cols = list(df.columns)
+
+ for col_name in list(col_names)[::-1]:
+
+ # warn if the column exist
+ if col_name not in cols:
+ print(f"Column:'{col_name}' not in dataframe")
+ continue
+ cols.insert(0, cols.pop(cols.index(col_name)))
+ return df[cols]
+
+pd.DataFrame.column_order = column_order
\ No newline at end of file
diff --git a/nbs/01_pandas_extra.ipynb b/nbs/01_pandas_extra.ipynb
new file mode 100644
index 0000000..edb13bf
--- /dev/null
+++ b/nbs/01_pandas_extra.ipynb
@@ -0,0 +1,691 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# Pandas Extra functions\n",
+ "> Extra pandas functions at import"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# default_exp pdenhanced"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 16,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# export\n",
+ "import pandas as pd\n",
+ "from typing import Callable"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Value counts"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# export\n",
+ "def list_vc(\n",
+ " df, colname: str, value: str\n",
+ ") -> pd.DataFrame:\n",
+ " \"\"\"\n",
+ " count the values in a column\n",
+ " that each cell is a list\n",
+ " \"\"\"\n",
+ " return df[colname].list_vc(value)\n",
+ "\n",
+ "def col_list_vc(\n",
+ " col, value: str\n",
+ ") -> pd.DataFrame:\n",
+ " \"\"\"\n",
+ " count the values in a column\n",
+ " that each cell is a list\n",
+ " \"\"\"\n",
+ " return pd.DataFrame(\n",
+ " col.apply(lambda x: value in x).value_counts()\n",
+ " )\n",
+ "\n",
+ "pd.DataFrame.vc = lambda self,col:pd.DataFrame(self[col].value_counts())\n",
+ "pd.Series.list_vc = col_list_vc\n",
+ "pd.DataFrame.list_vc = list_vc"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Rename by rule"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 18,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# export\n",
+ "def default_rename_rule(x: str) -> str:\n",
+ " return x.replace(\" \", \"_\").replace(\"-\", \"_\").lower()\n",
+ "\n",
+ "\n",
+ "def rename_by_rule(\n",
+ " df,\n",
+ " rule: Callable = default_rename_rule\n",
+ ") -> pd.DataFrame:\n",
+ " \"\"\"\n",
+ " rename the columns by a rule function\n",
+ " \"\"\"\n",
+ " df = df.rename(\n",
+ " columns=dict((c, rule(c)) for c in df.columns))\n",
+ " return df\n",
+ "\n",
+ "pd.DataFrame.rename_by_rule = rename_by_rule"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Rearrage Columns"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 22,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# export\n",
+ "def column_order(df, *col_names) -> pd.DataFrame:\n",
+ " \"\"\"\n",
+ " df = df.column_order(\"col1\", \"col2\", \"col3\")\n",
+ " will put col1, col2, and col3 as the 1st 3 column\n",
+ " \"\"\"\n",
+ " cols = list(df.columns)\n",
+ " \n",
+ " for col_name in list(col_names)[::-1]:\n",
+ " \n",
+ " # warn if the column exist\n",
+ " if col_name not in cols:\n",
+ " print(f\"Column:'{col_name}' not in dataframe\")\n",
+ " continue\n",
+ " cols.insert(0, cols.pop(cols.index(col_name)))\n",
+ " return df[cols]\n",
+ "\n",
+ "pd.DataFrame.column_order = column_order"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Testing"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 10,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from sklearn.datasets import california_housing\n",
+ "\n",
+ "cdata = california_housing.fetch_california_housing()\n",
+ "\n",
+ "df = pd.DataFrame(cdata[\"data\"], columns=cdata[\"feature_names\"])"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 12,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "df[\"old\"] = df.HouseAge>20"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 13,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " old | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " True | \n",
+ " 14347 | \n",
+ "
\n",
+ " \n",
+ " False | \n",
+ " 6293 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " old\n",
+ "True 14347\n",
+ "False 6293"
+ ]
+ },
+ "execution_count": 13,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df.vc(\"old\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 19,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " medinc | \n",
+ " houseage | \n",
+ " averooms | \n",
+ " avebedrms | \n",
+ " population | \n",
+ " aveoccup | \n",
+ " latitude | \n",
+ " longitude | \n",
+ " old | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 8.3252 | \n",
+ " 41.0 | \n",
+ " 6.984127 | \n",
+ " 1.023810 | \n",
+ " 322.0 | \n",
+ " 2.555556 | \n",
+ " 37.88 | \n",
+ " -122.23 | \n",
+ " True | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 8.3014 | \n",
+ " 21.0 | \n",
+ " 6.238137 | \n",
+ " 0.971880 | \n",
+ " 2401.0 | \n",
+ " 2.109842 | \n",
+ " 37.86 | \n",
+ " -122.22 | \n",
+ " True | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 7.2574 | \n",
+ " 52.0 | \n",
+ " 8.288136 | \n",
+ " 1.073446 | \n",
+ " 496.0 | \n",
+ " 2.802260 | \n",
+ " 37.85 | \n",
+ " -122.24 | \n",
+ " True | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 5.6431 | \n",
+ " 52.0 | \n",
+ " 5.817352 | \n",
+ " 1.073059 | \n",
+ " 558.0 | \n",
+ " 2.547945 | \n",
+ " 37.85 | \n",
+ " -122.25 | \n",
+ " True | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " 3.8462 | \n",
+ " 52.0 | \n",
+ " 6.281853 | \n",
+ " 1.081081 | \n",
+ " 565.0 | \n",
+ " 2.181467 | \n",
+ " 37.85 | \n",
+ " -122.25 | \n",
+ " True | \n",
+ "
\n",
+ " \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " 20635 | \n",
+ " 1.5603 | \n",
+ " 25.0 | \n",
+ " 5.045455 | \n",
+ " 1.133333 | \n",
+ " 845.0 | \n",
+ " 2.560606 | \n",
+ " 39.48 | \n",
+ " -121.09 | \n",
+ " True | \n",
+ "
\n",
+ " \n",
+ " 20636 | \n",
+ " 2.5568 | \n",
+ " 18.0 | \n",
+ " 6.114035 | \n",
+ " 1.315789 | \n",
+ " 356.0 | \n",
+ " 3.122807 | \n",
+ " 39.49 | \n",
+ " -121.21 | \n",
+ " False | \n",
+ "
\n",
+ " \n",
+ " 20637 | \n",
+ " 1.7000 | \n",
+ " 17.0 | \n",
+ " 5.205543 | \n",
+ " 1.120092 | \n",
+ " 1007.0 | \n",
+ " 2.325635 | \n",
+ " 39.43 | \n",
+ " -121.22 | \n",
+ " False | \n",
+ "
\n",
+ " \n",
+ " 20638 | \n",
+ " 1.8672 | \n",
+ " 18.0 | \n",
+ " 5.329513 | \n",
+ " 1.171920 | \n",
+ " 741.0 | \n",
+ " 2.123209 | \n",
+ " 39.43 | \n",
+ " -121.32 | \n",
+ " False | \n",
+ "
\n",
+ " \n",
+ " 20639 | \n",
+ " 2.3886 | \n",
+ " 16.0 | \n",
+ " 5.254717 | \n",
+ " 1.162264 | \n",
+ " 1387.0 | \n",
+ " 2.616981 | \n",
+ " 39.37 | \n",
+ " -121.24 | \n",
+ " False | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
20640 rows × 9 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " medinc houseage averooms avebedrms population aveoccup latitude \\\n",
+ "0 8.3252 41.0 6.984127 1.023810 322.0 2.555556 37.88 \n",
+ "1 8.3014 21.0 6.238137 0.971880 2401.0 2.109842 37.86 \n",
+ "2 7.2574 52.0 8.288136 1.073446 496.0 2.802260 37.85 \n",
+ "3 5.6431 52.0 5.817352 1.073059 558.0 2.547945 37.85 \n",
+ "4 3.8462 52.0 6.281853 1.081081 565.0 2.181467 37.85 \n",
+ "... ... ... ... ... ... ... ... \n",
+ "20635 1.5603 25.0 5.045455 1.133333 845.0 2.560606 39.48 \n",
+ "20636 2.5568 18.0 6.114035 1.315789 356.0 3.122807 39.49 \n",
+ "20637 1.7000 17.0 5.205543 1.120092 1007.0 2.325635 39.43 \n",
+ "20638 1.8672 18.0 5.329513 1.171920 741.0 2.123209 39.43 \n",
+ "20639 2.3886 16.0 5.254717 1.162264 1387.0 2.616981 39.37 \n",
+ "\n",
+ " longitude old \n",
+ "0 -122.23 True \n",
+ "1 -122.22 True \n",
+ "2 -122.24 True \n",
+ "3 -122.25 True \n",
+ "4 -122.25 True \n",
+ "... ... ... \n",
+ "20635 -121.09 True \n",
+ "20636 -121.21 False \n",
+ "20637 -121.22 False \n",
+ "20638 -121.32 False \n",
+ "20639 -121.24 False \n",
+ "\n",
+ "[20640 rows x 9 columns]"
+ ]
+ },
+ "execution_count": 19,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df.rename_by_rule()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 24,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " old | \n",
+ " AveOccup | \n",
+ " MedInc | \n",
+ " HouseAge | \n",
+ " AveRooms | \n",
+ " AveBedrms | \n",
+ " Population | \n",
+ " Latitude | \n",
+ " Longitude | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " True | \n",
+ " 2.555556 | \n",
+ " 8.3252 | \n",
+ " 41.0 | \n",
+ " 6.984127 | \n",
+ " 1.023810 | \n",
+ " 322.0 | \n",
+ " 37.88 | \n",
+ " -122.23 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " True | \n",
+ " 2.109842 | \n",
+ " 8.3014 | \n",
+ " 21.0 | \n",
+ " 6.238137 | \n",
+ " 0.971880 | \n",
+ " 2401.0 | \n",
+ " 37.86 | \n",
+ " -122.22 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " True | \n",
+ " 2.802260 | \n",
+ " 7.2574 | \n",
+ " 52.0 | \n",
+ " 8.288136 | \n",
+ " 1.073446 | \n",
+ " 496.0 | \n",
+ " 37.85 | \n",
+ " -122.24 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " True | \n",
+ " 2.547945 | \n",
+ " 5.6431 | \n",
+ " 52.0 | \n",
+ " 5.817352 | \n",
+ " 1.073059 | \n",
+ " 558.0 | \n",
+ " 37.85 | \n",
+ " -122.25 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " True | \n",
+ " 2.181467 | \n",
+ " 3.8462 | \n",
+ " 52.0 | \n",
+ " 6.281853 | \n",
+ " 1.081081 | \n",
+ " 565.0 | \n",
+ " 37.85 | \n",
+ " -122.25 | \n",
+ "
\n",
+ " \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " 20635 | \n",
+ " True | \n",
+ " 2.560606 | \n",
+ " 1.5603 | \n",
+ " 25.0 | \n",
+ " 5.045455 | \n",
+ " 1.133333 | \n",
+ " 845.0 | \n",
+ " 39.48 | \n",
+ " -121.09 | \n",
+ "
\n",
+ " \n",
+ " 20636 | \n",
+ " False | \n",
+ " 3.122807 | \n",
+ " 2.5568 | \n",
+ " 18.0 | \n",
+ " 6.114035 | \n",
+ " 1.315789 | \n",
+ " 356.0 | \n",
+ " 39.49 | \n",
+ " -121.21 | \n",
+ "
\n",
+ " \n",
+ " 20637 | \n",
+ " False | \n",
+ " 2.325635 | \n",
+ " 1.7000 | \n",
+ " 17.0 | \n",
+ " 5.205543 | \n",
+ " 1.120092 | \n",
+ " 1007.0 | \n",
+ " 39.43 | \n",
+ " -121.22 | \n",
+ "
\n",
+ " \n",
+ " 20638 | \n",
+ " False | \n",
+ " 2.123209 | \n",
+ " 1.8672 | \n",
+ " 18.0 | \n",
+ " 5.329513 | \n",
+ " 1.171920 | \n",
+ " 741.0 | \n",
+ " 39.43 | \n",
+ " -121.32 | \n",
+ "
\n",
+ " \n",
+ " 20639 | \n",
+ " False | \n",
+ " 2.616981 | \n",
+ " 2.3886 | \n",
+ " 16.0 | \n",
+ " 5.254717 | \n",
+ " 1.162264 | \n",
+ " 1387.0 | \n",
+ " 39.37 | \n",
+ " -121.24 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
20640 rows × 9 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " old AveOccup MedInc HouseAge AveRooms AveBedrms Population \\\n",
+ "0 True 2.555556 8.3252 41.0 6.984127 1.023810 322.0 \n",
+ "1 True 2.109842 8.3014 21.0 6.238137 0.971880 2401.0 \n",
+ "2 True 2.802260 7.2574 52.0 8.288136 1.073446 496.0 \n",
+ "3 True 2.547945 5.6431 52.0 5.817352 1.073059 558.0 \n",
+ "4 True 2.181467 3.8462 52.0 6.281853 1.081081 565.0 \n",
+ "... ... ... ... ... ... ... ... \n",
+ "20635 True 2.560606 1.5603 25.0 5.045455 1.133333 845.0 \n",
+ "20636 False 3.122807 2.5568 18.0 6.114035 1.315789 356.0 \n",
+ "20637 False 2.325635 1.7000 17.0 5.205543 1.120092 1007.0 \n",
+ "20638 False 2.123209 1.8672 18.0 5.329513 1.171920 741.0 \n",
+ "20639 False 2.616981 2.3886 16.0 5.254717 1.162264 1387.0 \n",
+ "\n",
+ " Latitude Longitude \n",
+ "0 37.88 -122.23 \n",
+ "1 37.86 -122.22 \n",
+ "2 37.85 -122.24 \n",
+ "3 37.85 -122.25 \n",
+ "4 37.85 -122.25 \n",
+ "... ... ... \n",
+ "20635 39.48 -121.09 \n",
+ "20636 39.49 -121.21 \n",
+ "20637 39.43 -121.22 \n",
+ "20638 39.43 -121.32 \n",
+ "20639 39.37 -121.24 \n",
+ "\n",
+ "[20640 rows x 9 columns]"
+ ]
+ },
+ "execution_count": 24,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df.column_order(\"old\",\"AveOccup\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.7.4"
+ },
+ "toc": {
+ "base_numbering": 1,
+ "nav_menu": {},
+ "number_sections": true,
+ "sideBar": true,
+ "skip_h1_title": false,
+ "title_cell": "Table of Contents",
+ "title_sidebar": "Contents",
+ "toc_cell": false,
+ "toc_position": {},
+ "toc_section_display": true,
+ "toc_window_display": false
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
diff --git a/nbs/02_imports.ipynb b/nbs/02_imports.ipynb
index da2b0ad..c7dc359 100644
--- a/nbs/02_imports.ipynb
+++ b/nbs/02_imports.ipynb
@@ -25,34 +25,11 @@
"source": [
"# export\n",
"\n",
- "__all__ = [\"pd\",\"np\",\"partial\",\"Path\",\"json\",\"Counter\",\"list_vc\",\"col_list_vc\",\n",
+ "__all__ = [\"pd\",\"np\",\"partial\",\"Path\",\"json\",\"Counter\",\n",
" \"plt\",\"os\",\"sys\",\"glob\",\"Image\",]\n",
- "import pandas as pd\n",
- "\n",
- "def list_vc(\n",
- " df, colname: str, value: str\n",
- ") -> pd.DataFrame:\n",
- " \"\"\"\n",
- " count the values in a column\n",
- " that each cell is a list\n",
- " \"\"\"\n",
- " return df[colname].list_vc(value)\n",
- "\n",
- "def col_list_vc(\n",
- " col, value: str\n",
- ") -> pd.DataFrame:\n",
- " \"\"\"\n",
- " count the values in a column\n",
- " that each cell is a list\n",
- " \"\"\"\n",
- " return pd.DataFrame(\n",
- " col.apply(lambda x: value in x).value_counts()\n",
- " )\n",
- "\n",
- "pd.DataFrame.vc = lambda self,col:pd.DataFrame(self[col].value_counts())\n",
- "pd.Series.list_vc = col_list_vc\n",
- "pd.DataFrame.list_vc = list_vc\n",
"\n",
+ "# import enhanced version of pandas\n",
+ "from forgebox.pdenhanced import pd\n",
"import numpy as np\n",
"from pathlib import Path\n",
"import json\n",
diff --git a/nbs/06_flatten.ipynb b/nbs/06_flatten.ipynb
new file mode 100644
index 0000000..04bf542
--- /dev/null
+++ b/nbs/06_flatten.ipynb
@@ -0,0 +1,142 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# Flatten"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# default_exp flatten"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Flattening the tree structure"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# export\n",
+ "\n",
+ "from typing import List, Callable, Any, Dict\n",
+ "class Flatten:\n",
+ " \"\"\"\n",
+ " Flatten a tree structure dictionary\n",
+ " \"\"\"\n",
+ " def __init__(\n",
+ " self, data,\n",
+ " key_callback: Callable = None,\n",
+ " key_connection: str = \"_\",\n",
+ " ):\n",
+ " self.data = data\n",
+ " self.key_callback = key_callback\n",
+ " self.key_connection = key_connection\n",
+ "\n",
+ " def flattening(\n",
+ " self, data,\n",
+ " result=None,\n",
+ " upper_key=\"\"\n",
+ " ) -> Dict[str, str]:\n",
+ " \"\"\"\n",
+ " Recursive flatten function\n",
+ " \"\"\"\n",
+ " if result is None:\n",
+ " result = {}\n",
+ " for key, value in data.items():\n",
+ " if self.key_callback is not None:\n",
+ " key = self.key_callback(key)\n",
+ " if isinstance(value, dict):\n",
+ " self.flattening(value, result,\n",
+ " upper_key=f\"{key}{self.key_connection}\")\n",
+ " else:\n",
+ " result[f\"{upper_key}{key}\"] = value\n",
+ " return result\n",
+ "\n",
+ " def __call__(self) -> Dict[str, str]:\n",
+ " return self.flattening(self.data)\n",
+ " \n"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Testing a tree structure"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 10,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "{'a': [1, 2, {'c': 'd'}], 'b=>g': 1}"
+ ]
+ },
+ "execution_count": 10,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "Flatten({\"a\":[1,2,{\"c\":\"d\"}],\"b\":{\"g\":1}}, key_connection=\"=>\")()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.7.4"
+ },
+ "toc": {
+ "base_numbering": 1,
+ "nav_menu": {},
+ "number_sections": true,
+ "sideBar": true,
+ "skip_h1_title": false,
+ "title_cell": "Table of Contents",
+ "title_sidebar": "Contents",
+ "toc_cell": false,
+ "toc_position": {},
+ "toc_section_display": true,
+ "toc_window_display": false
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
diff --git a/nbs/30_traceable_edit_in_flask.ipynb b/nbs/30_traceable_edit_in_flask.ipynb
index 795d8af..e7f3db7 100644
--- a/nbs/30_traceable_edit_in_flask.ipynb
+++ b/nbs/30_traceable_edit_in_flask.ipynb
@@ -234,70 +234,9 @@
},
{
"cell_type": "code",
- "execution_count": 8,
+ "execution_count": null,
"metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- " * Serving Flask app \"__main__\" (lazy loading)\n",
- " * Environment: production\n",
- "\u001b[31m WARNING: This is a development server. Do not use it in a production deployment.\u001b[0m\n",
- "\u001b[2m Use a production WSGI server instead.\u001b[0m\n",
- " * Debug mode: off\n"
- ]
- },
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- " * Running on http://127.0.0.1:4242/ (Press CTRL+C to quit)\n",
- "127.0.0.1 - - [21/Jun/2020 18:09:40] \"\u001b[33mGET /table1/workspace HTTP/1.1\u001b[0m\" 404 -\n",
- "127.0.0.1 - - [21/Jun/2020 18:09:43] \"\u001b[37mGET /table1 HTTP/1.1\u001b[0m\" 200 -\n",
- "127.0.0.1 - - [21/Jun/2020 18:09:43] \"\u001b[37mGET /table1/df_api?page=0&where= HTTP/1.1\u001b[0m\" 200 -\n"
- ]
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "SELECT * FROM sample_table \n",
- " ORDER BY id ASC LIMIT 0,20\n",
- " \n"
- ]
- },
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "127.0.0.1 - - [21/Jun/2020 18:10:02] \"\u001b[37mPOST /table1/save_api HTTP/1.1\u001b[0m\" 200 -\n"
- ]
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- " idx col valtype original changed ip table_name \\\n",
- "0 0 name str Darrow Reaper 127.0.0.1 sample_table \n",
- "1 0 house str Andromedus Rising 127.0.0.1 sample_table \n",
- "2 0 age int 20 21 127.0.0.1 sample_table \n",
- "\n",
- " ts \\\n",
- "0 2020-06-21 18:10:02.067516 \n",
- "1 2020-06-21 18:10:02.071400 \n",
- "2 2020-06-21 18:10:02.072681 \n",
- "\n",
- " sql \n",
- "0 UPDATE sample_table \\n SET name='Re... \n",
- "1 UPDATE sample_table \\n SET house='R... \n",
- "2 UPDATE sample_table \\n SET age=21 W... \n",
- "SELECT * FROM sample_table \n",
- " ORDER BY id ASC LIMIT 0,20\n",
- " \n"
- ]
- }
- ],
+ "outputs": [],
"source": [
"app = Flask(__name__)\n",
"\n",
@@ -310,7 +249,7 @@
" log_con=con\n",
" )\n",
"\n",
- "app.run(port = 4242,debug=False)"
+ "app.run(host=\"0.0.0.0\",port = 4242,debug=False)"
]
},
{
@@ -331,103 +270,9 @@
},
{
"cell_type": "code",
- "execution_count": 10,
+ "execution_count": null,
"metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "
\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " | \n",
- " idx | \n",
- " col | \n",
- " valtype | \n",
- " original | \n",
- " changed | \n",
- " ip | \n",
- " table_name | \n",
- " ts | \n",
- " sql | \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " 0 | \n",
- " 0 | \n",
- " name | \n",
- " str | \n",
- " Darrow | \n",
- " Reaper | \n",
- " 127.0.0.1 | \n",
- " sample_table | \n",
- " 2020-06-21 18:10:02.067516 | \n",
- " UPDATE sample_table \\n SET name='Reaper' WHERE id=0\\n | \n",
- "
\n",
- " \n",
- " 1 | \n",
- " 0 | \n",
- " house | \n",
- " str | \n",
- " Andromedus | \n",
- " Rising | \n",
- " 127.0.0.1 | \n",
- " sample_table | \n",
- " 2020-06-21 18:10:02.071400 | \n",
- " UPDATE sample_table \\n SET house='Rising' WHERE id=0\\n | \n",
- "
\n",
- " \n",
- " 2 | \n",
- " 0 | \n",
- " age | \n",
- " int | \n",
- " 20 | \n",
- " 21 | \n",
- " 127.0.0.1 | \n",
- " sample_table | \n",
- " 2020-06-21 18:10:02.072681 | \n",
- " UPDATE sample_table \\n SET age=21 WHERE id=0\\n | \n",
- "
\n",
- " \n",
- "
\n",
- "
"
- ],
- "text/plain": [
- " idx col valtype original changed ip table_name \\\n",
- "0 0 name str Darrow Reaper 127.0.0.1 sample_table \n",
- "1 0 house str Andromedus Rising 127.0.0.1 sample_table \n",
- "2 0 age int 20 21 127.0.0.1 sample_table \n",
- "\n",
- " ts \\\n",
- "0 2020-06-21 18:10:02.067516 \n",
- "1 2020-06-21 18:10:02.071400 \n",
- "2 2020-06-21 18:10:02.072681 \n",
- "\n",
- " sql \n",
- "0 UPDATE sample_table \\n SET name='Reaper' WHERE id=0\\n \n",
- "1 UPDATE sample_table \\n SET house='Rising' WHERE id=0\\n \n",
- "2 UPDATE sample_table \\n SET age=21 WHERE id=0\\n "
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- }
- ],
+ "outputs": [],
"source": [
"with PandasDisplay(max_colwidth = 0,max_rows=100):\n",
" display(pd.read_sql('editable_log',con = con))"
@@ -458,6 +303,19 @@
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.4"
+ },
+ "toc": {
+ "base_numbering": 1,
+ "nav_menu": {},
+ "number_sections": true,
+ "sideBar": true,
+ "skip_h1_title": false,
+ "title_cell": "Table of Contents",
+ "title_sidebar": "Contents",
+ "toc_cell": false,
+ "toc_position": {},
+ "toc_section_display": true,
+ "toc_window_display": false
}
},
"nbformat": 4,
diff --git a/settings.ini b/settings.ini
index 538f4d0..d6fe5ad 100644
--- a/settings.ini
+++ b/settings.ini
@@ -7,7 +7,7 @@ author = xiaochen(ray) zhang
author_email = b2ray2c@gmail.com
copyright = xiaochen(ray) zhang
branch = master
-version = 0.4.5
+version = 0.4.7
min_python = 3.6
audience = Developers
language = English