From d68ac8d3e646cee4ca93d075f14872465edb91e2 Mon Sep 17 00:00:00 2001 From: raynardj Date: Thu, 19 Aug 2021 11:08:27 +0800 Subject: [PATCH] =?UTF-8?q?=E2=9B=88=20flatten=20function,=20enhanced=20pa?= =?UTF-8?q?ndas?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- docs/_data/sidebars/home_sidebar.yml | 6 + docs/flatten.html | 111 +++++ docs/imports.html | 54 +- docs/pandas_extra.html | 709 +++++++++++++++++++++++++++ docs/sidebar.json | 2 + docs/traceable_edit_in_flask.html | 235 +++------ forgebox/__init__.py | 2 +- forgebox/_nbdev.py | 20 +- forgebox/flatten.py | 43 ++ forgebox/imports.py | 31 +- forgebox/pdenhanced.py | 69 +++ nbs/01_pandas_extra.ipynb | 691 ++++++++++++++++++++++++++ nbs/02_imports.ipynb | 29 +- nbs/06_flatten.ipynb | 142 ++++++ nbs/30_traceable_edit_in_flask.ipynb | 178 +------ settings.ini | 2 +- 16 files changed, 1885 insertions(+), 439 deletions(-) create mode 100644 docs/flatten.html create mode 100644 docs/pandas_extra.html create mode 100644 forgebox/flatten.py create mode 100644 forgebox/pdenhanced.py create mode 100644 nbs/01_pandas_extra.ipynb create mode 100644 nbs/06_flatten.ipynb diff --git a/docs/_data/sidebars/home_sidebar.yml b/docs/_data/sidebars/home_sidebar.yml index db069da..449d4f7 100644 --- a/docs/_data/sidebars/home_sidebar.yml +++ b/docs/_data/sidebars/home_sidebar.yml @@ -9,6 +9,9 @@ entries: - output: web,pdf title: Overview url: / + - output: web,pdf + title: Pandas Extra functions + url: pandas_extra.html - output: web,pdf title: 03 Imports url: imports.html @@ -24,6 +27,9 @@ entries: - output: web,pdf title: Interactive tools url: inter_widgets.html + - output: web,pdf + title: Flatten + url: flatten.html - output: web,pdf title: spacy toolkit url: spacy.html diff --git a/docs/flatten.html b/docs/flatten.html new file mode 100644 index 0000000..61bccf0 --- /dev/null +++ b/docs/flatten.html @@ -0,0 +1,111 @@ +--- + +title: Flatten + + +keywords: fastai +sidebar: home_sidebar + + + +nb_path: "nbs/06_flatten.ipynb" +--- + + +
+ + {% raw %} + +
+ +
+ {% endraw %} + +
+
+

Flattening the tree structure

+
+
+
+ {% raw %} + +
+ +
+
+ +
+ + +
+

class Flatten[source]

Flatten(data, key_callback:Callable=None, key_connection:str='_')

+
+

Flatten a tree structure dictionary

+ +
+ +
+ +
+
+ +
+ {% endraw %} + + {% raw %} + +
+ +
+ {% endraw %} + +
+
+

Testing a tree structure

+
+
+
+ {% raw %} + +
+
+ +
+
+
Flatten({"a":[1,2,{"c":"d"}],"b":{"g":1}}, key_connection="=>")()
+
+ +
+
+
+ +
+
+ +
+ + + +
+
{'a': [1, 2, {'c': 'd'}], 'b=>g': 1}
+
+ +
+ +
+
+ +
+ {% endraw %} + +
+ + diff --git a/docs/imports.html b/docs/imports.html index c21fe53..bb3faf8 100644 --- a/docs/imports.html +++ b/docs/imports.html @@ -2,11 +2,13 @@ title: 03 Imports + keywords: fastai sidebar: home_sidebar summary: "standard imports" description: "standard imports" +nb_path: "nbs/02_imports.ipynb" --- + +
+ + {% raw %} + +
+ +
+ {% endraw %} + + {% raw %} + +
+ +
+ {% endraw %} + +
+
+

Value counts

+
+
+
+ {% raw %} + +
+ +
+
+ +
+ + +
+

list_vc[source]

list_vc(df, colname:str, value:str)

+
+

count the values in a column + that each cell is a list

+ +
+ +
+ +
+
+ +
+ {% endraw %} + + {% raw %} + +
+ +
+
+ +
+ + +
+

col_list_vc[source]

col_list_vc(col, value:str)

+
+

count the values in a column + that each cell is a list

+ +
+ +
+ +
+
+ +
+ {% endraw %} + + {% raw %} + +
+ +
+ {% endraw %} + +
+
+

Rename by rule

+
+
+
+ {% raw %} + +
+ +
+
+ +
+ + +
+

default_rename_rule[source]

default_rename_rule(x:str)

+
+ +
+ +
+ +
+
+ +
+ {% endraw %} + + {% raw %} + +
+ +
+
+ +
+ + +
+

rename_by_rule[source]

rename_by_rule(df, rule:Callable=default_rename_rule)

+
+

rename the columns by a rule function

+ +
+ +
+ +
+
+ +
+ {% endraw %} + + {% raw %} + +
+ +
+ {% endraw %} + +
+
+

Rearrage Columns

+
+
+
+ {% raw %} + +
+ +
+
+ +
+ + +
+

column_order[source]

column_order(df, *col_names)

+
+

df = df.column_order("col1", "col2", "col3") +will put col1, col2, and col3 as the 1st 3 column

+ +
+ +
+ +
+
+ +
+ {% endraw %} + + {% raw %} + +
+ +
+ {% endraw %} + +
+
+

Testing

+
+
+
+ {% raw %} + +
+
+ +
+
+
from sklearn.datasets import california_housing
+
+cdata = california_housing.fetch_california_housing()
+
+df = pd.DataFrame(cdata["data"], columns=cdata["feature_names"])
+
+ +
+
+
+ +
+ {% endraw %} + + {% raw %} + +
+
+ +
+
+
df["old"] = df.HouseAge>20
+
+ +
+
+
+ +
+ {% endraw %} + + {% raw %} + +
+
+ +
+
+
df.vc("old")
+
+ +
+
+
+ +
+
+ +
+ + +
+
+ + + + + + + + + + + + + + + + + + +
old
True14347
False6293
+
+
+ +
+ +
+
+ +
+ {% endraw %} + + {% raw %} + +
+
+ +
+
+
df.rename_by_rule()
+
+ +
+
+
+ +
+
+ +
+ + +
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
medinchouseageaveroomsavebedrmspopulationaveoccuplatitudelongitudeold
08.325241.06.9841271.023810322.02.55555637.88-122.23True
18.301421.06.2381370.9718802401.02.10984237.86-122.22True
27.257452.08.2881361.073446496.02.80226037.85-122.24True
35.643152.05.8173521.073059558.02.54794537.85-122.25True
43.846252.06.2818531.081081565.02.18146737.85-122.25True
..............................
206351.560325.05.0454551.133333845.02.56060639.48-121.09True
206362.556818.06.1140351.315789356.03.12280739.49-121.21False
206371.700017.05.2055431.1200921007.02.32563539.43-121.22False
206381.867218.05.3295131.171920741.02.12320939.43-121.32False
206392.388616.05.2547171.1622641387.02.61698139.37-121.24False
+

20640 rows × 9 columns

+
+
+ +
+ +
+
+ +
+ {% endraw %} + + {% raw %} + +
+
+ +
+
+
df.column_order("old","AveOccup")
+
+ +
+
+
+ +
+
+ +
+ + +
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
oldAveOccupMedIncHouseAgeAveRoomsAveBedrmsPopulationLatitudeLongitude
0True2.5555568.325241.06.9841271.023810322.037.88-122.23
1True2.1098428.301421.06.2381370.9718802401.037.86-122.22
2True2.8022607.257452.08.2881361.073446496.037.85-122.24
3True2.5479455.643152.05.8173521.073059558.037.85-122.25
4True2.1814673.846252.06.2818531.081081565.037.85-122.25
..............................
20635True2.5606061.560325.05.0454551.133333845.039.48-121.09
20636False3.1228072.556818.06.1140351.315789356.039.49-121.21
20637False2.3256351.700017.05.2055431.1200921007.039.43-121.22
20638False2.1232091.867218.05.3295131.171920741.039.43-121.32
20639False2.6169812.388616.05.2547171.1622641387.039.37-121.24
+

20640 rows × 9 columns

+
+
+ +
+ +
+
+ +
+ {% endraw %} + +
+ + diff --git a/docs/sidebar.json b/docs/sidebar.json index c3362a8..9e2b594 100644 --- a/docs/sidebar.json +++ b/docs/sidebar.json @@ -1,11 +1,13 @@ { "forgebox": { "Overview": "/", + "Pandas Extra functions": "pandas_extra.html", "03 Imports": "imports.html", "Async": "async.html", "Tools on pandas dataframe": "df.html", "HTML operation": "html.html", "Interactive tools": "inter_widgets.html", + "Flatten": "flatten.html", "spacy toolkit": "spacy.html", "FreeMap": "freemap.html", "Controllable loop process": "loop.html", diff --git a/docs/traceable_edit_in_flask.html b/docs/traceable_edit_in_flask.html index da57d2e..a32f857 100644 --- a/docs/traceable_edit_in_flask.html +++ b/docs/traceable_edit_in_flask.html @@ -2,10 +2,13 @@ title: 01 A logged editable table + keywords: fastai sidebar: home_sidebar summary: "Traceable editable table in flask" +description: "Traceable editable table in flask" +nb_path: "nbs/30_traceable_edit_in_flask.ipynb" ---
- {% raw %} + {% raw %} +
+ {% endraw %} + + {% raw %} +
-
from flask import Flask
+
from flask import Flask
 app = Flask(__name__)
 
 @app.route('/')
@@ -41,18 +49,23 @@
 
+ {% endraw %} +
-

Run a simple applitcation

+

Run a simple applitcation

+ {% raw %} +
-
+ {% endraw %} -
+ {% raw %} +
@@ -73,9 +86,17 @@

get_static

+ {% endraw %} + + {% raw %} +
@@ -96,6 +117,10 @@

edit_js
@@ -105,7 +130,7 @@

edit_js -

class DefaultTemp[source]

DefaultTemp(source, block_start_string='{%', block_end_string='%}', variable_start_string='{{', variable_end_string='}}', comment_start_string='{#', comment_end_string='#}', line_statement_prefix=None, line_comment_prefix=None, trim_blocks=False, lstrip_blocks=False, newline_sequence='\n', keep_trailing_newline=False, extensions=(), optimized=True, undefined='Undefined', finalize=None, autoescape=False, enable_async=False) :: Template

+

class DefaultTemp[source]

DefaultTemp(source, block_start_string='{%', block_end_string='%}', variable_start_string='{{', variable_end_string='}}', comment_start_string='{#', comment_end_string='#}', line_statement_prefix=None, line_comment_prefix=None, trim_blocks=False, lstrip_blocks=False, newline_sequence='\n', keep_trailing_newline=False, extensions=(), optimized=True, undefined=Undefined, finalize=None, autoescape=False, enable_async=False) :: Template

Jinjia template with some default render config

@@ -117,12 +142,23 @@

class DefaultTemp

+ {% endraw %} + + {% raw %} + +
+ +
+ {% endraw %} +
-

Create sample data

+

Create sample data

+ {% raw %} + -
-
- -
- -
-
 * Serving Flask app "__main__" (lazy loading)
- * Environment: production
-   WARNING: This is a development server. Do not use it in a production deployment.
-   Use a production WSGI server instead.
- * Debug mode: off
-
-
-
- -
- -
-
 * Running on http://127.0.0.1:4242/ (Press CTRL+C to quit)
-127.0.0.1 - - [21/Jun/2020 18:09:40] "GET /table1/workspace HTTP/1.1" 404 -
-127.0.0.1 - - [21/Jun/2020 18:09:43] "GET /table1 HTTP/1.1" 200 -
-127.0.0.1 - - [21/Jun/2020 18:09:43] "GET /table1/df_api?page=0&where= HTTP/1.1" 200 -
-
-
-
- -
- -
-
SELECT * FROM sample_table 
-        ORDER BY id ASC LIMIT 0,20
-        
-
-
-
- -
- -
-
127.0.0.1 - - [21/Jun/2020 18:10:02] "POST /table1/save_api HTTP/1.1" 200 -
-
-
-
- -
- -
-
  idx    col valtype    original changed         ip    table_name  \
-0   0   name     str      Darrow  Reaper  127.0.0.1  sample_table   
-1   0  house     str  Andromedus  Rising  127.0.0.1  sample_table   
-2   0    age     int          20      21  127.0.0.1  sample_table   
-
-                          ts  \
-0 2020-06-21 18:10:02.067516   
-1 2020-06-21 18:10:02.071400   
-2 2020-06-21 18:10:02.072681   
-
-                                                 sql  
-0  UPDATE sample_table \n            SET name='Re...  
-1  UPDATE sample_table \n            SET house='R...  
-2  UPDATE sample_table \n            SET age=21 W...  
-SELECT * FROM sample_table 
-        ORDER BY id ASC LIMIT 0,20
-        
-
-
-
- -
+ {% endraw %} -

-

Retrieve the log

+

Retrieve the log

+ {% raw %} +
diff --git a/forgebox/__init__.py b/forgebox/__init__.py index 98a433b..a34b2f6 100644 --- a/forgebox/__init__.py +++ b/forgebox/__init__.py @@ -1 +1 @@ -__version__ = "0.4.5" +__version__ = "0.4.7" diff --git a/forgebox/_nbdev.py b/forgebox/_nbdev.py index c11fd92..e8c03ee 100644 --- a/forgebox/_nbdev.py +++ b/forgebox/_nbdev.py @@ -2,12 +2,17 @@ __all__ = ["index", "modules", "custom_doc_links", "git_url"] -index = {"list_vc": "02_imports.ipynb", - "col_list_vc": "02_imports.ipynb", +index = {"list_vc": "01_pandas_extra.ipynb", + "col_list_vc": "01_pandas_extra.ipynb", + "pd.DataFrame.vc": "01_pandas_extra.ipynb", + "pd.Series.list_vc": "01_pandas_extra.ipynb", + "pd.DataFrame.list_vc": "01_pandas_extra.ipynb", + "default_rename_rule": "01_pandas_extra.ipynb", + "rename_by_rule": "01_pandas_extra.ipynb", + "pd.DataFrame.rename_by_rule": "01_pandas_extra.ipynb", + "column_order": "01_pandas_extra.ipynb", + "pd.DataFrame.column_order": "01_pandas_extra.ipynb", "__all__": "02_imports.ipynb", - "pd.DataFrame.vc": "02_imports.ipynb", - "pd.Series.list_vc": "02_imports.ipynb", - "pd.DataFrame.list_vc": "02_imports.ipynb", "Path.ls": "02_imports.ipynb", "Async": "03_async.ipynb", "PandasDisplay": "03_df.ipynb", @@ -24,6 +29,7 @@ "make_hboxes": "05_inter_widgets.ipynb", "SingleButton": "05_inter_widgets.ipynb", "Labeler": "05_inter_widgets.ipynb", + "Flatten": "06_flatten.ipynb", "l2norm": "06_spacy.ipynb", "normal": "06_spacy.ipynb", "distance": "06_spacy.ipynb", @@ -135,11 +141,13 @@ "eng_twt_tk": "dataframe_pipeline.ipynb", "Opts": "optimizers.ipynb"} -modules = ["imports.py", +modules = ["pdenhanced.py", + "imports.py", "asyncing.py", "df.py", "html.py", "widgets.py", + "flatten.py", "spacy.py", "freemap.py", "loop.py", diff --git a/forgebox/flatten.py b/forgebox/flatten.py new file mode 100644 index 0000000..b7710bb --- /dev/null +++ b/forgebox/flatten.py @@ -0,0 +1,43 @@ +# AUTOGENERATED! DO NOT EDIT! File to edit: nbs/06_flatten.ipynb (unless otherwise specified). + +__all__ = ['Flatten'] + +# Cell + +from typing import List, Callable, Any, Dict +class Flatten: + """ + Flatten a tree structure dictionary + """ + def __init__( + self, data, + key_callback: Callable = None, + key_connection: str = "_", + ): + self.data = data + self.key_callback = key_callback + self.key_connection = key_connection + + def flattening( + self, data, + result=None, + upper_key="" + ) -> Dict[str, str]: + """ + Recursive flatten function + """ + if result is None: + result = {} + for key, value in data.items(): + if self.key_callback is not None: + key = self.key_callback(key) + if isinstance(value, dict): + self.flattening(value, result, + upper_key=f"{key}{self.key_connection}") + else: + result[f"{upper_key}{key}"] = value + return result + + def __call__(self) -> Dict[str, str]: + return self.flattening(self.data) + diff --git a/forgebox/imports.py b/forgebox/imports.py index 303bd26..a460cfd 100644 --- a/forgebox/imports.py +++ b/forgebox/imports.py @@ -1,37 +1,14 @@ # AUTOGENERATED! DO NOT EDIT! File to edit: nbs/02_imports.ipynb (unless otherwise specified). -__all__ = ['list_vc', 'col_list_vc', '__all__'] +__all__ = ['__all__'] # Cell -__all__ = ["pd","np","partial","Path","json","Counter","list_vc","col_list_vc", +__all__ = ["pd","np","partial","Path","json","Counter", "plt","os","sys","glob","Image",] -import pandas as pd - -def list_vc( - df, colname: str, value: str -) -> pd.DataFrame: - """ - count the values in a column - that each cell is a list - """ - return df[colname].list_vc(value) - -def col_list_vc( - col, value: str -) -> pd.DataFrame: - """ - count the values in a column - that each cell is a list - """ - return pd.DataFrame( - col.apply(lambda x: value in x).value_counts() - ) - -pd.DataFrame.vc = lambda self,col:pd.DataFrame(self[col].value_counts()) -pd.Series.list_vc = col_list_vc -pd.DataFrame.list_vc = list_vc +# import enhanced version of pandas +from .pdenhanced import pd import numpy as np from pathlib import Path import json diff --git a/forgebox/pdenhanced.py b/forgebox/pdenhanced.py new file mode 100644 index 0000000..a683e96 --- /dev/null +++ b/forgebox/pdenhanced.py @@ -0,0 +1,69 @@ +# AUTOGENERATED! DO NOT EDIT! File to edit: nbs/01_pandas_extra.ipynb (unless otherwise specified). + +__all__ = ['list_vc', 'col_list_vc', 'default_rename_rule', 'rename_by_rule', 'column_order'] + +# Cell +import pandas as pd +from typing import Callable + +# Cell +def list_vc( + df, colname: str, value: str +) -> pd.DataFrame: + """ + count the values in a column + that each cell is a list + """ + return df[colname].list_vc(value) + +def col_list_vc( + col, value: str +) -> pd.DataFrame: + """ + count the values in a column + that each cell is a list + """ + return pd.DataFrame( + col.apply(lambda x: value in x).value_counts() + ) + +pd.DataFrame.vc = lambda self,col:pd.DataFrame(self[col].value_counts()) +pd.Series.list_vc = col_list_vc +pd.DataFrame.list_vc = list_vc + +# Cell +def default_rename_rule(x: str) -> str: + return x.replace(" ", "_").replace("-", "_").lower() + + +def rename_by_rule( + df, + rule: Callable = default_rename_rule +) -> pd.DataFrame: + """ + rename the columns by a rule function + """ + df = df.rename( + columns=dict((c, rule(c)) for c in df.columns)) + return df + +pd.DataFrame.rename_by_rule = rename_by_rule + +# Cell +def column_order(df, *col_names) -> pd.DataFrame: + """ + df = df.column_order("col1", "col2", "col3") + will put col1, col2, and col3 as the 1st 3 column + """ + cols = list(df.columns) + + for col_name in list(col_names)[::-1]: + + # warn if the column exist + if col_name not in cols: + print(f"Column:'{col_name}' not in dataframe") + continue + cols.insert(0, cols.pop(cols.index(col_name))) + return df[cols] + +pd.DataFrame.column_order = column_order \ No newline at end of file diff --git a/nbs/01_pandas_extra.ipynb b/nbs/01_pandas_extra.ipynb new file mode 100644 index 0000000..edb13bf --- /dev/null +++ b/nbs/01_pandas_extra.ipynb @@ -0,0 +1,691 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Pandas Extra functions\n", + "> Extra pandas functions at import" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "# default_exp pdenhanced" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [], + "source": [ + "# export\n", + "import pandas as pd\n", + "from typing import Callable" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Value counts" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "# export\n", + "def list_vc(\n", + " df, colname: str, value: str\n", + ") -> pd.DataFrame:\n", + " \"\"\"\n", + " count the values in a column\n", + " that each cell is a list\n", + " \"\"\"\n", + " return df[colname].list_vc(value)\n", + "\n", + "def col_list_vc(\n", + " col, value: str\n", + ") -> pd.DataFrame:\n", + " \"\"\"\n", + " count the values in a column\n", + " that each cell is a list\n", + " \"\"\"\n", + " return pd.DataFrame(\n", + " col.apply(lambda x: value in x).value_counts()\n", + " )\n", + "\n", + "pd.DataFrame.vc = lambda self,col:pd.DataFrame(self[col].value_counts())\n", + "pd.Series.list_vc = col_list_vc\n", + "pd.DataFrame.list_vc = list_vc" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Rename by rule" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [], + "source": [ + "# export\n", + "def default_rename_rule(x: str) -> str:\n", + " return x.replace(\" \", \"_\").replace(\"-\", \"_\").lower()\n", + "\n", + "\n", + "def rename_by_rule(\n", + " df,\n", + " rule: Callable = default_rename_rule\n", + ") -> pd.DataFrame:\n", + " \"\"\"\n", + " rename the columns by a rule function\n", + " \"\"\"\n", + " df = df.rename(\n", + " columns=dict((c, rule(c)) for c in df.columns))\n", + " return df\n", + "\n", + "pd.DataFrame.rename_by_rule = rename_by_rule" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Rearrage Columns" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": {}, + "outputs": [], + "source": [ + "# export\n", + "def column_order(df, *col_names) -> pd.DataFrame:\n", + " \"\"\"\n", + " df = df.column_order(\"col1\", \"col2\", \"col3\")\n", + " will put col1, col2, and col3 as the 1st 3 column\n", + " \"\"\"\n", + " cols = list(df.columns)\n", + " \n", + " for col_name in list(col_names)[::-1]:\n", + " \n", + " # warn if the column exist\n", + " if col_name not in cols:\n", + " print(f\"Column:'{col_name}' not in dataframe\")\n", + " continue\n", + " cols.insert(0, cols.pop(cols.index(col_name)))\n", + " return df[cols]\n", + "\n", + "pd.DataFrame.column_order = column_order" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Testing" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.datasets import california_housing\n", + "\n", + "cdata = california_housing.fetch_california_housing()\n", + "\n", + "df = pd.DataFrame(cdata[\"data\"], columns=cdata[\"feature_names\"])" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [], + "source": [ + "df[\"old\"] = df.HouseAge>20" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
old
True14347
False6293
\n", + "
" + ], + "text/plain": [ + " old\n", + "True 14347\n", + "False 6293" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.vc(\"old\")" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
medinchouseageaveroomsavebedrmspopulationaveoccuplatitudelongitudeold
08.325241.06.9841271.023810322.02.55555637.88-122.23True
18.301421.06.2381370.9718802401.02.10984237.86-122.22True
27.257452.08.2881361.073446496.02.80226037.85-122.24True
35.643152.05.8173521.073059558.02.54794537.85-122.25True
43.846252.06.2818531.081081565.02.18146737.85-122.25True
..............................
206351.560325.05.0454551.133333845.02.56060639.48-121.09True
206362.556818.06.1140351.315789356.03.12280739.49-121.21False
206371.700017.05.2055431.1200921007.02.32563539.43-121.22False
206381.867218.05.3295131.171920741.02.12320939.43-121.32False
206392.388616.05.2547171.1622641387.02.61698139.37-121.24False
\n", + "

20640 rows × 9 columns

\n", + "
" + ], + "text/plain": [ + " medinc houseage averooms avebedrms population aveoccup latitude \\\n", + "0 8.3252 41.0 6.984127 1.023810 322.0 2.555556 37.88 \n", + "1 8.3014 21.0 6.238137 0.971880 2401.0 2.109842 37.86 \n", + "2 7.2574 52.0 8.288136 1.073446 496.0 2.802260 37.85 \n", + "3 5.6431 52.0 5.817352 1.073059 558.0 2.547945 37.85 \n", + "4 3.8462 52.0 6.281853 1.081081 565.0 2.181467 37.85 \n", + "... ... ... ... ... ... ... ... \n", + "20635 1.5603 25.0 5.045455 1.133333 845.0 2.560606 39.48 \n", + "20636 2.5568 18.0 6.114035 1.315789 356.0 3.122807 39.49 \n", + "20637 1.7000 17.0 5.205543 1.120092 1007.0 2.325635 39.43 \n", + "20638 1.8672 18.0 5.329513 1.171920 741.0 2.123209 39.43 \n", + "20639 2.3886 16.0 5.254717 1.162264 1387.0 2.616981 39.37 \n", + "\n", + " longitude old \n", + "0 -122.23 True \n", + "1 -122.22 True \n", + "2 -122.24 True \n", + "3 -122.25 True \n", + "4 -122.25 True \n", + "... ... ... \n", + "20635 -121.09 True \n", + "20636 -121.21 False \n", + "20637 -121.22 False \n", + "20638 -121.32 False \n", + "20639 -121.24 False \n", + "\n", + "[20640 rows x 9 columns]" + ] + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.rename_by_rule()" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
oldAveOccupMedIncHouseAgeAveRoomsAveBedrmsPopulationLatitudeLongitude
0True2.5555568.325241.06.9841271.023810322.037.88-122.23
1True2.1098428.301421.06.2381370.9718802401.037.86-122.22
2True2.8022607.257452.08.2881361.073446496.037.85-122.24
3True2.5479455.643152.05.8173521.073059558.037.85-122.25
4True2.1814673.846252.06.2818531.081081565.037.85-122.25
..............................
20635True2.5606061.560325.05.0454551.133333845.039.48-121.09
20636False3.1228072.556818.06.1140351.315789356.039.49-121.21
20637False2.3256351.700017.05.2055431.1200921007.039.43-121.22
20638False2.1232091.867218.05.3295131.171920741.039.43-121.32
20639False2.6169812.388616.05.2547171.1622641387.039.37-121.24
\n", + "

20640 rows × 9 columns

\n", + "
" + ], + "text/plain": [ + " old AveOccup MedInc HouseAge AveRooms AveBedrms Population \\\n", + "0 True 2.555556 8.3252 41.0 6.984127 1.023810 322.0 \n", + "1 True 2.109842 8.3014 21.0 6.238137 0.971880 2401.0 \n", + "2 True 2.802260 7.2574 52.0 8.288136 1.073446 496.0 \n", + "3 True 2.547945 5.6431 52.0 5.817352 1.073059 558.0 \n", + "4 True 2.181467 3.8462 52.0 6.281853 1.081081 565.0 \n", + "... ... ... ... ... ... ... ... \n", + "20635 True 2.560606 1.5603 25.0 5.045455 1.133333 845.0 \n", + "20636 False 3.122807 2.5568 18.0 6.114035 1.315789 356.0 \n", + "20637 False 2.325635 1.7000 17.0 5.205543 1.120092 1007.0 \n", + "20638 False 2.123209 1.8672 18.0 5.329513 1.171920 741.0 \n", + "20639 False 2.616981 2.3886 16.0 5.254717 1.162264 1387.0 \n", + "\n", + " Latitude Longitude \n", + "0 37.88 -122.23 \n", + "1 37.86 -122.22 \n", + "2 37.85 -122.24 \n", + "3 37.85 -122.25 \n", + "4 37.85 -122.25 \n", + "... ... ... \n", + "20635 39.48 -121.09 \n", + "20636 39.49 -121.21 \n", + "20637 39.43 -121.22 \n", + "20638 39.43 -121.32 \n", + "20639 39.37 -121.24 \n", + "\n", + "[20640 rows x 9 columns]" + ] + }, + "execution_count": 24, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.column_order(\"old\",\"AveOccup\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.4" + }, + "toc": { + "base_numbering": 1, + "nav_menu": {}, + "number_sections": true, + "sideBar": true, + "skip_h1_title": false, + "title_cell": "Table of Contents", + "title_sidebar": "Contents", + "toc_cell": false, + "toc_position": {}, + "toc_section_display": true, + "toc_window_display": false + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/nbs/02_imports.ipynb b/nbs/02_imports.ipynb index da2b0ad..c7dc359 100644 --- a/nbs/02_imports.ipynb +++ b/nbs/02_imports.ipynb @@ -25,34 +25,11 @@ "source": [ "# export\n", "\n", - "__all__ = [\"pd\",\"np\",\"partial\",\"Path\",\"json\",\"Counter\",\"list_vc\",\"col_list_vc\",\n", + "__all__ = [\"pd\",\"np\",\"partial\",\"Path\",\"json\",\"Counter\",\n", " \"plt\",\"os\",\"sys\",\"glob\",\"Image\",]\n", - "import pandas as pd\n", - "\n", - "def list_vc(\n", - " df, colname: str, value: str\n", - ") -> pd.DataFrame:\n", - " \"\"\"\n", - " count the values in a column\n", - " that each cell is a list\n", - " \"\"\"\n", - " return df[colname].list_vc(value)\n", - "\n", - "def col_list_vc(\n", - " col, value: str\n", - ") -> pd.DataFrame:\n", - " \"\"\"\n", - " count the values in a column\n", - " that each cell is a list\n", - " \"\"\"\n", - " return pd.DataFrame(\n", - " col.apply(lambda x: value in x).value_counts()\n", - " )\n", - "\n", - "pd.DataFrame.vc = lambda self,col:pd.DataFrame(self[col].value_counts())\n", - "pd.Series.list_vc = col_list_vc\n", - "pd.DataFrame.list_vc = list_vc\n", "\n", + "# import enhanced version of pandas\n", + "from forgebox.pdenhanced import pd\n", "import numpy as np\n", "from pathlib import Path\n", "import json\n", diff --git a/nbs/06_flatten.ipynb b/nbs/06_flatten.ipynb new file mode 100644 index 0000000..04bf542 --- /dev/null +++ b/nbs/06_flatten.ipynb @@ -0,0 +1,142 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Flatten" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "# default_exp flatten" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Flattening the tree structure" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "# export\n", + "\n", + "from typing import List, Callable, Any, Dict\n", + "class Flatten:\n", + " \"\"\"\n", + " Flatten a tree structure dictionary\n", + " \"\"\"\n", + " def __init__(\n", + " self, data,\n", + " key_callback: Callable = None,\n", + " key_connection: str = \"_\",\n", + " ):\n", + " self.data = data\n", + " self.key_callback = key_callback\n", + " self.key_connection = key_connection\n", + "\n", + " def flattening(\n", + " self, data,\n", + " result=None,\n", + " upper_key=\"\"\n", + " ) -> Dict[str, str]:\n", + " \"\"\"\n", + " Recursive flatten function\n", + " \"\"\"\n", + " if result is None:\n", + " result = {}\n", + " for key, value in data.items():\n", + " if self.key_callback is not None:\n", + " key = self.key_callback(key)\n", + " if isinstance(value, dict):\n", + " self.flattening(value, result,\n", + " upper_key=f\"{key}{self.key_connection}\")\n", + " else:\n", + " result[f\"{upper_key}{key}\"] = value\n", + " return result\n", + "\n", + " def __call__(self) -> Dict[str, str]:\n", + " return self.flattening(self.data)\n", + " \n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Testing a tree structure" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'a': [1, 2, {'c': 'd'}], 'b=>g': 1}" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "Flatten({\"a\":[1,2,{\"c\":\"d\"}],\"b\":{\"g\":1}}, key_connection=\"=>\")()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.4" + }, + "toc": { + "base_numbering": 1, + "nav_menu": {}, + "number_sections": true, + "sideBar": true, + "skip_h1_title": false, + "title_cell": "Table of Contents", + "title_sidebar": "Contents", + "toc_cell": false, + "toc_position": {}, + "toc_section_display": true, + "toc_window_display": false + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/nbs/30_traceable_edit_in_flask.ipynb b/nbs/30_traceable_edit_in_flask.ipynb index 795d8af..e7f3db7 100644 --- a/nbs/30_traceable_edit_in_flask.ipynb +++ b/nbs/30_traceable_edit_in_flask.ipynb @@ -234,70 +234,9 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - " * Serving Flask app \"__main__\" (lazy loading)\n", - " * Environment: production\n", - "\u001b[31m WARNING: This is a development server. Do not use it in a production deployment.\u001b[0m\n", - "\u001b[2m Use a production WSGI server instead.\u001b[0m\n", - " * Debug mode: off\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - " * Running on http://127.0.0.1:4242/ (Press CTRL+C to quit)\n", - "127.0.0.1 - - [21/Jun/2020 18:09:40] \"\u001b[33mGET /table1/workspace HTTP/1.1\u001b[0m\" 404 -\n", - "127.0.0.1 - - [21/Jun/2020 18:09:43] \"\u001b[37mGET /table1 HTTP/1.1\u001b[0m\" 200 -\n", - "127.0.0.1 - - [21/Jun/2020 18:09:43] \"\u001b[37mGET /table1/df_api?page=0&where= HTTP/1.1\u001b[0m\" 200 -\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "SELECT * FROM sample_table \n", - " ORDER BY id ASC LIMIT 0,20\n", - " \n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "127.0.0.1 - - [21/Jun/2020 18:10:02] \"\u001b[37mPOST /table1/save_api HTTP/1.1\u001b[0m\" 200 -\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - " idx col valtype original changed ip table_name \\\n", - "0 0 name str Darrow Reaper 127.0.0.1 sample_table \n", - "1 0 house str Andromedus Rising 127.0.0.1 sample_table \n", - "2 0 age int 20 21 127.0.0.1 sample_table \n", - "\n", - " ts \\\n", - "0 2020-06-21 18:10:02.067516 \n", - "1 2020-06-21 18:10:02.071400 \n", - "2 2020-06-21 18:10:02.072681 \n", - "\n", - " sql \n", - "0 UPDATE sample_table \\n SET name='Re... \n", - "1 UPDATE sample_table \\n SET house='R... \n", - "2 UPDATE sample_table \\n SET age=21 W... \n", - "SELECT * FROM sample_table \n", - " ORDER BY id ASC LIMIT 0,20\n", - " \n" - ] - } - ], + "outputs": [], "source": [ "app = Flask(__name__)\n", "\n", @@ -310,7 +249,7 @@ " log_con=con\n", " )\n", "\n", - "app.run(port = 4242,debug=False)" + "app.run(host=\"0.0.0.0\",port = 4242,debug=False)" ] }, { @@ -331,103 +270,9 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
idxcolvaltypeoriginalchangediptable_nametssql
00namestrDarrowReaper127.0.0.1sample_table2020-06-21 18:10:02.067516UPDATE sample_table \\n SET name='Reaper' WHERE id=0\\n
10housestrAndromedusRising127.0.0.1sample_table2020-06-21 18:10:02.071400UPDATE sample_table \\n SET house='Rising' WHERE id=0\\n
20ageint2021127.0.0.1sample_table2020-06-21 18:10:02.072681UPDATE sample_table \\n SET age=21 WHERE id=0\\n
\n", - "
" - ], - "text/plain": [ - " idx col valtype original changed ip table_name \\\n", - "0 0 name str Darrow Reaper 127.0.0.1 sample_table \n", - "1 0 house str Andromedus Rising 127.0.0.1 sample_table \n", - "2 0 age int 20 21 127.0.0.1 sample_table \n", - "\n", - " ts \\\n", - "0 2020-06-21 18:10:02.067516 \n", - "1 2020-06-21 18:10:02.071400 \n", - "2 2020-06-21 18:10:02.072681 \n", - "\n", - " sql \n", - "0 UPDATE sample_table \\n SET name='Reaper' WHERE id=0\\n \n", - "1 UPDATE sample_table \\n SET house='Rising' WHERE id=0\\n \n", - "2 UPDATE sample_table \\n SET age=21 WHERE id=0\\n " - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], + "outputs": [], "source": [ "with PandasDisplay(max_colwidth = 0,max_rows=100):\n", " display(pd.read_sql('editable_log',con = con))" @@ -458,6 +303,19 @@ "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.4" + }, + "toc": { + "base_numbering": 1, + "nav_menu": {}, + "number_sections": true, + "sideBar": true, + "skip_h1_title": false, + "title_cell": "Table of Contents", + "title_sidebar": "Contents", + "toc_cell": false, + "toc_position": {}, + "toc_section_display": true, + "toc_window_display": false } }, "nbformat": 4, diff --git a/settings.ini b/settings.ini index 538f4d0..d6fe5ad 100644 --- a/settings.ini +++ b/settings.ini @@ -7,7 +7,7 @@ author = xiaochen(ray) zhang author_email = b2ray2c@gmail.com copyright = xiaochen(ray) zhang branch = master -version = 0.4.5 +version = 0.4.7 min_python = 3.6 audience = Developers language = English