diff --git a/nbs/01_pandas_extra.ipynb b/nbs/01_pandas_extra.ipynb index edb13bf..168c6c3 100644 --- a/nbs/01_pandas_extra.ipynb +++ b/nbs/01_pandas_extra.ipynb @@ -8,162 +8,55 @@ "> Extra pandas functions at import" ] }, - { - "cell_type": "code", - "execution_count": 1, - "metadata": {}, - "outputs": [], - "source": [ - "# default_exp pdenhanced" - ] - }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [], "source": [ - "# export\n", - "import pandas as pd\n", - "from typing import Callable" + "from typing import Callable\n", + "from forgebox.imports import *" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "## Value counts" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [], - "source": [ - "# export\n", - "def list_vc(\n", - " df, colname: str, value: str\n", - ") -> pd.DataFrame:\n", - " \"\"\"\n", - " count the values in a column\n", - " that each cell is a list\n", - " \"\"\"\n", - " return df[colname].list_vc(value)\n", - "\n", - "def col_list_vc(\n", - " col, value: str\n", - ") -> pd.DataFrame:\n", - " \"\"\"\n", - " count the values in a column\n", - " that each cell is a list\n", - " \"\"\"\n", - " return pd.DataFrame(\n", - " col.apply(lambda x: value in x).value_counts()\n", - " )\n", - "\n", - "pd.DataFrame.vc = lambda self,col:pd.DataFrame(self[col].value_counts())\n", - "pd.Series.list_vc = col_list_vc\n", - "pd.DataFrame.list_vc = list_vc" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Rename by rule" + "### Testing" ] }, { "cell_type": "code", - "execution_count": 18, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ - "# export\n", - "def default_rename_rule(x: str) -> str:\n", - " return x.replace(\" \", \"_\").replace(\"-\", \"_\").lower()\n", - "\n", + "from sklearn.datasets import california_housing\n", "\n", - "def rename_by_rule(\n", - " df,\n", - " rule: Callable = default_rename_rule\n", - ") -> pd.DataFrame:\n", - " \"\"\"\n", - " rename the columns by a rule function\n", - " \"\"\"\n", - " df = df.rename(\n", - " columns=dict((c, rule(c)) for c in df.columns))\n", - " return df\n", + "cdata = california_housing.fetch_california_housing()\n", "\n", - "pd.DataFrame.rename_by_rule = rename_by_rule" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Rearrage Columns" + "df = pd.DataFrame(cdata[\"data\"], columns=cdata[\"feature_names\"])" ] }, { "cell_type": "code", - "execution_count": 22, + "execution_count": 3, "metadata": {}, "outputs": [], "source": [ - "# export\n", - "def column_order(df, *col_names) -> pd.DataFrame:\n", - " \"\"\"\n", - " df = df.column_order(\"col1\", \"col2\", \"col3\")\n", - " will put col1, col2, and col3 as the 1st 3 column\n", - " \"\"\"\n", - " cols = list(df.columns)\n", - " \n", - " for col_name in list(col_names)[::-1]:\n", - " \n", - " # warn if the column exist\n", - " if col_name not in cols:\n", - " print(f\"Column:'{col_name}' not in dataframe\")\n", - " continue\n", - " cols.insert(0, cols.pop(cols.index(col_name)))\n", - " return df[cols]\n", - "\n", - "pd.DataFrame.column_order = column_order" + "df[\"old\"] = df.HouseAge>20" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "### Testing" + "## Value Counts" ] }, { "cell_type": "code", - "execution_count": 10, - "metadata": {}, - "outputs": [], - "source": [ - "from sklearn.datasets import california_housing\n", - "\n", - "cdata = california_housing.fetch_california_housing()\n", - "\n", - "df = pd.DataFrame(cdata[\"data\"], columns=cdata[\"feature_names\"])" - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "metadata": {}, - "outputs": [], - "source": [ - "df[\"old\"] = df.HouseAge>20" - ] - }, - { - "cell_type": "code", - "execution_count": 13, + "execution_count": 4, "metadata": {}, "outputs": [ { @@ -192,11 +85,11 @@ " \n", " \n", " \n", - " True\n", + " True\n", " 14347\n", " \n", " \n", - " False\n", + " False\n", " 6293\n", " \n", " \n", @@ -209,7 +102,7 @@ "False 6293" ] }, - "execution_count": 13, + "execution_count": 4, "metadata": {}, "output_type": "execute_result" } @@ -220,7 +113,7 @@ }, { "cell_type": "code", - "execution_count": 19, + "execution_count": 8, "metadata": {}, "outputs": [ { @@ -257,184 +150,40 @@ " \n", " \n", " \n", - " 0\n", + " 0\n", " 8.3252\n", " 41.0\n", " 6.984127\n", - " 1.023810\n", + " 1.02381\n", " 322.0\n", " 2.555556\n", " 37.88\n", " -122.23\n", " True\n", " \n", - " \n", - " 1\n", - " 8.3014\n", - " 21.0\n", - " 6.238137\n", - " 0.971880\n", - " 2401.0\n", - " 2.109842\n", - " 37.86\n", - " -122.22\n", - " True\n", - " \n", - " \n", - " 2\n", - " 7.2574\n", - " 52.0\n", - " 8.288136\n", - " 1.073446\n", - " 496.0\n", - " 2.802260\n", - " 37.85\n", - " -122.24\n", - " True\n", - " \n", - " \n", - " 3\n", - " 5.6431\n", - " 52.0\n", - " 5.817352\n", - " 1.073059\n", - " 558.0\n", - " 2.547945\n", - " 37.85\n", - " -122.25\n", - " True\n", - " \n", - " \n", - " 4\n", - " 3.8462\n", - " 52.0\n", - " 6.281853\n", - " 1.081081\n", - " 565.0\n", - " 2.181467\n", - " 37.85\n", - " -122.25\n", - " True\n", - " \n", - " \n", - " ...\n", - " ...\n", - " ...\n", - " ...\n", - " ...\n", - " ...\n", - " ...\n", - " ...\n", - " ...\n", - " ...\n", - " \n", - " \n", - " 20635\n", - " 1.5603\n", - " 25.0\n", - " 5.045455\n", - " 1.133333\n", - " 845.0\n", - " 2.560606\n", - " 39.48\n", - " -121.09\n", - " True\n", - " \n", - " \n", - " 20636\n", - " 2.5568\n", - " 18.0\n", - " 6.114035\n", - " 1.315789\n", - " 356.0\n", - " 3.122807\n", - " 39.49\n", - " -121.21\n", - " False\n", - " \n", - " \n", - " 20637\n", - " 1.7000\n", - " 17.0\n", - " 5.205543\n", - " 1.120092\n", - " 1007.0\n", - " 2.325635\n", - " 39.43\n", - " -121.22\n", - " False\n", - " \n", - " \n", - " 20638\n", - " 1.8672\n", - " 18.0\n", - " 5.329513\n", - " 1.171920\n", - " 741.0\n", - " 2.123209\n", - " 39.43\n", - " -121.32\n", - " False\n", - " \n", - " \n", - " 20639\n", - " 2.3886\n", - " 16.0\n", - " 5.254717\n", - " 1.162264\n", - " 1387.0\n", - " 2.616981\n", - " 39.37\n", - " -121.24\n", - " False\n", - " \n", " \n", "\n", - "

20640 rows × 9 columns

\n", "" ], "text/plain": [ - " medinc houseage averooms avebedrms population aveoccup latitude \\\n", - "0 8.3252 41.0 6.984127 1.023810 322.0 2.555556 37.88 \n", - "1 8.3014 21.0 6.238137 0.971880 2401.0 2.109842 37.86 \n", - "2 7.2574 52.0 8.288136 1.073446 496.0 2.802260 37.85 \n", - "3 5.6431 52.0 5.817352 1.073059 558.0 2.547945 37.85 \n", - "4 3.8462 52.0 6.281853 1.081081 565.0 2.181467 37.85 \n", - "... ... ... ... ... ... ... ... \n", - "20635 1.5603 25.0 5.045455 1.133333 845.0 2.560606 39.48 \n", - "20636 2.5568 18.0 6.114035 1.315789 356.0 3.122807 39.49 \n", - "20637 1.7000 17.0 5.205543 1.120092 1007.0 2.325635 39.43 \n", - "20638 1.8672 18.0 5.329513 1.171920 741.0 2.123209 39.43 \n", - "20639 2.3886 16.0 5.254717 1.162264 1387.0 2.616981 39.37 \n", - "\n", - " longitude old \n", - "0 -122.23 True \n", - "1 -122.22 True \n", - "2 -122.24 True \n", - "3 -122.25 True \n", - "4 -122.25 True \n", - "... ... ... \n", - "20635 -121.09 True \n", - "20636 -121.21 False \n", - "20637 -121.22 False \n", - "20638 -121.32 False \n", - "20639 -121.24 False \n", + " medinc houseage averooms avebedrms ... aveoccup latitude longitude old\n", + "0 8.3252 41.0 6.984127 1.02381 ... 2.555556 37.88 -122.23 True\n", "\n", - "[20640 rows x 9 columns]" + "[1 rows x 9 columns]" ] }, - "execution_count": 19, + "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "df.rename_by_rule()" + "df.rename_by_rule().head(1)" ] }, { "cell_type": "code", - "execution_count": 24, + "execution_count": 7, "metadata": {}, "outputs": [ { @@ -471,7 +220,7 @@ " \n", " \n", " \n", - " 0\n", + " 0\n", " True\n", " 2.555556\n", " 8.3252\n", @@ -483,7 +232,7 @@ " -122.23\n", " \n", " \n", - " 1\n", + " 1\n", " True\n", " 2.109842\n", " 8.3014\n", @@ -495,7 +244,7 @@ " -122.22\n", " \n", " \n", - " 2\n", + " 2\n", " True\n", " 2.802260\n", " 7.2574\n", @@ -506,144 +255,26 @@ " 37.85\n", " -122.24\n", " \n", - " \n", - " 3\n", - " True\n", - " 2.547945\n", - " 5.6431\n", - " 52.0\n", - " 5.817352\n", - " 1.073059\n", - " 558.0\n", - " 37.85\n", - " -122.25\n", - " \n", - " \n", - " 4\n", - " True\n", - " 2.181467\n", - " 3.8462\n", - " 52.0\n", - " 6.281853\n", - " 1.081081\n", - " 565.0\n", - " 37.85\n", - " -122.25\n", - " \n", - " \n", - " ...\n", - " ...\n", - " ...\n", - " ...\n", - " ...\n", - " ...\n", - " ...\n", - " ...\n", - " ...\n", - " ...\n", - " \n", - " \n", - " 20635\n", - " True\n", - " 2.560606\n", - " 1.5603\n", - " 25.0\n", - " 5.045455\n", - " 1.133333\n", - " 845.0\n", - " 39.48\n", - " -121.09\n", - " \n", - " \n", - " 20636\n", - " False\n", - " 3.122807\n", - " 2.5568\n", - " 18.0\n", - " 6.114035\n", - " 1.315789\n", - " 356.0\n", - " 39.49\n", - " -121.21\n", - " \n", - " \n", - " 20637\n", - " False\n", - " 2.325635\n", - " 1.7000\n", - " 17.0\n", - " 5.205543\n", - " 1.120092\n", - " 1007.0\n", - " 39.43\n", - " -121.22\n", - " \n", - " \n", - " 20638\n", - " False\n", - " 2.123209\n", - " 1.8672\n", - " 18.0\n", - " 5.329513\n", - " 1.171920\n", - " 741.0\n", - " 39.43\n", - " -121.32\n", - " \n", - " \n", - " 20639\n", - " False\n", - " 2.616981\n", - " 2.3886\n", - " 16.0\n", - " 5.254717\n", - " 1.162264\n", - " 1387.0\n", - " 39.37\n", - " -121.24\n", - " \n", " \n", "\n", - "

20640 rows × 9 columns

\n", "" ], "text/plain": [ - " old AveOccup MedInc HouseAge AveRooms AveBedrms Population \\\n", - "0 True 2.555556 8.3252 41.0 6.984127 1.023810 322.0 \n", - "1 True 2.109842 8.3014 21.0 6.238137 0.971880 2401.0 \n", - "2 True 2.802260 7.2574 52.0 8.288136 1.073446 496.0 \n", - "3 True 2.547945 5.6431 52.0 5.817352 1.073059 558.0 \n", - "4 True 2.181467 3.8462 52.0 6.281853 1.081081 565.0 \n", - "... ... ... ... ... ... ... ... \n", - "20635 True 2.560606 1.5603 25.0 5.045455 1.133333 845.0 \n", - "20636 False 3.122807 2.5568 18.0 6.114035 1.315789 356.0 \n", - "20637 False 2.325635 1.7000 17.0 5.205543 1.120092 1007.0 \n", - "20638 False 2.123209 1.8672 18.0 5.329513 1.171920 741.0 \n", - "20639 False 2.616981 2.3886 16.0 5.254717 1.162264 1387.0 \n", - "\n", - " Latitude Longitude \n", - "0 37.88 -122.23 \n", - "1 37.86 -122.22 \n", - "2 37.85 -122.24 \n", - "3 37.85 -122.25 \n", - "4 37.85 -122.25 \n", - "... ... ... \n", - "20635 39.48 -121.09 \n", - "20636 39.49 -121.21 \n", - "20637 39.43 -121.22 \n", - "20638 39.43 -121.32 \n", - "20639 39.37 -121.24 \n", + " old AveOccup MedInc HouseAge ... AveBedrms Population Latitude Longitude\n", + "0 True 2.555556 8.3252 41.0 ... 1.023810 322.0 37.88 -122.23\n", + "1 True 2.109842 8.3014 21.0 ... 0.971880 2401.0 37.86 -122.22\n", + "2 True 2.802260 7.2574 52.0 ... 1.073446 496.0 37.85 -122.24\n", "\n", - "[20640 rows x 9 columns]" + "[3 rows x 9 columns]" ] }, - "execution_count": 24, + "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "df.column_order(\"old\",\"AveOccup\")" + "df.column_order(\"old\",\"AveOccup\").head(3)" ] }, { @@ -656,7 +287,7 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3", + "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, diff --git a/nbs/02_imports.ipynb b/nbs/02_imports.ipynb deleted file mode 100644 index c7dc359..0000000 --- a/nbs/02_imports.ipynb +++ /dev/null @@ -1,160 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# 03 Imports\n", - "> standard imports" - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "metadata": {}, - "outputs": [], - "source": [ - "# default_exp imports" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [], - "source": [ - "# export\n", - "\n", - "__all__ = [\"pd\",\"np\",\"partial\",\"Path\",\"json\",\"Counter\",\n", - " \"plt\",\"os\",\"sys\",\"glob\",\"Image\",]\n", - "\n", - "# import enhanced version of pandas\n", - "from forgebox.pdenhanced import pd\n", - "import numpy as np\n", - "from pathlib import Path\n", - "import json\n", - "from functools import partial\n", - "from collections import Counter\n", - "\n", - "\n", - "import os\n", - "import sys\n", - "\n", - "try:\n", - " from matplotlib import pyplot as plt\n", - "except:\n", - " pass\n", - "\n", - "try:\n", - " from forgebox.widgets import search_box,paginate\n", - " pd.DataFrame.search_box = search_box\n", - " pd.DataFrame.paginate = paginate\n", - "except:\n", - " pass\n", - "\n", - "try:\n", - " import torch\n", - " from torch import nn\n", - " from torch.utils.data.dataset import Dataset\n", - " from torch.utils.data.dataloader import DataLoader\n", - " __all__+=[\"nn\",\"torch\",\"Dataset\",\"DataLoader\"]\n", - "except:\n", - " pass\n", - "\n", - "try:\n", - " from ipywidgets import interact, interact_manual\n", - " from IPython.display import HTML\n", - " __all__+=[\"interact\",\"interact_manual\",\"HTML\"]\n", - "except:\n", - " pass\n", - "\n", - "try:\n", - " import redis\n", - " def read_redis(key, host='localhost', port=6379, db=3):\n", - " rd = redis.Redis(host=host, port=port, db=db)\n", - " return pd.read_json(rd.get(key))\n", - " \n", - " __all__+=[\"read_redis\",]\n", - " \n", - " def to_redis(self, key:str, con:redis.Redis = None, ex:int=600):\n", - " \"\"\"\n", - " Save dataframe to redis\n", - " \"\"\"\n", - " if con is None:\n", - " con = redis.Redis(host='localhost', port=6379, db=3)\n", - " return con.set(key, self.to_json())\n", - " \n", - " pd.DataFrame.to_redis = to_redis\n", - " \n", - " def df_cache(ex=600, host='localhost', port=6379, db=3):\n", - " \"\"\"\n", - " Caching pd.DataFrame, use as decorator\n", - " \n", - " @df_cache(ex=300)\n", - " def get_table_abc(uuid):\n", - " return pd.read_sql(f\"select * from abc where uuid='{uuid}'\", con=conn)\n", - " \n", - " within 5 minutes, under same uuid, the code will query data base only once\n", - " \"\"\"\n", - " rd = redis.Redis(host=host, port=port, db=db)\n", - " def decorator(f):\n", - " def wrapper(*args, **kwargs):\n", - " key = \"_\".join(str(i).lower() for i in args)\n", - " if len(kwargs)>0:\n", - " key += \"_\"\n", - " key += \"_\".join(f\"{k}-{v}\" for k,v in kwargs.items())\n", - " if rd.exists(key):\n", - " return pd.read_json(rd.get(key))\n", - " df = f(*args, **kwargs)\n", - " df.to_redis(key, con=rd)\n", - " return df\n", - "\n", - " return wrapper\n", - " return decorator\n", - " __all__+=[\"df_cache\",]\n", - " \n", - "except:\n", - " pass\n", - "\n", - "from glob import glob\n", - "from PIL import Image\n", - "\n", - "Path.ls = lambda x:os.listdir(x)" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.7.4" - }, - "toc": { - "base_numbering": 1, - "nav_menu": {}, - "number_sections": true, - "sideBar": true, - "skip_h1_title": false, - "title_cell": "Table of Contents", - "title_sidebar": "Contents", - "toc_cell": false, - "toc_position": {}, - "toc_section_display": true, - "toc_window_display": false - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/setup.py b/setup.py index 9364ca8..4775b04 100644 --- a/setup.py +++ b/setup.py @@ -26,7 +26,7 @@ ] py_versions = ( - "2.0 2.1 2.2 2.3 2.4 2.5 2.6 2.7 3.0 3.1 3.2 3.3 3.4 3.5 3.6 3.7 3.8".split() + "2.0 2.1 2.2 2.3 2.4 2.5 2.6 2.7 3.0 3.1 3.2 3.3 3.4 3.5 3.6 3.7 3.8 3.9 3.10".split() ) for exp in expected: