diff --git a/forgebox/__init__.py b/forgebox/__init__.py index 2dd5536..a070a77 100644 --- a/forgebox/__init__.py +++ b/forgebox/__init__.py @@ -1 +1 @@ -__version__ = "0.4.18" +__version__ = "0.4.18.2" diff --git a/forgebox/_nbdev.py b/forgebox/_nbdev.py index adf7356..7ff3ce9 100644 --- a/forgebox/_nbdev.py +++ b/forgebox/_nbdev.py @@ -41,6 +41,7 @@ "highlight": "06_spacy.ipynb", "FreeMap": "07_freemap.ipynb", "SingleFileLiner": "09_multiprocess.ipynb", + "DataFrameRowling": "09_multiprocess.ipynb", "Stuff": "10_loop.ipynb", "method4all": "10_loop.ipynb", "StorageCore": "10_loop.ipynb", diff --git a/forgebox/multiproc.py b/forgebox/multiproc.py index 15fff1a..ae64d6d 100644 --- a/forgebox/multiproc.py +++ b/forgebox/multiproc.py @@ -1,6 +1,6 @@ # AUTOGENERATED! DO NOT EDIT! File to edit: nbs/09_multiprocess.ipynb (unless otherwise specified). -__all__ = ['SingleFileLiner'] +__all__ = ['SingleFileLiner', 'DataFrameRowling'] # Cell from pathlib import Path @@ -102,4 +102,40 @@ def split_train_test( f_valid.write(line) else: f_train.write(line) - return train_file_path, valid_file_path \ No newline at end of file + return train_file_path, valid_file_path + +# Cell +class DataFrameRowling: + """ + Read dataframe row by row + """ + def __init__(self, df): + self.df = df + self.restart() + + def restart(self): + global DataFrameRowling_df + DataFrameRowling_df = self.df + global DataFrameRowling_ct + DataFrameRowling_ct = 0 + + def __repr__(self) -> str: + return f"DataFrameRowling:\t{len(self)} Rows" + + def __len__(self): + return len(self.df) + + def __iter__(self): + for i in range(len(self)): + yield next(self) + + def __getitem__(self, idx): + global DataFrameRowling_df + return DataFrameRowling_df[DataFrameRowling_df.index[idx]] + + def __next__(self): + global DataFrameRowling_df + global DataFrameRowling_ct + row = DataFrameRowling_df.loc[DataFrameRowling_df.index[DataFrameRowling_ct]] + DataFrameRowling_ct+=1 + return row \ No newline at end of file diff --git a/nbs/09_multiprocess.ipynb b/nbs/09_multiprocess.ipynb index 6e42775..49ebc19 100644 --- a/nbs/09_multiprocess.ipynb +++ b/nbs/09_multiprocess.ipynb @@ -164,6 +164,75 @@ "res = Parallel(backend=\"multiprocessing\", n_jobs=6)(delayed(get_line)(i) for i in sfl)" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Read dataframe row by row" + ] + }, + { + "cell_type": "code", + "execution_count": 59, + "metadata": {}, + "outputs": [], + "source": [ + "# export\n", + "class DataFrameRowling:\n", + " \"\"\"\n", + " Read dataframe row by row\n", + " \"\"\"\n", + " def __init__(self, df):\n", + " self.df = df\n", + " self.restart()\n", + " \n", + " def restart(self):\n", + " global DataFrameRowling_df\n", + " DataFrameRowling_df = self.df\n", + " global DataFrameRowling_ct\n", + " DataFrameRowling_ct = 0\n", + "\n", + " def __repr__(self) -> str:\n", + " return f\"DataFrameRowling:\\t{len(self)} Rows\"\n", + " \n", + " def __len__(self):\n", + " return len(self.df)\n", + " \n", + " def __iter__(self):\n", + " for i in range(len(self)):\n", + " yield next(self)\n", + " \n", + " def __getitem__(self, idx):\n", + " global DataFrameRowling_df\n", + " return DataFrameRowling_df[DataFrameRowling_df.index[idx]]\n", + " \n", + " def __next__(self):\n", + " global DataFrameRowling_df\n", + " global DataFrameRowling_ct\n", + " row = DataFrameRowling_df.loc[DataFrameRowling_df.index[DataFrameRowling_ct]]\n", + " DataFrameRowling_ct+=1\n", + " return row" + ] + }, + { + "cell_type": "code", + "execution_count": 56, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "df = pd.DataFrame({\"col1\":list(range(100))})" + ] + }, + { + "cell_type": "code", + "execution_count": 57, + "metadata": {}, + "outputs": [], + "source": [ + "res = Parallel(backend=\"multiprocessing\", n_jobs=6)(delayed(get_line)(i) for i in DataFrameRowling(df))" + ] + }, { "cell_type": "code", "execution_count": null, diff --git a/settings.ini b/settings.ini index fecbddf..3baed78 100644 --- a/settings.ini +++ b/settings.ini @@ -7,7 +7,7 @@ author = xiaochen(ray) zhang author_email = b2ray2c@gmail.com copyright = xiaochen(ray) zhang branch = master -version = 0.4.18 +version = 0.4.18.2 min_python = 3.6 audience = Developers language = English