diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..0fc33f2
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,149 @@
+# Manual additions
+dataset/
+
+
+# Created by https://www.gitignore.io/api/code,macos,python,jupyternotebook
+
+### Code ###
+# Visual Studio Code - https://code.visualstudio.com/
+.settings/
+.vscode/
+tsconfig.json
+jsconfig.json
+
+### JupyterNotebook ###
+.ipynb_checkpoints
+*/.ipynb_checkpoints/*
+
+# Remove previous ipynb_checkpoints
+# git rm -r .ipynb_checkpoints/
+#
+### macOS ###
+*.DS_Store
+.AppleDouble
+.LSOverride
+
+# Icon must end with two \r
+Icon
+
+# Thumbnails
+._*
+
+# Files that might appear in the root of a volume
+.DocumentRevisions-V100
+.fseventsd
+.Spotlight-V100
+.TemporaryItems
+.Trashes
+.VolumeIcon.icns
+.com.apple.timemachine.donotpresent
+
+# Directories potentially created on remote AFP share
+.AppleDB
+.AppleDesktop
+Network Trash Folder
+Temporary Items
+.apdisk
+
+### Python ###
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+
+# PyInstaller
+# Usually these files are written by a python script from a template
+# before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.coverage
+.coverage.*
+.cache
+.pytest_cache/
+nosetests.xml
+coverage.xml
+*.cover
+.hypothesis/
+
+# Translations
+*.mo
+*.pot
+
+# Flask stuff:
+instance/
+.webassets-cache
+
+# Scrapy stuff:
+.scrapy
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+target/
+
+# Jupyter Notebook
+
+# pyenv
+.python-version
+
+# celery beat schedule file
+celerybeat-schedule.*
+
+# SageMath parsed files
+*.sage.py
+
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+
+# Spyder project settings
+.spyderproject
+.spyproject
+
+# Rope project settings
+.ropeproject
+
+# mkdocs documentation
+/site
+
+# mypy
+.mypy_cache/
+
+
+# End of https://www.gitignore.io/api/code,macos,python,jupyternotebook
+
diff --git a/test.ipynb b/test.ipynb
new file mode 100644
index 0000000..a4f0c7d
--- /dev/null
+++ b/test.ipynb
@@ -0,0 +1,703 @@
+{
+ "cells": [
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "metadata": {
+ "ExecuteTime": {
+ "end_time": "2018-03-28T20:59:25.682883Z",
+ "start_time": "2018-03-28T20:59:19.498143Z"
+ }
+ },
+ "outputs": [],
+ "source": [
+ "import numpy as np\n",
+ "import pandas as pd\n",
+ "\n",
+ "DUMMY_DATA_PATH = 'dataset/dummy/'\n",
+ "DUMMY_BANK_DATA = DUMMY_DATA_PATH+'BSA.csv'\n",
+ "DUMMY_MAIN_DATA = DUMMY_DATA_PATH+'data.csv'"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 20,
+ "metadata": {
+ "ExecuteTime": {
+ "end_time": "2018-03-28T21:03:06.490558Z",
+ "start_time": "2018-03-28T21:03:06.465137Z"
+ }
+ },
+ "outputs": [],
+ "source": [
+ "main_df = pd.read_csv(DUMMY_MAIN_DATA)\n",
+ "bank_df = pd.read_csv(DUMMY_BANK_DATA)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 21,
+ "metadata": {
+ "ExecuteTime": {
+ "end_time": "2018-03-28T21:03:06.752422Z",
+ "start_time": "2018-03-28T21:03:06.705442Z"
+ }
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " address | \n",
+ " ads_cmpid | \n",
+ " ads_creative | \n",
+ " ads_matchtype | \n",
+ " ads_network | \n",
+ " ads_targetid | \n",
+ " amount | \n",
+ " application_id | \n",
+ " birthdate | \n",
+ " browser | \n",
+ " ... | \n",
+ " registered_office_city | \n",
+ " registered_office_state | \n",
+ " role_in_firm | \n",
+ " role_on_application | \n",
+ " seo_city | \n",
+ " state | \n",
+ " utm_medium | \n",
+ " utm_source | \n",
+ " utm_term | \n",
+ " year_of_incorporation | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 1024.0 | \n",
+ " NaN | \n",
+ " Opera | \n",
+ " ... | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 0 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " At. Pandharbodi, Gondiya | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 300000.0 | \n",
+ " 716.0 | \n",
+ " 23/10/1982 | \n",
+ " Chrome | \n",
+ " ... | \n",
+ " Gondia | \n",
+ " MAHARASHTRA | \n",
+ " 1.0 | \n",
+ " 4 | \n",
+ " NaN | \n",
+ " MAHARASHTRA | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 2014.0 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " Near Heena Manjeel, Serve No. 53,, kale padal ... | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 200000.0 | \n",
+ " 1031.0 | \n",
+ " 08/09/1987 | \n",
+ " Chrome | \n",
+ " ... | \n",
+ " PUNE | \n",
+ " MAHARASHTRA | \n",
+ " 1.0 | \n",
+ " 4 | \n",
+ " NaN | \n",
+ " MAHARASHTRA | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 2016.0 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " Mangasule Gali | \n",
+ " 977169039.0 | \n",
+ " 2.312225e+11 | \n",
+ " e | \n",
+ " {google_search} | \n",
+ " kwd-11424241 | \n",
+ " 300000.0 | \n",
+ " 2056.0 | \n",
+ " 02/04/1982 | \n",
+ " Chrome | \n",
+ " ... | \n",
+ " Pune | \n",
+ " MAHARASHTRA | \n",
+ " 1.0 | \n",
+ " 4 | \n",
+ " NaN | \n",
+ " KARNATAKA | \n",
+ " ppc | \n",
+ " adwords | \n",
+ " business loans | \n",
+ " 2014.0 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " Near Pratiksha Building,, 1, Natraj Niwas, Ata... | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 500000.0 | \n",
+ " 9047.0 | \n",
+ " 13/04/1979 | \n",
+ " Chrome | \n",
+ " ... | \n",
+ " THANE | \n",
+ " MAHARASHTRA | \n",
+ " 1.0 | \n",
+ " 4 | \n",
+ " NaN | \n",
+ " MAHARASHTRA | \n",
+ " Banner | \n",
+ " Facebook | \n",
+ " Carousel-Ad | \n",
+ " 2014.0 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
5 rows × 41 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " address ads_cmpid \\\n",
+ "0 NaN NaN \n",
+ "1 At. Pandharbodi, Gondiya NaN \n",
+ "2 Near Heena Manjeel, Serve No. 53,, kale padal ... NaN \n",
+ "3 Mangasule Gali 977169039.0 \n",
+ "4 Near Pratiksha Building,, 1, Natraj Niwas, Ata... NaN \n",
+ "\n",
+ " ads_creative ads_matchtype ads_network ads_targetid amount \\\n",
+ "0 NaN NaN NaN NaN NaN \n",
+ "1 NaN NaN NaN NaN 300000.0 \n",
+ "2 NaN NaN NaN NaN 200000.0 \n",
+ "3 2.312225e+11 e {google_search} kwd-11424241 300000.0 \n",
+ "4 NaN NaN NaN NaN 500000.0 \n",
+ "\n",
+ " application_id birthdate browser ... \\\n",
+ "0 1024.0 NaN Opera ... \n",
+ "1 716.0 23/10/1982 Chrome ... \n",
+ "2 1031.0 08/09/1987 Chrome ... \n",
+ "3 2056.0 02/04/1982 Chrome ... \n",
+ "4 9047.0 13/04/1979 Chrome ... \n",
+ "\n",
+ " registered_office_city registered_office_state role_in_firm \\\n",
+ "0 NaN NaN NaN \n",
+ "1 Gondia MAHARASHTRA 1.0 \n",
+ "2 PUNE MAHARASHTRA 1.0 \n",
+ "3 Pune MAHARASHTRA 1.0 \n",
+ "4 THANE MAHARASHTRA 1.0 \n",
+ "\n",
+ " role_on_application seo_city state utm_medium utm_source \\\n",
+ "0 0 NaN NaN NaN NaN \n",
+ "1 4 NaN MAHARASHTRA NaN NaN \n",
+ "2 4 NaN MAHARASHTRA NaN NaN \n",
+ "3 4 NaN KARNATAKA ppc adwords \n",
+ "4 4 NaN MAHARASHTRA Banner Facebook \n",
+ "\n",
+ " utm_term year_of_incorporation \n",
+ "0 NaN NaN \n",
+ "1 NaN 2014.0 \n",
+ "2 NaN 2016.0 \n",
+ "3 business loans 2014.0 \n",
+ "4 Carousel-Ad 2014.0 \n",
+ "\n",
+ "[5 rows x 41 columns]"
+ ]
+ },
+ "execution_count": 21,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "main_df.head()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 22,
+ "metadata": {
+ "ExecuteTime": {
+ "end_time": "2018-03-28T21:03:09.592675Z",
+ "start_time": "2018-03-28T21:03:09.515172Z"
+ }
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " ads_matchtype | \n",
+ " ads_network | \n",
+ " amount | \n",
+ " application_id | \n",
+ " birthdate | \n",
+ " browser | \n",
+ " campaign_city | \n",
+ " city | \n",
+ " company_size | \n",
+ " country | \n",
+ " ... | \n",
+ " registered_office_city | \n",
+ " registered_office_state | \n",
+ " role_in_firm | \n",
+ " role_on_application | \n",
+ " seo_city | \n",
+ " state | \n",
+ " utm_medium | \n",
+ " utm_source | \n",
+ " utm_term | \n",
+ " year_of_incorporation | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 1024.0 | \n",
+ " NaN | \n",
+ " Opera | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " ... | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 0 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 300000.0 | \n",
+ " 716.0 | \n",
+ " 23/10/1982 | \n",
+ " Chrome | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " India | \n",
+ " ... | \n",
+ " Gondia | \n",
+ " MAHARASHTRA | \n",
+ " 1.0 | \n",
+ " 4 | \n",
+ " NaN | \n",
+ " MAHARASHTRA | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 2014.0 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 200000.0 | \n",
+ " 1031.0 | \n",
+ " 08/09/1987 | \n",
+ " Chrome | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " India | \n",
+ " ... | \n",
+ " PUNE | \n",
+ " MAHARASHTRA | \n",
+ " 1.0 | \n",
+ " 4 | \n",
+ " NaN | \n",
+ " MAHARASHTRA | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 2016.0 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " e | \n",
+ " {google_search} | \n",
+ " 300000.0 | \n",
+ " 2056.0 | \n",
+ " 02/04/1982 | \n",
+ " Chrome | \n",
+ " Pune | \n",
+ " Mumbai | \n",
+ " 5.0 | \n",
+ " India | \n",
+ " ... | \n",
+ " Pune | \n",
+ " MAHARASHTRA | \n",
+ " 1.0 | \n",
+ " 4 | \n",
+ " NaN | \n",
+ " KARNATAKA | \n",
+ " ppc | \n",
+ " adwords | \n",
+ " business loans | \n",
+ " 2014.0 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 500000.0 | \n",
+ " 9047.0 | \n",
+ " 13/04/1979 | \n",
+ " Chrome | \n",
+ " NaN | \n",
+ " Mumbai | \n",
+ " NaN | \n",
+ " India | \n",
+ " ... | \n",
+ " THANE | \n",
+ " MAHARASHTRA | \n",
+ " 1.0 | \n",
+ " 4 | \n",
+ " NaN | \n",
+ " MAHARASHTRA | \n",
+ " Banner | \n",
+ " Facebook | \n",
+ " Carousel-Ad | \n",
+ " 2014.0 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
5 rows × 35 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " ads_matchtype ads_network amount application_id birthdate \\\n",
+ "0 NaN NaN NaN 1024.0 NaN \n",
+ "1 NaN NaN 300000.0 716.0 23/10/1982 \n",
+ "2 NaN NaN 200000.0 1031.0 08/09/1987 \n",
+ "3 e {google_search} 300000.0 2056.0 02/04/1982 \n",
+ "4 NaN NaN 500000.0 9047.0 13/04/1979 \n",
+ "\n",
+ " browser campaign_city city company_size country ... \\\n",
+ "0 Opera NaN NaN NaN NaN ... \n",
+ "1 Chrome NaN NaN NaN India ... \n",
+ "2 Chrome NaN NaN NaN India ... \n",
+ "3 Chrome Pune Mumbai 5.0 India ... \n",
+ "4 Chrome NaN Mumbai NaN India ... \n",
+ "\n",
+ " registered_office_city registered_office_state role_in_firm \\\n",
+ "0 NaN NaN NaN \n",
+ "1 Gondia MAHARASHTRA 1.0 \n",
+ "2 PUNE MAHARASHTRA 1.0 \n",
+ "3 Pune MAHARASHTRA 1.0 \n",
+ "4 THANE MAHARASHTRA 1.0 \n",
+ "\n",
+ " role_on_application seo_city state utm_medium utm_source \\\n",
+ "0 0 NaN NaN NaN NaN \n",
+ "1 4 NaN MAHARASHTRA NaN NaN \n",
+ "2 4 NaN MAHARASHTRA NaN NaN \n",
+ "3 4 NaN KARNATAKA ppc adwords \n",
+ "4 4 NaN MAHARASHTRA Banner Facebook \n",
+ "\n",
+ " utm_term year_of_incorporation \n",
+ "0 NaN NaN \n",
+ "1 NaN 2014.0 \n",
+ "2 NaN 2016.0 \n",
+ "3 business loans 2014.0 \n",
+ "4 Carousel-Ad 2014.0 \n",
+ "\n",
+ "[5 rows x 35 columns]"
+ ]
+ },
+ "execution_count": 22,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "to_remove_cols = [\n",
+ " 'address', # textual, and hence not helpful\n",
+ " 'ads_cmpid', # unique key\n",
+ " 'ads_creative', # unique key\n",
+ "# 'ads_matchtype',\n",
+ "# 'ads_network',\n",
+ " 'ads_targetid', # unique key\n",
+ "# 'amount',\n",
+ "# 'application_id',\n",
+ "# 'birthdate',\n",
+ "# 'browser',\n",
+ "# 'campaign_city',\n",
+ "# 'city',\n",
+ "# 'company_size',\n",
+ " 'country', # all are india as-is, so no point keeping it..\n",
+ " 'created_date', # has no correlation on the model.. \n",
+ "# 'email',\n",
+ " 'firm_name', # each name is potentially unique, also string so can't do much\n",
+ " 'firm_pan', # unique for each company, NOTE: there is a pattern than can be used to extract features!! (TODO)\n",
+ "# 'firm_type',\n",
+ "# 'gender',\n",
+ "# 'industry',\n",
+ "# 'ip',\n",
+ "# 'last_fy_profit',\n",
+ "# 'latitude',\n",
+ "# 'loan_created',\n",
+ "# 'longitude',\n",
+ "# 'name',\n",
+ "# 'network',\n",
+ " 'pan', # unique for each individual, NOTE: there is a pattern than can be used to extract features!! (TODO)\n",
+ "# 'pincode',\n",
+ "# 'platform',\n",
+ "# 'registered_office_city',\n",
+ "# 'registered_office_state',\n",
+ "# 'role_in_firm',\n",
+ "# 'role_on_application',\n",
+ "# 'seo_city',\n",
+ "# 'state',\n",
+ "# 'utm_medium',\n",
+ "# 'utm_source',\n",
+ "# 'utm_term',\n",
+ "# 'year_of_incorporation',\n",
+ "]\n",
+ "\n",
+ "main_df = main_df.drop(columns=to_remove_cols)\n",
+ "main_df.head()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 24,
+ "metadata": {
+ "ExecuteTime": {
+ "end_time": "2018-03-28T21:03:19.300420Z",
+ "start_time": "2018-03-28T21:03:19.294635Z"
+ }
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "432\n"
+ ]
+ }
+ ],
+ "source": [
+ "'''\n",
+ "- make 0 amounts as NaNs\n",
+ "- birthdat -> age\n",
+ "- browser: some really low counts\n",
+ "- campaign_city: some really low counts\n",
+ "- city: some really low counts\n",
+ "- email: publicly hosted email domain or personal email domain\n",
+ "- firm_type: is skewed (need to figure things out..)\n",
+ "\n",
+ "'''\n",
+ "print(len(main_df))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 35,
+ "metadata": {
+ "ExecuteTime": {
+ "end_time": "2018-03-28T21:11:05.008409Z",
+ "start_time": "2018-03-28T21:11:04.998455Z"
+ }
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "Male 339\n",
+ "Female 36\n",
+ "Name: gender, dtype: int64"
+ ]
+ },
+ "execution_count": 35,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# list(main_df)\n",
+ "main_df['gender'].value_counts()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "ExecuteTime": {
+ "end_time": "2018-03-28T00:03:50.014321Z",
+ "start_time": "2018-03-28T00:03:50.002483Z"
+ }
+ },
+ "outputs": [],
+ "source": [
+ "# only keep applications that are in both data sets\n",
+ "appln_id = pd.Series(list(set(main_df['application_id']) & set(bank_df['appl_id'])))\n",
+ "main_df = main_df.loc[main_df['application_id'].isin(appln_id)]\n",
+ "bank_df = bank_df.loc[bank_df['appl_id'].isin(appln_id)]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "ExecuteTime": {
+ "end_time": "2018-03-28T00:03:51.265344Z",
+ "start_time": "2018-03-28T00:03:50.999445Z"
+ }
+ },
+ "outputs": [],
+ "source": [
+ "def _aggregate_columns(df, application_id_col):\n",
+ " # group by application id and merge all rows into lists\n",
+ " new_df = pd.DataFrame()\n",
+ " g = bank_df.groupby(application_id_col)\n",
+ " for k in list(df):\n",
+ " if k == application_id_col:\n",
+ " continue\n",
+ " new_df = pd.concat([new_df, g[k].apply(list)], axis=1)\n",
+ " return new_df.reset_index()\n",
+ "\n",
+ "\n",
+ "def setup_aggregations(df, application_id_col):\n",
+ " '''\n",
+ " fix bank data (for applications with multiple rows)\n",
+ " - average the averages\n",
+ " - add high_credit_cp\n",
+ " - add invard returns\n",
+ " - max of all the maxs\n",
+ " - min of all the mins\n",
+ " - add outward_returns\n",
+ " - drop totals (because average is better and normalized)\n",
+ " '''\n",
+ " df = df.drop(columns=['total_bi_inflow', 'total_bi_outflow', 'total_inflow', 'total_outflow'])\n",
+ " df = _aggregate_columns(df, application_id_col)\n",
+ " new_df = pd.DataFrame()\n",
+ " for k in list(df):\n",
+ " if k == application_id_col:\n",
+ " new_df = pd.concat([new_df, df[k]], axis=1)\n",
+ " elif 'average' in k:\n",
+ " new_df = pd.concat([new_df, df[k].apply(np.average)], axis=1)\n",
+ " elif 'max' in k:\n",
+ " new_df = pd.concat([new_df, df[k].apply(np.max)], axis=1)\n",
+ " elif 'min' in k:\n",
+ " new_df = pd.concat([new_df, df[k].apply(np.min)], axis=1)\n",
+ " else:\n",
+ " new_df = pd.concat([new_df, df[k].apply(np.sum)], axis=1)\n",
+ " return new_df\n",
+ "\n",
+ "bank_df = setup_aggregations(bank_df, 'appl_id')"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.6.4"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}