diff --git a/.gitignore b/.gitignore
index 1e8f214..ff31749 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,3 +1,6 @@
+# Testing
+tester.py
+
# s3 entire data
data/
*.pem
diff --git a/notebook/feature_test_with_DEG.ipynb b/notebook/feature_test_with_DEG.ipynb
new file mode 100644
index 0000000..6786a16
--- /dev/null
+++ b/notebook/feature_test_with_DEG.ipynb
@@ -0,0 +1,365 @@
+{
+ "cells": [
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "source": [
+ "import pandas as pd\n",
+ "import numpy as np\n",
+ "from notebook_utils.OpenKbcMSToolkit import ExtractionToolkit as exttoolkit\n",
+ "\n",
+ "deg_path = \"resultFiles/DEG_RRvsCIS_by_Jun/\"\n",
+ "expr_path = \"../data/counts_normalized/rawFiles/\"\n",
+ "deg_df = pd.read_csv(deg_path+\"CD4_DEG.result\",sep=' ', index_col=0).dropna()\n",
+ "sig_df = deg_df.loc[(deg_df['pvalue']<0.05)]\n",
+ "sig_df = sig_df.loc[(sig_df['log2FoldChange'] > 1) | (sig_df['log2FoldChange'] < -1)]\n",
+ "\n",
+ "expr_df = pd.read_csv(expr_path+\"counts_norm_CD4.csv\", index_col=0)\n",
+ "expr_df.loc[sig_df.index.tolist()]\n",
+ "expr_df.columns = [x.split('.')[0] for x in expr_df.columns.tolist()]\n",
+ "expr_df = expr_df.applymap(lambda x : np.log2(x+1))\n",
+ "expr_df = expr_df.subtract(expr_df.median(axis=1), axis=0)\n",
+ "\n",
+ "meta_data = pd.read_csv('../data/annotation_metadata/EPIC_HCvB_metadata_baseline_updated-share.csv')"
+ ],
+ "outputs": [],
+ "metadata": {}
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "source": [
+ "sample_list, sample_category = exttoolkit.get_sample_name_by_category(dataframe=meta_data, sampleColumn='HCVB_ID', dataColname='DiseaseCourse')\n",
+ "sample_list[0] = list(set(expr_df.columns.tolist()).intersection(set(sample_list[0])))\n",
+ "sample_list[4] = list(set(expr_df.columns.tolist()).intersection(set(sample_list[4])))\n",
+ "ext_samples = sample_list[0] + sample_list[4] # RR + CIS\n",
+ "\n",
+ "ext_category = [0]*len(sample_list[0])+[1]*len(sample_list[4])\n",
+ "\n",
+ "expr_df = expr_df[ext_samples].loc[sig_df.index]\n",
+ "expr_df = expr_df.replace(0, np.nan).dropna(thresh=len(expr_df.columns)-2).replace(np.nan, 0)\n"
+ ],
+ "outputs": [],
+ "metadata": {}
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "source": [
+ "len(ext_samples)"
+ ],
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ "119"
+ ]
+ },
+ "metadata": {},
+ "execution_count": 4
+ }
+ ],
+ "metadata": {}
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "source": [
+ "X = expr_df.T.values\n",
+ "y = ext_category"
+ ],
+ "outputs": [],
+ "metadata": {}
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "source": [
+ "from sklearn.ensemble import RandomForestClassifier\n",
+ "from sklearn.svm import SVC\n",
+ "from sklearn.model_selection import train_test_split\n",
+ "from sklearn import metrics\n",
+ "\n",
+ "auc_arr = []\n",
+ "val_auc = []\n",
+ "\n",
+ "for t in list(range(0,100)):\n",
+ " X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, random_state=t)\n",
+ " X_test, X_val, y_test, y_val = train_test_split(X_test, y_test, test_size=0.5, random_state=t)\n",
+ "\n",
+ " #randomState = list(range(0,5))\n",
+ "\n",
+ " clf = SVC(kernel=\"linear\")\n",
+ " clf.fit(X_train, y_train)\n",
+ "\n",
+ " y_pred = clf.predict(X_test)\n",
+ " fpr, tpr, thresholds = metrics.roc_curve(y_test, y_pred, pos_label=1)\n",
+ " auc_arr.append([t, metrics.auc(fpr, tpr)])\n",
+ " \n",
+ " y_val_pred = clf.predict(X_val)\n",
+ " fpr, tpr, thresholds = metrics.roc_curve(y_val, y_val_pred, pos_label=1)\n",
+ " val_auc.append([t, metrics.auc(fpr, tpr)])\n",
+ "\n",
+ "auc_test_df = pd.DataFrame(data=auc_arr, columns=['state', 'auc']).set_index('state')\n",
+ "auc_val_df = pd.DataFrame(data=val_auc, columns=['state', 'auc']).set_index('state')"
+ ],
+ "outputs": [],
+ "metadata": {}
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "source": [
+ "auc_df = pd.concat([auc_test_df, auc_val_df], axis=1)\n",
+ "auc_df.columns = ['test_auc', 'val_auc']\n",
+ "auc_df['diff'] = auc_df['test_auc'] - auc_df['val_auc']\n",
+ "auc_df"
+ ],
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/html": [
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " test_auc | \n",
+ " val_auc | \n",
+ " diff | \n",
+ "
\n",
+ " \n",
+ " state | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 0.718750 | \n",
+ " 0.755556 | \n",
+ " -0.036806 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 0.708333 | \n",
+ " 0.888889 | \n",
+ " -0.180556 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 0.626050 | \n",
+ " 0.584034 | \n",
+ " 0.042017 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 0.685714 | \n",
+ " 0.773684 | \n",
+ " -0.087970 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " 0.900000 | \n",
+ " 0.642857 | \n",
+ " 0.257143 | \n",
+ "
\n",
+ " \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " 95 | \n",
+ " 0.750000 | \n",
+ " 0.611888 | \n",
+ " 0.138112 | \n",
+ "
\n",
+ " \n",
+ " 96 | \n",
+ " 0.638655 | \n",
+ " 0.697479 | \n",
+ " -0.058824 | \n",
+ "
\n",
+ " \n",
+ " 97 | \n",
+ " 0.688889 | \n",
+ " 0.687500 | \n",
+ " 0.001389 | \n",
+ "
\n",
+ " \n",
+ " 98 | \n",
+ " 0.677778 | \n",
+ " 0.750000 | \n",
+ " -0.072222 | \n",
+ "
\n",
+ " \n",
+ " 99 | \n",
+ " 0.742857 | \n",
+ " 0.888889 | \n",
+ " -0.146032 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
100 rows × 3 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " test_auc val_auc diff\n",
+ "state \n",
+ "0 0.718750 0.755556 -0.036806\n",
+ "1 0.708333 0.888889 -0.180556\n",
+ "2 0.626050 0.584034 0.042017\n",
+ "3 0.685714 0.773684 -0.087970\n",
+ "4 0.900000 0.642857 0.257143\n",
+ "... ... ... ...\n",
+ "95 0.750000 0.611888 0.138112\n",
+ "96 0.638655 0.697479 -0.058824\n",
+ "97 0.688889 0.687500 0.001389\n",
+ "98 0.677778 0.750000 -0.072222\n",
+ "99 0.742857 0.888889 -0.146032\n",
+ "\n",
+ "[100 rows x 3 columns]"
+ ]
+ },
+ "metadata": {},
+ "execution_count": 7
+ }
+ ],
+ "metadata": {}
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 8,
+ "source": [
+ "import seaborn as sns\n",
+ "sns.distplot(auc_test_df['auc'].values.tolist())\n",
+ "sns.distplot(auc_val_df['auc'].values.tolist())"
+ ],
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stderr",
+ "text": [
+ "/opt/miniconda3/envs/r-py-test/lib/python3.8/site-packages/seaborn/distributions.py:2619: FutureWarning: `distplot` is a deprecated function and will be removed in a future version. Please adapt your code to use either `displot` (a figure-level function with similar flexibility) or `histplot` (an axes-level function for histograms).\n",
+ " warnings.warn(msg, FutureWarning)\n",
+ "/opt/miniconda3/envs/r-py-test/lib/python3.8/site-packages/seaborn/distributions.py:2619: FutureWarning: `distplot` is a deprecated function and will be removed in a future version. Please adapt your code to use either `displot` (a figure-level function with similar flexibility) or `histplot` (an axes-level function for histograms).\n",
+ " warnings.warn(msg, FutureWarning)\n"
+ ]
+ },
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "execution_count": 8
+ },
+ {
+ "output_type": "display_data",
+ "data": {
+ "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXgAAAD4CAYAAADmWv3KAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjQuMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/MnkTPAAAACXBIWXMAAAsTAAALEwEAmpwYAAA0aUlEQVR4nO3dd3xc1Znw8d+Z0cyo925LtiT3gptsY2zAFAOhmZDQewhkE5IQkjfvZrPZTd1kX3bTNm0DCTWUAKGa3otxwQZ32ZZVrC6NujTS9PP+MQJsY0tja2buzOj5fj7zsXTnzJ2Hy+jR0XNPUVprhBBCxB+T0QEIIYQID0nwQggRpyTBCyFEnJIEL4QQcUoSvBBCxKkEowM4VG5urp46darRYQghRMzYunVrp9Y672jPRVWCnzp1Klu2bDE6DCGEiBlKqYPHek5KNEIIEackwQshRJySBC+EEHFKErwQQsQpSfBCCBGnJMELIUSckgQvhBBxShK8EELEKUnwQggRp6JqJqsQ4fLwpoaQnOfq5aUhOY8QkSA9eCGEiFOS4IUQIk5JghdCiDglCV4IIeKUJHghhIhTkuCFECJOSYIXQog4JQleCCHilEx0EgJwe/3sbetnwOkl2WpmVmE6SVaz0WEJMS6S4MWEprVmY103L+9uw+31f3I8waQ4fWYeq2fkYzYpAyMU4sRJghcTll9rntjaxLbGXqbnp3L6zDwK0xPpdrh570Anr1d1cLBziOtWTMFilmqmiD1h/9QqpcxKqY+UUuvC/V5CBEtrzTPbmtnW2MvZs/O58ZSplOemkmxNYHJWMlcuLeXSRZOosQ/y0KaD+Pza6JCFOG6R6JbcDlRF4H2ECNrWgz18UN/D6TPyOHNWAUp9tgxTOTWbixcWs799kDf2thsQpRDjE9YEr5SaDFwA/CWc7yPE8egccPHcjhbK81JYM6dg1LbLy3JYMiWLt/bZqbEPRihCIUIj3D343wD/F/Afq4FS6lal1Bal1Ba73R7mcMREp7XmuR0tmE2Ky5eUYDpKz/1IF51UTHaKlac/asbl9UUgSiFCI2wJXil1IdChtd46Wjut9V1a60qtdWVeXl64whECgKrWfqo7Bjl7dgHpSZagXmNNMHHRgmK6HG7ufqc2zBEKETrh7MGvBC5WStUDjwJnKqX+Fsb3E2JUPr/mpd1t5KfZWF6Wc1yvnVGQxtzidP74Vg2dg64wRShEaIUtwWut/0VrPVlrPRW4EnhDa31tuN5PiLHsaOqlc9DN2bMLTmhs+zlzCnF6fPzprZowRCdE6Mk4eBG1QrXNHgTGvL+xt4OijETmFKef0Dny0mxcungyD248yC2nllOYkRiy+IQIh4jM3tBav6W1vjAS7yXE0VS19tPlcLN6Zn5QN1aP5ZtnTsfr83PP+roQRidEeEgPXkwI6w90kpVsYe5Reu8VDY8HfZ5SczYXTE7n4fcPcFvmJjKsY0yAqrzpeEMVImRk/rWIe809w9R3DbGiIndcvfePfWXGEINeEw/XJoUgOiHCRxK8iHub6rqwmk1UTskKyfnmZXlZkefmodokfLKCgYhikuBFXHN5fOxo6uOkyRkkWkK3/O815cM0DZl5p80asnMKEWqS4EVc29HUh9vnp3JqdkjPe84kF7k2Hw9JmUZEMUnwIq5tOdhNQbqNkqzQJmKrCa4oc/JGq5XmIfkxEtFJPpkibnUOumjsGWZxadZRV4scryvLhtHA3+ukFy+ikyR4Ebe2NfaigJMmZ4bl/CUpfs4odPNoXSKeYy6nJ4RxJMGLuKS1ZltjL+V5KWQEuajYibimfJgOp5nXW+Vmq4g+kuBFXGruHabb4WZhSWZY3+f0Qjd5iT6ePChlGhF9JMGLuLSruR+TgtlFJ7buTLASTHBJiYs3W610u2RzbhFdJMGLuKO1ZndLH+V5gT1Ww+3SKcN4tOK5Rll8TEQXSfAi7rQPuOhyuI+67kw4zM70MSfTw5MHJcGL6CIJXsSd3c19KGBOmMszh7q01Mn2HgsH+kM3W1aI8ZIEL+LO7pZ+SnOSSUsM3+iZI60tdWJWmn9IL15EEUnwIq50Dbpo63cytzgjou+bl6g5vcDNUw2JsgCZiBqS4EVc2d3SD8DcCJZnPvb5KU7ahs1sskfuLwchRiMJXsSV3S19FGcmkpUS+YlHZxe5SE3w83SDlGlEdJAEL+LGoMtLU89w2Me+H0tSApw7ycWLTTacPkNCEOIwkuBF3KhuH0ADswqMSfAAny91MuA18UarzbAYhPiYJHgRN/a1D5BiS6Ao07gSyYp8D/mJPp6SMo2IApLgRVzwa011+yAzC1JDsu/qiTIruLjExVutVnrdsnSBMJYkeBEXGruHGPb4mFGQZnQoXFLqxKMVLzRJmUYYK/wLdQgRAfvaBzApmJ5vfIKfm+llWpqXpxsSuXrLveF5k8qbwnNeEVekBy/iwv72AUqzk0myGr9UgFKBXvzmTitNDvkRE8aRT5+Ief1ODy29zqgoz3xsbakTgGdkhUlhIEnwIuZVtw8AMLMwehJ8SYqfyhw3TzckomXpAmEQSfAi5u1vHyQtMYHC9OjqLV9S6qS6P4E9fXKrSxhDPnkipmmtqbUPMqMgDRWm4ZFWdx+Zg9WkDTVCcz94hkEB1jRIzYfMKZA/GxIPX+DsgskufrRN80yDjbmZ3rDEJsRoJMGLmNY+4MLh9lGelxLyc6cONTLJ/i4ZgwdQgDshDTKLICkL0OAagNbt0LAh8ILscpiyEooWgslMlk2zutDNMw2J/PN8B2YZFi8iTBK8iGm19kEAyvNSQ3bOBK+Dqa0vktO/B485hea8U+nKmI/TmsPy8pzDG2sNA23QvhMaN8NHD8K+F2DGeTBpCZeUOnmtNYNNdgun5HtCFqMQwZAEL2Jard1BdoqVrOTQrB6ZPljLtKYnMftdNOWdTmvuCvymUc6tFKQXBR7Tzob2PVD9Emx7CA6uZ83cK0hNmM9TDYmS4EXEyU1WEbP8WlPbOUh5bmjKM/ndW5h18CE8CSnsKr+F5vzTR0/uR1ImKJwHq74NC64Chx3b+//NzzOf5aUmq6wwKSJOevAiZrX2OXF6/CEpzxTb36Wk4016UqdzYPKl+M3jWGZAmaBkOeTPgR1/5+L2x/CrJt5svoLPlcqYSRE50oMXMevT+vv4evDF9vco6XiTzoz57C+9YnzJ/VC2NKi8Gf/MC7jYvIE5u38Fwz2hObcQQZAEL2JWjX2QvDQb6ePYXLui8QlKOt6gM2M+NZMuCfS+Q0kpTNPX8ETubeR42/G99xtw2EP7HkIcgyR4EZN8fk1959C46u/5XR+wdPfP6E2dRu2kiwM3TMNk8ZyZfNH9IzweL2z4vSR5ERGS4EVMau4Zwu3zU3GC9ffk4TZWbfs/DCSXcGDyF9AqvIuUTUv3kZxVxDdM/4L2eWHDHyTJi7CTBC9iUk2nA+CEevAmn4tTP/oWZp+Ldxf/Fl+oau5juKJsmFcHy6ma/Q3weWDjH8E1GJH3FhNT2BK8UipRKbVZKbVdKbVbKfXjcL2XmHhq7IMUZSSSbDv+gWCVe35OTt9u3l/wc/pTy8MQ3dFdMNlFstnP/R3TYflXAsl96z3gl2UMRHiEswfvAs7UWi8AFgLnKaVODuP7iQnC4/PT0HVi9feStleZ1vQkuypuobngzDBEd2ypFs0FJS7WNdlwpE6BBVdCdy3sfAJZclKEQ9gSvA74+O9Py8hDPsVi3Bq7h/D69XHX3xOddpbt+jFdGXPZOe2rYYpudFdMdeLwmni+yQaTlsD0c6BxI9S9Y0g8Ir6FtQavlDIrpbYBHcCrWutNR2lzq1Jqi1Jqi90uN53E2GrsDkwKph5PD15rTt75b5h9Lt4/6Rdo04kPrRyPJTkeKtK8PFSTFDgw4zwomA9Vz0BfoyExifgV1gSvtfZprRcCk4FlSql5R2lzl9a6UmtdmZeXF85wRJyotQ9SnJlEoiX4kS/TG/5Oced6Ppr1HQZSy8IY3eiUgusqhtneY2FHd0Jg3P2CKwOToj56EHxuw2IT8Scio2i01r3Am8B5kXg/Eb/cXj+NPUPHVZ5JGWpm0b5f0ZK7kurSK8IYXXAuneIk2ezngY978dYUWHgNDHbAnmeNDU7ElbCtRaOUygM8WutepVQSsAb4f+F6PzEx1Hc58OvjWJ5Aa5bu/ikaxeZ5Pxz3ZKZNdd3jev3HVmYl8FxjJv960iBZNg25M6DsdKh7GwrmBNaxEWKcwtmDLwLeVErtAD4gUINfF8b3ExNArX0Qs1JMyQ4uwU9tWUdx53q2z7ydoaSiMEcXvHPye3D5FY/VJ316cNaFkFYE2x8Fj9O44ETcCOcomh1a60Va65O01vO01j8J13uJiaO200FJdhLWhLE/ujZXN4ur7qQz86SoKM0cqjTJzbJcNw/WJOH7eGyZ2QInXRnYKWr/i4bGJ+KDzGQVMWPY7aO5Zzjo5YEX7/0vLN5BNs37cdiXIjgR11cM0zRk5o3WQ9acz5oCU1YEhk32NRkXnIgLkuBFzKjvcqAhqBusRfb3KGtZx56KL9OXNi38wZ2Acye5mJTs4+79yYc/MfPCwI3XnY+D9hsTnIgLkuBFzKixD5JgUpRkJY3aLsE7xNLdP6UvpYzd5bdEKLrjZzHBl6YPsbnTykddh4x3sCbDnLXQexAaPjN1RIigSYIXMaPW7mBqTgoJ5tE/tidV/57U4RY2z/sRfnNo9moNlyvLnKRb/Nx1ZC9+UiVkV8DedeAZNiY4EfMkwYuYMOjy0tbvHHN4ZHbvLmbUP8T+0iuwZy+OUHQnLiVBc235MC8126gfPOQ+gVIw9/PgcUDN68YFKGKaJHgRE+pGlgcerf6u/B6W7/oRTlsu22fcHqnQxu3G6cNYTPCX/UeUnjImB3rytW/DcK8hsYnYJptui5hQax/ElmCiOPPY9fdZ9Q+SNbCPdxb9Bo8lLYLRnZhDJ02tyk7g73XpnJraTKbF98lxa/IpLNAf0bn1aeomXfzJ8Rpfw2Hnunp5afgDFjFHevAiJtSM1N/NpqPPRE11NDK/+o80FpxFU+FZEY5u/NYWdOHTimfbcg477rZm0p69lLzebSQ52w2KTsQqSfAi6vUNe+gcdFFxrPq71izb/WP8Jgtb5vxLZIMLkcJED6uy+3nVnkmv5/Ax+825p+IzJVLaLrV4cXwkwYuoV2sPbCtwrAlOZS3PUdi1iW0zvsVwYkEkQwupS4s68WjFc+3Zhx33JSTRkruSzMEDpA7J5CcRPEnwIurVdjpIspgpzEj8zHM2VzeLqv4Le+ZCDpReZkB0oVOU6OHU7H5e6cj6TC++PXspHnMyk+xvGxSdiEWS4EXUq7UPUp6XgukoK0Eu3nvnyHIEPwqsrR7jPu7FH1mL95uttOauIHOwhtQh2RhEBCf2fyJEXOt2uOkZ8hy1PBNYjuB59lR8mf60CgOiC71PevH2TDrdhw9y+7QXL9v7ieAEleCVUk8qpS5QKg66SCKmfFJ/P2J7Pounj+U7f0hvagW7y79sRGhhc1lxJxp4oiX3sON+k5XW3FPIHKwht2ebIbGJ2BLsOPg/AjcB/6OUehy4V2u9L3xhiZi05d6Qnq6ioZsXDhaRkZBCfprtsOcq9/wnie4u3l7yP/jNtmOcITbl2zycm9fLCx1ZXFDQTUnSp9v4tWdXUtT5PvMP/Ik3l/7ZwChFLAiqR661fk1rfQ2wGKgHXlNKva+UukkpZczuxSLuaQ27B5KZl+ZAHVJ/n9z2GmUt69hdcSs9GXMNjDB8Pl/USaLJzyPNh+9T7DdZac1ZQVHn+2T17TYoOhErgi65KKVygBuBLwMfAb8lkPBfDUtkYsJrcVnp8ViYmzb0yTGbq4tlu35Cd/psdlVE70qR45WW4GdtYRdb+9KoGjh89m5HdiXuhDTm1N5jUHQiVgRbg38KeBdIBi7SWl+stf671vobQPC7HwtxHHYPBFZYnJs+kuC1Ztnun2DxOdhw0s/Rpvj+4/H8gh6yLB4eas5H60+P+8w2qksvp7TtVdIcB40LUES9YHvwd2ut52itf6G1bgVQStkAtNaVYYtOTGi7+pPJtXoosHqAwP6qJe1vsH36N6J2E49Qspk0lxV3Uu1I4oPew/tR+6Zei99kYVbdfcYEJ2JCsAn+Z0c5tiGUgQhxKL+GPQPJzE1zoBSkDDVRuefndGQtYl/ZdUaHFzGrc/ooTnTxcHMe3kN68U5bLrWTLqG86RkSnXbjAhRRbdQEr5QqVEotAZKUUouUUotHHqsJlGuECIt9fWYGfAnMTRtC+X2s3PZ/ARUozUTh/qrhYlZw7aQOWl02XunIOuy5qrIbUdrHrPoHDYpORLuxhkmeS+DG6mTgV4ccHwC+H6aYhOB9e2AnprlpQ0zueIPcvp28u/CXOJInGxxZ5C3OcHBS+iBPtOayKqef9ITAcsKDKSU0Fp3DtMbHwfljSMwwOFIRbUbtwWut79danwHcqLU+45DHxVrrJyMUo5iA3u+wUmRzU+GqorhrA9Ull9NYdI7RYRlCKbh+cgdDPtNnJj/tKfsSVu8gfPBXg6IT0WysEs21I19OVUp9+8hHBOITE5DHDxvtFlamNlPR/AxDtnw+nP1do8MyVEmSmzV5vbxqz6Rx+NN9ZnsyZtOSuxI2/gk8TgMjFNForJusH88PTwXSjvIQIuS2d1sY9sLtnnsw+T1Ul3wBn/mzK0lONJcVd5Jk9vNAUz76kHGTe8q/BI4O2P6wgdGJaDRqDV5r/eeRf38cmXCEgPUdFr5ufpoSdw01xRfjtOWN/aIJID3BxxeKOnmgqYCF7QPMKkwHoCN7KRQtDPTiF98IJlkySgQEO9HpTqVUulLKopR6XSllP6R8I0RIdbfWcbvlH3RmzKczc4HR4USVc/N7KLa5eGFnKz7/SC9eKVjxdejcDwdkYrn4VLC/6s/RWvcDFxJYi2YaMLGLoiIsHENDfHXof+lLyKOu6PxA8hKfSFBwXUkHnYNuNtZ2ffrE3EsgrRg2/N6w2ET0CTbBf1zKuQB4XGvdF6Z4xESmNcNbHyaLAQ7OuCnuVokMlUXpDqbnp/L63nYcLm/goNkCy78Cde9A6w5jAxRRI9gEv04ptRdYAryulMoD5Ja9CK26d8jt28WdvquYPaXI6GiillJw/vwi3F4/r1W1f/rEkhvAkgIb/2hccCKqBLUevNb6e0qpO4E+rbVPKeUA1oY3NDGh9DXB3mfZaFrI3qwzSTQf/kdiRcPjBgUWnQrSE1lWlsOm2i6Wl49s75eUBYuuhS33wFk/hHT5JTnRHc/t9lnAFUqp64EvAhNz1okIPa8LPnwAnyWVrw79E6cUeIyOKCacPSufRIuZF3a2fjps8uR/Ar8XPrjb2OBEVAh2FM2DwH8Dq4ClIw9ZRVKExu6nwGFn4+Sb6CGdVfnusV8jSLYlcNbsfA50DPLG3o7AwexymH1hoBfvdhgboDBcsFv2VQJz9KGzK4QIhZYPoXEjTFvDs4PzSbf4mZflNTqqmLG8LIdNtd387PkqTp2ehzXBFBgyWfUcbH8ElsbXfrXi+ARbotkFFIYzEDEBDXXBjscgayp6+nm812FlRZ4bs4yMDJrZpDh/fhF1nQ4e2FAfOFiyHCYtgQ1/BL/f0PiEsYJN8LnAHqXUy0qpZz9+hDMwEef8PvhoZJnbRdfRMGylecjMKqm/H7eZhWmsnpnHb1+vpmvQNTLx6TboroHql40OTxgo2AT/I+AS4OfALw95CHFiat+EnnqYfxkk5/BeR2D7vZVSfz8hP7hgNkNuH796dX/gwOy1kFECG/5gbGDCUEEleK312wRmsFpGvv4A+DCMcYl41t8K+1+EogVQvBiA99qtFCX5KEv1GRxcbJqWn8Z1J0/hkc0N7G3rB3NCYOJT/bvQss3o8IRBgh1FcwvwBPDnkUOTgKfDFJOIZ34fbH8IEpJg3mWgFB5/IMGfXuiWlQnG4VtnTyc9ycJP1+0JDJtcfD1YU2Xi0wQWbInmNmAl0A+gta4G8kd7gVKqRCn1plJqj1Jqt1Lq9vGFKuLCgdcCk5rmXwa2wEbSH3ZZGPCaWF0o5ZnxyEy2csfZM1h/oItX97QHdnhafD3s+gf0NRsdnjBAsAnepbX+5KdPKZUAjDVk0gt8R2s9BzgZuE0pNefEwhRxYaANql8JlGWKPl0l8q02KwlKS/09BK5ZXsr0/FT+44UqXF5foEyj/bD5LqNDEwYINsG/rZT6PoHNt9cAjwPPjfYCrXWr1vrDka8HgCoCpR0xEWk/7HwMEmww99LDnnqzzUplroc0i0yzGK8Es4kfXDiHg11D3P9+PWRNhdkXwdZ7wTVodHgiwoJN8N8D7MBO4CvAC8APgn0TpdRUYBGw6SjP3aqU2qKU2mK324M9pYg1TR9Ady3MvviT0gxA27CJvX0WKc+E0Okz8jhzVj6/e/0AnYOuwMQnZ19g4pOYUIIdReMncFP1a1rrL2qt7w52VqtSKhX4B/CtkTXljzz3XVrrSq11ZV6e7NwTl9yDsOdZyCqDkmWHPfV2W2B/0dWFLiMii1v/esFshj0+fvnK/sA1n1QZuNkqE58mlFGXKlBKKeCHwNcZ+WWglPIBv9Na/2SskyulLASS+0Na6yfHH66ISVXPgncYTroc1OF9irfarBQm+ZiZLsMjx+PhTQ2fOba8LJtHNzeQm2pled4VrNr2Xd5e9yDNBWcc8zxXLy8NZ5giwsbqwd9BYPTMUq11ttY6G1gOrFRK3THaC0d+OfwVqNJa/yok0YrY01MPjZuh/AxIO3z52o+HR66W4ZFhceasApKsZtbtaKUh/ywciUXMqn/A6LBEBI2V4K8DrtJa1318QGtdC1wLXD/Ga1eOvP5MpdS2kcf544pWxBatYc/TYEuH6Z9dXVqGR4ZXktXM2bMLqOt0sLt9iH1Tr6GgewtZfXuMDk1EyFgJ3qK17jzyoNbaDlhGe6HW+j2ttdJan6S1XjjyeGE8wYoY0/JRoAc/8/zA6JkjvNZiwyLDI8Nq6dRsCtJtvLirjX1Fl+AxJzOr/kGjwxIRMlaCH+0nT34qxbH53LD3OUif9JkbqxDo3L/SYuWUfLcMjwyjj1eb7Ha4eavBTc3kS5nS+hJJzvaxXyxi3lgJfoFSqv8ojwFgfiQCFDGq9m0Y7oE5l3zmxipAdb+Zg44E1hTL6Jlwm56fxqzCNN7a18GHRZejtI8ZBx81OiwRAaMmeK21WWudfpRHmtZ61BKNmMBcA4ElCQrmQ+70ozZ5pSVQsllTLH8IRsL584vw+jRP1lloLDiLaY2PY/YOGR2WCLPj2ZNViOAceC1Qopl94TGbvNpiY2G2h4IkGZcdCbmpNlZU5LD1YA/rcy/H5umjrHnUyegiDkiCF6E13AsH10PJUkgtOGqT1iET23ssUp6JsDNm5pNkNfPnunw6M+Yx6+DfAktIiLglCV6EVvXLgTuo0887ZpPXWgPlmXMlwUdUktXMmjkF1HcP8Vr6F0h31FNsf9fosEQYSYIXoeOwQ+MmmHIKJGcfs9krLVbKU71UpMns1UirnJJNYXoiv2yahcNWIEMm45wkeBE6+18CZYZpa47ZpM+t2NhhZU2xS2avGsBsUlxwUhH2Yc2LKWsp7NpEZv9eo8MSYSIJXoSGfR80fwhlp0Fi+jGbvdJiw6MV502S8oxRKvJSmVOUzi/al+MxJ0kvPo5Jgheh8e4vwWwJrDkziucabZSk+FiY7Y1QYOJoPjevkF5/Mq9Y1zCl5QUSnbJUdzySBC/Gr7sWdj4OU1Yettb7kbpcivUdVi6a7JTyjMFyUm2cMi2HO3vPwKR9zGiQteLjkSR4MX7v/gpMFihfPWqzF5ts+LTiohJnZOISozpjZj6d1km8Z17GtIbHMfuGjQ5JhJgkeDE+vY2BnYKW3BDY5HkUzzUmMi3Ny6wMGT0TDRItZs6ZXcDvhs4h0dMrE5/i0KgbfggxpvW/ARSsvB2qXz1ms7ZhE5s7LXxrjkPKMyFQ0fB4SM5TZoLv2aayR09lXvWf4IMsWHpzSM4tjCc9eHHiBtrhwwdh4dWQMXnUps832dAoLiyR0TPRxKTghlI7f/acT7K7EzpkyGQ8kQQvTtzmuwJrzqy8fcymzzYkMjfTI5ObotDctCE60ubSprNw1bxtdDgihCTBixPjdsAHf4FZF0BOxahN9/eZ2d5j4fOlcnM1Wl1V0s2DvjXYuvdB2y6jwxEhIglenJiPHgJnb1C998frk0hQms9PkQQfrQpsHrqzFjOkbXS9/hujwxEhIgleHD+/Dzb8HkqWH3W3pkN5/PBkQyJnF7vIscnOTdHs3ElO1rGK9Oqn0ANtRocjQkASvDh+Vc9B70E45RtjNn2j1UqXy8TlU6X3Hu2SzH5Sp5+GWfuofv63RocjQkASvDg+WsP7/wPZ5YHNtMfweH0S+Yk+TiuQnZtiwbkzM9hkWUr+3r/hcTqMDkeMkyR4cXwaNkLzVlhxG5jMozbtGDbxZpuVL0xxkiCftJhgVmBZ9Q0y6Wfruj8bHY4YJ/mxE8fn/d9BUjYsuHrMpv84mIhPKy6T8kxMWXLahdQmVJC/668Mu2RRuFgmCV4Er7Ma9r0Ay24Ba/KoTX0a/labxPJcN+Uy9j2mKJMJVnyNcpp4dd3DRocjxkGWKhDB2/B7MFth6S1jNn2txUrzkJkfnDQYgcBEqGyq66bG14Ap+QwyVDbZO+7mL/mrSLYef6q4enlpGCIUx0N68CI4g3bY9ggsvApS88Zsfv+BZIqTfLKxdozymyxUlVzJKrWDml2bjQ5HnCBJ8CI4H9wNPhes+PqYTff3mXnfbuWaimG5uRrD2qZfhQsbi1oepXdIRkHFIvnxE2NzD8HmuwPDInOnj9n8/pokrCbNVWWyvngsc1szqS66kLWm9/ioqtrocMQJkAQvxrb9YRjuDmpiU59b8eTBJNaWOsmWmasxr27aDdiUh7ktj0svPgZJghej8/tgwx9gUiWUrhiz+cO1SQz7FDdUSO89HgykllGfvYprza+yfl+z0eGI4yQJXoxu3wuBPVdP+QZj7dTh9MFfq5M4Nd/NvCwZPx0vaqbdSK7qZ3LjOunFxxhJ8GJ07/8OMqfA7IvGbPqPg4l0usx8dZZMcY8n7dnL6EyZzpfML/LWvg6jwxHHQRK8OLaGTdC4KTByZoxlCbw+P3/el8KCLA8r8jwRClBEhFJUl9/ATFMjiQ3v0CO9+JghE50EAA9vavjMsVUf3kmBJZ2nvafhO8rzh9rR1EuDw8z3TxqQPVfj0MGiz7Fg76/5ku9F/rjvdC5ZNMnokEQQpAcvjirNcZCS9tepLr0CX8LoyxJorXl7v51im4tziqV3F4/8ZivVU69itXkbvQ27pBcfIyTBi6OaVXcffpOF/VPGXlSsqnWA1j4nawu7MUnvPW4dKL0cr8nGTeYXebfabnQ4IgiS4MVnJLo6KW9+ltpJa3Hackdt69ea1/e2k5Ni5dScvghFKIzgsmZRN+kiLjW/S039QQaccq8l2kmCF58xs/5vKL+XqrIbx2y7p6Wf1j4nZ87Kxyy997i3b+p12HBzpXqN92u6jA5HjEESvDhMgmeQ6Q2P0Vh4NoMpo68G6Nea16rayU21saAkMzIBCkP1p5bTkreKL1lfZWttG06PLAUdzcKW4JVS9yilOpRSu8L1HiL0pjU+gdU7QFX5l8Zsu6u5j44BF2fNzsckQ2cmjKqp15OleznXv55Ndd1GhyNGEc4e/H3AeWE8vwgxk8/NrPoHactZTnfG3FHb+vyB3nt+mo35kzIiFKGIBu05J9OTNp2vJb7E+gN2PD6/0SGJYwhbgtdavwPIr/cYMrXleZJdHewpG7v3vuVgN52Dbs6ZUyi994lGKfZOvZ4Kfz2Vnq182NBjdETiGAyvwSulblVKbVFKbbHbZeiVYbSf2XX30J02i7bc0RcVc3l9vF7VwZScZGYXpUUoQBFN6osvwJFYxHcSn+Hd/XZ8flk5NBoZPpNVa30XcBdAZWWlfEoiYcu9nzlUuXsDGY56Dky+lIrGJ0Z9+RMtOQy68vjnqQeY1vjhYc9JTTb2VTQ8HlS7jqxFzG99gZnubdh31LMye+DwBubsz76o8qYQRCiCZXgPXkQBrSnufBenJZOu9DmjNu3zmHmuPZtlmQPMSHVGKEARjeyZC3EnpPId65M83ZaDlu5Z1JEEL6CjitThFlryVoEa/SPxj9Zc3H4TV02SctpEp00JtOasoJIqcp0NbOtPMTokcYRwDpN8BNgAzFRKNSmlbg7Xe4lx0Br2v4TTkklnxoJRm7Y6Lbxmz+Ss3F6KE2UtEgEd2UvwmJO4w/oUT7flGB2OOELYavBa66vCdW4RQvYq6GugpfhC9BhLAj/akkeCSfPF4s4IBSeind9kpS3nZE7teBOzo4O9g0nMSpXdvKKFlGgmspHeO0lZY/beqwaS2NiTzkUF3WRaZPai+FR79lK8Jhu3W57i2baj3FgVhpEEP5HZq6C3AaadM2rv3a/hvsYCciweLi6U9UfE4XzmRNqzl3G2aQt9/X00DFuNDkmMkAQ/UWl/YL/VpCwoWTpq07e6MqgfTuSayR3YTDJUQnxWW85y/CqBOxKe5FmpxUcNSfATVctH0NcEM88H07FvxQz5TDzanMfMlCFOyRo4ZjsxsXkTkmnPWc4F5o309HRjdxk+xUYgCX5i8nlh7/OQXgyTloza9MnWHPq9Zm4saZet+MSoWnNPwW1K4v8kPMa6dqnFRwNJ8BPRwfdguBtmXzzquPdWp4UXOrJZndNHeYorggGKWOQzJ9KWt5LV5u30d7XR5ZIegdEkwU80w71Q/QrkzoS8WaM2/VtTPlbl50qZ1CSC1Ja9lGFzGnckPMb91UlGhzPhSYKfaN77FXiGYPZFozbb3p/Mlr40Pl/UJcMiRdC0yUJb/mksNe2nrmYfgx7pxRtJEvxEYt8HG/4Ik5dCxuRjNvNpeKCxgAKbm/PzZSlYcXzsWQvpT8jhdvUIf6+1GB3OhCYJfqLQGp7/DliTA7X3UbzUkUWT08Z1kzuwyLBIcZy0MtNavIZpphb6q9fjkj8ADSMJfqLY+QTUvwtn/RBsx17DvcudwGMtuSxMH6QyYzCCAYp40ps6ne70Odysn+L5Go/R4UxYkuAnAmcfvPx9KF4MS24cten9jfn4tOJLpTIsUoyDUmQtWkuycpGw/3ncsqufISTBTwSv/RgcdrjwVzDKkgTb+lLY1JvOpUVdFNik1yXGR6UV0JK/mgv127y5t8PocCYkSfDxrvo12PJXOPlrULzomM3cfsU9DQUU21xcVCC7MonQKFl4Nv0qjdLaR/B4pRgfaZLg49lQNzxzW2C8+1n/PmrTp9tyaHdbubm0XW6sipBR1iQayy9nNnXs+fA9o8OZcCTBxyutYd23YKgLLr0bLInHbFozYOaZtmxWZfcxL30ocjGKCWHe7HmsN1Uyq2Md7ra9RoczoUiCj1fbH4E9z8CZ/wpFJx2zmU/DP29Jw2bSXDdZ6qQi9JQC5n2BIW2j95FbwS+lmkiRBB+PWrbBum/DlJVwyjdHbXr/gSS2dFm5oaRdZqyKsDmlJImHkq4mv287znd/Z3Q4E4Yk+HgzaIdHr4HkHLjsvlFHzRwcNHPnrlRWF7o4Lbs/cjGKCUcpOHXRfF7yLcXy1k+gcbPRIU0IkuDjidcNj10PQ51w5UOQmn/Mpn4N/7w1DYvS/GLxgIx5F2G3IMfHazP+nRZ/Dr6/3wAO2R0s3CTBxwu/H9bdAQ3vw9o/QPHCUZv/ZX8SG+1WfrBgkKJkmYUiIuPrn1vC173fwu/ohCe/LPX4MJMEHw+0hpe+B9v+Bqf/M8z/4qjNd/Yk8F+7UjlvkpPLpzojFKQQMDU3hUXLV/ND93VQ8wa8+R9GhxTXJMHHOq3htR/C5j/Diq/D6n8ZtbnD5eWbm9LJTfTzn0ukNCMi79vnzOCVxM/xqm0NvPtL2Hy30SHFLUnwsUxreOOnsP63UHkznPMzRsvYWmv+7Zld1A+a+fWyfjKtMqFJRF56ooV/u2gO/9R3PU15p8EL34XdTxsdVlySBB+rvG54+muBHtDiG+D8/x41uQPc9349T37YzDdnD3Fynqw1I4xz8YJilpXn8wX7rXiKK+HJW6DmTaPDijuS4GORsw8evgy2Pwyrvw8X/RZMo/+vXH+gk589X8WaOQXcPscRoUCFODqlFD+9ZC7dHjPfT/wBOmcaPHx5YHKeCJkEowMQx7Dl3qMf72+GrfcHhkIuuDowFHLrfaOe6uCgma+9nkVFqp9fT9+FSeruIgpMy0/jjjUzuPOlfaxaezdr93wbHrsBLvhvWPplo8OLC9KDjxVaQ/178N6vweuE5V+FkmVjvqxt2MR172aiFPzllF5SLVJ3F9HjK6dVsKwsm++/2ETjBQ/DjHMDO4+9/K/gkzLieEmCjwXDPYElf3c9ATnT4bTvQu70MV9mdyqufieTbpfivlW9lKbKeHcRXcwmxa+vWIjJpPjWU/vxXvZgoPe+4fdwz7nQXWd0iDFNEnw0036oexve+s/AhtlzLoFlt4y65d7HOp2Ka9/JonXIzL2r+liY7Q1/vEKcgEmZSfzsknlsPdjDT1/YDxf8Ei67HzoPwJ9Pg4/+FpjIJ46b1OCjlX0fVD0bqLnnzQ5MXkrOCeqle/vM3Lw+ky6Xib+e0svSXPlTV0S3tQsnsau5j7vfraMsN4UbV14S2KDmyVsDexpsuRfOvxMmLTE61JgiPfho07INHvw8bPoTeIYDQyCX3Rp0cn+9xcoX3szC64fHTu9hZYEkdxEbvve52ayZU8BP1u3hjb3tkDUFbnoRLvlf6GuEu8+EJ26Gtl1GhxozlNbRc9OtsrJSb9myxegwwurhTQ2fPag1+d0fMLf2rxR1vo/Lkk5b9sm0Z1eiTcf+I2t5WfYnXw974c5dqdx7IJl5mR7+srKPwqTg/6zdVCfb9InQOvTz+YnKm0Z9zZDby2X/u4Fau4O7rl/CqdPzAk+4Bj6d9eoehGlrYMVtUHb6mEOE451SaqvWuvJoz03sK2MwpX1MbnuNczZcw9mbbyazfx/bZtzOs6e/RFvuyaMm94/5NDzTYOOsl3O490AyN1QM8fjqnuNK7kJEi2RrAvfdtIwpOcncfN8WXt7dFnjClgZn/wju2AVn/gBaPoQHL4HfLgjco5KbsUclPfgIe3hTA8nDrVQ0PUV505OkONsZSJpMVflN1E5ai99sA6Ci4fFRz9PpTqBBF/J4fSK1gwnMyfTwowWDLDvBGarSgxehdtQefJD63Iob3stkZ08CP188wBVlRyyK5/NA2w5o3ASd+wPHCubD7Ath1oVQMHfMmd3xYrQevCT4SPEM49n7Mq1v/YXJXetRaBqzT6aq6FIO5p6OMltQCkxKoRRMa3oKExo/in6vmV5PAq1OKzVDiVQ7kqhxJKJRLM728OUZQ5w3yTWuCUyS4EWojSfBAwx6FF/dkMG7HVY+X+rkp4sGjj6PY6gLWnfAYHsg4aMhaypUnAXlq6HsVEjKGlcs0UwSfIT0Oz1Utw9S1+mgrnOQhvYeijrXs9TxNqd4N5OinLTrTB7zrebvvjNo0nnH/R42k5+yZCfz0xx8fYGJKamhWU9bErwItfEmeAiUIH9flcxv96RQkuLjF0sGOCX/GH+lVt4EA+2w7wXY92JgYqDHAcoUGJFTvjpQs5+8FKzJ444tWkiCDwOHy8vuln52NPWyo6mPnc191HU6mKw6OM20k9PNO1hl2kUKwwya09mXuZqGonN5wzkTTGbUSE8dApNUtdb4D/k3q3cHWitAk27xkZngJc/mYVKiG/PI60LxA/QxSfAi1EL5+dxst3DHB+k0D5lZXejie/MHmZVxROfmyBu4Xjc0b4Xat6D2TWjaAtoHpgQoPAlKT4aS5YFHelHIYo00SfDjNOjyUtXaz86mPna19LGruY8DHYNo7adCtbA65SCnJh1kgXcHmcOBUTI6YzKq4iyYsxbKTgOzBTjGKJqjGKsGD5LgRXQL5ecTwOmDBw4k8fu9KfR7TKzMd3Nl2TDnFLuwmRlzhA7OfmjYAA0bA6Wc5q2BZT8AMkuhaAEULoDC+YFHenFM1PFHS/BhneiklDoP+C1gBv6itf7PcL7feDk9Puo6HdTYB6npcHDAPsjulj7qOgcp1N2Um1pYkGjn/BQ7M/ObKXJUkeB1gBdwpwd6BBW3wbSzUDnTYuLDIUSsSDTDrTOHubzMyQMHkvh7fRLf2JRBWoKfFfkeTvUeZOnULCryUrGYjzJAMDE9sNbNjHMD33vdn96obdwMbTuh6rlP2ydlQ+G8wPIg2eWBR05F4JeBJSky/9HjFLYEr5QyA38A1gBNwAdKqWe11ntC/V5aazw+jdvnx+P14/b5cXu8eNxOvB43HreLYaeLwaEhHEPDDDuHcQw7GRpyMDzQw/BgL15HLyb3AGlqiAwcTFc9nJHQT7G5l6zELsx6pO7nB1ypkDsDZlwVmFk3qRJypk348bhCREKmVfPNOUPcNnuI99qtvNRs4512K688HZgAZTWbmJafSkl2EoXpiRRmJFGYYSMjyUKyNYEUawLJNjMp1gSSshdgzV+EbfnXMJlUYLx9+55A4m/bCe27AmtAOfsODyIxE9KKIK0w8EjKgsSMTx+29MAvlIQkSLCC2XbIvzYwWwN/1StzoGQUptwRzh78MuCA1roWQCn1KLAWCHmCn/3vL+H0fDru+1bzc3zf8sjxnyhQRcFnSUOlF2JKL4K0BZBaELgrnzs98Ns8rVB650IYzKzg9EI3pxe6A4utll3OjqZe9rT2s7d1gLpOB+/XdDHgHHsdpoe+vJyV03ID4+1Llwcehxrqhu5a6KoJzKodaIOB1sC/ndWBXwDugRP/j0nJh+9Wn/jrjyFsNXil1BeB87TWXx75/jpgudb660e0uxW4deTbmcC+sAQUeblAp9FBxAC5TmOTaxSciXqdpmh99CF5hi82prW+C7jL6DhCTSm15Vg3PsSn5DqNTa5RcOQ6fVY4i8bNQMkh308eOSaEECICwpngPwCmK6XKlFJW4Erg2TC+nxBCiEOErUSjtfYqpb4OvExgmOQ9Wuvd4Xq/KBR3Zacwkes0NrlGwZHrdISomugkhBAidGTgthBCxClJ8EIIEackwY+TUuo8pdQ+pdQBpdT3Rmn3BaWUVkpNuGFcwVwjpdTlSqk9SqndSqmHIx1jNBjrOimlSpVSbyqlPlJK7VBKnW9EnEZSSt2jlOpQSh113z4V8D8j13CHUmpxpGOMKlpreZzgg8DN4xqgHLAC24E5R2mXBrwDbAQqjY472q4RMB34CMga+T7f6Lij9DrdBXx15Os5QL3RcRtwnU4DFgO7jvH8+cCLgAJOBjYZHbORD+nBj88nyzFord3Ax8sxHOmnwP8DnEd5Lt4Fc41uAf6gte4B0Fp3RDjGaBDMddJA+sjXGUBLBOOLClrrd4DRlj5dCzygAzYCmUqp2F0LeJwkwY/PJKDxkO+bRo59YuRPxBKt9fORDCyKjHmNgBnADKXUeqXUxpFVSCeaYK7Tj4BrlVJNwAvANyITWkwJ5jpOGJLgw0gpZQJ+BXzH6FiiXAKBMs1q4CrgbqVUppEBRamrgPu01pMJlCIeHPmMCXFU8uEYn7GWY0gD5gFvKaXqCdQEn51gN1qDWbKiCXhWa+3RWtcB+wkk/IkkmOt0M/AYgNZ6A5BIYIEt8SlZIuUQkuDHZ9TlGLTWfVrrXK31VK31VAI3WS/WWkfftlXhE8ySFU8T6L2jlMolULKpjWCM0SCY69QAnAWglJpNIMHbIxpl9HsWuH5kNM3JQJ/WutXooIxi+GqSsUwfYzkGpdRPgC1a6wm/9k6Q1+hl4Byl1B7AB3xXa91lXNSRF+R1+g6B8tUdBG643qhHho5MFEqpRwh0BnJH7kX8kJGdHLTW/0vg3sT5wAFgCBhjH7/4JksVCCFEnJISjRBCxClJ8EIIEackwQshRJySBC+EEHFKErwQQsQpSfBCCBGnJMELIUSc+v/ZpPhI9bmdqgAAAABJRU5ErkJggg==",
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {
+ "needs_background": "light"
+ }
+ }
+ ],
+ "metadata": {}
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 9,
+ "source": [
+ "sns.distplot(auc_df['diff'].values.tolist())"
+ ],
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stderr",
+ "text": [
+ "/opt/miniconda3/envs/r-py-test/lib/python3.8/site-packages/seaborn/distributions.py:2619: FutureWarning: `distplot` is a deprecated function and will be removed in a future version. Please adapt your code to use either `displot` (a figure-level function with similar flexibility) or `histplot` (an axes-level function for histograms).\n",
+ " warnings.warn(msg, FutureWarning)\n"
+ ]
+ },
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "execution_count": 9
+ },
+ {
+ "output_type": "display_data",
+ "data": {
+ "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYIAAAD4CAYAAADhNOGaAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjQuMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/MnkTPAAAACXBIWXMAAAsTAAALEwEAmpwYAAAqCUlEQVR4nO3deXxcdb3/8ddnZjLZp9mTJmm2pk0XoLRNFyhQkKXgVXoVRRBEEAUuetWrXkW91+vV6wOXn1z1uqGIoMgiFJGlUGUrULql+5o2TbPv+77O9/dHptzekiZpyZkzy+f5eMyjmZmTzPu0zXzmfFcxxqCUUip8OewOoJRSyl5aCJRSKsxpIVBKqTCnhUAppcKcFgKllApzLrsDnKmUlBSTl5dndwyllAoqO3bsaDHGpI73XNAVgry8PEpKSuyOoZRSQUVEKk/3nDYNKaVUmNNCoJRSYU4LgVJKhTktBEopFea0ECilVJjTQqCUUmFOC4FSSoU5LQRKWcwYw/Co1+4YSp1W0E0oUyrQeb2GzeWtrN9XT0lFOxWtvQyOeImLdFGYFselRancuDyHdE+U3VGVArQQKDVthke9PLWjhl9vPEZlax+xbifFeUlcMjeFuMgI2vuG2FPTwU9fOcovXz/GLStz+fJVRUS7nXZHV2FOC4FS0+DVw4381/OHKG/p5bzsGfz0hvNZszCDqIh3v8lXt/Xx81fLeOCt47xW2sT9n1hKYVq8DamVGiPBtlVlcXGx0bWGVKBo6Rnk288e4Pm99RSkxvLN98/nffPSEJFJv/fNo838yxN7GPF6efDWZSzJSfRDYhWuRGSHMaZ4vOe0s1ips2CM4ZldtVx530b+dqCRL185l5e+cAmXz0+fUhEAuHhOKuv+6QJmREdwy++2cbCuy+LUSo1PC4FSZ6iuo5/bHy7hi0/sJi8llhc+fxH/fPkc3K4z/3XKTY7l8TtWEh/l4tbfb6O2o9+CxEpNTAuBUlM06jU8+NZxrrxvI5uPtfKtDyzgqbsuZE76e2vfnzkjmoduW07f0Cif/dNOhkZ0qKnyL+0sVmfs0a1Vfn/Nj6/IeddjjV0D7KnuoKqtj8ERL7FuJxkzolk0awYzZ0S/p9c79RyrWnt5bm89tR39zE2PY+2iLKIinDyxvfo9vc7JPrgok8e2VfGph7bz/nNnTtvPHc94f58qfGkhUEHFGMPfDjby4FvH2Xq87bTHpXsiWZKTyMVzUrlkbgrZiTFn9XrVbX28criRI409xEe6uGHZLM7NmjHlfoAzcW7WDMrzk3irrIX5Mz3kp8RO+2soNR4tBCpo1Hf285Un97CprJXc5Bi+fOVcLixMoTA1jii3g97BUSpbe9lT3cHu6g62HW/jxf0NABSkxnLJnFRWz01lZUHyhGP323uH2FnVzs7KdspbeolxO7l6YQYrC5LPqh/gTFxzzkyONHbz9M4aPn/5HCKc2nqrrKeFQAWFkoo27npkBwPDXr67diE3Ls/BdcqbZKTLSVKsm8W+YZjGGI4197DxSAtvHGnmsW1VPPR2BRFOoTAtnoLUWNLiI4l0ORkcGaWlZ4ijjd2UNnZjDCTFulmzIJ2VBclEjjMfwApul4MPLc7mwU1jcwyuWpDhl9dV4U0LgQp4FS29fPf5g2TMiOLxO6Y++Upk7A2/MC2e2y/KZ2B4lO0VbWw+1sqBui4O1XWxsXuQwZFR3E4HyXGRFKbFcfU5GQwOe8lKjMZhQRPQZArT4jh/VgJvHW2hODeJpFi33zOo8KKFQAW0hq4BHt5cQVZCNI/fuZK0+LNfnycqwsnFc1K5eE7qpMfa0SF+sjULMzhQ18n6ffXcvDLX1iwq9GkDpApYA8Oj/GlLJW6ngz99ZsV7KgLBZkZ0BKvnpnGwvovK1l6746gQp4VABaynd9XS3jfEjctz3vNw0GB0UWEKcZEuNhxoJNiWglHBRQuBCkj7ajvZX9vJFfPTyQvTYZRul4PL5qVR0drL0aYeu+OoEKaFQAWcvqERnt1TR2ZC1JTa80PZsrxEEmIiePVwk14VKMtoIVAB59XDTfQNjvDhxdk4Hf4ftRNIXA4HF89Jpaqtj4rWPrvjqBClhUAFlJbuQbaUt1Kcl0hmQvj1C4xnaU4isW4nG4802R1FhSgtBCqgbDjYgMvp4Ir56XZHCRhul4NVhSkcaeyhTlcnVRbQQqACRn1nPwfqurioMIX4qAi74wSUFfnJRLocbDzSbHcUFYK0EKiA8VppM5EuB6tmp9gdJeBEu52syE9mf20nrT2DdsdRIUYLgQoITV0DHKjt5ILZEy8IF85WFSbjdAhvlrXYHUWFGC0EKiBsOtaC0yF6NTCB+KgIFmUnsKuqnf6hUbvjqBCihUDZrndwhF1VHSzOSSQ2Upe/msjK2ckMjxp2VLXbHUWFEC0EynbbKtoY8RounJ1sd5SAl5UQTU5SDFvKW/HqBDM1TSwrBCIyS0ReE5GDInJARL4wzjEiIj8TkTIR2SsiS6zKowLTqNewtbyVwrQ40j3hs6jce3HB7GTaesf2TlBqOlh5RTACfNkYswBYCXxWRBaccsw1wBzf7Q7gVxbmUQHoSGM3XQMjrMxPsjtK0FiY6SE+ysXm8la7o6gQYVkhMMbUG2N2+r7uBg4BWaccthb4gxmzBUgQEWt37VYBZdvxNuKjXBRleOyOEjRcDgfL85I40thDiw4lVdPAL30EIpIHLAa2nvJUFlB90v0a3l0sVIjq6BviSGM3S3MTw35NoTO1PD8Jpwhb9KpATQPLC4GIxAHrgC8aY7rO8mfcISIlIlLS3KwzK0PFruoODFCcq81CZyo+KoIFmR52VXUwPOq1O44KcpYWAhGJYKwI/MkY8/Q4h9QCs066n+177P8wxvzGGFNsjClOTQ3vZYlDhTGG3dUd5CbH6J68Z2lZXhL9w6McqDurz1dKvcPKUUMC/A44ZIy57zSHPQvc4hs9tBLoNMbUW5VJBY76zgGauwc5f1aC3VGCVkFqLIkxEWyvaLM7igpyVs7eWQV8AtgnIrt9j30DyAEwxvwaWA+8HygD+oDbLMyjAsju6g4cAudmzrA7StByiLAsL4m/HWykpWeQlLhIuyOpIGVZITDGvAVM2ANoxrZc+qxVGVRg8hrD3poO5qbHE6Mzid+TJTmJvHyokZKKNq4+RwfcqbOjM4uV3x1v6aVrYESbhaaBJzqCovR4dlR1MOrVmcbq7GghUH63p7oDt8vBPJ07MC2W5SXROzjCoXrtNFZnR6/LlV8Nj3rZX9fJwpke3K6pfw55dGuVhamC25z0eDxRLkoq2zgnS/tc1JnTKwLlV0cauxkY9rJIm4WmjdMhLM1N4mhjD+19Q3bHUUFIC4Hyq/21ncS4ncxOjbM7Skgpzk0EYEelLk+tzpwWAuU3I14vhxu6mT/To0tKTLPEWDeFaXHsqGzX5anVGdNCoPymvLmXwREvC2dqJ7EVivOS6Owf1uWp1RnTQqD85kBdF26Xg9lp2ixkhfkz44lxOynR5iF1hrQQKL/wGsPB+i6K0uOJcOp/Oyu4HA4Wz0rgcH03PYMjdsdRQUR/I5VfVLX20Ts4woJMbRay0tK8JEZ9C/opNVVaCJRfHKzvwukQitLj7Y4S0jI8UWQnRlNS0YbRTmM1RVoIlOWMMRyo66QwNY6oCKfdcUJecW4STd2D1LT32x1FBQktBMpy9Z0DtPcNs1CbhfzivOwZRDhFO43VlGkhUJY7WN+FAPN02KhfREU4OSdzBntrOhga0d3L1OS0ECjLlTZ0MysphjhdctpvluYlMjgytq6TUpPRQqAs1T0wTG1HP/MytJPYn/KTY0mOdeuSE2pKtBAoSx3xzXKdq6OF/EpEWJqbyPGWXlp7Bu2OowKcFgJlqcMN3XiiXMycEWV3lLCzOCcRQReiU5PTQqAsM+L1UtbUQ1FGPCK6yJy/zYiOYG56PDur2nX3MjUhLQTKMpWtfQyOeClK19FCdlmam0jXwAhHm3QhOnV6WgiUZUobunE6hNlpsXZHCVvzZsYT63Zq85CakBYCZZnShm7yU2KJdOlsYru4HA4W5yRyqL5LF6JTp6WFQFmirXeI5p5BXVsoACzNTcRrYHeVXhWo8WkhUJYobegC0PkDASDdE8WsxGhKKtt1ITo1Li0EyhKljd2kxLlJjou0O4pCF6JTE9NCoKbd0IiX8uZebRYKIOe+sxBdm91RVADSQqCm3bHmHka8hqIMHTYaKKIinJybNYO9NZ26EJ16Fy0EatqVNnTjdjnIS4mxO4o6ydLcpLGF6Gp1ITr1f2khUNPKGENpYzeFqXG4HPrfK5DkJceQHOvW5iH1LvqbqqZVY9cgnf3DOlooAIkIxbmJVLT2Ud7cY3ccFUC0EKhpdWLYqK42GphOLET35I4au6OoAKKFQE2rw43dZM6IwhMdYXcUNQ5PdARFGfGs21HDyKh2GqsxWgjUtOkfGqWqtY8ibRYKaEtzE2nqHmTjkWa7o6gAoYVATZsjTd0Y0GGjAW5ehoeUODd/Lqm2O4oKEFoI1LQpbegmxu0kOzHa7ihqAk6H8OEl2bxyqInmbt29TGkhUNPEawxHGruZmx6PQzehCXjXF2cz4jX8ZZd2GisLC4GIPCgiTSKy/zTPXyoinSKy23f7llVZlPVq2vroGxrV/oEgUZgWz5KcBP5cUqML0SlLrwgeAq6e5Jg3jTHn+27fsTCLslhpYzcOgblpWgiCxfXFsyhr6mFnVYfdUZTNLCsExpg3AJ3CGCZKG7rJSYoh2q2b0ASLDyzKJDrCyZPaaRz27O4juEBE9ojIiyKy8HQHicgdIlIiIiXNzTrkLdB09Q9T1zmgo4WCTFyki384bybP7amjV3cvC2t2FoKdQK4xZhHwP8AzpzvQGPMbY0yxMaY4NTXVX/nUFJU2jm2MrstOB5+PLZtF79Ao6/fV2x1F2ci2QmCM6TLG9Pi+Xg9EiEiKXXnU2Ttc30VCTATpHt2EJtgU5yZSkBKrcwrCnG2FQEQyRMbGGYrIcl+WVrvyqLMzPOqlrLmHeRnxiA4bDToiwkeLZ7G9ol0XogtjVg4ffQzYDBSJSI2I3C4id4nIXb5DPgLsF5E9wM+AG4yOYws65c29DI8a5mn/QNC6bkkWTofw5xKdUxCuXFb9YGPMjZM8/3Pg51a9vvKPww1dRDiF/JRYu6Oos5TmieKyolTW7azhK1fNxeW0ewyJ8jf9F1dnzRhDaUM3hWnxROibR1C7vngWzd2DvFaqo/LCkf72qrPW2DVIR/8w83U2cdC7bF4aKXGRPLFdO43DkRYCddYOn9iERgtB0ItwOri+OJtXDzdS39lvdxzlZ1MqBCLytIj8g4ho4VDvONzQTVZCNJ4o3YQmFNywLAevgT9v107jcDPVN/ZfAh8HjorI90WkyMJMKgj0Do5Q3danexOHkJzkGC6ek8IT26sY9eoAvnAypUJgjHnZGHMTsASoAF4WkbdF5DYR0Y+DYehI49gmNDpsNLTcuDyHus4B3tDdy8LKlJt6RCQZuBX4NLAL+CljheHvliRTAe1wQzfxUS5mJkTZHUVNoyvmp5MS5+bRbVV2R1F+NNU+gr8AbwIxwAeNMdcaY54wxvwzEGdlQBV4Rr1jm9AU6SY0IcftcvCRpbN49XATDZ0DdsdRfjLVK4LfGmMWGGPuNcbUA4hIJIAxptiydCogVbT2Mjji1WahEHXj8lmMeo0uTx1GploI/mucxzZPZxAVPEobunE5hNlpOps4FOUmx3JRYQqPb6/WTuMwMWEh8C0MtxSIFpHFIrLEd7uUsWYiFWaMMRys76IgNZZIl25CE6puXJ5DbUc/bxzVTuNwMNlaQ2sY6yDOBu476fFu4BsWZVIBrLSxm7beIS6Zo/tChLIrF6STHOvmsa1VXFaUZnccZbEJC4Ex5mHgYRG5zhizzk+ZVAB7aX8DAsyfqfMHQpnb5eAjxdk88OZxGrsGSPfo6LBQNlnT0M2+L/NE5Eun3vyQTwWYDQcayUmKIV5nE4e8G5blaKdxmJiss/hEb2AcED/OTYWR6rY+DtV3sSBTRwuFg/yUWC6cncxj26rxaqdxSJusaeh+35//6Z84KpBtONAAwMLMGTYnUf7y8RU5fO7RXbxxtJlLta8gZE11QtkPRcQjIhEi8oqINJ/UbKTCxEv7G5g/00NSrNvuKMpPrlqQMdZprDONQ9pUdyi7yhjzVRH5EGNrDX0YeAN4xKpgKrA0dw+yo6qdL1w+x+4oaho8unXqb+wLMj38/WAjv9547KxXmv34ipyz+j7lH1OdUHaiYPwD8KQxptOiPCpA/f1gI8bAmoUZdkdRfrYsNwmvgZ2V7XZHURaZaiF4XkQOA0uBV0QkFdCFSMLIhgMN5CTF6LLTYSglPpKClFi2V7ThNdppHIqmugz1PcCFQLExZhjoBdZaGUwFjvbeITaVtXDNuRmILjIXlpblJ9HeN8yxph67oygLTLWPAGAeY/MJTv6eP0xzHhWAXjrQwIjX8MHzMu2OomyycKaHGLeTbRVtzEnXq8JQM6VCICJ/BGYDu4FR38MGLQRh4bk9deSnxLJQ5w+ELZfTwdKcRDYda6F7YFgnFIaYqV4RFAMLjNEGwnDT1D3AlvJWPndZoTYLhblleUm8WdbCjsp2nVMQYqbaWbwf0OEiYejFfQ14DXxgkTYLhbuU+EjytdM4JE21EKQAB0Vkg4g8e+JmZTAVGJ7fW8fc9DjmaruwApbn+TqNm7XTOJRMtWno21aGUIGprqOf7RXtfPnKuXZHUQFiQeZYp/H2423MSdMPB6FiqsNHNzI2ozjC9/V2YKeFuVQAWL+vHtBmIfW/IpwOluQkcrC+i+6BYbvjqGky1bWGPgM8BdzveygLeMaiTCpAPLenjnOyPOSn6JaU6n8ty/PNNK7qsDuKmiZT7SP4LLAK6AIwxhwFdNhACCtr6mFPTSdrF2XZHUUFmFTtNA45Uy0Eg8aYoRN3fJPK9H9ACFu3swanQ1i7WJuF1Lsty0uirXeI8uZeu6OoaTDVQrBRRL7B2Cb2VwJPAs9ZF0vZadRr+MvOWlbPTSUtXrcoVO+2MNNDdMTYTGMV/KZaCO4BmoF9wJ3AeuDfrAql7PX2sRYauga4bkm23VFUgBrrNE7gYF2ndhqHgKmOGvIy1jl8tzHmI8aY3+os49D11I4aPFEuLp+v3UDq9E50Gu/STuOgN9nm9SIi3xaRFqAUKPXtTvYt/8RT/tY9MMyGAw18cFEmURFOu+OoAJbmiSIvWTuNQ8FkVwT/wthooWXGmCRjTBKwAlglIv9ieTrld+v31TMw7OW6pdospCa3PD+R1t4hjrdop3Ewm6wQfAK40Rhz/MQDxphy4Gbglom+UUQeFJEmEdl/mudFRH4mImUisldElpxpeDX91u2opSAllsWzEuyOooLAwswZY53Gx7XTOJhNtsREhDGm5dQHjTHNIjLZOrQPAT/n9EtVXwPM8d1WAL/y/anO0JnsPzuRpu4BtlW0sWZBOo9tq56Wn6lC24lO4y3lbfQMjhAXeSZbnKhAMdkVwdBZPocx5g1goo8Ja4E/mDFbgAQRmTlJHmWh7cfbcIqwJDfR7igqiBTnJTFqjO5pHMQmKwSLRKRrnFs3cO57fO0s4OSPnTW+x95FRO4QkRIRKWlubn6PL6vGMzzqZWdVBwsyPbrpiDoj6Z4ocpNj2F7Rhg4mDE4TFgJjjNMY4xnnFm+M8du7hTHmN8aYYmNMcWpqqr9eNqzsq+2kf3iUFflJdkdRQWh5XhKtvUOUa6dxUJrqhDIr1AKzTrqf7XtM2WDb8TZS4iJ1gTl1Vs7JGus03q4zjYOSnYXgWeAW3+ihlUCnMabexjxhq76zn6q2PpbnJ+l2lOqsRDgdLM5J4EBdFz2DI3bHUWfIskIgIo8Bm4EiEakRkdtF5C4Ruct3yHqgHCgDfgvcbVUWNbFtx9twOYQlOQl2R1FBbFleEqNew64q7TQONpaN9TLG3DjJ84ax5a2VjQaGR9ld3cG5WTOIcevQP3X20j1R5CbFsO14GxcVpujVZRCxs2lIBYAdle0Mjni5YHay3VFUCFiWn6QzjYOQFoIw5jWGt4+1kJsUQ3ZijN1xVAg4N2sGUREOXZ46yGghCGOH6rto7xtmVWGK3VFUiBjrNE7UTuMgo4UgjG0qayEhJoL5Mz12R1EhZLmv01hnGgcPLQRhqra9n4rWPi4sSMbp0E49NX3SPVHkp8SyTZenDhpaCMLUpmMtuF0OivN0JrGafsvzx/Y0LmvqsTuKmgItBGGos3+YfTWdLM1N1M1nlCUWZnqIdTvZqstTBwUtBGHoraPNGAwXzdZOYmUNl2PsavNwfRcdfRMuVKwCgBaCMNM3OML2inbOy04gMdZtdxwVwpb5mh1LtNM44GkhCDOby1sZGvWyeq6u4qqslRTrZk56HNsr2hge9dodR01AC0EYGRwZ5e1jrczPiCfdE2V3HBUGVuQn0z0wwiuHGu2OoiaghSCMlFS00z88qlcDym+KMuKZER3BI1umZztVZQ0tBGFixOvlrbIW8lNiyUnWPQeUfzhEWJaXxFtlLbr+UADTQhAmdlS209k/rFcDyu+K8xJxOYRHt1baHUWdhhaCMDDi9fJ6aTM5STHMSYuzO44KM56oCK5amM6TO2oYGB61O44ahxaCMHDiauDyeWm6Rryyxc0rcunoG2b9Pt2EMBBpIQhxI6P/ezVQqFcDyiYXzE6mICWWR7Zo81Ag0kIQ4nZU6dWAsp+IcNPKXHZWdbC/ttPuOOoUWghCmF4NqEDy0eJsYtxOHtx03O4o6hRaCEKYXg2oQOKJiuAjS7N5fk89zd2DdsdRJ9FCEKL0akAFok9emMfQqJdHt+oEs0CihSBElZwYKTRfrwZU4JidGsfquak8srWSoRFdfyhQaCEIQcOjXl4vbRq7GkjVqwEVWG5blUdz96AOJQ0gWghC0NbyVroGRrhqQbpeDaiAc8mcVApSYvn9puMY3coyIGghCDEDw6O8fqSZwrQ4CvRqQAUgh0O4dVUee2o62VXdYXcchRaCkLPpWAt9Q6NctSDd7ihKndZ1S7KJj3Tx+00VdkdRaCEIKX2DI7x1tIUFMz1kJ8bYHUep04qNdHH9slm8uK+ehs4Bu+OEPS0EIWTj0WaGRrxcqVcDKgh88oI8Ro3hj1sq7I4S9rQQhIiu/mE2H2vl/FkJuvuYCgo5yTFctSCdP26upHdwxO44YU0LQYh4rbQJrzFcPl+vBlTwuHP1bLoGRnh8e7XdUcKaFoIQ0NY7xPaKNpblJZEU67Y7jlJTtiQnkeV5SfzuzXLd4N5GWghCwCuHGnGIcFlRmt1RlDpjd64uoK5zgOf31tkdJWxpIQhyRxq72V3dwQWzk/FER9gdR6kzdllRGnPT47h/Y7lOMLOJFoIg9+O/leJ2OVg9R/ciVsHJ4RDuuGQ2hxu62Xik2e44YUkLQRDbUdnOhgONXDQnhZhIl91xlDpr1y7KJMMTxS9eK9OrAhtYWghE5GoRKRWRMhG5Z5znbxWRZhHZ7bt92so8ocQYww9ePExKXCQXFabYHUep98TtcnDX6gK2V7SzpbzN7jhhx7JCICJO4BfANcAC4EYRWTDOoU8YY8733R6wKk+oeeVQE9sq2vjiFXOIdDntjqPUe3bD8hxS4yP56StH7I4Sdqy8IlgOlBljyo0xQ8DjwFoLXy9sjHoNP3jpMAUpsXxs2Sy74yg1LaIinNy1ejZbytvYWt5qd5ywYmUhyAJOniVS43vsVNeJyF4ReUpExn1XE5E7RKREREqam7Uzad2OGo429fCva4qIcGo3jwodH1+eQ0pcJP/zapndUcKK3e8izwF5xpjzgL8DD493kDHmN8aYYmNMcWpqeI+O6R8a5b6/H+H8WQlcfU6G3XGUmlbRbid3XlLAW2Ut7KjUvgJ/sbIQ1AInf8LP9j32DmNMqzHmxC7WDwBLLcwTEh56u4KGrgHuuWaebjqjQtJNK3NIjnVz39+1r8BfrCwE24E5IpIvIm7gBuDZkw8QkZkn3b0WOGRhnqDX3jvEL18v433z0lhZkGx3HKUsEeN2cfdlhWwqa+XNo9oU7A+WFQJjzAjwOWADY2/wfzbGHBCR74jItb7DPi8iB0RkD/B54Far8oSCX75eRs/gCF+7ep7dUZSy1M0rc8hKiOYHLx3G69V5BVaztI/AGLPeGDPXGDPbGPM932PfMsY86/v668aYhcaYRcaYy4wxh63ME8xq2vt4+O1KrluSTVFGvN1xlLJUpMvJV9bMZX9tF8/rJveWs7uzWE3RfX87AgJfunKu3VGU8ou1i7KYlxHPj/9WytCIrkxqJS0EQWBfTSdP76rltgvzyEyItjuOUn7hcAhfu2Yela19PLatyu44IU0LQYAzxvCfzx0gOdbNZ99XaHccpfzq0rmprCxI4icvH6Gjb8juOCFLC0GAe2FfPSWV7XxlTRGeKF1mWoUXEeE/PriQzv5hHU5qIS0EAWxgeJR71x9m/kwP1xfrUhIqPM2f6eETK3N5ZEslB+o67Y4TkrQQBLAH3iyntqOff//AfJwOnTymwteXriwiIcbNf/z1gC5TbQEtBAGqsWuAX75+jDUL07lwti4zrcLbjJgIvrqmiJLKdp7ZXTv5N6gzooUgQP3wpVJGRg3ffP94K3crFX6uL57FouwZfO+FQ7T3asfxdNJCEID21nSwbmcNn7oon5zkGLvjKBUQHA7h3g+fR0ffMP/53AG744QU3d8wwIx6Df/+zH5S4iL57GWz7Y6j1LR4dOv0zQO4ZG4qz+yuIz4qgvkzPeMe8/EVOdP2euFArwgCzKNbK9lT08m/f2A+8TpcVKl3ubQolQxPFM/srqV/aNTuOCFBC0EAaeoe4IcbSllVmMy1izLtjqNUQHI5HFy3NJvewRFe2Fdnd5yQoIUggHzvhUMMDnv57tpzdK8BpSaQlRDN6rmp7KzqYE91h91xgp4WggCxqayFv+6u465LZ1OQGmd3HKUC3vvmpZObFMNfdtfS0jM4+Teo09JCEAB6B0e45+m95CXHcPel2kGs1FQ4HcLHls3CKcJj26oYHtUVSs+WFoIA8P0XD1PT3s+PPrqIqAin3XGUChoJMW4+WpxNfecA63XfgrOmhcBmm8pa+OOWSm5flc+yvCS74ygVdOZleLi4MIWtx9vYdlw3vD8bOo/ARt0Dw3z1qb0UpMTylTVFdsdRKmitOSeDpu5Bnt1TS1Ks2+44QUevCGz0necOUt/Zz/+7XpuElHovHDLWX5AaH8mj2yopa+q2O1JQ0UJgk3U7anhyRw2fvayQJTmJdsdRKuhFRTi55YI8nA4Htz20nYbOAbsjBQ0tBDY42tjNvz2zn5UFSXzxCt2DWKnpkhjj5paVubT1DHHTA1to7tZhpVOhhcDP+oZGuPtPO4mNdPKzGxbrPgNKTbNZSTH8/rbl1HUM8InfbdWVSqdAC4EfGWP4+tP7KGvu4ac3LCbNE2V3JKVC0vL8JB74ZDHlLb3c/LuttOqEswlpIfCj/375KH/dXcdXripiVaFuNqOUlVYVpnD/J5ZS1tTDR369meq2PrsjBSwtBH7yZEk1P3vlKNcXZ+vsYaX85LKiNB79zAraeof48K/eZn+t7nk8Hi0EfrCprIWvP72PiwpT+N6HztUF5ZTyo6W5Saz7pwuIcAgfu38zL+3XGcin0kJgsbePtfDph0soSI3llzcvIcKpf+VK+VthWjxP372KwvR47npkJ/euP8SIrk30Dn1XstAbR5q57ffbyU6M5pFPr8CjG80oZZuMGVH8+c6V3Lwyh/vfKOemB7ZS19Fvd6yAoIXAIq8ebuTTfyihIDWOx+9YSVq8jhBSym6RLif/9Y/n8uOPLmJvTSdr/vsNnthehTHG7mi20kIwzYwx3L/xGJ9+uISi9Hge+8wKkuMi7Y6llDrJdUuz2fDFS1iQ6eFr6/Zx6++3U9naa3cs22ghmEZ9QyN87rFd3PviYa45dyaP37GShBhdAEupQJSTHMNjn1nJd9YuZHtFG1fe9wb3rj9E18Cw3dH8TlcfnSY7Ktv42rp9lDf3cM8187jzkgIdHaRUgHM4hFsuyOPqhRn8aEMpv3mznKd21HDHJQXctDKXuMjweIvUK4L3qGtgmH97Zh/X/Woz/UOj/OFTK7hr9WwtAkoFkTRPFD/66CKe+9xFLMj0cO+Lh1n1/Vf5yctHwmIbzPAodxboHRzhkS2V/PbNctp6h/jUqny+fNVcYsPkE4RSoeicrBn88fYV7K7u4OevlvGTl4/yi9fKuGphBjctz2FlQTKOEFwfTN+1zlBtRz/rdtTw+03Hae8b5qLCFP51TRGLZiXYHU0pNU3On5XAA58spqypm0e3VrNuZw0v7K0nwxPF1edk8P5zZ7I0NzFkFo3UQjAF9Z39vHGkmWd21bHleCvGwGVFqfzz5XN0LwGlQlhhWjzf+uACvnp1ERsONPDC3noe3VbFQ29X4IlyccHsZC4qTKE4L4k5aXG4gnTCqKWFQESuBn4KOIEHjDHfP+X5SOAPwFKgFfiYMabCykyTGRge5WhjDwfrO9lX28nmY60cax4bVpaXHMMXL5/LhxZnkZMcY2dMpZQfRUU4WXt+FmvPz6JncITXS5t462gLbx5tYcOBRgCiI5yck+VhXoaHgtRY8lNimZ0aR2ZCdMBfOVhWCETECfwCuBKoAbaLyLPGmIMnHXY70G6MKRSRG4AfAB+zIk9n/zAVLb10DQzT1T/i+3OYlp5B6joGqO3op76zn6buQU7MLYl1O1mWn8SNy3O4cHYK82fGayewUmEuLtLFB87L5APnZWKMoaqtj93VHeyu7mBPdQfP7K6le2DknePdTgep8ZGkxkeSFh9JmieSpBg3cVEu4iIjiItyER/pIi7KRXSEE7fLQYTTgcsh73wd4RTfnw5LioqVVwTLgTJjTDmAiDwOrAVOLgRrgW/7vn4K+LmIiLFgmt+bR5v53KO73vV4pMtBVkI0mQnRXDInlcyEaIoy4lkw00NOUkxIdgwppaaHiJCbHEtucixrz88CxiaVtvQMUd7cw/GWXipa+2jqGqCpe5CK1l62VbTR2T/M2bzL3bm6gK9fM3+az8LaQpAFVJ90vwZYcbpjjDEjItIJJAMtJx8kIncAd/ju9ohI6XQGPTL1Q1M4JVsYCedzh/A+/6A795um70cF1Ll/4wfwjbP/9tzTPREUncXGmN8Av7E7h4iUGGOK7c5hh3A+dwjv89dzD/1zt7KLuxaYddL9bN9j4x4jIi5gBmOdxkoppfzEykKwHZgjIvki4gZuAJ495ZhngU/6vv4I8KoV/QNKKaVOz7KmIV+b/+eADYwNH33QGHNARL4DlBhjngV+B/xRRMqANsaKRSCzvXnKRuF87hDe56/nHuJEP4ArpVR4C85pcEoppaaNFgKllApzWggmICJJIvJ3ETnq+/O0CwuJiEdEakTk5/7MaJWpnLuInC8im0XkgIjsFRFLZoX7i4hcLSKlIlImIveM83ykiDzhe36riOTZENMyUzj/L4nIQd+/9Ssictpx6cFmsnM/6bjrRMSISEgNKdVCMLF7gFeMMXOAV3z3T+e7wBt+SeUfUzn3PuAWY8xC4GrgJyKS4L+I0+ekJVGuARYAN4rIglMOe2dJFOC/GVsSJSRM8fx3AcXGmPMYWwngh/5NaY0pnjsiEg98Adjq34TW00IwsbXAw76vHwb+cbyDRGQpkA78zT+x/GLSczfGHDHGHPV9XQc0Aan+CjjN3lkSxRgzBJxYEuVkJ/+dPAVcLqGz+NSk52+Mec0Y0+e7u4WxuUGhYCr/9jD2Ye8HwIA/w/mDFoKJpRtj6n1fNzD2Zv9/iIgD+DHwFX8G84NJz/1kIrIccAPHrA5mkfGWRMk63THGmBHgxJIooWAq53+y24EXLU3kP5Oeu4gsAWYZY17wZzB/CYolJqwkIi8DGeM89c2T7xhjjIiMN9b2bmC9MaYm2D4cTsO5n/g5M4E/Ap80xninN6UKNCJyM1AMrLY7iz/4PuzdB9xqcxTLhH0hMMZccbrnRKRRRGYaY+p9b3ZN4xx2AXCxiNwNxAFuEekxxkzUnxAQpuHcEREP8ALwTWPMFoui+sOZLIlSE4JLokzl/BGRKxj7oLDaGBMqm/lOdu7xwDnA674PexnAsyJyrTGmxG8pLaRNQxM7eQmMTwJ/PfUAY8xNxpgcY0weY81DfwiGIjAFk567b+mQvzB2zk/5MZsVwn1JlEnPX0QWA/cD1xpjxv1gEKQmPHdjTKcxJsUYk+f7Pd/C2N9BSBQB0EIwme8DV4rIUeAK331EpFhEHrA1mfWmcu7XA5cAt4rIbt/tfFvSvke+Nv8TS6IcAv58YkkUEbnWd9jvgGTfkihfYuJRZEFliuf/I8auep/0/VufWiiD0hTPPaTpEhNKKRXm9IpAKaXCnBYCpZQKc1oIlFIqzGkhUEqpMKeFQCmlwpwWAqWUCnNaCJRSKsz9fwaVNRCloqMMAAAAAElFTkSuQmCC",
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {
+ "needs_background": "light"
+ }
+ }
+ ],
+ "metadata": {}
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "source": [],
+ "outputs": [],
+ "metadata": {}
+ }
+ ],
+ "metadata": {
+ "orig_nbformat": 4,
+ "language_info": {
+ "name": "python",
+ "version": "3.8.2",
+ "mimetype": "text/x-python",
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "pygments_lexer": "ipython3",
+ "nbconvert_exporter": "python",
+ "file_extension": ".py"
+ },
+ "kernelspec": {
+ "name": "python3",
+ "display_name": "Python 3.8.2 64-bit ('r-py-test': conda)"
+ },
+ "interpreter": {
+ "hash": "7508a6b53ffb04362d156591e4bfb20c197555e37f3cce3b1ec90fd899bbfe63"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
\ No newline at end of file
diff --git a/notebook/feature_test_with_act.ipynb b/notebook/feature_test_with_act.ipynb
new file mode 100644
index 0000000..851f954
--- /dev/null
+++ b/notebook/feature_test_with_act.ipynb
@@ -0,0 +1,404 @@
+{
+ "cells": [
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "source": [
+ "import pandas as pd\n",
+ "## Utils and Library for notebook\n",
+ "from notebook_utils.OpenKbcMSToolkit import ExtractionToolkit as exttoolkit\n",
+ "\n",
+ "# Root data path\n",
+ "DATA_PATH = '../data/'\n",
+ "\n",
+ "#Data loading\n",
+ "df = pd.read_csv(\"resultFiles/featureExtractionV5_by_Jun/CD4.Ranksum.RFECV.act.csv\", engine='c', index_col=0)\n",
+ "meta_data = pd.read_csv(DATA_PATH+'annotation_metadata/EPIC_HCvB_metadata_baseline_updated-share.csv')\n"
+ ],
+ "outputs": [],
+ "metadata": {}
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "source": [
+ "## Utils and Library for notebook\n",
+ "from notebook_utils.OpenKbcMSToolkit import ExtractionToolkit as exttoolkit\n",
+ "import itertools\n",
+ "def _LoadDiseaseDuration(df, meta_data, returntype='long'):\n",
+ " \"\"\"\n",
+ " df : Expression or activation score matrix\n",
+ " meta_data : meta data which contains duration and sample ID\n",
+ " output: long DD samples and short DD samples by list, or healthy samples and short DD samples by list\n",
+ " \"\"\"\n",
+ " # checking multiple element for returntype\n",
+ " if returntype.count(',')>1: raise ValueError('No more than 2 elements for returntype')\n",
+ "\n",
+ " if returntype.find(',')==-1: # if returnType is single(long and healthy)\n",
+ " # Sample by disease category\n",
+ " sample_list, sample_category = exttoolkit.get_sample_name_by_category(dataframe=meta_data, sampleColumn='HCVB_ID', dataColname='DiseaseCourse')\n",
+ " \n",
+ " # Sort by disease category and exclude uknown samples\n",
+ " patient_samples = [] # patient samples\n",
+ " healthy_samples = [] # healthy samples\n",
+ " for samples, category in zip(sample_list, sample_category):\n",
+ " if category=='Healthy':\n",
+ " healthy_samples = samples\n",
+ " else:\n",
+ " if category!='Unknown':# Excluding unknown samples\n",
+ " patient_samples.append(samples)\n",
+ "\n",
+ " patient_samples = list(itertools.chain(*patient_samples)) # flatten\n",
+ " patient_samples = list(set(patient_samples).intersection(df.columns.tolist())) # intersected with act score matrix\n",
+ " healthy_samples = list(set(healthy_samples).intersection(df.columns.tolist())) # intersected with act score matrix\n",
+ " patient_meta = meta_data.loc[meta_data['HCVB_ID'].isin(patient_samples)] # Make patient metadata\n",
+ "\n",
+ " longDD_samples, shortDD_samples = exttoolkit.get_sample_name_by_contValues(patient_meta, 'HCVB_ID', 'DiseaseDuration', 25)\n",
+ " longDD_samples = list(set(longDD_samples.values.tolist()).intersection(df.columns.tolist())) # intersected with act score matrix\n",
+ " shortDD_samples = list(set(shortDD_samples.values.tolist()).intersection(df.columns.tolist())) # intersected with act score matrix\n",
+ "\n",
+ " else: # if returnType is multiple(List)\n",
+ " # Sample by disease category\n",
+ " sample_list, sample_category = exttoolkit.get_sample_name_by_category(dataframe=meta_data, sampleColumn='HCVB_ID', dataColname='DiseaseCourse')\n",
+ " category1 = returntype.split(',')[0]\n",
+ " category2 = returntype.split(',')[1]\n",
+ " \n",
+ " # Sort by disease category and exclude uknown samples\n",
+ " patient_samples = [] # patient samples\n",
+ " healthy_samples = [] # healthy samples\n",
+ " for samples, category in zip(sample_list, sample_category):\n",
+ " if category==category1:\n",
+ " category1_samples = list(set(samples).intersection(df.columns.tolist())) # intersected with act score matrix\n",
+ " elif category==category2:\n",
+ " category2_samples = list(set(samples).intersection(df.columns.tolist())) # intersected with act score matrix\n",
+ "\n",
+ " # return result\n",
+ " if returntype=='long':\n",
+ " return longDD_samples, shortDD_samples\n",
+ " elif returntype=='healthy':\n",
+ " return healthy_samples, shortDD_samples\n",
+ " else:\n",
+ " return category1_samples, category2_samples\n"
+ ],
+ "outputs": [],
+ "metadata": {}
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "source": [
+ "df_cd4 = df.copy()\n",
+ "longDD_samples, shortDD_samples = _LoadDiseaseDuration(df_cd4, meta_data, 'RR,CIS')\n",
+ "df_cd4 = df_cd4[longDD_samples+shortDD_samples]\n",
+ "df_cd4 = df_cd4.subtract(df_cd4.median(axis=1), axis=0)\n",
+ "\n",
+ "\n",
+ "X = df_cd4.T.values # Training sample\n",
+ "y = [0]*len(longDD_samples)+[1]*len(shortDD_samples) # Training y\n",
+ "X.shape"
+ ],
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ "(119, 556)"
+ ]
+ },
+ "metadata": {},
+ "execution_count": 6
+ }
+ ],
+ "metadata": {}
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "source": [
+ "from sklearn.ensemble import RandomForestClassifier\n",
+ "from sklearn.svm import SVC\n",
+ "from sklearn.model_selection import train_test_split\n",
+ "from sklearn import metrics\n",
+ "\n",
+ "auc_arr = []\n",
+ "val_auc = []\n",
+ "\n",
+ "for t in list(range(0,100)):\n",
+ " X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, random_state=t)\n",
+ " X_test, X_val, y_test, y_val = train_test_split(X_test, y_test, test_size=0.5, random_state=t)\n",
+ "\n",
+ " #randomState = list(range(0,5))\n",
+ "\n",
+ " clf = SVC(kernel=\"linear\")\n",
+ " clf.fit(X_train, y_train)\n",
+ "\n",
+ " y_pred = clf.predict(X_test)\n",
+ " fpr, tpr, thresholds = metrics.roc_curve(y_test, y_pred, pos_label=1)\n",
+ " auc_arr.append([t, metrics.auc(fpr, tpr)])\n",
+ " \n",
+ " y_val_pred = clf.predict(X_val)\n",
+ " fpr, tpr, thresholds = metrics.roc_curve(y_val, y_val_pred, pos_label=1)\n",
+ " val_auc.append([t, metrics.auc(fpr, tpr)])\n",
+ "\n",
+ "auc_test_df = pd.DataFrame(data=auc_arr, columns=['state', 'auc']).set_index('state')\n",
+ "auc_val_df = pd.DataFrame(data=val_auc, columns=['state', 'auc']).set_index('state')"
+ ],
+ "outputs": [],
+ "metadata": {}
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 8,
+ "source": [
+ "auc_df = pd.concat([auc_test_df, auc_val_df], axis=1)\n",
+ "auc_df.columns = ['test_auc', 'val_auc']\n",
+ "auc_df['diff'] = auc_df['test_auc'] - auc_df['val_auc']\n",
+ "auc_df"
+ ],
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " test_auc | \n",
+ " val_auc | \n",
+ " diff | \n",
+ "
\n",
+ " \n",
+ " state | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 0.875000 | \n",
+ " 0.822222 | \n",
+ " 0.052778 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 1.000000 | \n",
+ " 0.944444 | \n",
+ " 0.055556 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 0.857143 | \n",
+ " 0.970588 | \n",
+ " -0.113445 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 0.828571 | \n",
+ " 0.873684 | \n",
+ " -0.045113 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " 0.900000 | \n",
+ " 0.900000 | \n",
+ " 0.000000 | \n",
+ "
\n",
+ " \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " 95 | \n",
+ " 0.687500 | \n",
+ " 0.954545 | \n",
+ " -0.267045 | \n",
+ "
\n",
+ " \n",
+ " 96 | \n",
+ " 0.869748 | \n",
+ " 0.941176 | \n",
+ " -0.071429 | \n",
+ "
\n",
+ " \n",
+ " 97 | \n",
+ " 0.911111 | \n",
+ " 0.937500 | \n",
+ " -0.026389 | \n",
+ "
\n",
+ " \n",
+ " 98 | \n",
+ " 1.000000 | \n",
+ " 0.888889 | \n",
+ " 0.111111 | \n",
+ "
\n",
+ " \n",
+ " 99 | \n",
+ " 0.950000 | \n",
+ " 0.916667 | \n",
+ " 0.033333 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
100 rows × 3 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " test_auc val_auc diff\n",
+ "state \n",
+ "0 0.875000 0.822222 0.052778\n",
+ "1 1.000000 0.944444 0.055556\n",
+ "2 0.857143 0.970588 -0.113445\n",
+ "3 0.828571 0.873684 -0.045113\n",
+ "4 0.900000 0.900000 0.000000\n",
+ "... ... ... ...\n",
+ "95 0.687500 0.954545 -0.267045\n",
+ "96 0.869748 0.941176 -0.071429\n",
+ "97 0.911111 0.937500 -0.026389\n",
+ "98 1.000000 0.888889 0.111111\n",
+ "99 0.950000 0.916667 0.033333\n",
+ "\n",
+ "[100 rows x 3 columns]"
+ ]
+ },
+ "metadata": {},
+ "execution_count": 8
+ }
+ ],
+ "metadata": {}
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 9,
+ "source": [
+ "import seaborn as sns\n",
+ "sns.distplot(auc_test_df['auc'].values.tolist())\n",
+ "sns.distplot(auc_val_df['auc'].values.tolist())"
+ ],
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stderr",
+ "text": [
+ "/opt/miniconda3/envs/r-py-test/lib/python3.8/site-packages/seaborn/distributions.py:2619: FutureWarning: `distplot` is a deprecated function and will be removed in a future version. Please adapt your code to use either `displot` (a figure-level function with similar flexibility) or `histplot` (an axes-level function for histograms).\n",
+ " warnings.warn(msg, FutureWarning)\n",
+ "/opt/miniconda3/envs/r-py-test/lib/python3.8/site-packages/seaborn/distributions.py:2619: FutureWarning: `distplot` is a deprecated function and will be removed in a future version. Please adapt your code to use either `displot` (a figure-level function with similar flexibility) or `histplot` (an axes-level function for histograms).\n",
+ " warnings.warn(msg, FutureWarning)\n"
+ ]
+ },
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "execution_count": 9
+ },
+ {
+ "output_type": "display_data",
+ "data": {
+ "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYMAAAD4CAYAAAAO9oqkAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjQuMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/MnkTPAAAACXBIWXMAAAsTAAALEwEAmpwYAAAurklEQVR4nO3dd3wc1b338c9vV6veLcnqli133C0XMMWYYJoTIBBa6Fy4AR4SEiCXlCckXC7JTfLADTcJiYEQehLAIeCEYnqzjYVtjLst2ZbVLMnqddt5/pi1Ma4rWbsjrX7v12tfWs3OzvxGtvarM2fOGTHGoJRSamhz2F2AUkop+2kYKKWU0jBQSimlYaCUUgoNA6WUUkCU3QUEIyMjwxQVFdldhlJKDSqffvppgzEmM5h1B0UYFBUVUVpaancZSik1qIjIrmDX1dNESimlNAyUUkppGCillELDQCmlFBoGSiml0DBQSimFhoFSSik0DJRSSqFhoJRSikEyAlkpFaFKH+/f7ZVc17/bG0K0ZaCUUkrDQCmllIaBUkopNAyUUkqhYaCUUgoNA6WUUmgYKKWUQsNAKaUUIQwDEfmTiNSJyPoDlv1KRDaLyDoR+buIpIZq/0oppYIXypbBn4GzD1q2DJhkjJkCbAV+EML9K6WUClLIwsAY8z7QeNCyN4wx3sC3K4D8UO1fKaVU8OzsM7geeNXG/SullAqwJQxE5EeAF3jmKOvcJCKlIlJaX18fvuKUUmoICnsYiMi1wCLgm8YYc6T1jDGLjTElxpiSzMzMsNWnlFJDUVinsBaRs4HvA6cZYzrDuW+llFJHFspLS58DlgPjRKRSRG4AfgskActEZK2I/CFU+1dKKRW8kLUMjDGXH2bxY6Han1JKqb7TEchKKaU0DJRSSuk9kJVSYfLsyopDlhVXNB5mzb6bU9KvmxtStGWglFJKw0AppZSGgVJKKTQMlFJKoWGglFIKDQOllFJoGCillELDQCmlFBoGSiml0DBQSimFhoFSSik0DJRSSqFhoJRSCg0DpZRSaBgopZRCw0AppRQaBkoppdAwUEophYaBUkopNAyUUkqhYaCUUooQhoGI/ElE6kRk/QHL0kVkmYhsC3xNC9X+lVJKBS+ULYM/A2cftOxu4C1jzBjgrcD3SimlbBayMDDGvA80HrT4fOCJwPMngAtCtX+llFLBC3efwXBjTE3geS0w/EgrishNIlIqIqX19fXhqU4ppYYo2zqQjTEGMEd5fbExpsQYU5KZmRnGypRSaugJdxjsEZEcgMDXujDvXyml1GGEOwxeBq4JPL8G+EeY96+UUuowQnlp6XPAcmCciFSKyA3AL4AzRWQb8JXA90oppWwWFaoNG2MuP8JLZ4Rqn0oppfpGRyArpZTSMFBKKaVhoJRSCg0DpZRSaBgopZRCw0AppRQaBkoppdAwUEophYaBUkopNAyUUkqhYaCUUgoNA6WUUmgYKKWUQsNAKaUUGgZKKaXQMFBKKYWGgVJKKTQMlFJKoWGglFKKEN4DWSkVYUofP663F1c09lMhKhS0ZaCUUkrDQCmllIaBUkopbAoDEfmuiGwQkfUi8pyIxNpRh1JKKUvYw0BE8oBvAyXGmEmAE7gs3HUopZT6gl2niaKAOBGJAuKBapvqUEophQ1hYIypAn4NVAA1QIsx5o2D1xORm0SkVERK6+vrw12mUkoNKXacJkoDzgdGArlAgohcefB6xpjFxpgSY0xJZmZmuMtUSqkhxY7TRF8Bdhhj6o0xHmAJcJINdSillAqwIwwqgLkiEi8iApwBbLKhDqWUUgF29BmsBF4AVgOfB2pYHO46lFKDmPET7WnB4ffYXUnECGpuIhFZAjwGvGqM8R/vTo0x9wD3HO92lFJDi/g95NV/SFZTKS5fFwahMXkCu7IX4nEl213eoBbsRHW/B64DHhKR54HHjTFbQleWUkp9WbS7hXEVzxLfU09j0nhaEkcR624kq7GUxK5KNhVdY3eJg1pQYWCMeRN4U0RSgMsDz3cDjwBPBzqClVIqJGLcTUzc8WccfjebC6+gJWn0/tcaUqYwftdTjK34C3h+BK44GysdvILuMxCRYcC1wL8Ba4DfADOAZSGpTCmlAKe3i/G7nsFhPGwaec2XggCgMy6bsvwLie+ph7fvs6nKwS+oMBCRvwMfYI0W/qox5mvGmL8aY24DEkNZoFJqCDN+RlctIdrTwpbCy+mMzT7sai2Jo6lLnQ4r/wiNO8JcZGQItmXwiDFmojHm58aYGgARiQEwxpSErDql1JCWX/ceqe1l7Mw+m/b4gqOuW5k1H5wueOf+8BQXYYINg8O1vZb3ZyFKKXWg1NYt5DV8QF3qNOrTZhxzfY8rCUquhw1LoKUqDBVGlqOGgYhki8hMrEnlpovIjMBjPtYpI6WU6nexPXsprnqJ9thcduacCyLBvXH2TWD88IkOXeqtY11NdBZWp3E+8MABy9uAH4aoJqXUEObwuRmz+28YcbCt4GKMoxe3ak8bAePOhbXPwIIfW6eNVFCO+lM2xjwBPCEiFxljXgxTTUqpocoYiqteIq6ngc0jvok7OrX325h2BWxeCmVvw9iz+r3ESHXUMBCRK40xTwNFIvK9g183xjxwmLcppVSf5NW/T3rbZnYNX0hr4qi+bWT0mRA/DNY+q2HQC8dqfyUEvurlo0qpkMpoXkd+/XvUp0yhdticvm8oKhomXgCfPQfuTojW7s1gHOs00R8DX38WnnKUUkNRestGRlX9g5aEkezIXRR8h/GRTPwalD4GZW/BhK/2T5ERLthBZ78UkWQRcYnIWyJSf7gb0iilVG9lNq2huHIJbfH5bC28tHcdxkcyYh7EpcHGl49/W0NEsOMMFhpjWoFFwE5gNHBXqIpSSkU+8Xspqv4no6pfoS1hBFsLL8fviP7SOg3uKFY1J/Le3mQ+bkyittuFMUFs3OmCsWfD9jfBf9wTLQ8JwUbwvvXOA543xrTI8TbjlFJDkvi9DGvdSF7de8R6mqjOOIndWQtArL9N3X7hnYYU3mpIZVdX7CHvL4jt5vzsRk5Obz362aTiBVa/Qe1nkDs9REcTOYINg6UishnoAm4WkUygO3RlKaUGJZ8bulvB2w3eHuvh6wZPD3l1NSR01ZDUWUGUv4fOmCw2jbhy/1VDxsDypiSersxir8fFqPgurs7fw5iELlJdPjp9Dra0x/FGfRq/3ZnLx03J3DyihmSX7/C1jJpvfS17W8MgCMFOYX23iPwSaDHG+ESkA+um9kqpocwYaN4Flatg73ZorwMOfx4nD6ErJoPG5Ik0pkykJWHU/o7idq+Dh3fmUNqSxKj4Lm4pqmFScuch2yiK7+HMzGZer0/j6cpMfrJlBD8ZW0F6tPfQHSZmwfDJUPYOnHJHfx51ROpNT814rPEGB77nyX6uRyk1WDTtgPUvQkslOFyQMRZypkJ8BkTFQlRM4BELUbGsqu7BOA4dEbyzM4YHyvJo8Li4Mr+O87IacRzl9I9D4JysJkbGd/Pzbfncu7WAe8dXkBx1mBZC8emw4mFwd0B0wqGvq/2Cve3lU0AxsBbY9xM3aBgoNfT4vNZkcBUfQ2wqTL7UOg3jOvT8/oGMo/GQZR82JvPHndkkRvn46dhdjE0M/uzz+MQufjCmkvu2FvC/O3L4wejKQ1cqPh0+fgh2fQxjzgx620NRsC2DEmCiMUH14yulIpW7HUr/BI3lMOp064qdqJheb8YYeKk2nb9UZzEhsZPbR1WReqRz/0cxPrGL6wr2sLgihyU1wzjx4BUKT7RaJmVvaxgcQ7BhsB7IBmpCWItSaiDraYflD0FnI0y/GvKOPa304fgNPF2ZxT/r0jk5vYWbi2qIOo6LExdktLCxPZ4lNRncUNvK+OzkL150xVmBUPZO33cwRAQ7ziAD2Cgir4vIy/seoSxMKTWA9LRb00J3NsGcm/scBD4DD+/M4Z916ZyT1citxxkEYPVBX1NQR7zTxw+XfI7ff9AJjKJ5UL/JCjF1RMG2DH4ayiKUUgOY3wfPXwOtldbNY4YV920zBn63I4ePmlK4JLeer2fvPe5ZJ/ZJjvJxVUEdv98ZxZI1VVw8M/+LFwsDJ48qV+nEdUcRVMvAGPMe1shjV+D5KmB1COtSSg0U7//KGsk76SIYPqlPm/AZ+P1OKwiuyKvjopz+C4J9Tk1vZXJeCg8u20q354D+h9wZ4IiCihX9u8MIE+zcRDcCLwB/DCzKA17q605FJFVEXhCRzSKySUQO6fdRSg0AO96Hd38BUy6FwpP6tAm/gf8oTeKDxhQuza3n/OzQnK4Rgf84ezxVzV08s7Liixei461LXjUMjirYPoNbgXlAK4AxZhuQdRz7/Q3wmjFmPDAV2HQc21JKhUJXE7x4IwwbDec90KeZRP0Gfrg6iRd2xfGNnHq+nrM3BIV+4eQxGcwbPYyH393+5dZB4YlQvRq87pDufzALNgx6jDH7f4qBgWd9usxURFKAU4HHAIwxbmNMc1+2pZQKoTd+DB31cNGjENP7W5oYAz9Zk8hfdsTx7QkdXJwb2iDY59bTR9PQ7uaFTw8Yd1Awx5oio+azsNQwGAUbBu+JyA+BOBE5E3geeKWP+xwJ1AOPi8gaEXlURA4ZGigiN4lIqYiU1tfX93FXSqk+KX8X1jwNJ90GudP6tIkHNybwdHk8/z62g+9O7OjX8o7mxFHDmJqfwiMflOPbd2VR4Vzra8XysNUx2AQbBndjfYB/Dvw78C/gx33cZxQwA3jYGDMd6Ahs/0uMMYuNMSXGmJLMzMw+7kop1WueLnjlO5BeDPMP+dUMyuPb4nhoUwKXjezi7skd/d5ZfDQiwrdOK2bX3k5e31BrLUzMgvRRsHtl+AoZZIK9msiP1WF8izHmYmPMI8cxGrkSqDTG7PtXeQErHJRSA8Hy30HTTlj0oDVoq5f+URHDzz5L4qzcbu6b3hbWINhn4QnZ5KXG8dTyXV8sLJhrdSLrRAqHddQwEMtPRaQB2AJsCdzl7Cd93aExphbYLSLjAovOADb2dXtKqX7UVgsfPADjF8Go03r99ndro7ljVTJzMtz8Zk4rUcGee+hnTofwzbmFLC/fy/a6Nmth3gzobICW3fYUNcAd65/qu1hXEc0yxqQbY9KBOcA8Efnucez3NuAZEVkHTAPuP45tKaX6y9v/ad2T4Mx7e/3WtY1R3Lw8hbEpXh6Z10KsMwT19cIlJQVEOx08vSJwmem+UdNVOkTqcI4VBlcBlxtjduxbYIwpB64Eru7rTo0xawP9AVOMMRcYY5r6ui2lVD+pXgtrnoE5/97rUcaVHQ7+7aMUMmL9PHFyM8ku+0/FZCTGcO7kbF5cXWldZjp8kjXVdvUau0sbkI4VBi5jTMPBC40x9cChE5MrpQYnY+D1H0F8Opzau9ubt3qE6z9KpccvPD6vmcxY+4Ngn0tKCmjr9lodyVExkD3JGm+gDnGsMDjaCA0dvaFUpNj2Buz6EOb/AOJSg36bxw+3Lk+hvM3JH09sYXRy76ehDqW5o4aRlxr3xZiD3OlWC8jvt7WugehYYTBVRFoP82gDJoejQKVUiPn98PZ9kFYEM68N+m3WoLIkPqiL5v6ZbZyU5QlZiX3lcAgXzcznw+0NVDV3WfMU9bRCY5ndpQ04Rw0DY4zTGJN8mEeSMUZPEykVCTa/ArXrrFaBM/hf60e2xvHcjjhuGdfBJUXB36Es3C6ekW/dTGdNlXYiH0Vv7oGslIo0fh+8cz8tCSP5V9cczIETvB2kuOKLCeY+a43n59symZvWyikJ1azcccS32a5wWDzTClJZuq6GW089EVzxVr/B1EvtLm1AsekqYKXUgLD+RajfzLoxt2IkuGtB63pcPFSeR0FcDzePqDnqzesHikVTcthU00pZY7c1g6m2DA6hYaDUUOXzwLs/h+GT2Z0d3P2B3X7hgfI8/MAdo6qIdQ6cK4eOZtGUXERg6Wc1Vr9B7Trwee0ua0DRMFBqqPrsOevG9gt+BHLsjwJj4LGK4ezojOX/FFWTHTvwOoyPJDslllkj0lm6rtrqN/B2W7fCVPtpGCg1FHl74L1fQt5MGHt2UG95qyGFd/emclFOAzNTwzcLaX9ZNDWHbXXtlLvGWAv0VNGXaBgoNRStftKao+f0HwV105rdjZ08vns4U5PbuTjnkHGog8I5k3JwCPx9VwzEpujgs4NoGCg11Hi6rcnoCuZC8YJjrt7e4+XZTypIc3n59sjqQdFhfDiZSTHMHTWMpZ/XYnKna8vgIBoGSg01q5+Atmo4/YfHbBX4/Ia/rKqgo8fLHcVVJEYN7pG7i6bksqOhg4bkE6BuoxWMCtAwUGpo8XRZrYIR82DkqcdcfdnGPZTXd3D+tDxGxveEocDQOntSNk6H8FFHPvi9ULfB7pIGDA0DpYaS0sehvTaoVsH6qhbe31bP7JHpzByRFqYCQys9IZrZRem8UJNhLdAZTPfTMFBqqHB3wocPQtEpUHTyUVeta+vmhdWVFKTFsWhyTpgKDI+FJwznw4Z4fLFp1qR1CtAwUGroKH0MOuqsVsFR9Hh8PLOyApdDuGLOCKKckfUxcebE4YBQHTcOatbaXc6AEVn/ykqpw3N3wIf/A6Pmw4iTjriaMYYXV1fS0NbDZbMLSYmLvPko89PiOSE3mU96RkDdJu1EDtAwUGooWPWodf/f+UdvFXy4vYH11a2cPSmb4szEMBUXfmdOHM6bLTnaiXwADQOlIl1PO3z0Gyg+AwrnHHG18vp2Xt9Qy6TcZE4enRHGAsNv4cRsPvePtL7RfgNAw0CpyPfJYujce9S+guZON8+t2s2whBgumpGPBDEqeTCbkJMEKQW0O5K03yBAw0CpSNbdCh8/BGMWQn7J4VcJdBh7fX6+OaeQGFdwU1kPZiLCwhNyWOstwle11u5yBgQNA6Ui2YqHoasJ5t992JeNMdz94jqqm7u4pKSArOTYMBdon4UnDGedfySiI5EBDQOlIldHg9UqGL/Imp30MB79YAcvra3mjAnDmZCTHOYC7VUyIo0drtE4jHYig41hICJOEVkjIkvtqkGpiPb+r8HTCWfcc/iXt9bz81c3cc6kbE4flxnm4uwX5XSQMmo2AL4qHYlsZ8vgO4DeXUKpUGjaaV1OOv1KyBx7yMs7Gzq47bk1jB2exK+/MTXiO4yPZMbUqTSZRPZu/cTuUmwXZcdORSQfOA/4L+B7dtSgVER7537ra9ooaz6iAzS7hRveSUN8DhZPqyHh87Iv3ex+KDl1XBZrzUhG6xxFtrUM/gf4PnDE+XBF5CYRKRWR0vr6+rAVptSgV/s5rPubNStpXOqXXur2wY0fpbC7w8kfTmyhMHFwT0l9vBJjomhKPYFhneXW3d+GsLCHgYgsAuqMMZ8ebT1jzGJjTIkxpiQzc+idz1Sqz978GcQmW4PMDuAz8N1Pklm1N5oHZrUyN3Pw3MM4lJJHluDCS+WWUrtLsZUdLYN5wNdEZCfwF2CBiDxtQx1KRZ4dH8D2ZXDKHRAdv3+xMXDv2kRerYrlx1PaWFQwtP8KPtD4GacAUP7ZhzZXYq+wh4Ex5gfGmHxjTBFwGfC2MebKcNehVMTx++D1H0JyHsy+af9iY+C+dYk8URbPjWM6+bexXTYWOfAMLxxHmyTi3j20b4Op4wyUihRrnobadXDmveCKA6wguH9dIo9ti+fa0Z38cEq7zUUOQCI0Jk8ku2Mze9uHbovJ1jAwxrxrjFlkZw1KRYTuFnjrXusm95MuAqwg+PnnCTyyLZ6rizu5Z2r7sW5uNmTFjZjJWNnNexur7C7FNtoyUCoSvPdLazK6c/4bRHB7/Xx3VTKLtyZwdXEnP5umQXA0mWPnEC0+tqxbYXcptrFlnIFSkeLZlRUh38cVcwqPvkLDNlj5B5hxFeROo6XLw7ee+pTlFbHcdUI7t4zv1CA4BsmdBkBPxad0ey4ldghM1ncwbRkoNdi9/kNwxcOC/8uW2ja+/vuPKN3VyP/MbuHWCRoEQUkrwhOdwlh/GSvK99pdjS20ZaDUAFBc8fyRX3SmH/m1PRth2xuYCefz11ff4p41SSS5/Dw5r5UTs3QcQdBEcORNY2r5Tp7btIf547LsrijstGWg1GDlc8OGJXjjs7i97jzu/jSZ2RkeXj2zUYOgD5y50xknu3lvQxXGGLvLCTsNA6UGKbPtTehs4Oa2G/hXdQJ3TWrniVOayYwdeh9k/SJ3OlF4SW/fwobqVrurCTsNA6UGGb+Bj8v24t3+Fkt8J9OWOo5Xz2zk1vGdOLR/oO8Cd4Kb7ijjzU17bC4m/DQMlBokenzwt52xLHw9DceGF+giluhJX+O5U5sZneyzu7zBLzkPknJYkFQxJMNAO5CVGuBaPcJz5XH8aVsce7qd3Jb4NnMdm/BOupRFRdF2lxc5RCBvJlN3rWV9VSs1LV3kpMTZXVXYaBgoNUA1uqN4tS6NZWtS6fI7mZTUwS05u7mi7hna4gvY6B8LO4bmfQhCJn8WKZuXkkYrb26q46q5I+yuKGw0DJQaYCq7olm6J533G1PwG5ib1sZXhzdSnNDNqKp/4PT1sCPnPHQAQQgE+g3OSq3ijQ21GgZKqfDb3B7HK7XplLYk4RI/Z2Q0c97wRrJjrMtEU9q3k9n8GVUZ8+iKHXrXwYdF7nQQB18dVsU1ZXtp7nSTGj80TsVpGChls/L6dp7eUsDG9gQSnT4uymng7Mwmkl1fdAo7fT2MrF5KV0wGVZmn2VhthItOgKwTmMJ2vP6vsGzjHr5RUmB3VWGhYaCUTXY3dvLahlp2NHSQGhXD1fl7OCOjmVjnoeMECva8SbSnjY0jr8M49Nc2pPJLSFz/IvkpMby6vlbDQCkVGi1dHl7fUMva3c0kxUSxaEoOlzrfJdpx+MFiye07GN70KTXD5tIenx/eYoei/BLk08f55gluHlzbQGu3h+RYl91VhZyGgVJh4jeGj8v2smxjLcbA/LGZnDYuk5goJ9EVhw8Ch8/NyOpX6IpOZ3fW6WGueIgqmAvAuSk7+W/fCN7eVMcF0/NsLir0NAyUCoO97T28uLqSnXs7GZ+dxKIpuaQnHLtjsqDubWI8zWwsuhbjiPy/TgeEYcWQkElh2xqGJ4/l1fU1GgZKqeNXurORV9ZV43QIF8/IZ3phKhLEZaFJHbvIbvyE2vTZtCcc454Gqv+IQOGJSMUKzpl0O899UkFHj5eEmMj+uNTpKJQKEY/Pz5LVlSxZU0VBejzfOWMsM0akBRUEDp+bUdWv0O1KZffwBWGoVn3JiHnQUsHXinz0eP28u6Xe7opCTsNAqRBo6nDzx/fLKN3VxPxxmVw/byQpccGf5hmx5w1i3I2U552P3zE0rnMfUEacCMA0s4mMxGj++Xm1zQWFnoaBUv1sd2Mnv3+vjMYON1fPHcHCidk4ejFaOK11C1lNq6nJmEdbwtAZATugDJ8EMck4KpZz3uQc3txUR2t3ZN8jQsNAqX60sbqFRz8sJ9opfOu0YsbnJPfq/S5POyOrX6EjNpvKzPmhKVIdm8MJBbOhYjkXzsjH7fXz6uc1dlcVUhoGSvWT5eV7eWZlBdnJsdw8fzRZSbG924AxjKp+GaffTVnehRjH0Lsp+4BSeCLUb2ZqupeRGQn8fU2V3RWFVNi7x0WkAHgSGA4YYLEx5jfhrkMNIaWP9/82S6770rfvb63ntQ21TMhJ5rJZBbicvf87K6uplNT27ezMPpuu2Mz+qlT1VdHJAMjOD7lw+gk8sGwrVc1d5KVG5rTWdrQMvMAdxpiJwFzgVhGZaEMdSh03Ywxvbd7DaxtqmZKfwhWzC/sUBHHdexhRu4zmxGL2pM8KQaWq1/JmQnQSlL/DBdOscQYvRXDrIOxhYIypMcasDjxvAzYBkT+iQ0UcYwxvbNzDW5vqmFGYxiUlBTj7cN9Jl6eNsbufx+uMpSzvfJ2aeqBwumDkqVD2NoXpcZSMSOPva6owJjLvMW1rn4GIFAHTgZWHee0mESkVkdL6+si/xlcNLsYY7l26kfe21jN7ZDpfn5HXqyuGDtgQcz7/CTHuJrbnX4Q3KrH/i1V9V3w6NFdAYzkXzshje107G6pb7a4qJGwLAxFJBF4EbjfGHPLTNcYsNsaUGGNKMjP1/KkaOPwGfvTSeh7/aCfziodx/tTcvgUBMG7nUxTueZOK4WfoZaQDUXFgwF/Z2yyanEu008GS1ZF5qsiWMBARF1YQPGOMWWJHDUr1hdcPd5Ym8ezKCm49vZhzJ+cENaL4cIY3rGD6lgfZPXwBtcNO7OdKVb9IHwWphVD2DinxLs6YkMVLa6vo8fqO/d5BJuxhINZvzmPAJmPMA+Hev1J95fHDdz5JZsmuOO44cyx3nTW+z0GQ3F7OKWu+R2tCESsm36f9BAOViNU62PkB+DxcPruQxg43r62vtbuyfmdHy2AecBWwQETWBh7n2lCHUkHr8cEtK1L4Z2UsP5rSxm1njOnztmJ7Gjit9FZ8DhfvzfwtHldSP1aq+l3xAuhphcpSTh6dQWF6PM+urLC7qn5nx9VEHxpjxBgzxRgzLfD4V7jrUCpYnV648eMUllXH8J/T27hxbFeftxXtbmbBJzcS597L+zMeoiNeL6Qb8EbNB4cLtvwTh0O4bHYBK3c0sr2u3e7K+pWOQFbqKFrcwtUfpPLhnmh+WdLKVcXHFwSnr/p3kjoreG/m/7I3bWo/VqpCJjbFCoRNr4AxXFJSQLTTwZPLd9pdWb/SMFDqCBq6hcvfT+WzRhe/m9vKJUXdfd5WQmcVZ664itT27Xww/UH2DJvTj5WqkJvwVWjaCXvWk5EYw9em5fJ8aSUtnZEzeZ2GgVKHUdXp4JJ30yhvi+LReS2ck9/T521lNpaycMWVxPXs5e1Zi6nOOrUfK1VhMe5cEIfVOgCum1dEl8fHX0sjp+9Aw0Cpg2xvdfKNd9Ko73Hw9CnNnJbt7tN2HD43U7Y+xFdWXo/XGc8bc5+iPn1mP1erwiIxEwpP2h8GJ+SmMHdUOo9/tBO3129zcf0jsu/jptSReHugsQwatkH7HuhqBJ+HLp/g7k7gF6RxQkEaw7qyoSUPkrLBEdyvi8Pnpqjmn0ze9jAJ3TWU5V/IpxPuxhsVH+KDUiE14avw2n/A3jIYVsy3Tivm2sdX8dKaKi6ZVWB3dcdNw0ANLS2VsPNDqPoU/B7rAz4xC+IzKO9KYF1bFLmudubG1xO9ZyNUB1oF4oSk4ZCcD8l5kDYCErOJ72pHjIcYdwsp7dvJalpNQe2bRHvb2JtyAism/4w9GTqgLCJMWASv3Q2fPw/z7+a0sZlMykvm4ffKuGhmfp/mpRpINAzU0NBeD1uWQs1n4Iy2ZqTMnQ7pI+kmmp+tTeK5ujgWZPfw0JxWol0GjB866qGlCloDj7qNUPkJbPw7ABcctBuPM4HK4QvYmXsuNRnzdDBZJEnJt64qWvMMnPp9xOHg1vmjufmZ1SxdV8350wb3ZcIaBiqy+Tyw9XXY9oZ196qxZ1szUbqsUzbbW53cuiKFLa1R3DyugzsndeDc9/ktDkgcbj3yZnyxze5WyJ8J7XWs3LwTv7jwRCXSklRMe3wBRvSmNBFrxlXwwvWw410oXsBZJ2QzISeZ//fGVs6ZlEN01ODthtUwUJGrsRz+dg3UroPcGTDxAoi1bkPpM/BseRz3r0skPsrwxMm96CiOTd5/45Oyjsi5mkQFYfwiiEuD1U9B8QIcDuH7Z43juj+v4q+lu7lq7uCdbFDDQEWm8netIBCBmddDzpT9L21qdvKD1cmsbXRxSpabX89qZXhcZFwRokIsKgYmXwKfPg6djRCfzvxxmcwuSuc3b27l/Gm5JMe67K6yTwZvm0apwzEGVi6Gp75uXQF049v7g2BPl4N71iTy1bfSqWh38uCsFp48pVmDQPXOjKvA54Y1TwEgIvx40QT2drj5zZvbbC6u7zQMVOTwuuGV78Crd8GYhXDDMkgfRW2Xg3vXJnLqq8N4pjyOS0Z289ZZe7lwRI/276rey55s9Tst/x14rFHpU/JTuWxWIX/+eCdbattsLrBvNAxUZOhogCfPh9VPwMnfw3vJ0ywr7+KGP6/ipH8O44myOM4v7Oads/dy/4w20mIi89aFKkxOudManxJoHQDcddY4kmOj+P4Ln+H1Db7WpvYZqMGv9nPMc5dDez0rp/+Sp/bM4v373qKt20tWUgw3j+/k0qIuChMH3y+oGqBGngr5s+Gjh2DmteB0kZ4Qzb3nT+K259aw+INybpk/2u4qe0XDQNmiL/PBG2Po9vjpcHtp7vTQ2OFmVP1b3LT3v2k2Cdzo/jGfL88nKbaeccOT+NZpxcwfl0nUmidCcARqSBOBU++EZy+Btc/CzGsAWDQlh1fX1/Dgsq2cVJzBtIJUe+vsBQ0DZRuvz09bt5cOt5dOt4+OnsBXt5fOHt8hyzvdXvyBszuCn9ucL/Ft1wuslzE8OOynFGXkcVp6PDkpsYgIX5k43N4DVJFtzEIomANv/cyaqiI+HRHh/gsn89nuFm59ZjVLbzuZtIRouysNioaBCqmOHi/b69rZVtfO9rp2djd1UtXURVldO2093sO+R4D4aCcJMVHER0eRmRRDfHQUCTFOEqKjSHN2c2XtLxjf/B7luYtYP+mnnOGMCe+BKSUC5z0AfzwV3vwpfO0hAFLjo/n9N2dw8R8+5uZnPuWJ62cTEzXwByJqGKh+09Hj5bPKZtZUWI9NNa1UNX9xMxiXU8hLjSMvLY6xw5NIjXeREucKfOhbH/TxMU5iXU4cR7jMJ7mtjFPW3E5S524+HX8XW4qu0ikflH2yJ8GJt8DH/wvTroDCuQBMLUjlVxdP5fa/ruWu59fx4KXTBvzcRRoGqk/8fkN5QzurK/Z9+DexdU/b/tM4ozITmDkijctnFzA6K4mxwxMpTI8nymldwNbrPgNjKKpeyqwN/4nXGcfbsx6hbtisfj4qpfrgtLth4z+saSpuetea+BC4YHoe1S1d/PK1LUQ5hF99Y+qADgQNA3WIw31Qd7q97G7sYndTJ7sbO9nd1Em3x7o6J9bloCAtnvnjsihMjyc/LY746C/+azV2uFlR3siK8sY+1RPTs5dZG+6jcM+b1KVN56Npv6Ir9tj9AfuOo7iib/s9mjKfTkOhAmIS4dKn4bGz4G9Xw9UvQ5TVT3DL/NF4fYYHlm2lvcfL/1w27Uu/GwPJwKxK2crt9VPV3EVVUyeVzV1UNnXR2GHN2yNAdkosU/JSKUiPpyA9jozEmCOe1jkuxjCi5jVmbvoFLk8ba8Z9j80jr9aJ4NTAkzMVzv8tvHgDvPQtuOAP+wPh22eMISk2iv9cupGLH17Ob6+YzqjMRJsLPpSGwRDW7fGxa29noIO3jW117Wzb08a2Pe3sG5KVEuciPy2OWSPSyE+PJz81jhhX6D+MhzV/zvTNvyaraTV7kyeyYvajtCSNCfl+leqzyRdb98t48x7oboFLnoToBACumzeSomEJ3P7XtZz30Ifcfc54rpw7YkCdNhJjBv5IzJKSElNaWmp3GQOWMYYuj4/2bi+t3V7ae7y0dXto7/bS1u2lLfB9S5eHmuZuqlu6qG7uoqH9i1k6RaAgLZ7RWYkYAwVpVkdvUjgn3TKGrMZSJpY/Rm7DR3RHp/PZ2G9Tnn/BcbUGiiue78ci1UA25xt32F0CfPpnWPpdyBgLF/zeundGQE1LF99/YR0fbGtgQk4ydy4cy4LxWUiILoIQkU+NMSXBrKstgwFi3wd6Y4ebpg4PTZ1umjrdge/dNHV6aO32sKW2jW6Pj26Pnx7vF1/9QWR6TJSDlDgXqfEuRmYkMr3QRVp8NFlJMWQmxeBy2jM7SUJnJYW1yxhV+XdSOnbQHZ3OmrG3s23EZXijEmypSak+m3mtdSOcf9wGj34FZv0bnHQbpBaSkxLHk9fP5l+f1/Lfr23mhidKGZ2VyEUz8rlweh7ZKbG2lW1Ly0BEzgZ+AziBR40xvzja+oOxZdDt8R3wYe6hsdP6UG/scAc+6D1f+r6xw03PEW6sLWKdrkmOdeH1+YlxOYmNclhfXQ5io5z7n8dEHbjMQazLuf95SM7r95YxxHXvIaN5HVlNpWQ1lpLWZs302JAyhW2F36Ai5yx8zrh+26W2DIaOAdEy2Ke7BZbdY81fZAyMP9e6p8aYhRCbjMfn5x9rq/nLJxWU7mrCITBzRBpzRg5j7qhhTCtMJTHm+P5e703LIOxhICJOYCtwJlAJrAIuN8ZsPNJ7+iMM/H6D12/wG4Nv3/ODlu1/GEOPx0+Xx0e3x0eX20enx0e320eXx0dn4GtHjzUtQkuXh5YuNy1dHpo7PTR3eXAf4YMdrA/29IRo0uKtv8zTEqID30eTnvDFMuv7aFLiXPvPLfZlGoeQMQanvwenr5soXzdOX5f11d9NlLeLWHcjse69xPY0ENdTT1JHBckdO4j2tgPgdcZRnzqV2owTqcg+k4740NxUXMNg6BhQYbBP825Y8TCsf8Ga3C69GL69+kur7GjoYMnqSt7f1sD6qhZ8gaZ+flocv7x4CicVZ/Rp1wP9NNFsYLsxphxARP4CnA8cMQz66qcvb+DpFbvwBnMOpQ8Sop2kxkeTHOciNc7FqIzE/QOpkvd/4Fsf6OkJLlLjo0mNc+2/1n6wy2n4mNNLv3XM9XyOaLpiMmiPL2Bn7iJaEkfRmHICjckTMI7BeSMQpYKWWgBn3w8L74PKVdDZcMgqIzMSuGPhOO5YOI72Hi+lOxtZX9XClj3tZCWFZ3S9HWGQB+w+4PtKYM7BK4nITcBNgW/bRWRLL/aRARz6Ex8aBuixNwCbw7GjAXr8YTPEj//OiDv+3/Zu9YOPP+j7cA7YDmRjzGJgcV/eKyKlwTaNIs1QPnbQ49fj1+Pv6/Hbcb6iCjjw5HB+YJlSSimb2BEGq4AxIjJSRKKBy4CXbahDKaVUQNhPExljvCLyf4DXsS4t/ZMxZkM/76ZPp5cixFA+dtDj1+Mf2vp8/INiBLJSSqnQioxrHJVSSh0XDQOllFKDNwxE5GwR2SIi20Xk7iOsc4mIbBSRDSLybLhrDKVjHb+IPCgiawOPrSLSbEOZIRPE8ReKyDsiskZE1onIuXbUGSpBHP8IEXkrcOzviki+HXWGgoj8SUTqRGT9EV4XEXko8LNZJyIzwl1jKAVx/ONFZLmI9IjInUFv2Bgz6B5YHc9lwCggGvgMmHjQOmOANUBa4Pssu+sO5/EftP5tWB31ttcexn//xcDNgecTgZ121x3m438euCbwfAHwlN119+PxnwrMANYf4fVzgVexbr8xF1hpd81hPv4sYBbwX8CdwW53sLYM9k9pYYxxA/umtDjQjcDvjDFNAMaYujDXGErBHP+BLgeeC0tl4RHM8RsgOfA8BagOY32hFszxTwTeDjx/5zCvD1rGmPeBo92+7nzgSWNZAaSKSE54qgu9Yx2/MabOGLMK8PRmu4M1DA43pUXeQeuMBcaKyEcisiIwU2qkCOb4Aet0ATCSLz4YIkEwx/9T4EoRqQT+hdU6ihTBHP9nwNcDzy8EkkRkWBhqGwiC/v1QXxisYRCMKKxTRfOx/jJ+RERS7SzIJpcBLxhjfHYXEmaXA382xuRjnTZ4SkQi+f/7we4EThORNcBpWKP8h9r/AdULA3ZuomMIZkqLSqxzhR5gh4hsxQqHVeEpMaR6M6XHZcCtIa8ovII5/huAswGMMctFJBZrEq9IOF14zOM3xlQTaBmISCJwkTGmOVwF2kynvOmDwfqXUjBTWryE1SpARDKwThuVh7HGUApqSg8RGQ+kAcvDXF+oBXP8FcAZACIyAYgF6sNaZegc8/hFJOOAltAPgD+FuUY7vQxcHbiqaC7QYoypsbuogW5QtgzMEaa0EJF7gVJjzMuB1xaKyEas5vFdxpi99lXdf4I8frA+JP5iApcYRIogj/8OrFOD38XqTL42Un4OQR7/fODnImKA94mg1qGIPId1fBmBPqF7ABeAMeYPWH1E5wLbgU7gOnsqDY1jHb+IZAOlWBdQ+EXkdqyrzVqPut0I+f1QSil1HAbraSKllFL9SMNAKaWUhoFSSikNA6WUUmgYKKWUQsNAKaUUGgZKKaWA/w+RmwOosbSK5QAAAABJRU5ErkJggg==",
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {
+ "needs_background": "light"
+ }
+ }
+ ],
+ "metadata": {}
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 11,
+ "source": [
+ "sns.distplot(auc_df['diff'].values.tolist())"
+ ],
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stderr",
+ "text": [
+ "/opt/miniconda3/envs/r-py-test/lib/python3.8/site-packages/seaborn/distributions.py:2619: FutureWarning: `distplot` is a deprecated function and will be removed in a future version. Please adapt your code to use either `displot` (a figure-level function with similar flexibility) or `histplot` (an axes-level function for histograms).\n",
+ " warnings.warn(msg, FutureWarning)\n"
+ ]
+ },
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "execution_count": 11
+ },
+ {
+ "output_type": "display_data",
+ "data": {
+ "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXgAAAD4CAYAAADmWv3KAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjQuMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/MnkTPAAAACXBIWXMAAAsTAAALEwEAmpwYAAAoEUlEQVR4nO3dd3hc9Z3v8fd3ZjTqvVrdcpErbsIF0wyhpQDZhAQIEIckXrLZPJvsbvay2U2e3NxsyWY3N2RvssEBQiChBAKEAKHYdHCTe5HcZFuSVW2rd8387h8aE+PI0sieM2fm6Pt6nnnURnM+B6SPj37nd35HjDEopZRyHpfdAZRSSllDC14ppRxKC14ppRxKC14ppRxKC14ppRzKY3eAM2VlZZnS0lK7YyilVNTYunXrCWNM9mhfi6iCLy0tpbKy0u4YSikVNUTk2Lm+ZtkQjYiUi8iOMx6dIvJ1q7anlFLqwyw7gjfG7AcWAoiIGzgOPGvV9pRSSn1YuE6yXg0cNsac808JpZRSoRWugr8VeHy0L4jIGhGpFJHK1tbWMMVRSinns7zgRcQL3Ag8NdrXjTFrjTEVxpiK7OxRTwQrpZQ6D+E4gr8B2GaMaQ7DtpRSSgWEo+Bv4xzDM0oppaxjacGLSCJwDfCMldtRSin15yy90MkY0wNkWrkNpZRSo4uoK1mVilSPbaoNyevcvqw4JK+jVDB0sTGllHIoLXillHIoLXillHIoLXillHIoLXillHIoLXillHIoLXillHIoLXillHIoLXillHIoLXillHIoLXillHIoLXillHIoLXillHIoLXillHIoLXillHIoLXillHIoLXillHIoLXillHIoLXillHIoLXillHIoLXillHIoSwteRNJE5GkRqRaRKhFZYeX2lFJK/YnH4te/D3jZGPNpEfECCRZvTymlVIBlBS8iqcDlwGoAY8wgMGjV9pRSSn2YlUM0U4FW4Jcisl1EHhCRxLOfJCJrRKRSRCpbW1stjKOUUpOLlQXvARYD/2OMWQT0APee/SRjzFpjTIUxpiI7O9vCOEopNblYWfD1QL0xZlPg46cZKXyllFJhYFnBG2OagDoRKQ986mpgn1XbU0op9WFWz6L5GvCbwAyaGuALFm9PKaVUgKUFb4zZAVRYuQ2llFKj0ytZlVLKobTglbLBsM+P32/sjqEczuoxeKVUwMCwj5+/dZg/7m5kT0MnPr8hNyWWVeU53LWilDn5KXZHVA6jBa9UGBxs7uK3W+vpGRimOCOBldOy8LiFlq4Bnt1+nCe31LGkJJ2PX5SP1zP+H9a3LysOQ2oV7bTglbLYe4dO8NLuRnJT4rhzWTHFmR++oLtv0Mcb+1t479AJ6tp6uWNZCZlJsTalVU6iY/BKWWhDzUle3N3InPwU7rli2p+VO0C8181H509h9cpSOvuG+cU7NZzsHrAhrXIaLXilLFLV2MkfdjYwe0oKt15cPO7Qy4ycZL502VSG/YZfvFNDR99QmJIqp9KCV8oCbT2DPLW1joK0eG69uAi3S4L6vimp8Xzx0qn0D/t5dONRBof9FidVTqYFr1SI+Y3hyco6AG5bWkyMe2K/ZlNSR/5RaGzv59nt9Rij0ynV+dGCVyrENh05Re2pXj5xUT4Zid7zeo1ZeSlcPTuXnfUdbKttC3FCNVlowSsVQh19Q7y6t4npOUksLEq7oNe6sjybsuxEnt/ZoCdd1XnRglcqhF7b18yw33DTgnxEght3PxeXCLcsKcIlwu93NOhQjZowLXilQqSxo4/ttW2sKMsM2Tz21PgYrpubx6HWbrbXtofkNdXkoQWvVIi8sreJuBg3q8pzQvq6S6dmUJKRwIu7G+keGA7paytn04JXKgTqTvVyoLmby2dmE+91h/S1XSLcvKiAwWE/L+5qCOlrK2fTglcqBF6vbiE+xs3yqRmWvH5uShxXlmezs76Dw63dlmxDOY8WvFIXqKG9j/3NXaycnkVsTGiP3s90+cxs0hJieGl3Iz5dalgFQQteqQv03qETeD0uVpRlWrqdGLeL6+fm0djRz++21lu6LeUMWvBKXYDugWF2He9gcXF6yMfeRzO/IJWi9Hh++Op+evSEqxqHFrxSF2DzkVP4/Mbyo/fTRISPXZRPa9cA9791OCzbVNFLC16p8+TzGzYfOcmMnCSyk8O3fntxRgI3Lshn7Ts1NHb0hW27KvpYWvAiclREdovIDhGptHJbSoXb3oYOOvuHw3b0fqZvXleOz2/479cPhX3bKnqE4wh+lTFmoTGmIgzbUipsNtScJCPRy8y85LBvuygjgduXFvPbLXUcO9kT9u2r6KBDNEqdh4b2Po6d7GV5WSauC1xz5nx99arpeNzCj9cdtGX7KvJZXfAGeFVEtorImtGeICJrRKRSRCpbW1stjqNUaGw5eooYt7CkON22DDnJcay+ZCrP7TjO/qYu23KoyGV1wV9qjFkM3AB8VUQuP/sJxpi1xpgKY0xFdna2xXGUunDDPj+76juYMyUlLFMjx3LPFWUkeT3816v7bc2hIpOlBW+MOR542wI8Cyy1cntKhUNVUxd9Qz4Wl9h39H5aWoKXL11Wxqv7mtlV3253HBVhLCt4EUkUkeTT7wPXAnus2p5S4bLtWBspcR6mZSfZHQWAuy8tJTU+hp+s1xk16sOsPILPBd4VkZ3AZuBFY8zLFm5PKct19Q9xsKWLRcXptp1cPVtyXAxfWFnKuqpm9jV02h1HRRDLCt4YU2OMWRB4zDXG/ItV21IqXHbUteM3sKg4ze4oH/KFS6aSFOvhp2/oUbz6E50mqVSQjDFsr22nKD2enOQ4u+N8SGpCDJ+/pISX9jRyqEVn1KgRWvBKBamxo5+mzn4W2Tg1cixfvLSM+Bg3/0+vblUBWvBKBWl7bRtul3BRYardUUaVkejljuUlPL+zgSMn9OpWpQWvVFD8xrD7eAczc5JI8HrsjnNOX7psKjFuFz/TsXiFFrxSQak92Utn/zDzC9PsjjKmnOQ4bltazLPbj1N3qtfuOMpmWvBKBWHX8Q48LmG2DQuLTdQ9V0zDJcL/6Hrxk54WvFLj8PkNe493UJ6XbOk9V0MlLzWOT1cU8nRlPS1d/XbHUTbSgldqHJuPnKJrYJj5BZF5cnU0X76sjCG/n1+9f9TuKMpGWvBKjeOFXQ3EuIVZeSl2Rwna1KxErpuTx6831uq9WycxLXilxjDs8/PyniZm5aXg9UTXr8uaK8ro6BviyS11dkdRNomun1ilwmxDzUlO9gxG7Nz3sSwuTmdpaQYPvnuEIZ/f7jjKBlrwSo3hpd2NJHrdzMyN/Nkzo1lzeRnH2/t4aXej3VGUDbTglToHn9/w2r5mVs3KIcYdnb8qV83KYVp2Ive/VYMxxu44Ksyi86dWqTDYeqyNE92DXDc3z+4o583lEr50WRn7GjvZfOSU3XFUmGnBK3UOr+xtwut2sWpWjt1RLsjNCwtIS4jhYZ0yOelowSs1CmMML+9p4tIZWSTFRu7aM8GI97q59eJiXtnbRH2bLl8wmWjBKzWKvQ2dHG/v47q5uXZHCYk7V5QgIjy68ZjdUVQYacErNYpX9zbhEvjIbGcUfEFaPNfNzeWJzXX0DuqFT5OFFrxSo3h5bxMXl2aQmRRrd5SQWX3JVDr6hnhue4PdUVSYaMErdZYjJ3o40NzN9fOid/bMaC4uTWdufgoPv39Ep0xOElrwSp3llb1NAFwbxdMjRyMifH5FKQeau9lytM3uOCoMgip4EXlGRD4mIhP+B0FE3CKyXURemHg8pcLvtX3NzCtIoSAt3u4oIfeJBfkkx3n4zSY92ToZBFvYPwNuBw6KyL+LSPkEtvE3QNWEkyllg5PdA2yrbePqWc44uXq2eK+bTy0u5I+7mzjZPWB3HGWxoAreGLPOGPM5YDFwFFgnIu+LyBdEJOZc3ycihcDHgAdCEVYpq72xvxVjnDN7ZjS3Lytm0Ofn6a31dkdRFgt6yEVEMoHVwJeA7cB9jBT+a2N824+BfwDOuZSdiKwRkUoRqWxtbQ02jlKWWF/VTG5KLPMKomft94mamZvM0tIMHt9ci9+vJ1udLKhL9ETkWaAceBT4hDHm9NJ0T4pI5Tm+5+NAizFmq4hcea7XNsasBdYCVFRU6E+bss3gsJ+3D7Ry48ICRMTuOGN6bFPtBX1/aVYim4+e4v3DJ7l0RlaIUqlIE+wR/C+MMXOMMf92utxFJBbAGFNxju9ZCdwoIkeBJ4CrROTXFxpYKatsOnKSnkEfH5kd3WvPBGNefgoJXreebHW4YAv++6N8bsNY32CM+UdjTKExphS4FXjdGHPHBPMpFTbrq1qI9bi4ZJrzj2g9bhdLitN5dV+z3pjbwcYseBHJE5ElQLyILBKRxYHHlUBCOAIqFQ7GGNZVNXPp9CzivW6744TFktJ0fH7Dc9uP2x1FWWS8MfjrGDmxWgj86IzPdwHfCnYjxpg3gTcnFk2p8DnQ3E19Wx9/deV0u6OETU5yHIuL03iqsp4vX1YW8ecd1MSNeQRvjPmVMWYVsNoYs+qMx43GmGfClFEpy62ragbg6kkw/n6mWyqKONjSzY66drujKAuMN0Rzesy8VET+9uxHGPIpFRbrq5qZX5BKbkqc3VHC6uMXTSEuxsVTOifekcY7yZoYeJsEJI/yUCrqneweYHtd+6Q7egdIjovho/Om8IcdDfQN+uyOo0JszDF4Y8z9gbf/OzxxlAq/yXD16lhuqSjime3HeWVvEzcvKrA7jgqhYC90+g9Gpkr2AS8DFwHfMMbovHYV0YK5IOg3m46REudhZ107u+o7wpAqcjy2qRa/MaQnxPCT1w/Se55H8bcvKw5xMhUKwc6Dv9YY0wl8nJG1aKYD37QqlFLhMuzzc7Clm/K8lEk7i8QlwuKSdGpae2jrHbQ7jgqhYAv+9JH+x4CnjDGT6zBHOdaREz0MDvuZnTe5TyktKkoHYKfOpnGUYAv+BRGpBpYA60UkG9DL31TUq2rqIsYtTMtJsjuKrTISvZRkJLC9rl3v9uQgwS4XfC9wCVBhjBkCeoCbrAymlNWMMVQ3dTItO4kYt97cbGFxGq1dAzR26LGbU0zkp3oW8FkRuQv4NHCtNZGUCo/mrgHae4eYnefcpYEnYn5+Km4RvejJQYKdRfMoMA3YAZw+zW6AR6yJpZT1qhs7ASifMrnH309LiPUwMy+ZnfXtXD8vD9ckPensJEEVPFABzDE6OKccpLqpi4K0eFLiznlTsklnYVEaVY2d1LT2MH2Sn5dwgmCHaPYAzrrFvJrUugeGqTvVyyw9ev+QWXnJxHpc7KhrszuKCoFgj+CzgH0ishn44E69xpgbLUmllMX2N3VhQMffzxLjdjGvIJXdxzu4cYEfr0dPPkezYAv+u1aGUCrcqps6SYnzMCV1ci0uFoyFRWlsPdZGVVMnCwrT7I6jLkCw0yTfYuQK1pjA+1uAbRbmUsoyp69enTWJr14dy9SsRFLjY9hR2253FHWBgip4Efky8DRwf+BTBcBzFmVSylKnr17V8ffRuURYUJjKwZYuugeG7Y6jLkCwA2xfZeQm2p0AxpiDwORbW1U5wgdXr2brLJFzWViUjt/A7uO6Kkk0C7bgB4wxH6xCJCIeRubBKxVVTl+9Ol2vXh1TXmoceSlx7KjV2TTRLNif8LdE5FuM3Hz7GuAp4A/WxVLKGs2dI1evzpqis2fGs6Aojbq2Pk716AqT0SrYgr8XaAV2A38JvAT8s1WhlLJKdVPg6tVJvnpkMBYUpgLo0gVRLKhpksYYv4g8BzxnjGm1NpJS1qlq7NSrV4OUluClNDOBnXXtrCrP1hlHUWi8m26LiHxXRE4A+4H9ItIqIt8Z74VFJE5ENovIThHZKyJ62z9lq67+Ierb+pits2eCtqAojdZuXWEyWo03RPMNRmbPXGyMyTDGZADLgJUi8o1xvncAuMoYswBYCFwvIssvNLBS5+uDq1d1/D1o8/NTcYneCCRajVfwdwK3GWOOnP6EMaYGuAO4a6xvNCO6Ax/GBB4680bZpqqpi7T4GPJS9OrVYCXEepiZO7LCpF/XGow64xV8jDHmxNmfDIzDjzuIKSJuEdkBtACvGWM2jfKcNSJSKSKVra06vK+sMeTzc6ili1lT9OrViVpQlEZn/zBHT/TYHUVN0HgFP9b8qHHnThljfMaYhUAhsFRE5o3ynLXGmApjTEV2dvZ4L6nUeTnc0s2Qz+j4+3mYnZeC1+PS2TRRaLyCXyAinaM8uoD5wW7EGNMOvAFcfwFZlTpvVU2dxHpcTM1KtDtK1PF6XMydksKehg6GfX6746gJGLPgjTFuY0zKKI9kY8yYQzQiki0iaYH344FrgOqQJVcqSH5jqG7sYmZuMh6XXr16PhYUpdE/5OdAc5fdUdQEWPnTPgV4Q0R2MbL65GvGmBcs3J5Sozre1kfXwLAOz1yAadlJJHrd7KjXtWmiSbDrwU+YMWYXsMiq11cqWFVNnbgEZuZqwZ8vt0uYX5hG5dFT9A/5iItx2x1JBUH/XlWOV93YRUlmIgley45nJoWFRWkM+w17GzrtjqKCpAWvHK2tZ5Cmzn69uCkEitLjyUj0srO+3e4oKkha8MrRqgKLi83WxcUumARuBHK4pZuu/iG746ggaMErR6tu7CI7OZbMpFi7ozjCgsI0DLBLT7ZGBS145Vid/UPUnOhmdp4Oz4RKTkoc+alxOkwTJbTglWO9ub8Vv0GnR4bYgqI06tv6ONE9YHcUNQ4teOVYr+xtIjnWQ1FGgt1RHOWiwjQEXWEyGmjBK0fqH/LxRnULs/NTcOniYiGVGh/D1KxEdta3Y3SFyYimBa8c6Z2DJ+gd9DE3X8ffrbCgKI0T3YMcb++zO4oagxa8cqSX9zSREuehLCvJ7iiONC8/FY9L2FbbZncUNQYteOU4Qz4/66qa+cicXNwuHZ6xQrzXzdz8FHbUtTOkK0xGLC145Tgba07S0TfEDfOm2B3F0ZaUZNA/5KeqUZcuiFRa8MpxXt7TRILXzWUzsuyO4mhl2Ymkxcew9ZgO00QqLXjlKD6/4ZW9zawqz9EVDy3mEmFxSTqHWrr1ZGuE0oJXjrKtto0T3QNcNy/P7iiTwuLidAzwzNZ6u6OoUWjBK0d5cVcjXo+LVeV6f99wyEj0UpadyFNb6/H7dU58pNGCV47h8xte3N3IVeU5JMeNeUdJFUIVJenUnupl45GTdkdRZ9GCV46xqeYkrV0D3Lgw3+4ok8rc/FRS42P4zaZau6Oos2jBK8f4w64GEr1uVpXn2B1lUolxu/hMRSGv7GmiubPf7jjqDFrwyhEGh/28tLuJa+bkEu/V2TPh9rllJQz7DY/pUXxE0YJXjvDuoVY6+oZ0eMYmpVmJXDEzm8c31+qVrRHEsoIXkSIReUNE9onIXhH5G6u2pdTzOxpIjY/h0uk6e8Yud60ooaVrgFf2NtkdRQVYeQQ/DPydMWYOsBz4qojMsXB7apLqG/Tx2r5mbpiXh9ejf5Ta5cryHArT43lkwzG7o6gAy34bjDGNxphtgfe7gCqgwKrtqcnr9eoWegZ93LhAh2fs5HYJdywvYfORU1Q36fo0kSAshzsiUgosAjaN8rU1IlIpIpWtra3hiKMc5plt9eQkx7KsLNPuKJPeZyqKiPW4ePCdI3ZHUYSh4EUkCfgd8HVjzJ/9s26MWWuMqTDGVGRn6/ipmpiWrn7ePNDKp5YU6tLAESAj0ctnLy7iuR3HadD1aWxnacGLSAwj5f4bY8wzVm5LTU7PbjuOz2+4ZUmh3VFUwJcvK8Nv4MF39SjeblbOohHgQaDKGPMjq7ajJi9jDL+trGNJSTpl2XrnpkhRlJHAjQvyeXxzLW09g3bHmdSsPIJfCdwJXCUiOwKPj1q4PTXJbK9r53Brjx69R6B7rphG76CPX204aneUSc1j1QsbY94FdFBUWeapynriY9x87CK9c1OkKc9L5iOzc3j4/aOsubyMBK9lVaPGoJOGVVTqG/Txws4GbpifpytHRqivXDmd9t4hfrNRly+wixa8ikqv7G2ia2CYW5YU2R1FncOSknQum5HFz948RGf/kN1xJiUteBWVHt14jJLMBJZNzbA7ihrD/7p+Fm29Q6x9q8buKJOSFryKOrvq29l6rI3PryjFpXPfI9q8glRuXJDPg+8eoUWXEg47LXgVdR5+/yiJXjefrtDZM9Hg766dyZDPz33rD9odZdLRgldRpbVrgBd2NvLpJYWk6MnVqFCSmcjnlhXzxJY6alq77Y4zqWjBq6jy+OZaBn1+7rqk1O4oagK+dvUM4jwuvv9iFcbozbnDRQteRY3BYT+/3niMK2ZmM02vXI0qWUmxfOOambxe3aLrxYeRFryKGn/c00hL1wCrV5baHUWdh9WXlDJnSgrffX4f3QPDdseZFLTgVVTw+w0/f6uGsqxErpihq45GI4/bxb/+xXyau/r5r1f32x1nUtCCV1Hhtapmqho7+eqq6To1MootLErjjmUl/Or9o+yu77A7juNpwauIZ4zhJ+sPUpqZwE16U+2o9/fXlZOZFMs3n95J/5DP7jiOpisAqYi3rqqFvQ2d/OctC/C49Zgk2qXGx/Afn7qILzy8hR++sp9vf/xPt2p+bFNo1q25fVlxSF4n2ulvi4poxhjuW3+AkswEbtajd8dYNSuHu1aU8OC7R3j7gN6q0ypa8CqivV7dwp7jnfz1qul69O4w3/robKbnJPH3T+3klN4YxBL6G6Mils9v+OEr+ynOSODmRQV2x1EhFhfj5r5bF9LWO8g3n9qJ368XQIWaFryKWE9vraO6qYt/uL6cGD16d6S5+al866OzWV/dwk/fOGR3HMfR3xoVkboHhvnPVw+wuDiNj83XOzY52epLSrl5YT4/WneAA81ddsdxFC14FZHuW3eA1q4Bvv3xOYzcv105lYjwb39xEeW5yTy5pU7H40NIC15FnKrGTh567yi3LS1iUXG63XFUGMR73dx/5xIMhl9vPMaAzo8PCZ0HryKKz2/41rO7ifW4mJadFLJ50SrylWQmctvFxfxqw1Ge2FLHHctLcOtVyxfEsiN4EXlIRFpEZI9V21DO88A7NWyvbefjF+WT4NXjj8lmRm4yn1iQz/7mLl7a02h3nKhn5RDNw8D1Fr6+cpj9TV3812sHuHZOLgsKU+2Oo2yybGomK6dlsuHwSTYcPmF3nKhmWcEbY94GTln1+spZegaG+cpvtpISF8O/fHK+nlid5G6YP4VZecm8sKuR/U2ddseJWrafZBWRNSJSKSKVra16yfJkZIzhH5/ZzdETPfzktoVkJ8faHUnZzCXCZy8uIi81jse31NHY0Wd3pKhke8EbY9YaYyqMMRXZ2brO92T06021PL+zgb+9ZiaXTMuyO46KELEeN3etKCXO4+Lh94/S1qvTJyfK9oJXk9u22jb+zx/2cWV5Nn915XS746gIkxofw+qVUxny+fnle0fp1TtBTYgWvLLN4dZuvvjwFqakxfGjzyzUG3moUeWlxHHn8lLaewd5ZOMxBof9dkeKGlZOk3wc2ACUi0i9iHzRqm2p6NPc2c9dD27G7RIeuXspGYleuyOpCDY1K5HPVBRRd6qXJ7bU4tOFyYJi5Sya24wxU4wxMcaYQmPMg1ZtS0WXzv4hVv9yC229g/xy9VJKMhPtjqSiwLyCVD6xIJ/qpi6e33kcY7Tkx6NXkqiwau8d5K6HNnOwuYuHVl/MfJ3vriZgeVkmnX1DvHmglZS4GK6enWt3pIimBa/C5kT3AHc8sIma1h7uv3MJl8/UWVNq4q6Zk0tn/zDrq1tIjoth6dQMuyNFLC14FRbNnf187oFN1Lf18uDqCi6boeWuzo+I8MlFBXQPDPH7HcdJjvMwe0qK3bEiks6iUZarauzk5p++R2N7Hw9/YamWu7pgbpdw+9ISCtLjeXxzLTUnuu2OFJG04JWl3jrQyi0/34Ax8Nt7VrC8LNPuSMohvB4Xn19RSkail0c2HKPuVK/dkSKOFryyhDGGh987wt0Pb6EoI4Fnv3oJc/P1hKoKrcRYD3evnEpSrIeH3z9KU0e/3ZEiiha8CrnewWG+/uQOvvuHfawqz+ape1YwJTXe7ljKoVLiY7h75VRi3MJD7x3hRPeA3ZEihha8CqkjJ3r45E/f5/mdDfz9tTNZe2cFSbF6Ll9ZKyPRy90rp+I3hofePcLxdl2cDEAi6WKBiooKU1lZaXeMsAjVnYpuX1YcktcJhVf3NvG1x7fjEuHWi4uYkZtsdyQ1yTS09/HAuzVkJ8fy2JeWU5SRYHcky4nIVmNMxWhf0yN4dcH6h3x85/d7WPPoVrKSYvnrq6ZruStb5KfFc/fKqXT0DnHr2o3UnpzcJ1614NUF2dvQwSf++10e2XCMu1dOZc3lZaQn6Loyyj6F6Qk89uXl9AwO85n7N1DTOnmnUGrBq/Pi9xseeKeGT/70fdr7hnjk7qV85xNziHHrj5Sy37yCVB7/8nKGfH5u+fkGdtS12x3JFvrbqCbsyIkebn9gI99/sYory7N55euX67IDKuLMnpLC01+5hMRYD7et3cj6qma7I4WdFrwK2pDPz/+8eZjrf/w2exs6+cGn5nP/nUt0qV8VsaZmJfK7r1zC9JwkvvxIJY9uODqpVqHU+WsqKO8fOsH3XthHdVMX183N5Xs3zSM3Jc7uWEqNKzs5lifWLOdrj2/n27/fy876Dr5/8zziYtx2R7OcFrwa09ETPfzrS1W8uq+ZwvR4fn7HYq6fN8XuWEpNSGKsh1/cVcFP1h/kvvUH2dfQyc/vWEJxprOnUWrBh4kxhqMne9lZ105NazdvHmjlVM8gvYM+Bob9DA77cIkQ63ER43aREh9DRoKX9MQYclPiKExPICXOg4g1t7U7e15+S1c/b+1vZWd9Ox6Xi2vn5LJyehaneoZCNodfqXByu4RvXDOTBUWpfP2JHdxw39vc+9HZfG5psWNvF6kFb6G6U728sb+Ftw+0sq22nVM9I3eFdwmkJXjJTBx5xHrceD0u/MYwOOxnYNhPZ98QB1u66Oz/002Gk2M9FKTHU5ieQElmAoXpob383+c3HGzpovJoG1WNnXjcwiXTsrh0RhYpcTEh3ZZSdrlqVi4v/c1l3Pu73Xz7uT28tKuRf//UfEfeWUyvZA2hIZ+frcfaeKO6hderWzjYMjL/tiQzgaWlGSwuSWdRcRplWUk8vbU+qNccHPbT1NlPfVsvx9v6qG/rozWw1oZLYE5+CkuK01lSmsHi4jQK0uIndJTfOzjM5iOn+MXbNew63kFX/zCJXjcVpRmsnJ6lywyoqBTMFd7GGJ7cUse/vFjFwLCfu1aU8NVV00mPskkDY13JqgV/gTp6h3jzQAvrqlp4a38Lnf3DeFzC0qkZXDUrh6tm5VCWnfRn33chwxx9gz7q2no5drKHgWE/O+ra6R30AZAS52FGbjIzcpLITYkjM8lLRqIXQRj2j/x10Njez7GTPdSc6GFvQwdDPoPbJczISaKiJJ2Zecl4XDrBSkWviSzh0dTRz49e28/TW+tJjPWw5rIyPre8JGpmh2nBh9iREz2sr2pmXVUzW4624fMbspK8rCofKfRLZ2SRPM6QRijXohn2+alu6mJbbRsHmrs40NzN4ZZuTgaGhEaTnxpHcWYCC4rSWDkti5rWHrweLXXlDOezRtP+pi5+8HI1r1e3EOtxcfPCAu5cUcLc/BTLzn2FwlgFr39/j8MYQ+2pXjbVnGLjkZNsqjn1wUp1s/KSueeKMq6encvCwjTbTtR43C7mFaQyr+DD660P+/y09Q59MPYf4xZi3C6yk2P/bIpYfZuuvqcmt/K8ZB5afTEHmrt4+P2jPLOtnicr65ialcgN8/K4bm4e8wpScUfRCVlLC15ErgfuA9zAA8aYf7dyexeqf8jH0ZM9VDd2UdXUSXVjF/saO2ntGhnzzkz0sqwsg7+8ooxV5TkRv1KdJ1Dm2cmxdkdRKmrMzE3mXz85n3+4rpyXdjfxxz2N3P92DT978zDJsR6WlKZzcWkGs6ckU56XQn5qXMQe4VtW8CLiBn4KXAPUA1tE5HljzL5Qb8sYw7DfMOTzMzRsGPT5GfL5GfaNvD8w7KOrf5jOviE6A287Ao/Gjj4a2vtpaO/70JCG1+1iek4Sl83IYnFxOsvLMpiWnRSx/yOVUqGVluDl9mXF3L6smFM9g7xzsJXNR06x6cgp3ty//4PnJXrdTEmLZ0pqXOART2aSl+Q4D8mxMSNv42Lwelx43S5iPCN/Sce4/vS+xyWWdIuVR/BLgUPGmBoAEXkCuAkIecHP/s7L9A/5J/x9ybEe8lLjyE+LZ15BKgVpcRRlJDB7SgpTsxJ14SylFDByQ5GbFhZw08ICADr7hzjQ1EV1UxeHWrpp6uinsbOf/U2ttHYPMNFTm1lJXir/+ZqQ57ay4AuAujM+rgeWnf0kEVkDrAl82C0i+89+jpX2nN+3ZQEnQhrkPH3uwl8iYvYlRJy0P7ov5ykEvxdjCfm+HAPk2+f97SXn+oLtJ1mNMWuBtXbnmAgRqTzXWeto46R9AWftj+5LZIqmfbFyDOI4UHTGx4WBzymllAoDKwt+CzBDRKaKiBe4FXjewu0ppZQ6g2VDNMaYYRH5a+AVRqZJPmSM2WvV9sIsqoaUxuGkfQFn7Y/uS2SKmn2JqCtZlVJKhY7OA1RKKYfSgldKKYfSgg+CiGSIyGsicjDwNn2U55SIyDYR2SEie0XkHjuyjifIfVkoIhsC+7FLRD5rR9ZgBLM/gee9LCLtIvJCuDOORUSuF5H9InJIRO4d5euxIvJk4OubRKTUhphBCWJfLg/8jgyLyKftyBisIPblb0VkX+D3Y72InHMuup204INzL7DeGDMDWB/4+GyNwApjzEJGLui6V0TywxcxaMHsSy9wlzFmLnA98GMRSQtfxAkJZn8AfgjcGbZUQThjOY8bgDnAbSIy56ynfRFoM8ZMB/4v8IPwpgxOkPtSC6wGHgtvuokJcl+2AxXGmIuAp4H/CG/K4GjBB+cm4FeB938F3Hz2E4wxg8aYgcCHsUTuf9tg9uWAMeZg4P0GoAXIDlfACRp3fwCMMeuBrjBlCtYHy3kYYwaB08t5nOnM/XsauFoic0GkcffFGHPUGLMLmPi6IuEVzL68YYzpDXy4kZHrfCJOpJZQpMk1xjQG3m8Cckd7kogUicguRpZo+EGgHCNNUPtymogsBbzAYauDnacJ7U+EGW05j4JzPccYMwx0AJlhSTcxwexLtJjovnwR+KOlic6T7UsVRAoRWQfkjfKlfzrzA2OMEZFR55YaY+qAiwJDM8+JyNPGmObQpx1bKPYl8DpTgEeBzxtjbDvqCtX+KBVqInIHUAFcYXeW0WjBBxhjPnKur4lIs4hMMcY0BkqvZZzXahCRPcBljPxZHVah2BcRSQFeBP7JGLPRoqhBCeX/mwgTzHIep59TLyIeIBU4GZ54E+KkpUmC2hcR+QgjBxlXnDE8G1F0iCY4zwOfD7z/eeD3Zz9BRApFJD7wfjpwKRDWlTGDFMy+eIFngUeMMWH/B2qCxt2fCBbMch5n7t+ngddNZF6d6KSlScbdFxFZBNwP3GiMidyDCmOMPsZ5MDLmuR44CKwDMgKfr2DkTlUwcmOTXcDOwNs1due+gH25AxgCdpzxWGh39vPdn8DH7wCtQB8jY6rX2Z09kOujwAFGznH8U+Bz32OkOADigKeAQ8BmoMzuzBewLxcH/tv3MPJXyF67M1/AvqwDms/4/Xje7syjPXSpAqWUcigdolFKKYfSgldKKYfSgldKKYfSgldKKYfSgldKKYfSgldKKYfSgldKKYf6/5X7Cy051eJuAAAAAElFTkSuQmCC",
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {
+ "needs_background": "light"
+ }
+ }
+ ],
+ "metadata": {}
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "source": [],
+ "outputs": [],
+ "metadata": {}
+ }
+ ],
+ "metadata": {
+ "orig_nbformat": 4,
+ "language_info": {
+ "name": "python",
+ "version": "3.8.2",
+ "mimetype": "text/x-python",
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "pygments_lexer": "ipython3",
+ "nbconvert_exporter": "python",
+ "file_extension": ".py"
+ },
+ "kernelspec": {
+ "name": "python3",
+ "display_name": "Python 3.8.2 64-bit ('r-py-test': conda)"
+ },
+ "interpreter": {
+ "hash": "7508a6b53ffb04362d156591e4bfb20c197555e37f3cce3b1ec90fd899bbfe63"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
\ No newline at end of file
diff --git a/notebook/notebook_archive/Jun09262021/SVM_test.ipynb b/notebook/notebook_archive/Jun09262021/SVM_test.ipynb
deleted file mode 100644
index 5cacee6..0000000
--- a/notebook/notebook_archive/Jun09262021/SVM_test.ipynb
+++ /dev/null
@@ -1,430 +0,0 @@
-{
- "cells": [
- {
- "cell_type": "code",
- "execution_count": 115,
- "source": [
- "import pandas as pd\n",
- "## Utils and Library for notebook\n",
- "from notebook_utils.OpenKbcMSToolkit import ExtractionToolkit as exttoolkit\n",
- "\n",
- "# Root data path\n",
- "DATA_PATH = '../data/'\n",
- "\n",
- "#Data loading\n",
- "df = pd.read_csv(\"resultFiles/featureExtractionV2_by_Jun/LongDiseaseDuration/CD4.Ranksum.RFECV.act.csv\", engine='c', index_col=0)\n",
- "meta_data = pd.read_csv(DATA_PATH+'annotation_metadata/EPIC_HCvB_metadata_baseline_updated-share.csv')\n"
- ],
- "outputs": [],
- "metadata": {}
- },
- {
- "cell_type": "code",
- "execution_count": 116,
- "source": [
- "## Utils and Library for notebook\n",
- "from notebook_utils.OpenKbcMSToolkit import ExtractionToolkit as exttoolkit\n",
- "import itertools\n",
- "def _LoadDiseaseDuration(df, meta_data, returntype='long'):\n",
- " \"\"\"\n",
- " df : Expression or activation score matrix\n",
- " meta_data : meta data which contains duration and sample ID\n",
- " output: long DD samples and short DD samples by list, or healthy samples and short DD samples by list\n",
- " \"\"\"\n",
- " # Sample by disease category\n",
- " sample_list, sample_category = exttoolkit.get_sample_name_by_category(dataframe=meta_data, sampleColumn='HCVB_ID', dataColname='DiseaseCourse')\n",
- " \n",
- " # Sort by disease category and exclude uknown samples\n",
- " patient_samples = [] # patient samples\n",
- " healthy_samples = [] # healthy samples\n",
- " for samples, category in zip(sample_list, sample_category):\n",
- " if category=='Healthy':\n",
- " healthy_samples = samples\n",
- " else:\n",
- " if category!='Unknown':# Excluding unknown samples\n",
- " patient_samples.append(samples)\n",
- "\n",
- " patient_samples = list(itertools.chain(*patient_samples)) # flatten\n",
- " patient_samples = list(set(patient_samples).intersection(df.columns.tolist())) # intersected with act score matrix\n",
- " healthy_samples = list(set(healthy_samples).intersection(df.columns.tolist())) # intersected with act score matrix\n",
- " patient_meta = meta_data.loc[meta_data['HCVB_ID'].isin(patient_samples)] # Make patient metadata\n",
- "\n",
- " longDD_samples, shortDD_samples = exttoolkit.get_sample_name_by_contValues(patient_meta, 'HCVB_ID', 'DiseaseDuration', 25)\n",
- " longDD_samples = list(set(longDD_samples.values.tolist()).intersection(df.columns.tolist())) # intersected with act score matrix\n",
- " shortDD_samples = list(set(shortDD_samples.values.tolist()).intersection(df.columns.tolist())) # intersected with act score matrix\n",
- "\n",
- " if returntype=='long':\n",
- " return longDD_samples, shortDD_samples\n",
- " elif returntype=='healthy':\n",
- " return healthy_samples, shortDD_samples"
- ],
- "outputs": [],
- "metadata": {}
- },
- {
- "cell_type": "code",
- "execution_count": 117,
- "source": [
- "df_cd4 = df.copy()\n",
- "longDD_samples, shortDD_samples = _LoadDiseaseDuration(df_cd4, meta_data, 'long')\n",
- "df_cd4 = df_cd4[longDD_samples+shortDD_samples]\n",
- "df_cd4 = df_cd4.subtract(df_cd4.median(axis=1), axis=0)\n",
- "\n",
- "\n",
- "X = df_cd4.T.values # Training sample\n",
- "y = [0]*len(longDD_samples)+[1]*len(shortDD_samples) # Training y\n",
- "X.shape"
- ],
- "outputs": [
- {
- "output_type": "execute_result",
- "data": {
- "text/plain": [
- "(86, 402)"
- ]
- },
- "metadata": {},
- "execution_count": 117
- }
- ],
- "metadata": {}
- },
- {
- "cell_type": "code",
- "execution_count": 118,
- "source": [
- "import matplotlib.pyplot as plt\n",
- "from sklearn.svm import SVC\n",
- "from sklearn.model_selection import StratifiedKFold\n",
- "from sklearn.feature_selection import RFECV\n",
- "\n",
- "## Reference: \n",
- "## https://scikit-learn.org/stable/auto_examples/feature_selection/plot_rfe_with_cross_validation.html\n",
- "\n",
- "estimator = SVC(kernel=\"linear\") # linear\n",
- "min_features_to_select = 1\n",
- "rfecv = RFECV(estimator=estimator, step=1, cv=StratifiedKFold(2),\\\n",
- " scoring='accuracy', min_features_to_select=min_features_to_select)\n",
- "rfecv.fit(X, y)\n",
- "\n",
- "print(\"Optimal number of features : %d\" % rfecv.n_features_)\n",
- "\n",
- "# Plot number of features VS. cross-validation scores\n",
- "plt.figure()\n",
- "plt.xlabel(\"Number of features selected\")\n",
- "plt.ylabel(\"Cross validation score (nb of correct classifications)\")\n",
- "plt.plot(range(min_features_to_select, len(rfecv.grid_scores_) + min_features_to_select), rfecv.grid_scores_)\n",
- "plt.show()"
- ],
- "outputs": [
- {
- "output_type": "stream",
- "name": "stdout",
- "text": [
- "Optimal number of features : 259\n"
- ]
- },
- {
- "output_type": "display_data",
- "data": {
- "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYgAAAEaCAYAAAAL7cBuAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjQuMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/MnkTPAAAACXBIWXMAAAsTAAALEwEAmpwYAAA0nUlEQVR4nO3deXicdbn/8fcnaZY2S7ekQPdSilgQClRAQCweRVQUNwSUIxw98gMV3D3iinhcOeJRRBQFWVw4KB7lVBRQCqKytVCWFipQoAuFJt2ydpKZ3L8/nu+kk3SSedJklib367rmyjzbzD1TeO757jIznHPOuf7Kih2Ac8650uQJwjnnXFaeIJxzzmXlCcI551xWniCcc85l5QnCOedcVp4gnHPOZeUJwjnnXFbj4pwkaTHwamA60Ak8DtxhZtvyGJtzzrkiGrQEIenfJD0EXASMB9YAm4HjgT9Luk7S7PyH6ZxzrtBylSAmAMeZWWe2g5IWAQuAdSMcl3POuSKTz8XknHMum1iN1JK+LaleUoWkv0hqknRWvoNzzjlXPHF7MZ1kZi3AKcBzwAHAp/MVlHPOueKLmyDSbRVvBn5tZjvyFI9zzrkSEaubK7BU0pNEXVzPl9QI7MxfWM4554otdiO1pCnADjNLSZoA1JvZi3mNzjnnXNHELUEAHATMlZR5zfUjHI9zzrkSEXck9Q3AfGAlkAq7DU8Qzjk3asWqYpL0BLDQfNCEc86NGXF7MT0O7JvPQJxzzpWWuG0QDcBqSQ8AifROM3trXqJyzjlXdHETxMX5DMI551zpGUo3132AV4bNB8xsc96ics45V3Rx52J6N/AAcBrwbuB+Se/KZ2DOOeeKK24vpkeA16dLDWEk9Z/N7LA8xxdbQ0ODzZ07t9hhOOfcXmXFihXNZtaY7VjcNoiyflVKWyix5Urnzp3L8uXLix2Gc87tVSQ9P9CxuAniT5JuA34Vtk8Hbh1uYM4550pXrARhZp+W9E7guLDrKjP73/yF5Zxzrthiz8VkZjcDN+cxFueccyVk0AQh6W9mdrykVqK5l3oPAWZm9XmNzjnnXNEMmiDM7Pjwt64w4TjnnCsVccdB3BBnn3POudEjblfVgzM3wpoQR458OM4550pFrjaIi4DPAeMltaR3A13AVXmOzTlXYDu7U1z7j+foSCSLHYobgn0njuc9R88e8dfN1QbxDeAbkr5hZhcN9cUlnQx8DygHfmpm3+x3fA5wDdAIbAXOMrMN4VgKeCycus5njnUu/+5bu4Vv/vFJAKQiB+NiWzRrUuETRJqZXSRpMrAAqM7Y/9eBrpFUDlwBvB7YADwo6RYzW51x2n8B15vZdZJeC3wD+NdwrNPMFg3lwzjnhmdzazSb/z2fOZFZUyYUORpXbHGXHP134KPATKJlR48B7gVeO8hlRwFPm9na8Bo3AqcCmQliIfCJ8HwZ8Lv4oTvnRlpzW5QgGmqrihyJKwVxG6k/SjTV9/NmdiJwOLA9xzUzgPUZ2xvCvkyPAO8Iz98O1EmaGrarJS2XdJ+kt2V7A0nnhnOWNzU1xfwozrmBNLUmqK0ax/jK8mKH4kpA3ASx08x2AkiqMrMngZeNwPt/CniNpIeB1wAbgVQ4NsfMFgPvAf5b0vz+F5vZVWa22MwWNzZmnYzQOTcETa0JGuu89OAicafa2CBpElEV0B2StgEDzgAYbARmZWzPDPt6mdkLhBKEpFrgnWa2PRzbGP6ulXQXUanlmZjxOuf2QHNbgobaymKH4UpE3Ebqt4enF0taBkwE/pTjsgeBBZLmESWGM4hKA70kNQBbzawHuIioRxOhQbzDzBLhnOOAb8f7SM65PdXUmuBl+/rECS4St5H6GGCVmbWa2d2S6ol+0d8/0DVmlpT0EeA2om6u15jZKkmXAMvN7BZgCVE3WgP+Cnw4XP5y4MeSeoiqwb7Zr/eTc3uFf77UykstO4sdRmybWxIcf0BDscNwJSJuFdOVwBEZ221Z9u3GzG6l37oRZvaljOe/AX6T5bp/AK+IGZtzJamzK8Up3/8bXameYocyJN691aXFTRCyjLVJzawnTLfhnBvA5taddKV6uPC1B3DCgXtHJ4qyMvGKGROLHYYrEXFv8mslXUhUagD4ELA2PyE5Nzo0hUFnR86dwuK5U4ocjXNDF7eb63nAsUSNzRuAo4Fz8xWUc6NBOkE0+qAzt5eK24tpM1EvJOdcTL2jkuu826jbO+WazfUzZvZtSZfTd0U5AMzswrxF5txerqk1QZlgao2XINzeKVcJIt21dHm+A3FutGlq62JKTSXlZT4tqts75UoQpwNLgUlm9r0CxOOKxMxIJHd1x6waV4YGmO+5O9WDgHHlu5qwenpswO6cma+V6jG697DbZ2V5GT1mJHt2K8yWpM0tO33SO7dXy5UgjpQ0HXi/pOuJFgvqZWZb8xaZK6jzfr6C21a91Lt97PyprHmxld9+6FjmTK3p3f9Sy06O/9adjK8o5/7Pva53Urezrr6ffzyzJetrv+9Vc7jk1EPo6TGW/Ncy1m/t3KMYD9q3ji3tXb2Nv3uDvaV7q3PZ5EoQPwL+AuwPrKBvgrCw340Cj27YwWEzJ3LyIfvx24c29N7sn9jU0idBPLO5je6U0Z1K8sKOTuY31vZef9TcKZx40LQ+r/vrFet5dMMOALZ3drN+aycnLdyHw2dPHlJ8/3immXueagbgLYdNZ+F+9Xv8WQvpNZ4g3F4s14py3we+L+lKMzu/QDG5AjMzmtsSnLpoBucvmc9zze08tbkNYLdf601tu7abWxPMb6ylsytFWyLJkoMaOX9J30l3//lSKw8+t7XPa7110XROOXT6kGKsqSrvTRCnHTnTf5k7VwC5ejHVm1kL8HlJu4308Sqm0WFHZzfdKeud5jmzW2ZTW1efczMTRjpZDLbITENtJU2tid4kNNB5uWRe4/X6zhVGriqmXwKnEFUvGV7FNCrtunFHiSFzYFeuEgTsWqYy2zoCjXVVJJI9tCWSuwaO7cF6A5nX+HoFzhVGriqmU8LfeYUJxxVD/xt8Q90gCaI1wX4Tq9ncmuhNFoONGE7/2m9qTfSeN5wSRJlgSo0PPHOuEGJNtSHpOEk14flZki6TNDu/oblCSd+4p4XEkHmjb27rmyCa27qYVlfVW3WUec5AJYj0ezS3JagcV0Z99dDneUy/zpSaKh9X4FyBxJ2L6UqgQ9JhwCeJVna7IW9RuYJqDu0M6V/puUoQDbVVNNRW9V6XPifbL/v0aza3Rd1TG2urBhxfMZiaynKqK8p8tTPnCijuT7mkmZmkU4EfmNnVkj6Qz8DcyPvdwxu5+m/PUlEuXnvQNFoTSZ7Z3M6jG7ZTUS4mjq8A+pYENm7v5Kq/PsO5J8znF/c/zxObWjhs5kRSZty3dgtvufxvbNrRyZSaSirKd/+9kX6tr9/6BC07u9k/dIsdKkk01lV5+4NzBRQ3QbRKugg4CzhBUhlQkb+wXD4sfXQTa5vaaO9K8dC67b37D55ez5lHze79ZV9XNY4L/2UBMyZV8x83P8ZvH9rIuSfM5/crXwDgbYfPYEdnN2Xh/Ma6Ko6al30666k1lZxz7FzWbe0A4M2v2G+P4//IiQf4vEbOFVDcBHE60XrSHzCzF0P7w6X5C8vlQ1NbgsNnT+a+tVv6TFfxmZMP6jOgSxKfeP2BAKxcv4M7Vr8IRL2W3nzofhyz/1QA3nDwvjnfUxIXv/XgEYn/9Fd6s5dzhRS3DaIV+J6Z3SPpQGAR8Ku8ReXyork1wbT6qt16EQ22XkFjXRVb27tI9RhNbQlf28C5MSRugvgrUCVpBnA78K/AtfkKyo08s103+P7rEwxWr99YW0mPwQvbO2ndmfQ2AOfGkLgJQmbWAbwD+KGZnQYckr+w3Ehr2ZmkK9kTNfRmlAJyjStIJ4QnNrVE216CcG7MiJ0gJL0KeC/whyFe60pA5iC1zCqmXOMK0uc+sak12vbV0ZwbM+Le5D8KXAT8r5mtkrQ/sCx/YbmRljmYbSjTVuxegqjOU4TOuVITK0GY2V/N7K1m9q2wvdaXGy1tqR7jmr89S2dXilSP8bU/PAHsXoLINfAsfe4DYUZWL0E4N3bE6uYqqRH4DHAw0PsT0sxem6e43DD93yMvcMnS1WxpT/CmV+zHYxujNRlmTRnPUfOmcMiMeuZOrWHRrEmDvk5N1TheOXcyT77YysL96r0NwrkxJO44iF8A/0M0s+t5wNlAU76CcsO3uXUnADu7e3rbH24+/1gmVI7jkBkTWXrBq2O/1q/POzYvMTrnSlvcNoipZnY10G1md5vZ+4GcpQdJJ0taI+lpSZ/NcnyOpL9IelTSXZJmZhw7W9JT4XF27E/kAGhPpACYUFk+6Gyrzjk3kLgJojv83STpzZIOB7LPrRBIKgeuAN4ILATOlLSw32n/BVxvZocClwDfCNdOAb4MHA0cBXxZ0tDWqBzjOrqSAFRXlO+ajM/bD5xzQxA3QfynpIlEM7l+Cvgp8PEc1xwFPB0atLuAG4FT+52zELgzPF+WcfwNwB1mttXMtgF3ACfHjNUBbYkoQSS6UzS1JqipLGdC5dCn2XbOjV2x7hhmtjQ83QGcGPO1ZwDrM7Y3EJUIMj1CNPjue8DbgTpJUwe4dkb/N5B0LnAuwOzZPk9PpnSpob0rRXNbwkdAO+eGLNea1JcTLS2a1Qh0df0U8ANJ5xBN57ERSMW92MyuAq4CWLx48YBxjkXpdoeOrlTvGg7OOTcUuUoQy4fx2huBWRnbM8O+Xmb2AlEJAkm1wDvNbLukjcCSftfeNYxYxpQXd+xk5frtADzX3M69a7fwxkNyz7zqnHOZcq1Jfd0wXvtBYIGkeUSJ4QyiKcN7SWoAtppZD9FI7WvCoduAr2c0TJ8UjrsYvnzL473P7127BYA5U2uKFY5zbi8Vd03qOyRNytieLOm2wa4xsyTwEaKb/RPATWGajkskvTWctgRYI+mfwD7A18K1W4GvEiWZB4FLwj4XQ1siydSayt5BcDMnj+eTJx1Y3KCcc3uduN1aGs1se3rDzLZJmpbrIjO7Fbi1374vZTz/DfCbAa69hl0lCjcE3SnjgGm1VI6L8v8r507Juhyoc84NJu5dIxVWkQOiAW4M0njtiivVY1SUlzGhshzIPd+Sc85lE7cE8Xngb5LuBgS8mtC91JWeZKqH8qpxWEjh3sXVObcn4o6D+JOkI4Bjwq6PmVlz/sJyw9GdMirKRUdX1GPYE4Rzbk/EHlobEsLSnCe6okv29DCurIyOrmiwnK/h4JzbE95yOQole4zycvVO2De5pqLIETnn9kaeIEahZMqoKBMH7FMLwNQar2Jyzg1d3HEQN8TZ50pDMtXDuPIyvvXOQ/nVB49h34lexeScG7q4JYiDMzfCVN5Hjnw4biQke6JG6tqqcbxq/tRih+Oc20sNmiAkXSSpFThUUkt4tAKbgd8XJEI3ZMkeo7xMxQ7DObeXGzRBmNk3zKwOuNTM6sOjzsymmpnPjVSiulNRLybnnBuOuHeRB8KCQQBImiTpbfkJyQ1XMoyDcM654YibIL5sZjvSG2Fepi/nJSI3bKkeo9xLEM65YYp7F8l2nq9fWaK6e3q8BOGcG7a4CWK5pMskzQ+Py4AV+QzM7ZlUj2GGt0E454Yt7l3kAqAL+B/gRmAn8OF8BeX2XLKnB4BxXoJwzg1T3Mn62oHPSqoJz12JSqaiKVzHeTdX59wwxR1Jfayk1UQrwyHpMEk/zGtkbo/0JghfIMg5N0xx7yLfBd4AbAEws0eAE/IVlNtz6Somb6R2zg1X7J+ZZra+367UCMfiRkCyJypB+Ehq59xwxe2qul7SsYBJqgA+SqhucqWlOxVKEN6LyTk3THHvIucR9VqaAWwEFuG9mErSrjYIL0E454YnZwkizNz6PTN7bwHiccPkVUzOuZGSswRhZilgjqTKAsTjhmlXI7VXMTnnhiduG8Ra4O+SbgF6x0GY2WV5icrtMR8H4ZwbKXETxDPhUQbU5S8cN1zpKiYvQTjnhituG8SB3gaxd0iGXkzeBuGcG668tkFIOlnSGklPS/psluOzJS2T9LCkRyW9KeyfK6lT0srw+NFQ33us6vZeTM65EZK3NohQ8rgCeD2wAXhQ0i1mtjrjtC8AN5nZlZIWArcCc8OxZ8xsUdwP4iLeSO2cGyn5bIM4CnjazNYCSLoROBXITBAG1IfnE4EXYr62G4B3c3XOjZS4s7l+BUBSbdhui3HZDCBzeo4NwNH9zrkYuF3SBUAN8LqMY/MkPQy0AF8ws3vixDqabW7dyd1rmigvE69fuA911RV9jt+/dgu/f3gj4COpnXPDFytBSDoEuAGYErabgfeZ2aphvv+ZwLVm9h1JrwJuCO+1CZhtZlskHQn8TtLBZtbSL65zgXMBZs+ePcxQSt8P7nya6+99HoAvnrKQDxw/r/dYMtXD6Vfd17vtbRDOueGK+zPzKuATZjbHzOYAnwR+kuOajcCsjO2ZYV+mDwA3AZjZvUA10GBmCTNLzxy7gqh668D+b2BmV5nZYjNb3NjYGPOj7L027djJ/MYaqivKeHFHZ59jW9u7+mz7bK7OueGKmyBqzGxZesPM7iKqEhrMg8ACSfNCD6gzgFv6nbMO+BcASS8nShBNkhpDIzeS9gcWEDWUj2lNrQmmTxpPY10VTa2JPsc299su9yom59wwxe7FJOmLRNVMAGeR44ZtZklJHwFuA8qBa8xslaRLgOVmdguhJCLp40QN1ueYmUk6AbhEUjfQA5xnZluH/OlGmea2BPMaamhLJGlu69rtWCYfSe2cG664CeL9wFeA3xLdyO8J+wZlZrcSdV3N3PeljOergeOyXHczcHPM2MYEM6OpNUFjXRXtiSTPb+noc7x/icK7uTrnhituL6ZtwIV5jsUNoi2RJJHsoaG2krZEkhXPb+tzvH+Jwru5OueGK+6a1HdImpSxPVnSbXmLyu0mXUJorKuisbaKrR1dvdNqpI/XVJb3bnsjtXNuuOJWMTWY2fb0hpltkzQtPyGNbolkipd2JJg1ZTxS7pt4qsdYt7WDx1+Ievg21lbTnkhhBo9s2M6UmioA1m1tp6GuivZQ9TTOq5icc8MUN0H0SJptZusAJM0haotwQ3TBLx/m9tUvcfFbFnLOcfNynv+tPz3JVX/d1R9g34nVdKWi5cDfeeW9fc49Zv8pzJ4ygXueavYShHNu2OImiM8Df5N0NyDg1YQBam5oNm6Pxi+8sGNn7PP3qa/ioje+nIkTKjhgWi1zpk7gR2cdyc7uVJ9zF82axLT6KtY2tVM1rnyAV3TOuXjiNlL/SdIRwDFh18fMrDl/YY1enV3RTb2jKxn7/Gl11bzt8Bm9+yrKyzj5kH0HvOaQGROHF6RzzhG/BEFICEvzGMuY0B4SQ0cilePMcH4iyYRKLw045wrPWzILLJ0Y2mOWIDq6Up4gnHNFMWiCkJS7FdXFZma7ShBdMUsQXUkmVMUu6Dnn3IjJVYL4DYCkvxQgllEvkewhLNdAeyJ+G0SNlyCcc0WQ66dpmaTPAQdK+kT/g4OtKOd2l1lqiF2CSCSZUOklCOdc4eW685wBvC2cF3clOZfFhm0dvO2Kv/duZ0sQ/37dcp7Y1MJPz17Mz/7+LIvnTPE2COdc0QyaIMxsDfAtSY+a2R8LFNOo9NC67b3zJU2eULFbN9dUj/HnJ14CYOX67fzfI5vY0tZFsseo8TYI51wRxO3F9A9Jl0laHh7fkeSd7YegOWO21WhG1r4liG0duybbe25LO53dKZ7fGk2b4SUI51wxxE0Q1wCtwLvDowX4Wb6CGo2aMtZraKitorM7RU/PrtlKMtdzeGJTKwDrtniCcM4VT9y6i/lm9s6M7a9IWpmHeEatzBJEQ200wV5nd6q3+ihzPYcnNkUT83WF2Vq9kdo5VwxxSxCdko5Pb0g6Dugc5HzXT2YJorEuShCZg+XSJYh966t3W/ynpspLEM65wov70/Q84PqMdodtwNn5CWl0as6SIDoSqd6+Yemk8PL96nixpe9Efl6CcM4VQ9zJ+h4BDpNUH7Zb8hrVKNSUpYopswTR1JqguqKMuQ01sKapz7XeBuGcK4Yh/TT1xJDbph2dfOuPT5JI9nDiy6Zx79ot7N9Q02dJ0Km1lQB8delqTl00g2c2t/FsczsNtVW9pYtMXoJwzhWD33lG2N1rmvjdyheoLC/jj4+/2OfYkpc1MqWmkoOn13P47Ems2tjCfWu39h4/fPYkjpvfwML9NnHEnEk8vbkNIWZOHl/oj+Gcc54gRlq6Kuno/adwz1N9l8x415EzOeXQ6QD874eO46LfPsavHljXe7yxtorDZk3i1o++unABO+fcAGIlCEnVwIeA44mWGv0bcKWZxVsWbQxpbkswcXwF0yfu/qu/sbZv9VH/6qSGLNVLzjlXLHFLENcTDZS7PGy/B7gBOC0fQe3NmtoSNNRW0lBXudux/gmgsbay37YnCOdc6YibIA4xs4UZ28skrc5HQHu7ptYEjXVVvTf7+upxtOyMeiv1LzHk2nbOuWKKO1DuIUnp9aiRdDSwPD8h7d2a27porKvuLS28fL96ACrHlVHXb9K9hn4lhv7bzjlXTIOWICQ9RtTmUEE0Yd+6sD0HeDL/4e19mlqjKqZ0CWL/xhpWrt9OQ20Vkvqc6yUI51wpy1XFdMpwXlzSycD3gHLgp2b2zX7HZwPXAZPCOZ81s1vDsYuADwAp4EIzu204seTDui0dtOzs5pAZ0QDzzq4UbYkkjXVVvSWIxtoqGmqrsjZA9y8xeBuEc66U5FoP4vn0c0nlwD65rul3/hXA64ENwIOSbjGzzLaLLwA3mdmVkhYCtwJzw/MzgIOB6cCfJR1oZvGWYSuQS29fw6oXdnDnJ5cAsDVM2T21ppLpE8czra6KQ2ZM5JmmdqbU7N5oXVM1jgXTaplWX8Uzm9uZVu8JwjlXOuLe7C8Avgy8BPSE3QYcOshlRwFPm9na8Bo3AqcCmQnCgPrwfCLwQnh+KnCjmSWAZyU9HV7v3jjxFsqOzm5e2rGrp29HWGe6pmoc4yvLeeDzrwPgpIP3HfA17vjEa/IbpHPO7aG4vZg+CrzMzLYM4bVnAOsztjcAR/c752Lg9pCAaoDXZVx7X79rZ/R/A0nnAucCzJ49ewihjYyORJL2rhTtiSQ1VeNoD8uI1vjUGM65USBuL6b1wI48vP+ZwLVmNhN4E3CDpLgxYWZXmdliM1vc2NiYh/AGl04I6Zla0yWI8T65nnNuFIj7U3ctcJekPwC905Ka2WWDXLMRmJWxPTPsy/QB4OTwWveGEdsNMa8tus4wG2tzW4I5U2vo8BKEc24UiftrfR1wB1BJtIJB+jGYB4EFkuZJqiRqdL4ly+v+C4CklwPVQFM47wxJVZLmAQuAB2LGWjDpEkR6/qX09N0TfIEf59woEHc9iK8M9YXNLCnpI8BtRF1YrzGzVZIuAZab2S3AJ4GfSPo4UYP1OWZmwCpJNxE1aCeBD5daDybYVaWUThBegnDOjSa5Bsr9BPi+mT2W5VgNcDqQMLNfZLs+jGm4td++L2U8Xw0cN8C1XwO+lusDFFKqx1jzYiszJo+nvnocHd1RQtjcmuDJF1vYsK0D8DYI59zokOun7hXAFyW9AnicqPqnmqjKpx64BsiaHEajXz6wji/+7nGOmjeF6/7tKMyi/b+4fx2X3/l073m+ApxzbjTINVBuJfBuSbXAYmA/oBN4wszW5D+80vJ8czsAW9u7+iwXurV912pxleVlVJTH7ojlnHMlK24bRBtwV35DKX3p7qxdyR46EtmbRAwrZEjOOZc3/lN3CJpCgkgkU70liPEVfauTulOeIJxzo4MniCFobo2qkhLJnt4eS3OmTihmSM45lzdDShCSxvTdsLcE0d1DRyhBzJ4SfSUHTKstWlzOOZcPsRKEpGPDCnJPhu3DJP0wr5GVmO5UD9vCbK1dqR7aQxvE3IYaYNfCQM45N1rELUF8F3gDsAXAzB4BTshXUKVoa3sXZrBvfTWpHqNlZzewqwSxwEsQzrlRZigT463vt6vkRjbn06k/+DsAMyePB2Bb6No6L5Qg9q2vLk5gzjmXJ3HnhFgv6VjAJFUQTf/9RP7CKi3tiSQvtuxkWl0Vr335NJY/v623BPGKmRP52tsP4U2H7se0+ir2mzi+yNE659zIiJsgziNaOnQG0ayqtwMfzldQpSY9/uEzJx9EdypaL6mlM2qkrh5XznuPngPAkpdNK06AzjmXBzkTRFg69Htm9t4CxFOS0pPxNdRW9o6abtnZjQQV5SpmaM45lzc52yDCLKpzwpTdY1K6BNFYV0XVuGhgXEtnN5XlZUieIJxzo9NQFgz6u6RbgPb0zhwLBo0a6RJEY10Vm7ZHa1C37kxSNc7HGTrnRq+4CeKZ8Cgj90JBo05TawIJpkyopKoiSgotO7upqvBZW51zo9eQFgwKs7qmJ+8bM5rauphaU8m48jIqw0ytLZ1egnDOjW5xR1IfIulhYBXRam8rJB2c39CKb1t7F+fdsII7Vr9EQ20VQG+poWVntycI59yoFvcOdxXwCTObY2ZzCEuF5i+s0rDi+W38adWLNNRW8q4jZwL0JoWOrlRvg7Vzzo1GcdsgasxsWXrDzO4KS46OauneS1ef80pmTIoGwGWWGiq9BOGcG8Vi92KS9EXghrB9FlHPplEtc/xDWmbDtFcxOedGs7h3uPcDjcBvgZuBhrBvVGtuSzBxfEWfqqTMpOC9mJxzo1ncXkzbgAvzHEvJaWpL9Ck9QL8E4SUI59woFrcX0x2SJmVsT5Z0W96iKhFNrQka66r67Kv0BOGcGyPi3uEazGx7eiOUKEb9zHTNbV001vWdxjs9DgLwXkzOuVEtboLokTQ7vSFpDmD5Cal0NLXuXsUkqbfkkB5V7Zxzo1HcXkyfB/4m6W5AwKuBc/MWVQnoTvXQlkgyecLucxTWVI0jkezqU5pwzrnRJm4j9Z8kHQEcE3Z9zMyac10n6WSidSTKgZ+a2Tf7Hf8ucGLYnABMM7NJ4VgKeCwcW2dmb40T60hJJKN1H8Zn6ak0tSaa9ttLEM650SxuI/VxQKeZLQUmAZ8L1UyDXVMOXAG8EVgInClpYeY5ZvZxM1tkZouAy4m60aZ1po8VOjkAJLqjFVWzJYHeaTe8DcI5N4rF/Ql8JdAh6TDgE0Qzu16f45qjgKfNbK2ZdQE3AqcOcv6ZwK9ixpN3XWHluGw9lSZNqBjwmHPOjRZx73BJMzOiG/wVZnYFuaf9ngGsz9jeEPbtJpRG5gF3ZuyulrRc0n2S3jbAdeeGc5Y3NTXF/CjxJLqjBJFtOo266qhmzhOEc240i9tI3SrpIqIpNk6QVAZUjGAcZwC/CavXpc0xs42S9gfulPSYmT2TeZGZXUU0kSCLFy8e0V5V6TaIbNVI9dXRR0/1jPqOXM65MSzuT+DTgQTwATN7EZgJXJrjmo3ArIztmWFfNmfQr3rJzDaGv2uBu4DDY8Y6IhLJ0AaRpZRQPz5KEK07k4UMyTnnCipWgjCzF83sMjO7J2yvM7NcbRAPAgskzQvrWZ8B3NL/JEkHAZOBezP2TZZUFZ43AMcBq+PEOlIGK0Gkq5hadnYXMiTnnCuouFVMQ2ZmSUkfAW4j6uZ6jZmtknQJsNzM0sniDODG0MaR9nLgx5J6iJLYN82ssAkitEFk68V06MxJABw8vb6QITnnXEHlLUEAmNmtwK399n2p3/bFWa77B/CKfMaWy2BVTEfOmcw9nzmRmZPHFzos55wrmLwmiL3ZYFVMALOmTChkOM45V3CxEkQYKHcxMCdcI8DMbP/8hVZc6RKErxrnnBur4pYgrgY+DqwAUjnOHRW6kgMPlHPOubEgboLYYWZ/zGskJSbhCcI5N8bFTRDLJF1KNFdSIr3TzB7KS1QlYFcvJp9vyTk3NsVNEEeHv4sz9hnw2pENp3QM1ovJOefGgrjTfZ+Y+6zRJZHsoUwwrkzFDsU554oi7nTfEyVdlp4YT9J3JE3Md3DFlEj2UDWuHMkThHNubIpbf3IN0Aq8OzxagJ/lK6hSkOhOeRdX59yYFrcNYr6ZvTNj+yuSVuYhnpIRlSA8QTjnxq64d8BOScenN9IrzOUnpNKQSPb4kqLOuTEtbgnifOC60O4gYCtwTr6CKgVdoQ3COefGqri9mFYCh0mqD9st+QyqFCSSKa9ics6NaYMmCElnmdnPJX2i334AzOyyPMZWVN4G4Zwb63KVIGrC32zrT4/q9TYT3T3ei8k5N6YNmiDM7Mfh6Z/N7O+Zx0JD9ajV2Z2iobay2GE451zRxP2JfHnMfaNGe1eSCVW+XIZzbuzK1QbxKuBYoLFfO0Q90TKio1ZHIkVN5aj+iM45N6hcP5ErgdpwXmY7RAvwrnwFVQo6upJMqPQShHNu7MrVBnE3cLeka83s+QLFVHRmRkdXiglegnDOjWFxfyJ3hPUgDgaq0zvNbFRO992V6iHZY9R4G4RzbgyL20j9C+BJYB7wFeA54ME8xVR0HYloLQgvQTjnxrK4CWKqmV0NdJvZ3Wb2fkbxYkEd3VGCqPE2COfcGBb3Dtgd/m6S9GbgBWBKfkIqvo5EEoDxXoJwzo1hcRPEf4aJ+j5JNP6hHvh43qIqsvauUIKo8gThnBu74k7WtzQ83QGM+uVH0yUI7+bqnBvLcg2Uu5xB5lwyswtHPKIS0NHlbRDOOZerkXo5sIKoa+sRwFPhsYhoEN2gJJ0saY2kpyV9Nsvx70paGR7/lLQ949jZkp4Kj7Pjf6Tha+8KJQivYnLOjWG5BspdByDpfOB4M0uG7R8B9wx2raRy4Arg9cAG4EFJt5jZ6ozX/3jG+RcAh4fnU4AvA4uJSjArwrXbhvwJ90C6BOHdXJ1zY1ncbq6TiRqm02rDvsEcBTxtZmvNrAu4ETh1kPPPBH4Vnr8BuMPMtoakcAdwcsxYh63d2yCccy52L6ZvAg9LWka05OgJwMU5rpkBrM/Y3gAcne1ESXOIBuHdOci1M7Jcdy5wLsDs2bNzfYZYHtuwg1UvRAvmeQnCOTeWxe3F9DNJf2TXDf4/zOzFEYzjDOA3ZpYaykVmdhVwFcDixYuHvYBRWyLJ23/4d5I9xtSaSirKfcEg59zYNegdUNJB4e8RwHSiX/Xrgelh32A2ArMytmeGfdmcwa7qpaFeO2I2t+wk2WN86qQD+ePHXp3vt3POuZKWqwTxSeCDwHeyHDMGn27jQWCBpHlEN/czgPf0PykkocnAvRm7bwO+LindznEScFGOWIetqTUBwKJZk5lWV53jbOecG91y9WL6YPg75MFxZpaU9BGim305cI2ZrZJ0CbDczG4Jp54B3GhmlnHtVklfZdeEgJeY2dahxjBUzW1dADTWVeX7rZxzruTlGij3jsGOm9lvcxy/Fbi1374v9du+eIBrrwGuGez1R1pT604AX4vaOefIXcX0lkGOGTBogtjbNLd1UV4mJk/wBOGcc7mqmP6tUIGUgqbWBFNrKikrU7FDcc65oos9EixM891/RblL8hFUsTS1Jbz9wTnnglgd/cPUGqcDFxANlDsNmJPHuIqiuS1BQ60nCOecg/hTbRxrZu8DtpnZV4BXAQfmL6ziaGr1EoRzzqXFTRCd4W+HpOlEK8ztl5+QisPMaPYqJuec6xW3DWKppEnApcBDRD2YfpKvoIphR2c33SnzKibnnAvizsX01fD0ZklLgWoz25G/sAqvuS0aRe0lCOeci8RtpH5U0uckzTezxGhLDgCbwzQbPkjOOecicdsg3gIkgZskPSjpU5JGZn7tEpGeZmOalyCccw6ImSDM7Hkz+7aZHUk04d6hwLN5jazAmnpLEJ4gnHMOhjZQbg7RWIjTgRTwmXwFVUjbO7o47Uf3sqW9i4pyMXF8RbFDcs65khArQUi6H6gAbgJOM7O1eY2qgMrKxIJ9alkAHDJjIpJPs+GccxC/BPE+M1uT10iKpL66gh++98hih+GccyUnbhvEqEwOzjnnBuaLLjvnnMvKE4Rzzrms4g6UO01SXXj+BUm/lXREfkNzzjlXTHFLEF80s1ZJxwOvA64GrsxfWM4554otboJIhb9vBq4ysz8APieFc86NYnETxEZJPyYaJHerpKohXOucc24vFPcm/27gNuANZrYdmAJ8Ol9BOeecKz6ZWe6TpPnABjNLSFpCNBfT9SFZlARJTcDze3h5A9A8guGMFI9raDyuofG4hma0xjXHzBqzHYibIFYCi4G5wK3A74GDzexNwwiqZEhabmaLix1Hfx7X0HhcQ+NxDc1YjCtuFVOPmSWBdwCXm9mnGWVLjjrnnOsrboLolnQm8D5gadjn054659woFjdB/BvwKuBrZvaspHnADfkLq+CuKnYAA/C4hsbjGhqPa2jGXFyx2iAAJFUCB4bNNWbWna+gnHPOFV/cRuolwHXAc4CAWcDZZvbXPMbmnHOuiOImiBXAe9LTfks6EPhVWILUOefcKBS3DaIic00IM/sno6CRWtLJktZIelrSZ4scy3OSHpO0UtLysG+KpDskPRX+Ti5AHNdI2izp8Yx9WeNQ5Pvh+3s0nxM4DhDXxZI2hu9spaQ3ZRy7KMS1RtIb8hjXLEnLJK2WtErSR8P+on5ng8RV1O9MUrWkByQ9EuL6Stg/T9L94f3/J1RpI6kqbD8djs8tcFzXSno24/taFPYX7L/98H7lkh6WtDRsF+b7MrOcD+BnwE+BJeHxE+CaONeW6gMoB54B9ieaV+oRYGER43kOaOi379vAZ8PzzwLfKkAcJwBHAI/nigN4E/BHomrHY4D7CxzXxcCnspy7MPx7VgHzwr9zeZ7i2g84IjyvA/4Z3r+o39kgcRX1OwufuzY8rwDuD9/DTcAZYf+PgPPD8w8BPwrPzwD+J0/f10BxXQu8K8v5BftvP7zfJ4BfAkvDdkG+r7gliPOA1cCF4bEaOD/mtaXqKOBpM1trZl3AjcCpRY6pv1OJ2n4If9+W7ze0qF1pa8w4TiUaUW9mdh8wSVJexscMENdATgVuNLOEmT0LPE30752PuDaZ2UPheSvwBDCDIn9ng8Q1kIJ8Z+Fzt4XNivAw4LXAb8L+/t9X+nv8DfAv0sgvHD9IXAMp2H/7kmYSTZT607AtCvR95UwQksqBR8zsMjN7R3h818wSe/qmJWIGsD5jewOD/w+UbwbcLmmFpHPDvn3MbFN4/iKwT3FCGzCOUvgOPxKK+NdkVMEVJa5QnD+c6NdnyXxn/eKCIn9nobpkJbAZuIOotLLdosG4/d+7N65wfAcwtRBxmVn6+/pa+L6+q2ii0j5xZYl5pP038BmgJ2xPpUDfV84EYWYpYI2k2Xv6Ji6W483sCOCNwIclnZB50KIyY7w+yXlUKnEEVwLzgUXAJuA7xQpEUi1wM/AxM2vJPFbM7yxLXEX/zswsZWaLgJlEpZSDCh1DNv3jknQIcBFRfK8kmqT0PwoZk6RTgM1mtqKQ75sWt4ppMrBK0l8k3ZJ+5DOwAthI1F03bWbYVxRmtjH83Qz8L9H/OC+li63h7+YihTdQHEX9Ds3spfA/dQ9Ru1i6SqSgcUmqILoJ/8LMfht2F/07yxZXqXxnIZbtwDKiQbiTJI3L8t69cYXjE4EtBYrr5FBVZ6HG5GcU/vs6DnirpOeIqsFfC3yPAn1fsVeUA04BLiH6xZF+7M0eBBaE3gCVRA06RUl6kmq0a0nXGuAk4PEQz9nhtLOJJkkshoHiuAV4X+jRcQywI6NaJe/61fm+neg7S8d1RujRMQ9YADyQpxhEtMLiE2Z2Wcahon5nA8VV7O9MUqOkSeH5eOD1RO0jy4B3hdP6f1/p7/FdwJ2hRFaIuJ7MSPIiqufP/L7y/u9oZheZ2Uwzm0t0j7rTzN5Lob6vHC3nBwDHZdl/PDB/OK3jpfAg6onwT6I60M8XMY79iXqQPAKsSsdCVHf4F+Ap4M/AlALE8iuiqoduorrNDwwUB1EPjivC9/cYsLjAcd0Q3vfR8D/Gfhnnfz7EtQZ4Yx7jOp6o+uhRYGV4vKnY39kgcRX1OyNaKuDh8P6PA1/K+H/gAaLG8V8DVWF/ddh+Ohzfv8Bx3Rm+r8eBn7Orp1PB/tvPiHEJu3oxFeT7GnSgXOhze5GZPdZv/yuAr5vZWwa82Dnn3F4tVxXTPv2TA0DYNzcvETnnnCsJuRLEpEGOjR/BOJxzzpWYXAliuaQP9t8p6d+BonS7cs45Vxi52iD2Iepy2cWuhLCYaGqKt5vZi3mP0DnnXFHEnc31ROCQsLnKzO7Ma1TOOeeKLtY4CDNbZmaXh4cnBzcgSSbpOxnbn5J08Qi99rWS3pX7zGG/z2mSnpC0LMuxSxXN9nnpHrzuImXMnlpqJC1Jzxa6B9d+TNKEQr2fK4y4A+WciysBvENSQ7EDyZQx6jSODwAfNLMTsxw7FzjUzD69B2EsIhqLEFsYiLU3/H/6MWBICcKVvr3hPzy3d0kSrZH78f4H+pcAJLWFv0sk3S3p95LWSvqmpPcqmp//MUnzM17mdZKWS/pnmKcmPcnapZIeDJOq/b+M170nTAuzOks8Z4bXf1zSt8K+LxENMru6fykhvE4tsELS6WH07c3hfR+UdFw47yhJ9yqav/8fkl4WRutfApyuaF2B0xWtzfCpjNd/XNLc8Fgj6XqiAVqzJH064/Ol1yqokfQHRWsYPC7p9Cyf8UJFa0I8KunGjOuuCd/vw5J2m8V4oHPCd/1f4f0elXSBpAuB6cCydKlL0knhO3hI0q8VzQmVXoPlSUkPAe/o/76uxOR79J8/xtYDaAPqida3mAh8Crg4HLuWjLn1gbbwdwmwnWgNgyqi+WS+Eo59FPjvjOv/RPTDZgHRiOpqol/1XwjnVAHLidY0WAK0A/OyxDkdWAc0AuOIRsy+LRy7iwFGxqZjDs9/STTJIsBsomktCJ9/XHj+OuDm8Pwc4AcZ119MxtoMRMlgbnj0AMeE/ScRJV2Fz76UaH2MdwI/ybh+YpZ4X2DXKNtJ4e/XgbPS+4hmE6ih70jdgc45n2ga6fTnS48Qf46wngnQAPwVqAnb/wF8KfxbrQ//diJa02Bpsf+b9cfAj6EUu52Lxcxawq/fC4HOmJc9aGEuG0nPALeH/Y8BmVU9N1k00dxTktYSzbR5EnBoRulkItFNqAt4wKL1Dfp7JXCXmTWF9/wF0U33dzHjhejmv1C7ptuvD7+UJwLXSVpANN3Fnqy++LxF6wxA9PlOIpoKAqJSzALgHuA7ofSz1MzuyfI6jwK/kPQ7dn22k4gmgEuXXqqJElymgc55HdGCNEkAM8u2RscxRAsQ/T18N5XAvUT/Vs+a2VMAkn5OlNxdifIE4fLlv4GHiGbATEsSqjVDvXplxrHM9UV6MrZ76Pvfaf9ud0b0a/QCM7st84CkJUQliHwpI/qVv7Pf+/4AWGZmb1e0FsNdA1zf+30E1RnPM+MW8A0z+3H/F1C01OWbgP+U9Bczu6TfKW8mSnxvAT6vaJocAe+0jGWEw2tlrjcy0DkDfJS+YRGtp3Bmv2sXxbnYlQ5vg3B5EX5Z3kTU4Jv2HHBkeP5W9uyX9WmSykK7xP5EE8vdBpyvaHprJB2oaFbcwTwAvEZSg6JFsc4E7h5iLLcDF6Q3Mm6AE9k1/fI5Gee3Ei3/mfYc0TKq6Rv9vAHe5zbg/Rn1+DMkTZM0Hegws58Dl6ZfKyOeMmCWmS0jquaZSFT6uA24QOFuL+nwAd4z2zl3AP9PodFf0pQsn+0+4DhJB4RzaiQdCDwJzNWuNqU+CcSVHk8QLp++Q1QfnfYTopvyI0RrAOzJr/t1RDf3PwLnhV/vPyVqhH5I0uPAj8lROg7VWZ8lmjb5EWCFmQ11OvULgcWhsXY10dK8EK1H/Q1JD/eLYxlRldTK0KB8MzBF0irgI0T1/NlivZ2oveNeSY8RtQHUAa8AHlC0CtqXgf/sd2k58PNwzcPA9y1a6+CrRMn50fDeX83ytgOd81Oif4NHw7/je8L+q4A/SVoWqu3OAX4l6VFC9VL4tzoX+ENopC7W+iYuplgD5Zxzzo09XoJwzjmXlScI55xzWXmCcM45l5UnCOecc1l5gnDOOZeVJwjnnHNZeYJwzjmX1f8HF41yqtYfi3kAAAAASUVORK5CYII=",
- "text/plain": [
- ""
- ]
- },
- "metadata": {
- "needs_background": "light"
- }
- }
- ],
- "metadata": {}
- },
- {
- "cell_type": "code",
- "execution_count": 119,
- "source": [
- "rfecv.n_features_"
- ],
- "outputs": [
- {
- "output_type": "execute_result",
- "data": {
- "text/plain": [
- "259"
- ]
- },
- "metadata": {},
- "execution_count": 119
- }
- ],
- "metadata": {}
- },
- {
- "cell_type": "code",
- "execution_count": 120,
- "source": [
- "import numpy as np\n",
- "selected_features = df_cd4.index[np.where(rfecv.ranking_==1)] # Top100\n",
- "selected_df = df_cd4.loc[selected_features]\n",
- "\n",
- "fold_change = (selected_df[longDD_samples].mean(axis=1) - selected_df[shortDD_samples].mean(axis=1)).apply(abs)\n",
- "fold_change = fold_change.sort_values(ascending=False)[:200].index.tolist()\n",
- "selected_df = selected_df.loc[fold_change]\n",
- "\n",
- "X = selected_df.T.values\n",
- "y = [0]*len(longDD_samples)+[1]*len(shortDD_samples) # Training y\n",
- "y = np.array(y)"
- ],
- "outputs": [],
- "metadata": {}
- },
- {
- "cell_type": "code",
- "execution_count": 121,
- "source": [
- "from sklearn.ensemble import RandomForestClassifier\n",
- "from sklearn.svm import SVC\n",
- "from sklearn.model_selection import train_test_split\n",
- "from sklearn import metrics\n",
- "\n",
- "auc_arr = []\n",
- "val_auc = []\n",
- "\n",
- "for t in list(range(0,100)):\n",
- " X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, random_state=t)\n",
- " X_test, X_val, y_test, y_val = train_test_split(X_test, y_test, test_size=0.5, random_state=t)\n",
- "\n",
- " #randomState = list(range(0,5))\n",
- "\n",
- " clf = SVC(kernel=\"linear\")\n",
- " clf.fit(X_train, y_train)\n",
- "\n",
- " y_pred = clf.predict(X_test)\n",
- " fpr, tpr, thresholds = metrics.roc_curve(y_test, y_pred, pos_label=1)\n",
- " auc_arr.append([t, metrics.auc(fpr, tpr)])\n",
- " \n",
- " y_val_pred = clf.predict(X_val)\n",
- " fpr, tpr, thresholds = metrics.roc_curve(y_val, y_val_pred, pos_label=1)\n",
- " val_auc.append([t, metrics.auc(fpr, tpr)])\n",
- "\n",
- "auc_test_df = pd.DataFrame(data=auc_arr, columns=['state', 'auc']).set_index('state')\n",
- "auc_val_df = pd.DataFrame(data=val_auc, columns=['state', 'auc']).set_index('state')"
- ],
- "outputs": [],
- "metadata": {}
- },
- {
- "cell_type": "code",
- "execution_count": 125,
- "source": [
- "auc_df = pd.concat([auc_test_df, auc_val_df], axis=1)\n",
- "auc_df.columns = ['test_auc', 'val_auc']\n",
- "auc_df['diff'] = auc_df['test_auc'] - auc_df['val_auc']\n",
- "auc_df"
- ],
- "outputs": [
- {
- "output_type": "execute_result",
- "data": {
- "text/html": [
- "\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " | \n",
- " test_auc | \n",
- " val_auc | \n",
- " diff | \n",
- "
\n",
- " \n",
- " state | \n",
- " | \n",
- " | \n",
- " | \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " 0 | \n",
- " 1.000000 | \n",
- " 0.916667 | \n",
- " 0.083333 | \n",
- "
\n",
- " \n",
- " 1 | \n",
- " 0.928571 | \n",
- " 0.900000 | \n",
- " 0.028571 | \n",
- "
\n",
- " \n",
- " 2 | \n",
- " 1.000000 | \n",
- " 0.928571 | \n",
- " 0.071429 | \n",
- "
\n",
- " \n",
- " 3 | \n",
- " 1.000000 | \n",
- " 1.000000 | \n",
- " 0.000000 | \n",
- "
\n",
- " \n",
- " 4 | \n",
- " 0.900000 | \n",
- " 0.750000 | \n",
- " 0.150000 | \n",
- "
\n",
- " \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- "
\n",
- " \n",
- " 95 | \n",
- " 0.875000 | \n",
- " 1.000000 | \n",
- " -0.125000 | \n",
- "
\n",
- " \n",
- " 96 | \n",
- " 1.000000 | \n",
- " 0.750000 | \n",
- " 0.250000 | \n",
- "
\n",
- " \n",
- " 97 | \n",
- " 1.000000 | \n",
- " 0.928571 | \n",
- " 0.071429 | \n",
- "
\n",
- " \n",
- " 98 | \n",
- " 0.875000 | \n",
- " 1.000000 | \n",
- " -0.125000 | \n",
- "
\n",
- " \n",
- " 99 | \n",
- " 0.833333 | \n",
- " 1.000000 | \n",
- " -0.166667 | \n",
- "
\n",
- " \n",
- "
\n",
- "
100 rows × 3 columns
\n",
- "
"
- ],
- "text/plain": [
- " test_auc val_auc diff\n",
- "state \n",
- "0 1.000000 0.916667 0.083333\n",
- "1 0.928571 0.900000 0.028571\n",
- "2 1.000000 0.928571 0.071429\n",
- "3 1.000000 1.000000 0.000000\n",
- "4 0.900000 0.750000 0.150000\n",
- "... ... ... ...\n",
- "95 0.875000 1.000000 -0.125000\n",
- "96 1.000000 0.750000 0.250000\n",
- "97 1.000000 0.928571 0.071429\n",
- "98 0.875000 1.000000 -0.125000\n",
- "99 0.833333 1.000000 -0.166667\n",
- "\n",
- "[100 rows x 3 columns]"
- ]
- },
- "metadata": {},
- "execution_count": 125
- }
- ],
- "metadata": {}
- },
- {
- "cell_type": "code",
- "execution_count": 126,
- "source": [
- "sns.distplot(auc_test_df['auc'].values.tolist())\n",
- "sns.distplot(auc_val_df['auc'].values.tolist())"
- ],
- "outputs": [
- {
- "output_type": "stream",
- "name": "stderr",
- "text": [
- "/opt/miniconda3/envs/r-py-test/lib/python3.8/site-packages/seaborn/distributions.py:2619: FutureWarning: `distplot` is a deprecated function and will be removed in a future version. Please adapt your code to use either `displot` (a figure-level function with similar flexibility) or `histplot` (an axes-level function for histograms).\n",
- " warnings.warn(msg, FutureWarning)\n",
- "/opt/miniconda3/envs/r-py-test/lib/python3.8/site-packages/seaborn/distributions.py:2619: FutureWarning: `distplot` is a deprecated function and will be removed in a future version. Please adapt your code to use either `displot` (a figure-level function with similar flexibility) or `histplot` (an axes-level function for histograms).\n",
- " warnings.warn(msg, FutureWarning)\n"
- ]
- },
- {
- "output_type": "execute_result",
- "data": {
- "text/plain": [
- ""
- ]
- },
- "metadata": {},
- "execution_count": 126
- },
- {
- "output_type": "display_data",
- "data": {
- "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYgAAAD4CAYAAAD2FnFTAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjQuMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/MnkTPAAAACXBIWXMAAAsTAAALEwEAmpwYAAAwuElEQVR4nO3dd3hU55n38e89Rb0XhBBFFNFsugA7FOOGMXbs2I5jO3GJ1wkpziZO86a9m7LZJLvJJrteJ/Hi3u047jHuDXDBiF5tMEUIEGqo15m53z/OgBUyAgGaOZK4P9c118ycNvdBaH46z3nOc0RVMcYYY47kcbsAY4wxvZMFhDHGmIgsIIwxxkRkAWGMMSYiCwhjjDER+dwuoCfl5ORoYWGh22UYY0yfsWrVqipVzY00r18FRGFhISUlJW6XYYwxfYaI7O5qnjUxGWOMicgCwhhjTEQWEMYYYyKKWkCIyBAReVNENovIJhH5Vnh6loi8KiLbws+ZXax/Q3iZbSJyQ7TqNMYYE1k0jyACwHdVdTxwBnCziIwHfgC8rqpFwOvh939HRLKAnwIzgRnAT7sKEmOMMdERtYBQ1f2qujr8ugHYAhQAlwL3hxe7H/hMhNUvAF5V1RpVPQi8CiyIVq3GGGP+UUzOQYhIITAFWAHkqer+8KxyIC/CKgXAnk7vy8LTjDHGxEjUA0JEUoAngVtUtb7zPHXGGj+p8cZFZJGIlIhISWVl5clsyhhjTCdRDQgR8eOEw8Oq+lR48gERyQ/PzwcqIqy6FxjS6f3g8LR/oKqLVbVYVYtzcyNeDGiMMeYERO1KahER4G5gi6r+vtOs54AbgN+En5+NsPrLwK86nZieD/wwWrUaY3qBknt7fpvFN/b8Nk8h0TyCmAVcB5wjImvDj4U4wXC+iGwDzgu/R0SKReQuAFWtAf4NWBl+/CI8zRhjTIxE7QhCVZcD0sXscyMsXwJ8qdP7e4B7olOdMcaYY7ErqY0xxkRkAWGMMSYiCwhjjDERWUAYY4yJyALCGGNMRBYQxhhjIrKAMMYYE5EFhDHGmIgsIIwxxkRkAWGMMSYiCwhjjDERWUAYY4yJyALCGGNMRBYQxhhjIrKAMMYYE5EFhDHGmIgsIIwxxkRkAWGMMSaiqN1yVETuAS4GKlT19PC0x4Ex4UUygFpVnRxh3V1AAxAEAqpaHK06jTHGRBa1gADuA24HHjg0QVWvOvRaRP4LqDvK+meralXUqjPGGHNUUQsIVV0qIoWR5omIAJ8DzonW5xtjjDk5bp2DmAMcUNVtXcxX4BURWSUii462IRFZJCIlIlJSWVnZ44UaY8ypyq2AuAZ49CjzZ6vqVOBC4GYRmdvVgqq6WFWLVbU4Nze3p+s0xphTVswDQkR8wOXA410to6p7w88VwNPAjNhUZ4wx5hA3jiDOA7aqalmkmSKSLCKph14D84GNMazPGGMMUQwIEXkUeA8YIyJlInJTeNbVHNG8JCKDRGRJ+G0esFxE1gEfAC+o6kvRqtMYY0xk0ezFdE0X078YYdo+YGH49Q5gUrTqMsb0Po+sKGVkaU2Pb3emXUF1UuxKamOMMRFZQBhjjInIAsIYY0xEFhDGGGMisoAwxhgTkQWEMcaYiCwgjDHGRGQBYYwxJiILCGOMMRFZQBhjjInIAsIYY0xEFhDGGGMisoAwxhgTkQWEMcaYiCwgjDHGRGQBYYwxJiILCGOMMRFF85aj94hIhYhs7DTtZyKyV0TWhh8Lu1h3gYh8KCLbReQH0arRGGNM16J5BHEfsCDC9D+o6uTwY8mRM0XEC/wRuBAYD1wjIuOjWKcxxpgIohYQqroUOJGbzM4AtqvqDlVtBx4DLu3R4owxxhyTG+cgviEi68NNUJkR5hcAezq9LwtPi0hEFolIiYiUVFZW9nStxhhzyop1QPwZGAlMBvYD/3WyG1TVxaparKrFubm5J7s5Y4wxYTENCFU9oKpBVQ0Bd+I0Jx1pLzCk0/vB4WnGGGNiKKYBISL5nd5eBmyMsNhKoEhEhotIHHA18Fws6jPGGPMJX7Q2LCKPAvOAHBEpA34KzBORyYACu4CvhJcdBNylqgtVNSAi3wBeBrzAPaq6KVp1GmOMiSxqAaGq10SYfHcXy+4DFnZ6vwT4hy6wxhhjYseupDbGGBORBYQxxpiILCCMMcZEZAFhjDEmIgsIY4wxEVlAGGOMicgCwhhjTEQWEMYYYyKygDDGGBORBYQxxpiILCCMMcZEZAFhjDEmIgsIY4wxEVlAGGOMicgCwhhjTEQWEMYYYyKygDDGGBNR1AJCRO4RkQoR2dhp2m9FZKuIrBeRp0Uko4t1d4nIBhFZKyIl0arRGGNM16J5BHEfsOCIaa8Cp6vqROAj4IdHWf9sVZ2sqsVRqs8YY8xRRC0gVHUpUHPEtFdUNRB++z4wOFqfb4wx5uS4eQ7in4AXu5inwCsiskpEFh1tIyKySERKRKSksrKyx4s0xphTlSsBISI/BgLAw10sMltVpwIXAjeLyNyutqWqi1W1WFWLc3Nzo1CtMcacmmIeECLyReBi4AuqqpGWUdW94ecK4GlgRswKNMYYA8Q4IERkAXArcImqNnexTLKIpB56DcwHNkZa1hhjTPREs5vro8B7wBgRKRORm4DbgVTg1XAX1jvCyw4SkSXhVfOA5SKyDvgAeEFVX4pWncYYYyLzRWvDqnpNhMl3d7HsPmBh+PUOYFK06jLGGNM93TqCEJGnROQiEbErr40x5hTR3S/8PwGfB7aJyG9EZEwUazLGGNMLdCsgVPU1Vf0CMBXYBbwmIu+KyI0i4o9mgcYYY9zR7SYjEckGvgh8CVgD/A9OYLwalcqMMca4qlsnqUXkaWAM8CDwaVXdH571uA2mZ4wx/VN3ezHdqapLOk8QkXhVbbPB9Iwxpn/qbhPTLyNMe68nCzHGGNO7HPUIQkQGAgVAoohMASQ8Kw1IinJtxhhjXHSsJqYLcE5MDwZ+32l6A/CjKNVkjDGmFzhqQKjq/cD9InKFqj4Zo5qMMcb0AsdqYrpWVR8CCkXkO0fOV9XfR1jNGGNMP3CsJqbk8HNKtAsxxhjTuxyrien/ws8/j005xhhjeovuDtb3nyKSJiJ+EXldRCpF5NpoF2eMMcY93b0OYr6q1uPcCW4XMAr4frSKMsYY477uBsShpqiLgCdUtS5K9RhjjOklujvUxt9EZCvQAnxNRHKB1uiVZYwxxm3dHe77B8CngGJV7QCagEuPtZ6I3CMiFSKysdO0LBF5VUS2hZ8zu1j3hvAy20Tkhu7tjjHGmJ5yPHeIGwtcJSLXA58F5ndjnfuABUdM+wHwuqoWAa+H3/8dEckCfgrMBGYAP+0qSIwxxkRHd3sxPQj8DpgNTA8/jjmKq6ouBWqOmHwpcH/49f3AZyKsegHwqqrWqOpBnHtOHBk0xhhjoqi75yCKgfGqqj3wmXmd7idRDuRFWKYA2NPpfVl42j8QkUXAIoChQ4f2QHnGGGOg+01MG4GBPf3h4cA5qdBR1cWqWqyqxbm5uT1UmTHGmO4eQeQAm0XkA6Dt0ERVveQEPvOAiOSr6n4RyQcqIiyzF5jX6f1g4K0T+CxjjDEnqLsB8bMe/MzngBuA34Sfn42wzMvArzqdmJ4P/LAHazDGGHMM3e3m+jbOFdT+8OuVwOpjrScij+LceW6MiJSJyE04wXC+iGwDzgu/R0SKReSu8OfVAP8W/pyVwC/C04wx5qhCCh8cTOGu0jx+8+JWNu6163pPlHTnvLOIfBnnRHCWqo4UkSLgDlU9N9oFHo/i4mItKSlxuwxjzHF6ZEUpI0ufOOntNAQ8/GbbELY3J5LoCRIQH8GQ8uW5I/jBgrGIyLE3cooRkVWqGrFXandPUt8MzALqAVR1GzCgZ8ozxpiT1xjw8MuPhrK7JZ6vDdvP3ZO3UfKT87ly2hD+7+0d/OG1bW6X2Od09xxEm6q2H0pfEfFxkr2PjDGmJ91dOpA9rfHcOrKMyelNAKQn+vnNFRMIqnLb69sYn5/KgtPzXa607+juEcTbIvIjIFFEzgeeAJ6PXlnGGNN9K2tTePdgGlfkVx0Oh0NEhF9dNoHx+Wn8/PnNNLUFXKqy7+luQPwAqAQ2AF8BlgA/iVZRxhjTXe0h4Z7SPAoTW7l0YHXEZeJ8Hn5x6Wnsr2vl9je3x7jCvqu7vZhCwDPA11X1s6p6Zw9dVW2MMSfl9aoMajr8XDekAt9RzkEXF2Zx2ZQC7lm+k6rGtq4XNIcdNSDE8TMRqQI+BD4M303uX2NTnjHGdK0tJDy9P5vxKU2cntp8zOW/cc4o2oMhHnh3V/SL6weOdQTxbZzeS9NVNUtVs3BGWJ0lIt+OenXGGHMUb1WlUxfwceWgqsPT/B0NDKhZSUHF27BiMdTsODxvZG4K54/L4/73dtu5iG44Vi+m64DzVfXwv76q7gjfj/oV4A/RLM4YY7qiCi9VZjIiqYVxKS14gm0MK3+Z3Np1yKFOli++DS8CE6+CC34FyTl85awRvLL5AE+v2cu1ZwxzdR96u2MFhL9zOByiqpUi4o9STcYYc0wbG5LY1xrP1wv3kdBezdjSR4lvP0h51gwqsqbRGpfFzPOugDUPwTu3wc5lcMPzTB06krEDU3miZI8FxDEcq4mp/QTnGWNMVL1cmUmqL8C8lD2M2/0g3mArWwqvpzT/Alrjc0A8kDUCzv1X+PLrEGyH+xYiNTv4XPEQ1pXVsbW83u3d6NWOFRCTRKQ+wqMBmBCLAo0x5kj1HV5W16YwP6uCCaUP4w22s3XYtTQkd3FEkD8JvvgCBDvg8ev4zOmZ+L3CX1aWxbbwPuaoAaGqXlVNi/BIVVVrYjLGuGJ5TRpB4JuhB0lor2bb0CtpTjzGLWsGjIUr7oKKzWS9+UPOG5fHc+v2EgxZj/2uHM89qY0xpld4uzqdLye+xbDGdZTlnkV98vDurTjqXJj7PVj3CDfmfUxVYzsrdka+uM5YQBhj+pjSlnhqWzq4hUdoTCxgX+7s49vA3O9DdhHFm35Jpj/A39bvP/Y6pygLCGNMn/JOTSo/9D1KorawY9DFzsno4+GLh0//N57a3fxywBu8tLGcQDAUnWL7OAsIY0yfoQrVNQe50vc25dkzaUnIO7ENFc6GcZdwQd1foKmKFTvtfmSRWEAYY/qMHU3xfEX/Qqsksi93zslt7Jz/hzfYyj/HPcsbWyt6psB+JuYBISJjRGRtp0e9iNxyxDLzRKSu0zI29pMxhoOVe5nt3URZ7lyC3oST21juaGTy57nW8xprN2/tmQL7mZgHhKp+qKqTVXUyMA1oBp6OsOiyQ8up6i9iWqQxptfRkDK76RUqyKI2e1rPbHT2d/AS5Lz6p9hZ1XTs5U8xbjcxnQt8rKq7Xa7DGNPLtRzcxyTZzpqUs1CPt2c2mj2SlqJP8wXvayzb+HHPbLMfcTsgrgYe7WLemSKyTkReFJHTutqAiCwSkRIRKamsrIxOlcYY1xVULaNS0/EM7PLr4IQkn/Nd0qQF/5r7e3S7/YFrASEiccAlOLcvPdJqYJiqTgL+F+dmRRGp6mJVLVbV4tzc3KjUaoxxV3LzXkYHPuIZz/mkxffw11b+JHanTmF27XM0ttoQc525eQRxIbBaVQ8cOUNV61W1Mfx6CeAXkZxYF2iM6R1yKt6hVpPZn1kcle23Tb6RIVLB1mWRToeeutwMiGvoonlJRAaKiIRfz8Cp066HN+YUlNh6gIFNW7k3sIBJWR1R+Yzhc66iinSS1t8Xle33Va4EhIgkA+cDT3Wa9lUR+Wr47WeBjSKyDrgNuNrugW3MqSm/+n1aiOc1/1kMjI9OQPjjEijJvJgxDe8TOlgalc/oi1wJCFVtUtVsVa3rNO0OVb0j/Pp2VT1NVSep6hmq+q4bdRpj3OULNJFdt5EnAnMZmxndzwpN/SKiStXbi6P7QX2I272YjDGmSwMOrsKjQe4LXsD0jIaoflbx5Im8EZpMyqZHIGAnq8ECwhjTS4kGyaspYbWcRoM/l8LEtqh+3oDUBJamX0JSRzVs/VtUP6uvsIAwxvRKmfVbiAs08sf2hRRnNOB0W4mupHHzKdMcgqsfjP6H9QEWEMaYXmlg9Qcc9ObwRnAS0zMaY/KZs0cP5KngbDw734J6u0+EBYQxptdJbtlLaksZz3vOIdmrjE1pjsnnFhdm8jfOQjQEG/4Sk8/szSwgjDG9zsDqDwh44ri9+TymZTTijUHzEkCC38vAEaezyTMG1j7q3IDiFGYBYYzpVfwdjWTVb2JrUjEVwZSo91460tyiHB5pmwWVW2D/uph+dm9jAWGM6VWcrq0hHtfziZMQE9NiOwz3nKJcng+eQdDjh3WPxfSzexsLCGNMryGhIAMOruJgyiiW1I9gYloT8Z7YNvOMzkshITWbdUmfgg1PQDA6V2/3BRYQxpheI6t+E3GBRtYmz6a6wx+z3kudiQhzinK5t+lMaK6C7a/FvIbewgLCGNM7qDKw+gNa4nJ4vnUKgjLVhYAAmDs6hxdbxtORkA3rurplTf9nAWGM6RVSWspIad1HefZ0VtalMi6lhTRf0JVaZo/KIYCPzVnnwYcvQWvdsVfqhywgjDG9Ql71BwQ88WxKmMae1gSKY9x7qbPslHhOL0jj8bYzIdgGm59zrRY3+dwuwBgTHY+siM6w1Z+fObTHt5nYeoCs+i0cyJ7B+w3OvcHcOP/Q2ZyiXO5cWs8vBwzHs+EvMPU6V+txgx1BGGNcV1T6OEKIA1nTKalNYVhiKwOidO+H7ppTlEMgBLsGXQQ7l0H9PlfrcYMFhDHGXR0tjCp9goOpYyiXHD5sTHT96AFg2rBMEv1entfZgMKGv7pdUsxZQBhj3LXhryR01HIgewbv1qShCJ/Kqne7KuJ9Xs4YkcWzexJh0NRTcmwm1wJCRHaJyAYRWSsiJRHmi4jcJiLbRWS9iEx1o05jTBSpwor/ozZlFPVJhbxbk0ZhYisFCb3jhj1zinLZUdXEwVGXQfkGqNjidkkx5fYRxNmqOllViyPMuxAoCj8WAX+OaWXGmOjb/Q4c2MCHhddS3hbH9uZEZveCo4dD5o52Tpi/6ZsN4oX1p9ZRhNsBcTSXAg+o430gQ0Ty3S7KGNOD3vsTJGaya9BC3jmYBsCZvSggRuamkJ+ewKulCiPmOechQiG3y4oZNwNCgVdEZJWILIowvwDY0+l9WXiaMaY/qNgCH74AMxYR8CSwvCaNcSnN5MQF3K7sMGfYjRze2V5F8PQroa4U9qxwu6yYcTMgZqvqVJympJtFZO6JbEREFolIiYiUVFZW9myFxpjoWf7f4E+GmV9lf10r+1rjmdWLjh4OmVOUS31rgA1pc8CfBOsfd7ukmHEtIFR1b/i5AngamHHEInuBIZ3eDw5PO3I7i1W1WFWLc3Nzo1WuMaYnHdzljJQ67YuQlMX6slq8KDMz3bt6uiuzR+UgAm/vbIExC2HzMxDoHSfRo82VgBCRZBFJPfQamA9sPGKx54Drw72ZzgDqVNVuEmtMf/Du/4J44FPfIBhS1pXVMSm9ybWxl44mMzmOiQXpLNtWCROvgpaDp8wIr24dQeQBy0VkHfAB8IKqviQiXxWRr4aXWQLsALYDdwJfd6dUY0yPajgAqx+EyddA2iCWbqukrqWDudm9d0C8OUW5rNlTS33BbEjKPmWamVwZi0lVdwCTIky/o9NrBW6OZV3GmBh4/48Q6oBZtwDw6IpSkuN9TE/vfc1Lh8wpyuH2N7fz3q56LjjtcljzILTWQ0Ka26VFVW/u5mqM6W/q98GKxXD6FZA9kgP1rby+tYJpQzPx9eJvoylDM0mO837SzBRohS3Pu11W1PXiH4kxpt9569cQCsA5PwGcEWeDIWV6YabLhR1dnM/DmSOzWbatCgYXQ+bwU6KZyQLCGBMbFVthzUMw/UuQWUhrR5CH3t/NeeMGkJ0S73Z1xzSnKJfd1c3srmmGCVfCzqVQ37/7zVhAGGNi4/WfQ1wKzP0+AE+v2Ut1Uzs3zR7hcmHdM6fIGXZj6bYqmPg5QGHjk+4WFWUWEMaY6Nv9Hny4BGZ9C5KzCYaUO5ft4LRBaZwxIsvt6rpleE4yBRmJLPuoEnKKYNCUft/MZAFhjImuUBBe+TGk5sMZTm/1Z9fuZUdlEzefPQoRcbnA7hER5o7O4b2Pq+kIhmDC56B8vdN01k9ZQBhjomvVfbB3FZz3c4hLoiMY4r9f28Zpg9JYcNpAt6s7LnOKcmloC7BuTy1M+Cx4fE6X137KAsKYU4SqEgwpziVGMdJwAF77OQyfG263d3ouldY0853zR+Px9I2jh0NmjczBI+HzECkDYMyFsO7Rfjv0hisXyhljoqO1I8ia0lpW7Kzmtc0HqGlqp6a5nbaOEIdiIc7nIS3BT3qij/z0RIZlJzEsO5mU+B7+OlCF57/lXDNw0e9BhMqGNn73yofMHpXDOWMH9OznxUB6kp9JQzJYtq2S75w/Gqbe4FwP8eESOO0zbpfX4ywgjOnjmtsDvLypnKfX7OP9HdW0B0J4BLKS48hKjmNC3H6SvUE8ongEGgNeqtt9VDX5eb8qnuXbnYaEouQWPpVZzxmZDWQdbchtbxYU33jswtY+DB+9CBf8yjmpC/zyhc20dgT5+aWn9ZlzD0eaU5TL7W9so665g/SR50BaAax+wALCGNN77KxqYvHSj3l27T6a24MUZCRy/RnDOHNkNsWFWbyw3umjP7L0H+7oe1hHSNjRnMDmhiTeO5jK/WV5PFA2gGnpjVyYd5DTUpo5oe/xiq2w5FYonAMzvwbAU6vLeHbtPm45r4iRuSknssu9wtyiHG57fRvvfFzFwgn5MOVaePs/obYUMoa6XV6PsoAwpo/Zsr+e/31jGy9uLMfv9XDZ5AKumDaY4mGZx92m7/coY1JaGJPSwmX51ZS1xLGsJo3XKzMoqUtlaGIrl+TVcGZWPb7ubrqtEf5yPcQlweV3gsfDlv31/OSZjcwYnsU3zh51/Dvdi0wekkFGkp9XNpX/fUCseQjO/pHb5fUoCwhj+ojKhjZ+/+qHPL5yD8lxPr521khunDWc3NSeuwp5cGI71xRUcUV+Nctr0njhQBa37xrEY/tyuTivhnNyao++gWAAnrwJqrfB9c9CWj67qpq47u4PSEvwc9vVU/B5+3bfGJ/XwwXjB/LChv20dgRJyBgKI89xAuKsfwGP1+0Se0zf/kkZcwoIhZR739nJ2b97iydKyvjip4az7F/O5tYFY3s0HDqL8yjn5NTx2/E7uXXkHrL9Hdy3J4+bN4zkts1J1DZH6LWjCku+Bx+9BAt/B8Pn8v6Oaq7487uEVHnoSzMYmJ4QlXpjbeHEfBrbAs7YTABTr4f6vbDtVXcL62F2BGGMyx5ZUdrlvMqGNp5aXcbummZG56Vw0YRB5KbGs2RDeUxq8whMy2hiWkYTWxsTebY8m99vTuHPv36DhRPyuWTyIM4YkUW81wMv3gqr7oXZ32b3iKu546n1PL5yD4U5ySy+rphRA/rueYcjfWpkNhlJfl5Yv4/zx+fB2IucCwFX3AFjFrhdXo+xgDCmF1JVVuysYcmG/fi9Hq6cNpjJQzJc7fkzNqWFsaPKSM/K5f66KTy/bh9Pri4jxRfif5Lv5dy213k57Up+u24e2197C79XuP7MQr47fzSpCX7X6o4G/5HNTH6/MwjhG/8GFVtgwDi3S+wRFhDG9DLtgRDPrN3L2j21jMlL5fKpBb3qC3ZsepBfnzuBn356PKs2bGLIG//M0Ma1PJjweR6WqxmWlcTVM4Zy4YR8CjIS3S43ahZOzOfxkj0s21blHEVMuxGW/hbe/zNccpvb5fWImAeEiAwBHsC57agCi1X1f45YZh7wLLAzPOkpVf1FDMs0xhVVDW08tGI3lQ1tnD8+j7NG5+LpjdcLqJKw9Wlmvfp950K4y+/iuolXcp3bdcXQoWamJRv2OwGRnA2TroZ1j8G5P3Xe93FuHEEEgO+q6moRSQVWicirqrr5iOWWqerFLtRnjCs27q3jydVleD3CF2cVUjQg1e2SIju4Cx64xLkfwqApTlfW8IVwp5JDzUxLDjczeZ1rPlbd55yLmfs9t0s8aTHvxaSq+1V1dfh1A7AFKIh1Hcb0FsGQ8uKG/TzyQSm5qfF84+xRvS4cPKEOsuo2Mm7n/fDOf8OBTU5PpS+9fkqGwyELJ+bT0BZg6UeVzoQBY50uryvv6hfjM7l6DkJECoEpwIoIs88UkXXAPuB7qroplrUZEwsVDa3cvXwnu6qbmDk8i4sm5PeK6wQkFCCptZy05t2kNpWS1rQLr3bQ6s+AcZfAZ/4M8f2nV9KJ+tTIbHJS4nlydRnzD41Me8bX4eHPOjcTmnyNuwWeJNcCQkRSgCeBW1S1/ojZq4FhqtooIguBZ4CIf6aIyCJgEcDQof3rMnfTv32ws4abH1lNbXM7V04bzJShMbwvsyq+YDMJ7QeJDz8SOsLP7QeJCzQcXrQlLoeqjEnUpI+jPmkYM0fkWDiE+b0eLp9awD3Ld1LV2EZOSjyMPBfyJsDS/3RuTertu32BXKlcRPw44fCwqj515PzOgaGqS0TkTyKSo6pVEZZdDCwGKC4ujuE4xsacGFXl7uU7+fWLWxmalcQ104dG7QIyX0cjyc17SWyvIrGtioS26sNh4A39fRNIuy+V1rhM6lJG0ObPoCU+l/rkoQR8FgZHc+W0wSxeuoNn1uzlS3NGgMcDZ/8QHvu8c8e5KV9wu8QT5kYvJgHuBrao6u+7WGYgcEBVVURm4JwrqY5hmcZERWNbgFv/uo4lG8q54LQ8fnvlJP62rmdufO8JtpFVv4Wc2nVk164np3YDya2fbDuEh7a4LFrjMmlIHkZrXCZtcZm0+TNpjctAPb2nK21fUpSXyuQhGTy+cg83zR7uXKsyZiHkT4a3/8O5D4a3b/7bunEEMQu4DtggImvD034EDAVQ1TuAzwJfE5EA0AJcrTG9y4kxPe+jAw189aFV7K5u5kcLx/LlOSNO7sI3DZFZv5X8qnfIr3yHnNp1eNUZprsxsYCqzElsS/0cSS3ltMRn0xaXiUr/GSeoN/n8zKHc+tf1vL+jhjNHZoMInP1jeORKWPsITLvB7RJPSMwDQlWXA0f9rVDV24HbY1ORMdH37Nq9/ODJDSTH+3j4SzM5Y8SJ9ZH3BNvIr3qXIQdeI79yOYntNQDUpI3jw8LrqMycTHXGRFrjcw6vM7L0iR7ZB9O1SyYN4ldLtvDAe7ucgAAoOh8Kip2L5yZdA744d4s8AX337IkxfUBLe5CfPbeJx0v2ML0wk9s/P5W8tOM73+AJtjGoajlD979CQcXb+INNtPnT2J8zm325synPOfPvAsHEXoLfy1XTh3DXsp3sq21hUEZi+CjiR/DQ5c51ETO/4naZx80Cwpgo+ehAAzc/vJrtlY184+xR3HJeUfe7sKqSU7uO4XufY9j+l4gLNNDmT2d3/gXsGTif8uwZds6gl7l25jDuWraTe5bv5CcXj3cmjjzHuR/3m79yejQlZblb5HGygDAnr+Tent9md25p2UupKo+t3MPPn99ESryPB/5pBnOKcru17riP7yKndj05tetJbK8mKD4Opo2jKmMC9cnDUfGS1LKPEWXPRHcnzHEbkpXEpyfm88gHpdx89igyk+Oco4gF/wF3zII3/x0u+i+3yzwuFhDG9KDyulZ++NR63vywklmjsvnDVZMZkHr0JiVvsIUh5a8zfO+zDKx+HwHqk4ayL2cWNWnjCHmjc88H0/O+Nm8Uz6zdx/3v7eKW80Y7E/PGOyO9rrwLJn8eCqa5W+RxsIAwpgeoKk+sKuPf/raZjmCIn356PDecWdj1LUBDISh9D9Y9wuXrn8YfbKIxsYC9uXOpyphEW1wML5ozPWbMwFTOH5/H3ct3csOZhc5RBMA5P4Etz8Nz34RFb/WZbq/uX9NvTB9XdrCZG+9bya1/Xc+4/DRe+tZcbpw1PHI41Oxw2qNvmwz3LYRNz1A68Dxem3EPz521hL0D5lk49HHfmz+GxrYAf3pr+ycTE9Lhot/DgY2wrO80M9kRhDHddOSd39oDIZZuq2TpR5WIwMUT8zljRDbvflzNux9/cl2nv6OBoeUvM3zv8ww4uBpFKM+eyc6JX2FP3rkEfUmx3hUTRWMGpnL5lMHc/95urj+zkCFZ4Z/v2IUw8Sp4+z+dk9dDZrhbaDdYQBhznFSVDXvreHFjOXUtHUwcnM6C0waSkfRJP3dvoJmCyqUMLX+FQRVL8YXaqEseztrR32LnoItpSRzo4h6YaPvu/NEs2bCfnz+/mbtuKP5kxsLfOk2LT94Ei97u9b2aLCCM6SZV5ePKJl7bcoDSmmYKE1v59ugDjEttgSrwhNrJaNhGVv1mMhq24dUA7b5kqjImUpUxkaaEQSDCoMplbu/KyevhnmsjS2t6dHtuG5SRyC3nFfHrF7fyyqbyT0Z6TUiHz94L9yxwQuILfwVP77263QLCmG549+Mq7ly2g13VzaQn+rlscgGf87xOfKCejJrtZDRuI73x48OhUJk5hZq0cTQkDQWxU32non+aPZyn1+zlx89sZNqwTLJTwr3RBhc73V2f/ya8/CNY8BunO2wvZAFhTBeCIeWNrRXcuWwHH+ysIS3Bx+UTsliQVkrBwZcp3Pc8ya0HAGjzp1somL/j93r4w1WTufT2d/iXJ9dz5/XFn4y9Ne0GqNwK7/8JUvJgznfcLbYLFhDm5AXboeEANFdD60FoqYXWWmith0ALBNrCd9c6NN6iOOPS+BLBn+A8xyVDYoZzCJ6QCeUbIWOI8z7G6ls7eKKkjPvf3UVjTTlnpZbx7LhyBh4sIWf7RrwaICReGhMLKM07l9qUIlric3vtX4HGPePy0/jBhWP5xd8288c3t/ONczrd1mb+v0NTJbz+c/D4YNY33Su0CxYQ5vg0HIC9JbBvLVRsdh41O/nkyx/wxkFChvPlnjwAfPHgjf/kC1QVgm3Q0eI8WmuhvswJlEPbWbnYeY5Ph4yhTlhkDIX0IZ3eD4PEzB75Yg611LN583rWrVtFza6NjNGPecq/m5yEKugAdnmpSjuNrcOvpyKrmMqMKQzb/+JJf25ftGJn/zpfEG03zipkw946fvfKRxTmJHPxxEHODI/HuTNfKACv/j9orXNGgPX0nqNPCwjTtUAb7F8PZSvDjxKoC3f1FA9kj4KBEyBnNKTmQ3KuEwr+pBP70g4Foa3eCYz8yVBXBrWlULcHDu6CnUuhvfHv1/EnfxIYKQMgPg3iU51HXDIgoEHnwjQNOUc0zdXQXIM2V9FWV0mwZhfJHTWcDpwO4IG29BHED5kHgybDoCmQP5lX1toXozl+IsKvL5/AnppmbnlsLXFezycnrb1+uOJuiEuBZb+Dmo/hkv91/v/2AhYQxqEKtbudEDgUCPvXQ6jDmZ8+xBkiYOZXYPB0yJ8I/kRnXk/1aPF4nSOCxEw4/fLINbYcdAKjthRq93wSILW7nWaptgZob/jHdTsJeuNp9KRTEUymvCOZfUyErBEUjj6diROmkphXRHzEW2paQJgTk+D3cu+N07nu7g/4+sOr+ffLTueq6eFbJHu8TijkFMFrP4P96+Dyu2Cw+0NyWECcqlrrYf/aT44MylY67aHgHAEMmgJn3uz0uCgohrR8V8sFnKOSpCznkT+p6+VCIedIo72Rlo4QG/c3sHZPA2vK6vlgTyNVrX7ifB5mjczmnHF5zB+fd9xDcBtzvFIT/Dx40wxufmQN//LkBjburedHC8eRGOd1/m/P+pbzu/bUIrjrXJjxZZh7K6R0b6DHaLCA6O9Unb+wyzc4f2GXr3cu9z+465Nlsotg1PlOGAyeDgPG97kbrasqNU3tfFjewOb99WzZ38CW/fV8dKCBQEgRgTF5qSyYOpJ5owfwqVHZJMX1rX00fV9qgp97bijmP17ayp3LdrJ8exX/+unxnD1mgLNA4Sz4+rvwxi+dwf1WPwjF/+ScwE6N/cWV9hvSHxxqeqnZCdXbnXbM6u1Q/bHzONzkIpA1wvnre8q1Tjt/wbRefzXnIS3tQcrrWymva2VPTTO7a5rYVd1MaXUzu6qbaGgNHF42JyWecfmpfHnMCKYXZjJtaBbpSX1jgDTTv/m8Hn580XjmjRnAT57ZyI33rmRGYRY3zRnOvDG5xCekO1dcz1gEy34PK+6AlXdC0Xyn6XX0gvD5tegTN271LCILgP8BvMBdqvqbI+bHAw8A04Bq4CpV3XWs7RYXF2tJSUnPFxxDqkpze5DGtgANzW0011fRXl9JoKGSYGMVNFXhbyonqbWclLYDpLQfIK29grhQ6+FthPBQ6R3AAd9gDvgL2Bc3jPLEIiqTRjKgYTOJnhAJ3hAJ4edET4hEr/PwdnFueebwng+RFTtrCCk0BLzUB7zUB3zOc4eXXckTqW/poL61g/qWAHUtHbR0BP9ufY9ARlIc2clxZKfEkZUcT15aPF+fN4rc1AhDZJ/kuRLrvdP3zLzyu26XcFTtgRCPrNjN4qU72FfXSmqCj/PH5zF//ECmF4YvrqvZCR8sho1PQWO50y18yAwYeiYMPcP5g+8k/sgTkVWqWhxxXqwDQkS8wEfA+UAZsBK4RlU3d1rm68BEVf2qiFwNXKaqVx1r2z0aEKpoKEgwGCQYCqKhUKfXSig8LxQKEgqG6OjooL29lY62VtrbWwi0txE4/NxKqKOVtjZnfrC9GdoakfZGvB0N+ALN+AJNxIeaSAi1kEEjmdJABo145R9/PiEVKsignGwOkEOFZFPpyWW/ZyAfdgxgtw6gTX2EFEIhJRhSuvtT9kuIJO8/BsfgNB8pfiXRq/hE8XvA51H8Aj4PeEQJhISgQkCdGgMhaAkKTYFOjw6hMSA0BTwcbFUag140wi3KBUiO95Ge6CctwUdaoj/82k9aop/MJD8ZSXF4I4yY+vmZQyPvnAXEKae3B8QhHcEQ72yv4m/r9/PypvLDR8N//sJULpwQPv8XCjrjOG1+DkrfdZqMD/1mZxbCN9eeUO/BowWEG01MM4DtqroDQEQeAy4FNnda5lLgZ+HXfwVuFxHRaKXZfxRCexMaChEKBQ9/KQvOP1C0/pFCCK2SSJsniTZfMgFfMkF/OsG4IdQkZFOVlI0kZ+NLycGXmkt8Wi6J6XkkZuUzMC6eSC2SR444Cs5RSUdQaQsEGbjnBVqDHlpDHlqCHlqDHlrCr1s6TW8JemgJeWkJeqhp91Fz0Edjh4fmAATCX/6hCF/snfnECZQkn5LsV1J8SrJPKUgKkewL0tzaQpovSJovEH4OkuZ33lcMvyzil78x/ZHf62HemAHMGzOAX102gfVltazcdZDJQzM+WcjjhcLZzgOc6ybKSpxrkdoaonKhphsBUQDs6fS+DJjZ1TKqGhCROiAbqDpyYyKyCFgUftsoIh92o4acSNtyR12sP7AX7fvRfPuE1/xC17P6yL73uFN1v4Hv9el9//pxLf3jIyd0d9+HdTWjz5+kVtXFwOLjWUdESro6pOrvbN9PvX0/VfcbbN9Pdt/duKZ7LzCk0/vB4WkRlxERH5COc7LaGGNMjLgRECuBIhEZLiJxwNXAc0cs8xxwQ/j1Z4E3onb+wRhjTEQxb2IKn1P4BvAyTjfXe1R1k4j8AihR1eeAu4EHRWQ7zvgGV/dwGcfVJNXP2L6fek7V/Qbb95PiynUQxhhjer/eM66sMcaYXsUCwhhjTET9NiBEZIGIfCgi20XkB10s8zkR2Swim0TkkVjXGC3H2ncR+YOIrA0/PhKRWhfKjIpu7PtQEXlTRNaIyHoRWehGndHQjX0fJiKvh/f7LREZ7EadPU1E7hGRChHZ2MV8EZHbwv8u60VkaqxrjJZu7PtYEXlPRNpE5HvH/QGq2u8eOCe/PwZGAHHAOmD8EcsUAWuAzPD7AW7XHat9P2L5f8bpKOB67TH6uS8GvhZ+PR7Y5XbdMdz3J4Abwq/PAR50u+4e2ve5wFRgYxfzFwIv4gyOcAawwu2aY7jvA4DpwL8D3zve7ffXI4jDw3moajtwaDiPzr4M/FFVDwKoakWMa4yW7ux7Z9cAj8aksujrzr4rkBZ+nQ7si2F90dSdfR8PvBF+/WaE+X2Sqi7l6HdzuhR4QB3vAxki0gtucHLyjrXvqlqhqitxbpx73PprQEQazqPgiGVGA6NF5B0ReT88wmx/0J19B5wmB2A4n3xp9HXd2fefAdeKSBmwBOcIqj/ozr6vAw7dqu8yIFVEsmNQm9u6/Tth/l5/DYju8OE0M83D+Sv6ThHJcLMgF1wN/FVVg8dcsv+4BrhPVQfjND08KCKnyu/B94CzRGQNcBbOiAWn0s/eHKc+PxZTF7oznEcZTltkB7BTRD7CCYyVsSkxarqz74dcDdwc9Ypipzv7fhOwAEBV3xORBJxBzfp6E+Mx911V9xE+ghCRFOAKVa2NVYEuOp7fCdNJf/3LqTvDeTyDc/SAiOTgNDntiGGN0dKdfUdExgKZwHsxri+aurPvpcC5ACIyDkgAKmNaZXQcc99FJKfT0dIPgXtiXKNbngOuD/dmOgOoU9X9bhfVF/TLIwjt3nAeLwPzRWQzzmH291W1zw8I2M19B+cL5DENd3XoD7q579/FaU78Ns4J6y/2h3+Dbu77PODXIqLAUvrJ0aOIPIqzbznhc0s/BfwAqnoHzrmmhcB2oBm40Z1Ke96x9l1EBgIlOB0zQiJyC07vtvpubb8f/G4YY4yJgv7axGSMMeYkWUAYY4yJyALCGGNMRBYQxhhjIrKAMMYYE5EFhDHGmIgsIIwxxkT0/wHhCQRWCW/5pwAAAABJRU5ErkJggg==",
- "text/plain": [
- ""
- ]
- },
- "metadata": {
- "needs_background": "light"
- }
- }
- ],
- "metadata": {}
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "source": [],
- "outputs": [],
- "metadata": {}
- }
- ],
- "metadata": {
- "orig_nbformat": 4,
- "language_info": {
- "name": "python",
- "version": "3.8.2",
- "mimetype": "text/x-python",
- "codemirror_mode": {
- "name": "ipython",
- "version": 3
- },
- "pygments_lexer": "ipython3",
- "nbconvert_exporter": "python",
- "file_extension": ".py"
- },
- "kernelspec": {
- "name": "python3",
- "display_name": "Python 3.8.2 64-bit ('r-py-test': conda)"
- },
- "interpreter": {
- "hash": "7508a6b53ffb04362d156591e4bfb20c197555e37f3cce3b1ec90fd899bbfe63"
- }
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}
\ No newline at end of file
diff --git a/notebook/notebook_archive/Jun09262021/feature_test_with_DEG.ipynb b/notebook/notebook_archive/Jun09262021/feature_test_with_DEG.ipynb
new file mode 100644
index 0000000..156d784
--- /dev/null
+++ b/notebook/notebook_archive/Jun09262021/feature_test_with_DEG.ipynb
@@ -0,0 +1 @@
+import pandas as padj
\ No newline at end of file
diff --git a/pipelines/deg_pipeline/README.md b/pipelines/deg_pipeline/README.md
new file mode 100644
index 0000000..1eb1fcc
--- /dev/null
+++ b/pipelines/deg_pipeline/README.md
@@ -0,0 +1,19 @@
+## DEG pipeline(DESeq2) by Jun
+* This workflow generates DEG result by using DESeq2, and it is working for only GEO styles of dataset
+
+#### Version history
+* It has memory issue in Docker
+* v1.0.0 is on the pipeline workflow
+
+#### Requirement
+```shell
+pip install -r requirements.txt
+Rscript installer_Rpackage.R
+```
+
+#### Usage
+* Please change config.yaml for standalone usage
+
+```shell
+snakemake --cores 3
+```
\ No newline at end of file
diff --git a/pipelines/deg_pipeline/Snakefile b/pipelines/deg_pipeline/Snakefile
index de8d611..391429e 100644
--- a/pipelines/deg_pipeline/Snakefile
+++ b/pipelines/deg_pipeline/Snakefile
@@ -6,9 +6,12 @@ __email__ = "swiri021@gmail.com"
# Base DEG pipeline by using DESeq2, it could expand to more functions by using this workflow
# For manual running, please use this one
-#configfile: "config.yaml"
+# configfile: "config.yaml"
+# pipeline_path = '/Users/junheeyun/OpenKBC/multiple_sclerosis_proj/pipelines/deg_pipeline/'
+#
pipeline_path = '/pipelines/deg_pipeline/'
+
SAMPLES = ['CD4','CD8','CD14']
rule all:
diff --git a/pipelines/deg_pipeline/import_utils/lib/externalHandler.py b/pipelines/deg_pipeline/import_utils/lib/externalHandler.py
index b0c73b1..939ea65 100644
--- a/pipelines/deg_pipeline/import_utils/lib/externalHandler.py
+++ b/pipelines/deg_pipeline/import_utils/lib/externalHandler.py
@@ -2,14 +2,31 @@
import itertools
class handlers(object):
- def get_column(filename_with_path, ext_value, annot='gene_id', sep="\t"):
+ def get_column(filename_with_path, ext_value, annot='gene_id', header_line=0, sep="\t"):
"""
filename_with_path = filepath + basename
ext_value = column name of file
sep = separator
"""
- temp = pd.read_csv(filename_with_path, sep=sep).set_index(annot) # temp loading
- return temp[[ext_value]]
+
+ # Don't use pandas.read_csv because of memory usage
+ index_list = []
+ value_list = []
+ with open(filename_with_path, 'r') as infile:
+ for i, line in enumerate(infile):
+ line = line.strip()
+ if i==header_line: # found header
+ header_info = line.split(sep)
+ value_ext_location = header_info.index(ext_value) # location of value extraction point
+ index_ext_location = header_info.index(annot) # location of value extraction point
+
+ elif i!=header_line:
+ line_list = line.split(sep)
+ index_list.append(str(line_list[index_ext_location])) # Value list
+ value_list.append(float(line_list[value_ext_location])) # Index list
+
+ result_df = pd.DataFrame(data={ext_value: value_list}, index=index_list)
+ return result_df
def get_samplename(filelist):
"""
diff --git a/pipelines/deg_pipeline/import_utils/step2_DESeq2_calculator.R b/pipelines/deg_pipeline/import_utils/step2_DESeq2_calculator.R
index c01651c..ee2bb11 100644
--- a/pipelines/deg_pipeline/import_utils/step2_DESeq2_calculator.R
+++ b/pipelines/deg_pipeline/import_utils/step2_DESeq2_calculator.R
@@ -6,7 +6,7 @@
# metafile = "./sample_CD4_meta.csv"
# outputfile = "./CD4_DEG.csv"
-library(tidyverse)
+#library(tidyverse)
library(DESeq2)
library(tximport)
diff --git a/pipelines/feature_extraction_pipeline/Snakefile b/pipelines/feature_extraction_pipeline/Snakefile
index db96798..2279726 100644
--- a/pipelines/feature_extraction_pipeline/Snakefile
+++ b/pipelines/feature_extraction_pipeline/Snakefile
@@ -7,8 +7,11 @@ __email__ = "swiri021@gmail.com"
# For manual running, please use this one
# configfile: "config.yaml"
+# pipeline_path = '/Users/junheeyun/OpenKBC/multiple_sclerosis_proj/pipelines/feature_extraction_pipeline/'
+#
pipeline_path = '/pipelines/feature_extraction_pipeline/'
+
SAMPLES = ['CD4','CD8','CD14']
rule all:
diff --git a/pipelines/feature_extraction_pipeline/import_ML/lib/statFunction.py b/pipelines/feature_extraction_pipeline/import_ML/lib/statFunction.py
index a94f68a..7266fb8 100644
--- a/pipelines/feature_extraction_pipeline/import_ML/lib/statFunction.py
+++ b/pipelines/feature_extraction_pipeline/import_ML/lib/statFunction.py
@@ -6,7 +6,6 @@
"""
Description: Repeative functions in notebook
"""
-import matplotlib.pyplot as plt
from sklearn.svm import SVC
from sklearn.model_selection import StratifiedKFold
from sklearn.feature_selection import RFECV
diff --git a/pipelines/pipeline_controller/app.py b/pipelines/pipeline_controller/app.py
index 9889382..e32ba38 100644
--- a/pipelines/pipeline_controller/app.py
+++ b/pipelines/pipeline_controller/app.py
@@ -24,10 +24,33 @@
from flask_wtf import Form
from wtforms import TextField, SubmitField
+# Celery running
+import json
+from celery import Celery, current_task
+from celery.result import AsyncResult
+from subprocess import Popen, PIPE
+
app = Flask(__name__)
app.config['SECRET_KEY'] = 'swiri021swiri021' # CSRF key
-Bootstrap(app) # set Bootstrap
+
+## Celery setting
+app.config.update(
+ CELERY_BROKER_URL='redis://localhost:6379', # Redis docker
+ CELERY_RESULT_BACKEND='redis://localhost:6379'
+)
+def make_celery(app):
+ celery = Celery(
+ app.import_name,
+ backend=app.config['CELERY_RESULT_BACKEND'],
+ broker=app.config['CELERY_BROKER_URL']
+ )
+ celery.conf.update(app.config)
+ return celery
+celery = make_celery(app)
+
+# set Bootstrap
+Bootstrap(app)
# setting Navigation Bar
nav = Nav(app)
@@ -91,26 +114,43 @@ class SnakeMakeForm(Form):
return render_template('config_yaml_creator.html', form=form)
+@celery.task()
+def workflow_running(pipeline_path, yaml_file):
+ print(pipeline_path, yaml_file)
+
+ proc = Popen(['snakemake', '--snakefile', pipeline_path+'Snakefile', '--cores', str(3), '--configfile', yaml_file], stdin=PIPE, stdout=PIPE, stderr=PIPE)
+ # It is not working with snakemake
+ while True:
+ line = proc.stdout.readline()
+ if not line:
+ break
+ print(str(line))
+ current_task.update_state(state='PROGRESS', meta={'msg': str(line)})
+ return 999
+
+@app.route("/workflow_progress")
+def workflow_progress():
+ print("WORKFLOW RETURN")
+ jobid = request.values.get('jobid')
+ if jobid:
+ job = AsyncResult(jobid, app=celery)
+ print(job.state)
+ if job.state == 'PROGRESS':
+ return json.dumps(dict( state=job.state, msg=job.result['msg'],))
+ elif job.state == 'SUCCESS':
+ return json.dumps(dict( state=job.state, msg="done",))
+ return '{}'
+
@app.route("/status")
def workflow_status():
-
pipeline_path = session.get('selected_pipeline', None) # Pipeline path
yaml_file = session.get('yaml_output', None) # yaml file
- ## Running snakemake
- cmd = 'snakemake --snakefile %s --cores 3 --configfile %s'%(pipeline_path+"Snakefile",yaml_file)
- print(cmd)
- try:
- p = subprocess.check_output([cmd], shell=True)
- msg = "Workflow has been completed"
- except subprocess.CalledProcessError as e:
- msg = "Error occur in snakemake, please check log files in pipelines folder"
-
- return render_template('status.html', msg=msg)
+ job = workflow_running.delay(pipeline_path, yaml_file)
+ return render_template('progress.html', JOBID=job.id)
#########Route###########
-
# Parsing function for yaml data, only work 2 layer nested yaml file
def _parsing_yamlFile(workflow_path):
"""
diff --git a/pipelines/pipeline_controller/requirements.txt b/pipelines/pipeline_controller/requirements.txt
index 32409a6..6032c06 100644
--- a/pipelines/pipeline_controller/requirements.txt
+++ b/pipelines/pipeline_controller/requirements.txt
@@ -4,4 +4,6 @@ PyYAML==5.4.1
flask==2.0.1
Flask-WTF==0.15.1
Flask-Bootstrap==3.3.7.1
-flask-nav==0.6
\ No newline at end of file
+flask-nav==0.6
+celery==5.1.2
+redis==3.5.3
\ No newline at end of file
diff --git a/pipelines/pipeline_controller/static/spinning-loading.gif b/pipelines/pipeline_controller/static/spinning-loading.gif
new file mode 100644
index 0000000..e3b78dd
Binary files /dev/null and b/pipelines/pipeline_controller/static/spinning-loading.gif differ
diff --git a/pipelines/pipeline_controller/templates/progress.html b/pipelines/pipeline_controller/templates/progress.html
new file mode 100644
index 0000000..3f99f8b
--- /dev/null
+++ b/pipelines/pipeline_controller/templates/progress.html
@@ -0,0 +1,40 @@
+{% extends "bootstrap/base.html" %}
+{% import "bootstrap/wtf.html" as wtf %}
+
+{% block navbar %}
+ {{nav.mynavbar.render()}}
+{% endblock %}
+
+{% block content %}
+
+
Workflow controller
+
This controller generates proper snakemake config file to run your samples
+
+
+
+
+
+
+
Copyright 2021 OpenKBC repository
+
+{% endblock %}
\ No newline at end of file
diff --git a/utils/lib/externalHandler.py b/utils/lib/externalHandler.py
index b0c73b1..3de4a3f 100644
--- a/utils/lib/externalHandler.py
+++ b/utils/lib/externalHandler.py
@@ -1,15 +1,33 @@
import pandas as pd
+import numpy as np
import itertools
class handlers(object):
- def get_column(filename_with_path, ext_value, annot='gene_id', sep="\t"):
+ def get_column(filename_with_path, ext_value, annot='gene_id', header_line=0, sep="\t", opt=0):
"""
filename_with_path = filepath + basename
ext_value = column name of file
sep = separator
"""
- temp = pd.read_csv(filename_with_path, sep=sep).set_index(annot) # temp loading
- return temp[[ext_value]]
+
+ # Don't use pandas.read_csv because of memory usage
+ index_list = []
+ value_list = []
+ with open(filename_with_path, 'r') as infile:
+ for i, line in enumerate(infile):
+ line = line.strip()
+ if i==header_line: # found header
+ header_info = line.split(sep)
+ value_ext_location = header_info.index(ext_value) # location of value extraction point
+ index_ext_location = header_info.index(annot) # location of value extraction point
+
+ elif i!=header_line:
+ line_list = line.split(sep)
+ index_list.append(str(line_list[index_ext_location])) # Value list
+ value_list.append(float(line_list[value_ext_location])) # Index list
+
+ result_df = pd.DataFrame(data={ext_value: value_list}, index=index_list)
+ return result_df
def get_samplename(filelist):
"""