From b1eed8061dcd3155d100fdf7751d4a28d48019ef Mon Sep 17 00:00:00 2001
From: raynardj <raynard@rasenn.com>
Date: Tue, 9 Feb 2021 23:14:51 +0800
Subject: [PATCH] =?UTF-8?q?=F0=9F=9A=81=20improve=20download=20result=20an?=
 =?UTF-8?q?d=20iteration=20speed?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 langhuan/progress.py        |  17 +--
 langhuan/tasks.py           |   2 +-
 langhuan/version.py         |   2 +-
 settings.ini                |   2 +-
 tests/dispatcher_test.ipynb | 229 +++++++-----------------------------
 5 files changed, 49 insertions(+), 203 deletions(-)

diff --git a/langhuan/progress.py b/langhuan/progress.py
index b978366..1b692bd 100644
--- a/langhuan/progress.py
+++ b/langhuan/progress.py
@@ -1,5 +1,5 @@
 from typing import List, Callable, Union
-
+import logging
 
 class Dispatcher:
     def __init__(self, n, v):
@@ -30,7 +30,6 @@ def __getitem__(self, user_id):
         """
         if user_id in self.busy_by_user:
             # read cache
-            print(f"caching user {user_id}: idx{self.busy_by_user[user_id]}")
             return self.busy_by_user[user_id]
 
         self.user_clear_progress(user_id)
@@ -76,17 +75,11 @@ def finish_update(
     def user_clear_progress(self, user_id):
         user_progress = self.user_progress(user_id)
 
-        # new_progress = []
-        # for i in user_progress:
-        #     if i > self.sent:
-        #         new_progress.append(i)
-        # self.by_user[user_id] = new_progress
-        # print(f"user_progress:{self.by_user[user_id]}")
+        new_progress = []
         for i in user_progress:
-            if i <= self.sent:
-                user_progress.remove(i)
-        print(f"user_progress:{self.by_user[user_id]}")
-        print(f"user_progress:{user_progress}")
+            if i > self.sent:
+                new_progress.append(i)
+        self.by_user[user_id] = new_progress
 
     def tick_sent(self, index):
         self.sent = index
diff --git a/langhuan/tasks.py b/langhuan/tasks.py
index 51916c8..9507f9b 100644
--- a/langhuan/tasks.py
+++ b/langhuan/tasks.py
@@ -177,7 +177,7 @@ def show_history_log(self, history):
                     "label": history["label"]}
 
     def append_text_to_data(self, text_dict, data):
-        text_dict[data["index"]] = self.df.loc[data["pandas"], self.text_col]
+        text_dict[data["pandas"]] = self.df.loc[data["pandas"], self.text_col]
         return data
 
     def register_functions(self):
diff --git a/langhuan/version.py b/langhuan/version.py
index 00ec2dc..9b36b86 100644
--- a/langhuan/version.py
+++ b/langhuan/version.py
@@ -1 +1 @@
-__version__ = "0.0.9"
+__version__ = "0.0.10"
diff --git a/settings.ini b/settings.ini
index 6b22d04..b99db52 100644
--- a/settings.ini
+++ b/settings.ini
@@ -6,7 +6,7 @@ keywords = python pandas label data science
 author = xiaochen(ray) zhang
 author_email = b2ray2c@gmail.com
 branch = main
-version = 0.0.9
+version = 0.0.10
 min_python = 3.6
 audience = Developers
 language = English
diff --git a/tests/dispatcher_test.ipynb b/tests/dispatcher_test.ipynb
index f8e4f7f..b8a6233 100644
--- a/tests/dispatcher_test.ipynb
+++ b/tests/dispatcher_test.ipynb
@@ -5,6 +5,25 @@
    "execution_count": 1,
    "metadata": {},
    "outputs": [],
+   "source": [
+    "import logging"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "logger = logging.getLogger()\n",
+    "logger.setLevel(logging.DEBUG)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [],
    "source": [
     "import unittest\n",
     "from langhuan.progress import Dispatcher"
@@ -12,7 +31,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": 8,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -45,7 +64,7 @@
     "            result.update({f\"step_{i}\": [i1, i2, i3]})\n",
     "            dispatcher.finish_update(1, index=i1)\n",
     "            dispatcher.finish_update(3, index=i3)\n",
-    "        \n",
+    "\n",
     "        expected = {'step_0': [0, 0, 0],\n",
     "                    'step_1': [1, 0, 1],\n",
     "                    'step_2': [2, 0, 2],\n",
@@ -60,6 +79,7 @@
     "        result = dict()\n",
     "        for i in range(12):\n",
     "            i1, i2, i3 = dispatcher[1], dispatcher[2], dispatcher[3]\n",
+    "#             logging.debug(f\"{i1}, {i2}, {i3}\")\n",
     "            result.update({f\"step_{i}\": [i1, i2, i3]})\n",
     "            if i % 3 != 0:\n",
     "                dispatcher.finish_update(1, index=i1)\n",
@@ -67,127 +87,38 @@
     "                dispatcher.finish_update(2, index=i2)\n",
     "            if i % 3 != 2:\n",
     "                dispatcher.finish_update(3, index=i3)\n",
-    "        print(result)\n",
-    "        expected = {'step_0': [0, 0, 1],\n",
-    "                    'step_1': [0, 1, 2],\n",
-    "                    'step_2': [2, 1, 3],\n",
-    "                    'step_3': [3, 3, 3],\n",
-    "                    'step_4': [3, 4, 4],\n",
-    "                    'step_5': [5, 4, 5],\n",
-    "                    'step_6': [6, 6, 5],\n",
-    "                    'step_7': [6, 7, 7],\n",
-    "                    'step_8': [8, 7, 8],\n",
-    "                    'step_9': [9, 9, 8],\n",
-    "                    'step_10': [9, -1, -1],\n",
-    "                    'step_11': [-1, -1, -1]}\n",
+    "\n",
+    "        expected = {\n",
+    "            'step_0': [0, 0, 1],\n",
+    "            'step_1': [0, 1, 2],\n",
+    "            'step_2': [2, 1, 3],\n",
+    "            'step_3': [3, 4, 3],\n",
+    "            'step_4': [3, 5, 4],\n",
+    "            'step_5': [5, 5, 6],\n",
+    "            'step_6': [6, 7, 6],\n",
+    "            'step_7': [6, 8, 7],\n",
+    "            'step_8': [8, 8, 9],\n",
+    "            'step_9': [9, -1, 9],\n",
+    "            'step_10': [9, -1, -1],\n",
+    "            'step_11': [-1, -1, -1]}\n",
+    "\n",
     "        self.assertEqual(result, expected)"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": 9,
    "metadata": {},
    "outputs": [
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      ".F."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "user_progress:[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]\n",
-      "user_progress:[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]\n",
-      "user_progress:[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]\n",
-      "user_progress:[1, 2, 3, 4, 5, 6, 7, 8, 9]\n",
-      "caching user 2: idx0\n",
-      "user_progress:[1, 2, 3, 4, 5, 6, 7, 8, 9]\n",
-      "user_progress:[2, 3, 4, 5, 6, 7, 8, 9]\n",
-      "caching user 2: idx0\n",
-      "user_progress:[2, 3, 4, 5, 6, 7, 8, 9]\n",
-      "user_progress:[3, 4, 5, 6, 7, 8, 9]\n",
-      "caching user 2: idx0\n",
-      "user_progress:[3, 4, 5, 6, 7, 8, 9]\n",
-      "user_progress:[4, 5, 6, 7, 8, 9]\n",
-      "caching user 2: idx0\n",
-      "user_progress:[4, 5, 6, 7, 8, 9]\n",
-      "user_progress:[5, 6, 7, 8, 9]\n",
-      "caching user 2: idx0\n",
-      "user_progress:[5, 6, 7, 8, 9]\n",
-      "user_progress:[6, 7, 8, 9]\n",
-      "caching user 2: idx0\n",
-      "user_progress:[6, 7, 8, 9]\n",
-      "user_progress:[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]\n",
-      "user_progress:[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]\n",
-      "user_progress:[1, 2, 3, 4, 5, 6, 7, 8, 9]\n",
-      "caching user 1: idx0\n",
-      "user_progress:[1, 2, 3, 4, 5, 6, 7, 8, 9]\n",
-      "user_progress:[2, 3, 4, 5, 6, 7, 8, 9]\n",
-      "user_progress:[2, 3, 4, 5, 6, 7, 8, 9]\n",
-      "caching user 2: idx1\n",
-      "user_progress:[3, 4, 5, 6, 7, 8, 9]\n",
-      "user_progress:[3, 4, 5, 6, 7, 8, 9]\n",
-      "user_progress:[4, 5, 6, 7, 8, 9]\n",
-      "caching user 3: idx3\n",
-      "caching user 1: idx3\n",
-      "user_progress:[5, 6, 7, 8, 9]\n",
-      "user_progress:[4, 5, 6, 7, 8, 9]\n",
-      "user_progress:[5, 6, 7, 8, 9]\n",
-      "caching user 2: idx5\n",
-      "user_progress:[6, 7, 8, 9]\n",
-      "user_progress:[6, 7, 8, 9]\n",
-      "user_progress:[7, 8, 9]\n",
-      "caching user 3: idx6\n",
-      "caching user 1: idx6\n",
-      "user_progress:[8, 9]\n",
-      "user_progress:[7, 8, 9]\n",
-      "user_progress:[8, 9]\n",
-      "caching user 2: idx8\n",
-      "user_progress:[9]\n",
-      "user_progress:[9]\n",
-      "user_progress:[]\n",
-      "caching user 3: idx9\n",
-      "caching user 1: idx9\n",
-      "user_progress:[]\n",
-      "user_progress:[]\n",
-      "user_progress:[]\n",
-      "user_progress:[]\n",
-      "user_progress:[]\n",
-      "{'step_0': [0, 0, 1], 'step_1': [0, 1, 2], 'step_2': [2, 1, 3], 'step_3': [3, 4, 3], 'step_4': [3, 5, 4], 'step_5': [5, 5, 6], 'step_6': [6, 7, 6], 'step_7': [6, 8, 7], 'step_8': [8, 8, 9], 'step_9': [9, -1, 9], 'step_10': [9, -1, -1], 'step_11': [-1, -1, -1]}\n",
-      "user_progress:[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]\n",
-      "user_progress:[1, 2, 3, 4, 5, 6, 7, 8, 9]\n",
-      "user_progress:[2, 3, 4, 5, 6, 7, 8, 9]\n",
-      "user_progress:[3, 4, 5, 6, 7, 8, 9]\n",
-      "user_progress:[4, 5, 6, 7, 8, 9]\n",
-      "user_progress:[5, 6, 7, 8, 9]\n",
-      "user_progress:[6, 7, 8, 9]\n",
-      "user_progress:[7, 8, 9]\n",
-      "user_progress:[8, 9]\n",
-      "user_progress:[9]\n",
-      "user_progress:[]\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "======================================================================\n",
-      "FAIL: test_nb_user_lg_than_v (__main__.DispatcherTest)\n",
-      "----------------------------------------------------------------------\n",
-      "Traceback (most recent call last):\n",
-      "  File \"<ipython-input-2-4ee9ee4ab55a>\", line 65, in test_nb_user_lg_than_v\n",
-      "    self.assertEqual(result, expected)\n",
-      "AssertionError: {'ste[68 chars] [3, 4, 3], 'step_4': [3, 5, 4], 'step_5': [5,[136 chars] -1]} != {'ste[68 chars] [3, 3, 3], 'step_4': [3, 4, 4], 'step_5': [5,[135 chars] -1]}\n",
-      "Diff is 767 characters long. Set self.maxDiff to None to see it.\n",
-      "\n",
+      "...\n",
       "----------------------------------------------------------------------\n",
-      "Ran 3 tests in 0.010s\n",
+      "Ran 3 tests in 0.004s\n",
       "\n",
-      "FAILED (failures=1)\n"
+      "OK\n"
      ]
     }
    ],
@@ -195,84 +126,6 @@
     "ran = unittest.main(argv=['first-arg-is-ignored'], exit=False)"
    ]
   },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "```\n",
-    "user_progress:[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]\n",
-    "user_progress:[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]\n",
-    "user_progress:[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]\n",
-    "user_progress:[1, 2, 3, 4, 5, 6, 7, 8, 9]\n",
-    "caching user 2: idx0\n",
-    "user_progress:[1, 2, 3, 4, 5, 6, 7, 8, 9]\n",
-    "user_progress:[2, 3, 4, 5, 6, 7, 8, 9]\n",
-    "caching user 2: idx0\n",
-    "user_progress:[2, 3, 4, 5, 6, 7, 8, 9]\n",
-    "user_progress:[3, 4, 5, 6, 7, 8, 9]\n",
-    "caching user 2: idx0\n",
-    "user_progress:[3, 4, 5, 6, 7, 8, 9]\n",
-    "user_progress:[4, 5, 6, 7, 8, 9]\n",
-    "caching user 2: idx0\n",
-    "user_progress:[4, 5, 6, 7, 8, 9]\n",
-    "user_progress:[5, 6, 7, 8, 9]\n",
-    "caching user 2: idx0\n",
-    "user_progress:[5, 6, 7, 8, 9]\n",
-    "user_progress:[6, 7, 8, 9]\n",
-    "caching user 2: idx0\n",
-    "user_progress:[6, 7, 8, 9]\n",
-    "user_progress:[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]\n",
-    "user_progress:[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]\n",
-    "user_progress:[1, 2, 3, 4, 5, 6, 7, 8, 9]\n",
-    "caching user 1: idx0\n",
-    "user_progress:[1, 2, 3, 4, 5, 6, 7, 8, 9]\n",
-    "user_progress:[2, 3, 4, 5, 6, 7, 8, 9]\n",
-    "user_progress:[2, 3, 4, 5, 6, 7, 8, 9]\n",
-    "caching user 2: idx1\n",
-    "user_progress:[3, 4, 5, 6, 7, 8, 9]\n",
-    "user_progress:[3, 4, 5, 6, 7, 8, 9]\n",
-    "user_progress:[4, 5, 6, 7, 8, 9]\n",
-    "caching user 3: idx3\n",
-    "caching user 1: idx3\n",
-    "user_progress:[5, 6, 7, 8, 9]\n",
-    "user_progress:[4, 5, 6, 7, 8, 9]\n",
-    "user_progress:[5, 6, 7, 8, 9]\n",
-    "caching user 2: idx5\n",
-    "user_progress:[6, 7, 8, 9]\n",
-    "user_progress:[6, 7, 8, 9]\n",
-    "user_progress:[7, 8, 9]\n",
-    "caching user 3: idx6\n",
-    "caching user 1: idx6\n",
-    "user_progress:[8, 9]\n",
-    "user_progress:[7, 8, 9]\n",
-    "user_progress:[8, 9]\n",
-    "caching user 2: idx8\n",
-    "user_progress:[9]\n",
-    "user_progress:[9]\n",
-    "user_progress:[]\n",
-    "caching user 3: idx9\n",
-    "caching user 1: idx9\n",
-    "user_progress:[]\n",
-    "user_progress:[]\n",
-    "user_progress:[]\n",
-    "user_progress:[]\n",
-    "user_progress:[]\n",
-    "{'step_0': [0, 0, 1], 'step_1': [0, 1, 2], 'step_2': [2, 1, 3], 'step_3': [3, 4, 3], 'step_4': [3, 5, 4], 'step_5': [5, 5, 6], 'step_6': [6, 7, 6], 'step_7': [6, 8, 7], 'step_8': [8, 8, 9], 'step_9': [9, -1, 9], 'step_10': [9, -1, -1], 'step_11': [-1, -1, -1]}\n",
-    "user_progress:[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]\n",
-    "user_progress:[1, 2, 3, 4, 5, 6, 7, 8, 9]\n",
-    "user_progress:[2, 3, 4, 5, 6, 7, 8, 9]\n",
-    "user_progress:[3, 4, 5, 6, 7, 8, 9]\n",
-    "user_progress:[4, 5, 6, 7, 8, 9]\n",
-    "user_progress:[5, 6, 7, 8, 9]\n",
-    "user_progress:[6, 7, 8, 9]\n",
-    "user_progress:[7, 8, 9]\n",
-    "user_progress:[8, 9]\n",
-    "user_progress:[9]\n",
-    "user_progress:[]\n",
-    "\n",
-    "```"
-   ]
-  },
   {
    "cell_type": "code",
    "execution_count": null,