🚁 improve download result and iteration speed

raynardj · Feb 9, 2021 · b1eed80 · b1eed80
1 parent f67606d
commit b1eed80
Show file tree

Hide file tree

Showing 5 changed files with 49 additions and 203 deletions.
diff --git a/langhuan/progress.py b/langhuan/progress.py
@@ -1,5 +1,5 @@
 from typing import List, Callable, Union
-
+import logging
 
 class Dispatcher:
     def __init__(self, n, v):
@@ -30,7 +30,6 @@ def __getitem__(self, user_id):
         """
         if user_id in self.busy_by_user:
             # read cache
-            print(f"caching user {user_id}: idx{self.busy_by_user[user_id]}")
             return self.busy_by_user[user_id]
 
         self.user_clear_progress(user_id)
@@ -76,17 +75,11 @@ def finish_update(
     def user_clear_progress(self, user_id):
         user_progress = self.user_progress(user_id)
 
-        # new_progress = []
-        # for i in user_progress:
-        #     if i > self.sent:
-        #         new_progress.append(i)
-        # self.by_user[user_id] = new_progress
-        # print(f"user_progress:{self.by_user[user_id]}")
+        new_progress = []
         for i in user_progress:
-            if i <= self.sent:
-                user_progress.remove(i)
-        print(f"user_progress:{self.by_user[user_id]}")
-        print(f"user_progress:{user_progress}")
+            if i > self.sent:
+                new_progress.append(i)
+        self.by_user[user_id] = new_progress
 
     def tick_sent(self, index):
         self.sent = index

diff --git a/langhuan/tasks.py b/langhuan/tasks.py
@@ -177,7 +177,7 @@ def show_history_log(self, history):
                     "label": history["label"]}
 
     def append_text_to_data(self, text_dict, data):
-        text_dict[data["index"]] = self.df.loc[data["pandas"], self.text_col]
+        text_dict[data["pandas"]] = self.df.loc[data["pandas"], self.text_col]
         return data
 
     def register_functions(self):

diff --git a/langhuan/version.py b/langhuan/version.py
@@ -1 +1 @@
-__version__ = "0.0.9"
+__version__ = "0.0.10"
diff --git a/settings.ini b/settings.ini
@@ -6,7 +6,7 @@ keywords = python pandas label data science
 author = xiaochen(ray) zhang
 author_email = [email protected]
 branch = main
-version = 0.0.9
+version = 0.0.10
 min_python = 3.6
 audience = Developers
 language = English

diff --git a/tests/dispatcher_test.ipynb b/tests/dispatcher_test.ipynb
@@ -5,14 +5,33 @@
    "execution_count": 1,
    "metadata": {},
    "outputs": [],
+   "source": [
+    "import logging"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "logger = logging.getLogger()\n",
+    "logger.setLevel(logging.DEBUG)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [],
    "source": [
     "import unittest\n",
     "from langhuan.progress import Dispatcher"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": 8,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -45,7 +64,7 @@
     "            result.update({f\"step_{i}\": [i1, i2, i3]})\n",
     "            dispatcher.finish_update(1, index=i1)\n",
     "            dispatcher.finish_update(3, index=i3)\n",
-    "        \n",
+    "\n",
     "        expected = {'step_0': [0, 0, 0],\n",
     "                    'step_1': [1, 0, 1],\n",
     "                    'step_2': [2, 0, 2],\n",
@@ -60,219 +79,53 @@
     "        result = dict()\n",
     "        for i in range(12):\n",
     "            i1, i2, i3 = dispatcher[1], dispatcher[2], dispatcher[3]\n",
+    "#             logging.debug(f\"{i1}, {i2}, {i3}\")\n",
     "            result.update({f\"step_{i}\": [i1, i2, i3]})\n",
     "            if i % 3 != 0:\n",
     "                dispatcher.finish_update(1, index=i1)\n",
     "            if i % 3 != 1:\n",
     "                dispatcher.finish_update(2, index=i2)\n",
     "            if i % 3 != 2:\n",
     "                dispatcher.finish_update(3, index=i3)\n",
-    "        print(result)\n",
-    "        expected = {'step_0': [0, 0, 1],\n",
-    "                    'step_1': [0, 1, 2],\n",
-    "                    'step_2': [2, 1, 3],\n",
-    "                    'step_3': [3, 3, 3],\n",
-    "                    'step_4': [3, 4, 4],\n",
-    "                    'step_5': [5, 4, 5],\n",
-    "                    'step_6': [6, 6, 5],\n",
-    "                    'step_7': [6, 7, 7],\n",
-    "                    'step_8': [8, 7, 8],\n",
-    "                    'step_9': [9, 9, 8],\n",
-    "                    'step_10': [9, -1, -1],\n",
-    "                    'step_11': [-1, -1, -1]}\n",
+    "\n",
+    "        expected = {\n",
+    "            'step_0': [0, 0, 1],\n",
+    "            'step_1': [0, 1, 2],\n",
+    "            'step_2': [2, 1, 3],\n",
+    "            'step_3': [3, 4, 3],\n",
+    "            'step_4': [3, 5, 4],\n",
+    "            'step_5': [5, 5, 6],\n",
+    "            'step_6': [6, 7, 6],\n",
+    "            'step_7': [6, 8, 7],\n",
+    "            'step_8': [8, 8, 9],\n",
+    "            'step_9': [9, -1, 9],\n",
+    "            'step_10': [9, -1, -1],\n",
+    "            'step_11': [-1, -1, -1]}\n",
+    "\n",
     "        self.assertEqual(result, expected)"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": 9,
    "metadata": {},
    "outputs": [
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      ".F."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "user_progress:[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]\n",
-      "user_progress:[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]\n",
-      "user_progress:[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]\n",
-      "user_progress:[1, 2, 3, 4, 5, 6, 7, 8, 9]\n",
-      "caching user 2: idx0\n",
-      "user_progress:[1, 2, 3, 4, 5, 6, 7, 8, 9]\n",
-      "user_progress:[2, 3, 4, 5, 6, 7, 8, 9]\n",
-      "caching user 2: idx0\n",
-      "user_progress:[2, 3, 4, 5, 6, 7, 8, 9]\n",
-      "user_progress:[3, 4, 5, 6, 7, 8, 9]\n",
-      "caching user 2: idx0\n",
-      "user_progress:[3, 4, 5, 6, 7, 8, 9]\n",
-      "user_progress:[4, 5, 6, 7, 8, 9]\n",
-      "caching user 2: idx0\n",
-      "user_progress:[4, 5, 6, 7, 8, 9]\n",
-      "user_progress:[5, 6, 7, 8, 9]\n",
-      "caching user 2: idx0\n",
-      "user_progress:[5, 6, 7, 8, 9]\n",
-      "user_progress:[6, 7, 8, 9]\n",
-      "caching user 2: idx0\n",
-      "user_progress:[6, 7, 8, 9]\n",
-      "user_progress:[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]\n",
-      "user_progress:[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]\n",
-      "user_progress:[1, 2, 3, 4, 5, 6, 7, 8, 9]\n",
-      "caching user 1: idx0\n",
-      "user_progress:[1, 2, 3, 4, 5, 6, 7, 8, 9]\n",
-      "user_progress:[2, 3, 4, 5, 6, 7, 8, 9]\n",
-      "user_progress:[2, 3, 4, 5, 6, 7, 8, 9]\n",
-      "caching user 2: idx1\n",
-      "user_progress:[3, 4, 5, 6, 7, 8, 9]\n",
-      "user_progress:[3, 4, 5, 6, 7, 8, 9]\n",
-      "user_progress:[4, 5, 6, 7, 8, 9]\n",
-      "caching user 3: idx3\n",
-      "caching user 1: idx3\n",
-      "user_progress:[5, 6, 7, 8, 9]\n",
-      "user_progress:[4, 5, 6, 7, 8, 9]\n",
-      "user_progress:[5, 6, 7, 8, 9]\n",
-      "caching user 2: idx5\n",
-      "user_progress:[6, 7, 8, 9]\n",
-      "user_progress:[6, 7, 8, 9]\n",
-      "user_progress:[7, 8, 9]\n",
-      "caching user 3: idx6\n",
-      "caching user 1: idx6\n",
-      "user_progress:[8, 9]\n",
-      "user_progress:[7, 8, 9]\n",
-      "user_progress:[8, 9]\n",
-      "caching user 2: idx8\n",
-      "user_progress:[9]\n",
-      "user_progress:[9]\n",
-      "user_progress:[]\n",
-      "caching user 3: idx9\n",
-      "caching user 1: idx9\n",
-      "user_progress:[]\n",
-      "user_progress:[]\n",
-      "user_progress:[]\n",
-      "user_progress:[]\n",
-      "user_progress:[]\n",
-      "{'step_0': [0, 0, 1], 'step_1': [0, 1, 2], 'step_2': [2, 1, 3], 'step_3': [3, 4, 3], 'step_4': [3, 5, 4], 'step_5': [5, 5, 6], 'step_6': [6, 7, 6], 'step_7': [6, 8, 7], 'step_8': [8, 8, 9], 'step_9': [9, -1, 9], 'step_10': [9, -1, -1], 'step_11': [-1, -1, -1]}\n",
-      "user_progress:[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]\n",
-      "user_progress:[1, 2, 3, 4, 5, 6, 7, 8, 9]\n",
-      "user_progress:[2, 3, 4, 5, 6, 7, 8, 9]\n",
-      "user_progress:[3, 4, 5, 6, 7, 8, 9]\n",
-      "user_progress:[4, 5, 6, 7, 8, 9]\n",
-      "user_progress:[5, 6, 7, 8, 9]\n",
-      "user_progress:[6, 7, 8, 9]\n",
-      "user_progress:[7, 8, 9]\n",
-      "user_progress:[8, 9]\n",
-      "user_progress:[9]\n",
-      "user_progress:[]\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "======================================================================\n",
-      "FAIL: test_nb_user_lg_than_v (__main__.DispatcherTest)\n",
-      "----------------------------------------------------------------------\n",
-      "Traceback (most recent call last):\n",
-      "  File \"<ipython-input-2-4ee9ee4ab55a>\", line 65, in test_nb_user_lg_than_v\n",
-      "    self.assertEqual(result, expected)\n",
-      "AssertionError: {'ste[68 chars] [3, 4, 3], 'step_4': [3, 5, 4], 'step_5': [5,[136 chars] -1]} != {'ste[68 chars] [3, 3, 3], 'step_4': [3, 4, 4], 'step_5': [5,[135 chars] -1]}\n",
-      "Diff is 767 characters long. Set self.maxDiff to None to see it.\n",
-      "\n",
+      "...\n",
       "----------------------------------------------------------------------\n",
-      "Ran 3 tests in 0.010s\n",
+      "Ran 3 tests in 0.004s\n",
       "\n",
-      "FAILED (failures=1)\n"
+      "OK\n"
      ]
     }
    ],
    "source": [
     "ran = unittest.main(argv=['first-arg-is-ignored'], exit=False)"
    ]
   },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "```\n",
-    "user_progress:[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]\n",
-    "user_progress:[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]\n",
-    "user_progress:[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]\n",
-    "user_progress:[1, 2, 3, 4, 5, 6, 7, 8, 9]\n",
-    "caching user 2: idx0\n",
-    "user_progress:[1, 2, 3, 4, 5, 6, 7, 8, 9]\n",
-    "user_progress:[2, 3, 4, 5, 6, 7, 8, 9]\n",
-    "caching user 2: idx0\n",
-    "user_progress:[2, 3, 4, 5, 6, 7, 8, 9]\n",
-    "user_progress:[3, 4, 5, 6, 7, 8, 9]\n",
-    "caching user 2: idx0\n",
-    "user_progress:[3, 4, 5, 6, 7, 8, 9]\n",
-    "user_progress:[4, 5, 6, 7, 8, 9]\n",
-    "caching user 2: idx0\n",
-    "user_progress:[4, 5, 6, 7, 8, 9]\n",
-    "user_progress:[5, 6, 7, 8, 9]\n",
-    "caching user 2: idx0\n",
-    "user_progress:[5, 6, 7, 8, 9]\n",
-    "user_progress:[6, 7, 8, 9]\n",
-    "caching user 2: idx0\n",
-    "user_progress:[6, 7, 8, 9]\n",
-    "user_progress:[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]\n",
-    "user_progress:[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]\n",
-    "user_progress:[1, 2, 3, 4, 5, 6, 7, 8, 9]\n",
-    "caching user 1: idx0\n",
-    "user_progress:[1, 2, 3, 4, 5, 6, 7, 8, 9]\n",
-    "user_progress:[2, 3, 4, 5, 6, 7, 8, 9]\n",
-    "user_progress:[2, 3, 4, 5, 6, 7, 8, 9]\n",
-    "caching user 2: idx1\n",
-    "user_progress:[3, 4, 5, 6, 7, 8, 9]\n",
-    "user_progress:[3, 4, 5, 6, 7, 8, 9]\n",
-    "user_progress:[4, 5, 6, 7, 8, 9]\n",
-    "caching user 3: idx3\n",
-    "caching user 1: idx3\n",
-    "user_progress:[5, 6, 7, 8, 9]\n",
-    "user_progress:[4, 5, 6, 7, 8, 9]\n",
-    "user_progress:[5, 6, 7, 8, 9]\n",
-    "caching user 2: idx5\n",
-    "user_progress:[6, 7, 8, 9]\n",
-    "user_progress:[6, 7, 8, 9]\n",
-    "user_progress:[7, 8, 9]\n",
-    "caching user 3: idx6\n",
-    "caching user 1: idx6\n",
-    "user_progress:[8, 9]\n",
-    "user_progress:[7, 8, 9]\n",
-    "user_progress:[8, 9]\n",
-    "caching user 2: idx8\n",
-    "user_progress:[9]\n",
-    "user_progress:[9]\n",
-    "user_progress:[]\n",
-    "caching user 3: idx9\n",
-    "caching user 1: idx9\n",
-    "user_progress:[]\n",
-    "user_progress:[]\n",
-    "user_progress:[]\n",
-    "user_progress:[]\n",
-    "user_progress:[]\n",
-    "{'step_0': [0, 0, 1], 'step_1': [0, 1, 2], 'step_2': [2, 1, 3], 'step_3': [3, 4, 3], 'step_4': [3, 5, 4], 'step_5': [5, 5, 6], 'step_6': [6, 7, 6], 'step_7': [6, 8, 7], 'step_8': [8, 8, 9], 'step_9': [9, -1, 9], 'step_10': [9, -1, -1], 'step_11': [-1, -1, -1]}\n",
-    "user_progress:[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]\n",
-    "user_progress:[1, 2, 3, 4, 5, 6, 7, 8, 9]\n",
-    "user_progress:[2, 3, 4, 5, 6, 7, 8, 9]\n",
-    "user_progress:[3, 4, 5, 6, 7, 8, 9]\n",
-    "user_progress:[4, 5, 6, 7, 8, 9]\n",
-    "user_progress:[5, 6, 7, 8, 9]\n",
-    "user_progress:[6, 7, 8, 9]\n",
-    "user_progress:[7, 8, 9]\n",
-    "user_progress:[8, 9]\n",
-    "user_progress:[9]\n",
-    "user_progress:[]\n",
-    "\n",
-    "```"
-   ]
-  },
   {
    "cell_type": "code",
    "execution_count": null,