From b1eed8061dcd3155d100fdf7751d4a28d48019ef Mon Sep 17 00:00:00 2001 From: raynardj Date: Tue, 9 Feb 2021 23:14:51 +0800 Subject: [PATCH] =?UTF-8?q?=F0=9F=9A=81=20improve=20download=20result=20an?= =?UTF-8?q?d=20iteration=20speed?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- langhuan/progress.py | 17 +-- langhuan/tasks.py | 2 +- langhuan/version.py | 2 +- settings.ini | 2 +- tests/dispatcher_test.ipynb | 229 +++++++----------------------------- 5 files changed, 49 insertions(+), 203 deletions(-) diff --git a/langhuan/progress.py b/langhuan/progress.py index b978366..1b692bd 100644 --- a/langhuan/progress.py +++ b/langhuan/progress.py @@ -1,5 +1,5 @@ from typing import List, Callable, Union - +import logging class Dispatcher: def __init__(self, n, v): @@ -30,7 +30,6 @@ def __getitem__(self, user_id): """ if user_id in self.busy_by_user: # read cache - print(f"caching user {user_id}: idx{self.busy_by_user[user_id]}") return self.busy_by_user[user_id] self.user_clear_progress(user_id) @@ -76,17 +75,11 @@ def finish_update( def user_clear_progress(self, user_id): user_progress = self.user_progress(user_id) - # new_progress = [] - # for i in user_progress: - # if i > self.sent: - # new_progress.append(i) - # self.by_user[user_id] = new_progress - # print(f"user_progress:{self.by_user[user_id]}") + new_progress = [] for i in user_progress: - if i <= self.sent: - user_progress.remove(i) - print(f"user_progress:{self.by_user[user_id]}") - print(f"user_progress:{user_progress}") + if i > self.sent: + new_progress.append(i) + self.by_user[user_id] = new_progress def tick_sent(self, index): self.sent = index diff --git a/langhuan/tasks.py b/langhuan/tasks.py index 51916c8..9507f9b 100644 --- a/langhuan/tasks.py +++ b/langhuan/tasks.py @@ -177,7 +177,7 @@ def show_history_log(self, history): "label": history["label"]} def append_text_to_data(self, text_dict, data): - text_dict[data["index"]] = self.df.loc[data["pandas"], self.text_col] + text_dict[data["pandas"]] = self.df.loc[data["pandas"], self.text_col] return data def register_functions(self): diff --git a/langhuan/version.py b/langhuan/version.py index 00ec2dc..9b36b86 100644 --- a/langhuan/version.py +++ b/langhuan/version.py @@ -1 +1 @@ -__version__ = "0.0.9" +__version__ = "0.0.10" diff --git a/settings.ini b/settings.ini index 6b22d04..b99db52 100644 --- a/settings.ini +++ b/settings.ini @@ -6,7 +6,7 @@ keywords = python pandas label data science author = xiaochen(ray) zhang author_email = b2ray2c@gmail.com branch = main -version = 0.0.9 +version = 0.0.10 min_python = 3.6 audience = Developers language = English diff --git a/tests/dispatcher_test.ipynb b/tests/dispatcher_test.ipynb index f8e4f7f..b8a6233 100644 --- a/tests/dispatcher_test.ipynb +++ b/tests/dispatcher_test.ipynb @@ -5,6 +5,25 @@ "execution_count": 1, "metadata": {}, "outputs": [], + "source": [ + "import logging" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "logger = logging.getLogger()\n", + "logger.setLevel(logging.DEBUG)" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], "source": [ "import unittest\n", "from langhuan.progress import Dispatcher" @@ -12,7 +31,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 8, "metadata": {}, "outputs": [], "source": [ @@ -45,7 +64,7 @@ " result.update({f\"step_{i}\": [i1, i2, i3]})\n", " dispatcher.finish_update(1, index=i1)\n", " dispatcher.finish_update(3, index=i3)\n", - " \n", + "\n", " expected = {'step_0': [0, 0, 0],\n", " 'step_1': [1, 0, 1],\n", " 'step_2': [2, 0, 2],\n", @@ -60,6 +79,7 @@ " result = dict()\n", " for i in range(12):\n", " i1, i2, i3 = dispatcher[1], dispatcher[2], dispatcher[3]\n", + "# logging.debug(f\"{i1}, {i2}, {i3}\")\n", " result.update({f\"step_{i}\": [i1, i2, i3]})\n", " if i % 3 != 0:\n", " dispatcher.finish_update(1, index=i1)\n", @@ -67,127 +87,38 @@ " dispatcher.finish_update(2, index=i2)\n", " if i % 3 != 2:\n", " dispatcher.finish_update(3, index=i3)\n", - " print(result)\n", - " expected = {'step_0': [0, 0, 1],\n", - " 'step_1': [0, 1, 2],\n", - " 'step_2': [2, 1, 3],\n", - " 'step_3': [3, 3, 3],\n", - " 'step_4': [3, 4, 4],\n", - " 'step_5': [5, 4, 5],\n", - " 'step_6': [6, 6, 5],\n", - " 'step_7': [6, 7, 7],\n", - " 'step_8': [8, 7, 8],\n", - " 'step_9': [9, 9, 8],\n", - " 'step_10': [9, -1, -1],\n", - " 'step_11': [-1, -1, -1]}\n", + "\n", + " expected = {\n", + " 'step_0': [0, 0, 1],\n", + " 'step_1': [0, 1, 2],\n", + " 'step_2': [2, 1, 3],\n", + " 'step_3': [3, 4, 3],\n", + " 'step_4': [3, 5, 4],\n", + " 'step_5': [5, 5, 6],\n", + " 'step_6': [6, 7, 6],\n", + " 'step_7': [6, 8, 7],\n", + " 'step_8': [8, 8, 9],\n", + " 'step_9': [9, -1, 9],\n", + " 'step_10': [9, -1, -1],\n", + " 'step_11': [-1, -1, -1]}\n", + "\n", " self.assertEqual(result, expected)" ] }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 9, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ - ".F." - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "user_progress:[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]\n", - "user_progress:[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]\n", - "user_progress:[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]\n", - "user_progress:[1, 2, 3, 4, 5, 6, 7, 8, 9]\n", - "caching user 2: idx0\n", - "user_progress:[1, 2, 3, 4, 5, 6, 7, 8, 9]\n", - "user_progress:[2, 3, 4, 5, 6, 7, 8, 9]\n", - "caching user 2: idx0\n", - "user_progress:[2, 3, 4, 5, 6, 7, 8, 9]\n", - "user_progress:[3, 4, 5, 6, 7, 8, 9]\n", - "caching user 2: idx0\n", - "user_progress:[3, 4, 5, 6, 7, 8, 9]\n", - "user_progress:[4, 5, 6, 7, 8, 9]\n", - "caching user 2: idx0\n", - "user_progress:[4, 5, 6, 7, 8, 9]\n", - "user_progress:[5, 6, 7, 8, 9]\n", - "caching user 2: idx0\n", - "user_progress:[5, 6, 7, 8, 9]\n", - "user_progress:[6, 7, 8, 9]\n", - "caching user 2: idx0\n", - "user_progress:[6, 7, 8, 9]\n", - "user_progress:[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]\n", - "user_progress:[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]\n", - "user_progress:[1, 2, 3, 4, 5, 6, 7, 8, 9]\n", - "caching user 1: idx0\n", - "user_progress:[1, 2, 3, 4, 5, 6, 7, 8, 9]\n", - "user_progress:[2, 3, 4, 5, 6, 7, 8, 9]\n", - "user_progress:[2, 3, 4, 5, 6, 7, 8, 9]\n", - "caching user 2: idx1\n", - "user_progress:[3, 4, 5, 6, 7, 8, 9]\n", - "user_progress:[3, 4, 5, 6, 7, 8, 9]\n", - "user_progress:[4, 5, 6, 7, 8, 9]\n", - "caching user 3: idx3\n", - "caching user 1: idx3\n", - "user_progress:[5, 6, 7, 8, 9]\n", - "user_progress:[4, 5, 6, 7, 8, 9]\n", - "user_progress:[5, 6, 7, 8, 9]\n", - "caching user 2: idx5\n", - "user_progress:[6, 7, 8, 9]\n", - "user_progress:[6, 7, 8, 9]\n", - "user_progress:[7, 8, 9]\n", - "caching user 3: idx6\n", - "caching user 1: idx6\n", - "user_progress:[8, 9]\n", - "user_progress:[7, 8, 9]\n", - "user_progress:[8, 9]\n", - "caching user 2: idx8\n", - "user_progress:[9]\n", - "user_progress:[9]\n", - "user_progress:[]\n", - "caching user 3: idx9\n", - "caching user 1: idx9\n", - "user_progress:[]\n", - "user_progress:[]\n", - "user_progress:[]\n", - "user_progress:[]\n", - "user_progress:[]\n", - "{'step_0': [0, 0, 1], 'step_1': [0, 1, 2], 'step_2': [2, 1, 3], 'step_3': [3, 4, 3], 'step_4': [3, 5, 4], 'step_5': [5, 5, 6], 'step_6': [6, 7, 6], 'step_7': [6, 8, 7], 'step_8': [8, 8, 9], 'step_9': [9, -1, 9], 'step_10': [9, -1, -1], 'step_11': [-1, -1, -1]}\n", - "user_progress:[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]\n", - "user_progress:[1, 2, 3, 4, 5, 6, 7, 8, 9]\n", - "user_progress:[2, 3, 4, 5, 6, 7, 8, 9]\n", - "user_progress:[3, 4, 5, 6, 7, 8, 9]\n", - "user_progress:[4, 5, 6, 7, 8, 9]\n", - "user_progress:[5, 6, 7, 8, 9]\n", - "user_progress:[6, 7, 8, 9]\n", - "user_progress:[7, 8, 9]\n", - "user_progress:[8, 9]\n", - "user_progress:[9]\n", - "user_progress:[]\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "\n", - "======================================================================\n", - "FAIL: test_nb_user_lg_than_v (__main__.DispatcherTest)\n", - "----------------------------------------------------------------------\n", - "Traceback (most recent call last):\n", - " File \"\", line 65, in test_nb_user_lg_than_v\n", - " self.assertEqual(result, expected)\n", - "AssertionError: {'ste[68 chars] [3, 4, 3], 'step_4': [3, 5, 4], 'step_5': [5,[136 chars] -1]} != {'ste[68 chars] [3, 3, 3], 'step_4': [3, 4, 4], 'step_5': [5,[135 chars] -1]}\n", - "Diff is 767 characters long. Set self.maxDiff to None to see it.\n", - "\n", + "...\n", "----------------------------------------------------------------------\n", - "Ran 3 tests in 0.010s\n", + "Ran 3 tests in 0.004s\n", "\n", - "FAILED (failures=1)\n" + "OK\n" ] } ], @@ -195,84 +126,6 @@ "ran = unittest.main(argv=['first-arg-is-ignored'], exit=False)" ] }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "```\n", - "user_progress:[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]\n", - "user_progress:[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]\n", - "user_progress:[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]\n", - "user_progress:[1, 2, 3, 4, 5, 6, 7, 8, 9]\n", - "caching user 2: idx0\n", - "user_progress:[1, 2, 3, 4, 5, 6, 7, 8, 9]\n", - "user_progress:[2, 3, 4, 5, 6, 7, 8, 9]\n", - "caching user 2: idx0\n", - "user_progress:[2, 3, 4, 5, 6, 7, 8, 9]\n", - "user_progress:[3, 4, 5, 6, 7, 8, 9]\n", - "caching user 2: idx0\n", - "user_progress:[3, 4, 5, 6, 7, 8, 9]\n", - "user_progress:[4, 5, 6, 7, 8, 9]\n", - "caching user 2: idx0\n", - "user_progress:[4, 5, 6, 7, 8, 9]\n", - "user_progress:[5, 6, 7, 8, 9]\n", - "caching user 2: idx0\n", - "user_progress:[5, 6, 7, 8, 9]\n", - "user_progress:[6, 7, 8, 9]\n", - "caching user 2: idx0\n", - "user_progress:[6, 7, 8, 9]\n", - "user_progress:[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]\n", - "user_progress:[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]\n", - "user_progress:[1, 2, 3, 4, 5, 6, 7, 8, 9]\n", - "caching user 1: idx0\n", - "user_progress:[1, 2, 3, 4, 5, 6, 7, 8, 9]\n", - "user_progress:[2, 3, 4, 5, 6, 7, 8, 9]\n", - "user_progress:[2, 3, 4, 5, 6, 7, 8, 9]\n", - "caching user 2: idx1\n", - "user_progress:[3, 4, 5, 6, 7, 8, 9]\n", - "user_progress:[3, 4, 5, 6, 7, 8, 9]\n", - "user_progress:[4, 5, 6, 7, 8, 9]\n", - "caching user 3: idx3\n", - "caching user 1: idx3\n", - "user_progress:[5, 6, 7, 8, 9]\n", - "user_progress:[4, 5, 6, 7, 8, 9]\n", - "user_progress:[5, 6, 7, 8, 9]\n", - "caching user 2: idx5\n", - "user_progress:[6, 7, 8, 9]\n", - "user_progress:[6, 7, 8, 9]\n", - "user_progress:[7, 8, 9]\n", - "caching user 3: idx6\n", - "caching user 1: idx6\n", - "user_progress:[8, 9]\n", - "user_progress:[7, 8, 9]\n", - "user_progress:[8, 9]\n", - "caching user 2: idx8\n", - "user_progress:[9]\n", - "user_progress:[9]\n", - "user_progress:[]\n", - "caching user 3: idx9\n", - "caching user 1: idx9\n", - "user_progress:[]\n", - "user_progress:[]\n", - "user_progress:[]\n", - "user_progress:[]\n", - "user_progress:[]\n", - "{'step_0': [0, 0, 1], 'step_1': [0, 1, 2], 'step_2': [2, 1, 3], 'step_3': [3, 4, 3], 'step_4': [3, 5, 4], 'step_5': [5, 5, 6], 'step_6': [6, 7, 6], 'step_7': [6, 8, 7], 'step_8': [8, 8, 9], 'step_9': [9, -1, 9], 'step_10': [9, -1, -1], 'step_11': [-1, -1, -1]}\n", - "user_progress:[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]\n", - "user_progress:[1, 2, 3, 4, 5, 6, 7, 8, 9]\n", - "user_progress:[2, 3, 4, 5, 6, 7, 8, 9]\n", - "user_progress:[3, 4, 5, 6, 7, 8, 9]\n", - "user_progress:[4, 5, 6, 7, 8, 9]\n", - "user_progress:[5, 6, 7, 8, 9]\n", - "user_progress:[6, 7, 8, 9]\n", - "user_progress:[7, 8, 9]\n", - "user_progress:[8, 9]\n", - "user_progress:[9]\n", - "user_progress:[]\n", - "\n", - "```" - ] - }, { "cell_type": "code", "execution_count": null,