diff --git a/langhuan/progress.py b/langhuan/progress.py index 2936d10..b978366 100644 --- a/langhuan/progress.py +++ b/langhuan/progress.py @@ -30,10 +30,11 @@ def __getitem__(self, user_id): """ if user_id in self.busy_by_user: # read cache + print(f"caching user {user_id}: idx{self.busy_by_user[user_id]}") return self.busy_by_user[user_id] - user = self.user_progress(user_id) self.user_clear_progress(user_id) + user = self.user_progress(user_id) try: index = user[0] self.after_get_update(user_id, index) @@ -74,11 +75,18 @@ def finish_update( def user_clear_progress(self, user_id): user_progress = self.user_progress(user_id) + + # new_progress = [] + # for i in user_progress: + # if i > self.sent: + # new_progress.append(i) + # self.by_user[user_id] = new_progress + # print(f"user_progress:{self.by_user[user_id]}") for i in user_progress: if i <= self.sent: user_progress.remove(i) - else: - break + print(f"user_progress:{self.by_user[user_id]}") + print(f"user_progress:{user_progress}") def tick_sent(self, index): self.sent = index @@ -91,14 +99,20 @@ class Progress: allowing multiple but limited number of users working a the same progress, with limited tags per entry of raw data + index is a generated incremental integer series + idx is the pandas index """ def __init__( self, - progress_list: List[int], + progress_list: List[Union[int, str]], cross_verify_num: int = 1, history_length: int = 20, ): + """ + progress_list: List[int], a list of pandas index (idx) + reordered by order strategy + """ self.progress_list = progress_list self.history_length = history_length self.v_num = cross_verify_num @@ -140,6 +154,8 @@ def tagging(self, data): def update_personal(self, data): """ update data to personal history + This history is for showing history lines on + the left side of web browswer """ user_id = data["user_id"] personal_history = self.personal_history.get(user_id) @@ -153,4 +169,4 @@ def update_personal(self, data): len(personal_history) - self.history_length:] else: self.personal_history[user_id] = [] - self.update_personal(data) \ No newline at end of file + self.update_personal(data) diff --git a/ner_test.ipynb b/ner_test.ipynb index 543cbd9..7b3f938 100644 --- a/ner_test.ipynb +++ b/ner_test.ipynb @@ -130,6 +130,95 @@ "app.run(\"0.0.0.0\", port=5000)" ] }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'Corporation\\njgreen@csd.harris.com\\t\\t\\tComputer Systems Division\\n\"The only thing that really scares me is a person with no sense of humor.\"\\n\\t\\t\\t\\t\\t\\t-- Jonathan Winters\\n'" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.loc[3][\"text\"][652:]" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [], + "source": [ + "from langhuan.utility import cleanup_tags" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "3" + ] + }, + "execution_count": 18, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "len(\"˃˃˃\")" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "3" + ] + }, + "execution_count": 20, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "len(\"˂˂˂\")" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'Harris Corporation\\njgreen@csd.harris.com\\t\\t\\tComputer Systems Division\\n\"The only thing that really scares me is a person with no sense of humor.\"\\n\\t\\t\\t\\t\\t\\t-- Jonathan Winters\\n'" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "cleanup_tags(df.loc[3][\"text\"])[652:]" + ] + }, { "cell_type": "markdown", "metadata": {}, diff --git a/tests/dispatcher_test.ipynb b/tests/dispatcher_test.ipynb index 63887d4..f8e4f7f 100644 --- a/tests/dispatcher_test.ipynb +++ b/tests/dispatcher_test.ipynb @@ -2,18 +2,17 @@ "cells": [ { "cell_type": "code", - "execution_count": 2, + "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import unittest\n", - "\n", "from langhuan.progress import Dispatcher" ] }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 2, "metadata": {}, "outputs": [], "source": [ @@ -46,6 +45,7 @@ " result.update({f\"step_{i}\": [i1, i2, i3]})\n", " dispatcher.finish_update(1, index=i1)\n", " dispatcher.finish_update(3, index=i3)\n", + " \n", " expected = {'step_0': [0, 0, 0],\n", " 'step_1': [1, 0, 1],\n", " 'step_2': [2, 0, 2],\n", @@ -67,6 +67,7 @@ " dispatcher.finish_update(2, index=i2)\n", " if i % 3 != 2:\n", " dispatcher.finish_update(3, index=i3)\n", + " print(result)\n", " expected = {'step_0': [0, 0, 1],\n", " 'step_1': [0, 1, 2],\n", " 'step_2': [2, 1, 3],\n", @@ -84,18 +85,109 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 3, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ - "...\n", + ".F." + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "user_progress:[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]\n", + "user_progress:[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]\n", + "user_progress:[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]\n", + "user_progress:[1, 2, 3, 4, 5, 6, 7, 8, 9]\n", + "caching user 2: idx0\n", + "user_progress:[1, 2, 3, 4, 5, 6, 7, 8, 9]\n", + "user_progress:[2, 3, 4, 5, 6, 7, 8, 9]\n", + "caching user 2: idx0\n", + "user_progress:[2, 3, 4, 5, 6, 7, 8, 9]\n", + "user_progress:[3, 4, 5, 6, 7, 8, 9]\n", + "caching user 2: idx0\n", + "user_progress:[3, 4, 5, 6, 7, 8, 9]\n", + "user_progress:[4, 5, 6, 7, 8, 9]\n", + "caching user 2: idx0\n", + "user_progress:[4, 5, 6, 7, 8, 9]\n", + "user_progress:[5, 6, 7, 8, 9]\n", + "caching user 2: idx0\n", + "user_progress:[5, 6, 7, 8, 9]\n", + "user_progress:[6, 7, 8, 9]\n", + "caching user 2: idx0\n", + "user_progress:[6, 7, 8, 9]\n", + "user_progress:[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]\n", + "user_progress:[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]\n", + "user_progress:[1, 2, 3, 4, 5, 6, 7, 8, 9]\n", + "caching user 1: idx0\n", + "user_progress:[1, 2, 3, 4, 5, 6, 7, 8, 9]\n", + "user_progress:[2, 3, 4, 5, 6, 7, 8, 9]\n", + "user_progress:[2, 3, 4, 5, 6, 7, 8, 9]\n", + "caching user 2: idx1\n", + "user_progress:[3, 4, 5, 6, 7, 8, 9]\n", + "user_progress:[3, 4, 5, 6, 7, 8, 9]\n", + "user_progress:[4, 5, 6, 7, 8, 9]\n", + "caching user 3: idx3\n", + "caching user 1: idx3\n", + "user_progress:[5, 6, 7, 8, 9]\n", + "user_progress:[4, 5, 6, 7, 8, 9]\n", + "user_progress:[5, 6, 7, 8, 9]\n", + "caching user 2: idx5\n", + "user_progress:[6, 7, 8, 9]\n", + "user_progress:[6, 7, 8, 9]\n", + "user_progress:[7, 8, 9]\n", + "caching user 3: idx6\n", + "caching user 1: idx6\n", + "user_progress:[8, 9]\n", + "user_progress:[7, 8, 9]\n", + "user_progress:[8, 9]\n", + "caching user 2: idx8\n", + "user_progress:[9]\n", + "user_progress:[9]\n", + "user_progress:[]\n", + "caching user 3: idx9\n", + "caching user 1: idx9\n", + "user_progress:[]\n", + "user_progress:[]\n", + "user_progress:[]\n", + "user_progress:[]\n", + "user_progress:[]\n", + "{'step_0': [0, 0, 1], 'step_1': [0, 1, 2], 'step_2': [2, 1, 3], 'step_3': [3, 4, 3], 'step_4': [3, 5, 4], 'step_5': [5, 5, 6], 'step_6': [6, 7, 6], 'step_7': [6, 8, 7], 'step_8': [8, 8, 9], 'step_9': [9, -1, 9], 'step_10': [9, -1, -1], 'step_11': [-1, -1, -1]}\n", + "user_progress:[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]\n", + "user_progress:[1, 2, 3, 4, 5, 6, 7, 8, 9]\n", + "user_progress:[2, 3, 4, 5, 6, 7, 8, 9]\n", + "user_progress:[3, 4, 5, 6, 7, 8, 9]\n", + "user_progress:[4, 5, 6, 7, 8, 9]\n", + "user_progress:[5, 6, 7, 8, 9]\n", + "user_progress:[6, 7, 8, 9]\n", + "user_progress:[7, 8, 9]\n", + "user_progress:[8, 9]\n", + "user_progress:[9]\n", + "user_progress:[]\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "======================================================================\n", + "FAIL: test_nb_user_lg_than_v (__main__.DispatcherTest)\n", "----------------------------------------------------------------------\n", - "Ran 3 tests in 0.003s\n", + "Traceback (most recent call last):\n", + " File \"\", line 65, in test_nb_user_lg_than_v\n", + " self.assertEqual(result, expected)\n", + "AssertionError: {'ste[68 chars] [3, 4, 3], 'step_4': [3, 5, 4], 'step_5': [5,[136 chars] -1]} != {'ste[68 chars] [3, 3, 3], 'step_4': [3, 4, 4], 'step_5': [5,[135 chars] -1]}\n", + "Diff is 767 characters long. Set self.maxDiff to None to see it.\n", "\n", - "OK\n" + "----------------------------------------------------------------------\n", + "Ran 3 tests in 0.010s\n", + "\n", + "FAILED (failures=1)\n" ] } ], @@ -103,6 +195,84 @@ "ran = unittest.main(argv=['first-arg-is-ignored'], exit=False)" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "```\n", + "user_progress:[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]\n", + "user_progress:[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]\n", + "user_progress:[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]\n", + "user_progress:[1, 2, 3, 4, 5, 6, 7, 8, 9]\n", + "caching user 2: idx0\n", + "user_progress:[1, 2, 3, 4, 5, 6, 7, 8, 9]\n", + "user_progress:[2, 3, 4, 5, 6, 7, 8, 9]\n", + "caching user 2: idx0\n", + "user_progress:[2, 3, 4, 5, 6, 7, 8, 9]\n", + "user_progress:[3, 4, 5, 6, 7, 8, 9]\n", + "caching user 2: idx0\n", + "user_progress:[3, 4, 5, 6, 7, 8, 9]\n", + "user_progress:[4, 5, 6, 7, 8, 9]\n", + "caching user 2: idx0\n", + "user_progress:[4, 5, 6, 7, 8, 9]\n", + "user_progress:[5, 6, 7, 8, 9]\n", + "caching user 2: idx0\n", + "user_progress:[5, 6, 7, 8, 9]\n", + "user_progress:[6, 7, 8, 9]\n", + "caching user 2: idx0\n", + "user_progress:[6, 7, 8, 9]\n", + "user_progress:[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]\n", + "user_progress:[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]\n", + "user_progress:[1, 2, 3, 4, 5, 6, 7, 8, 9]\n", + "caching user 1: idx0\n", + "user_progress:[1, 2, 3, 4, 5, 6, 7, 8, 9]\n", + "user_progress:[2, 3, 4, 5, 6, 7, 8, 9]\n", + "user_progress:[2, 3, 4, 5, 6, 7, 8, 9]\n", + "caching user 2: idx1\n", + "user_progress:[3, 4, 5, 6, 7, 8, 9]\n", + "user_progress:[3, 4, 5, 6, 7, 8, 9]\n", + "user_progress:[4, 5, 6, 7, 8, 9]\n", + "caching user 3: idx3\n", + "caching user 1: idx3\n", + "user_progress:[5, 6, 7, 8, 9]\n", + "user_progress:[4, 5, 6, 7, 8, 9]\n", + "user_progress:[5, 6, 7, 8, 9]\n", + "caching user 2: idx5\n", + "user_progress:[6, 7, 8, 9]\n", + "user_progress:[6, 7, 8, 9]\n", + "user_progress:[7, 8, 9]\n", + "caching user 3: idx6\n", + "caching user 1: idx6\n", + "user_progress:[8, 9]\n", + "user_progress:[7, 8, 9]\n", + "user_progress:[8, 9]\n", + "caching user 2: idx8\n", + "user_progress:[9]\n", + "user_progress:[9]\n", + "user_progress:[]\n", + "caching user 3: idx9\n", + "caching user 1: idx9\n", + "user_progress:[]\n", + "user_progress:[]\n", + "user_progress:[]\n", + "user_progress:[]\n", + "user_progress:[]\n", + "{'step_0': [0, 0, 1], 'step_1': [0, 1, 2], 'step_2': [2, 1, 3], 'step_3': [3, 4, 3], 'step_4': [3, 5, 4], 'step_5': [5, 5, 6], 'step_6': [6, 7, 6], 'step_7': [6, 8, 7], 'step_8': [8, 8, 9], 'step_9': [9, -1, 9], 'step_10': [9, -1, -1], 'step_11': [-1, -1, -1]}\n", + "user_progress:[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]\n", + "user_progress:[1, 2, 3, 4, 5, 6, 7, 8, 9]\n", + "user_progress:[2, 3, 4, 5, 6, 7, 8, 9]\n", + "user_progress:[3, 4, 5, 6, 7, 8, 9]\n", + "user_progress:[4, 5, 6, 7, 8, 9]\n", + "user_progress:[5, 6, 7, 8, 9]\n", + "user_progress:[6, 7, 8, 9]\n", + "user_progress:[7, 8, 9]\n", + "user_progress:[8, 9]\n", + "user_progress:[9]\n", + "user_progress:[]\n", + "\n", + "```" + ] + }, { "cell_type": "code", "execution_count": null,