Skip to content

Commit

Permalink
🛥 improve iteration speed
Browse files Browse the repository at this point in the history
  • Loading branch information
raynardj committed Feb 9, 2021
1 parent bff941f commit f67606d
Show file tree
Hide file tree
Showing 3 changed files with 287 additions and 12 deletions.
26 changes: 21 additions & 5 deletions langhuan/progress.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,10 +30,11 @@ def __getitem__(self, user_id):
"""
if user_id in self.busy_by_user:
# read cache
print(f"caching user {user_id}: idx{self.busy_by_user[user_id]}")
return self.busy_by_user[user_id]

user = self.user_progress(user_id)
self.user_clear_progress(user_id)
user = self.user_progress(user_id)
try:
index = user[0]
self.after_get_update(user_id, index)
Expand Down Expand Up @@ -74,11 +75,18 @@ def finish_update(

def user_clear_progress(self, user_id):
user_progress = self.user_progress(user_id)

# new_progress = []
# for i in user_progress:
# if i > self.sent:
# new_progress.append(i)
# self.by_user[user_id] = new_progress
# print(f"user_progress:{self.by_user[user_id]}")
for i in user_progress:
if i <= self.sent:
user_progress.remove(i)
else:
break
print(f"user_progress:{self.by_user[user_id]}")
print(f"user_progress:{user_progress}")

def tick_sent(self, index):
self.sent = index
Expand All @@ -91,14 +99,20 @@ class Progress:
allowing multiple but limited number of users
working a the same progress, with limited tags
per entry of raw data
index is a generated incremental integer series
idx is the pandas index
"""

def __init__(
self,
progress_list: List[int],
progress_list: List[Union[int, str]],
cross_verify_num: int = 1,
history_length: int = 20,
):
"""
progress_list: List[int], a list of pandas index (idx)
reordered by order strategy
"""
self.progress_list = progress_list
self.history_length = history_length
self.v_num = cross_verify_num
Expand Down Expand Up @@ -140,6 +154,8 @@ def tagging(self, data):
def update_personal(self, data):
"""
update data to personal history
This history is for showing history lines on
the left side of web browswer
"""
user_id = data["user_id"]
personal_history = self.personal_history.get(user_id)
Expand All @@ -153,4 +169,4 @@ def update_personal(self, data):
len(personal_history) - self.history_length:]
else:
self.personal_history[user_id] = []
self.update_personal(data)
self.update_personal(data)
89 changes: 89 additions & 0 deletions ner_test.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,95 @@
"app.run(\"0.0.0.0\", port=5000)"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'Corporation\\[email protected]\\t\\t\\tComputer Systems Division\\n\"The only thing that really scares me is a person with no sense of humor.\"\\n\\t\\t\\t\\t\\t\\t-- Jonathan Winters\\n'"
]
},
"execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.loc[3][\"text\"][652:]"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [],
"source": [
"from langhuan.utility import cleanup_tags"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"3"
]
},
"execution_count": 18,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"len(\"˃˃˃\")"
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"3"
]
},
"execution_count": 20,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"len(\"Ë‚Ë‚Ë‚\")"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'Harris Corporation\\[email protected]\\t\\t\\tComputer Systems Division\\n\"The only thing that really scares me is a person with no sense of humor.\"\\n\\t\\t\\t\\t\\t\\t-- Jonathan Winters\\n'"
]
},
"execution_count": 14,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"cleanup_tags(df.loc[3][\"text\"])[652:]"
]
},
{
"cell_type": "markdown",
"metadata": {},
Expand Down
Loading

0 comments on commit f67606d

Please sign in to comment.