-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathiter_edit_history_analysis.py
122 lines (105 loc) · 4.43 KB
/
iter_edit_history_analysis.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
import json
RESULTS_PATH = "edit_history/iter/results.json"
with open(RESULTS_PATH, 'r') as f:
all_results = [json.loads(d.strip()) for d in f.readlines()]
BOT_KEYWORDS = [
"bot", "script", "wp:", "ng", "auto", "maintenance tag update", "awb", "mos", "assist"
]
def is_bot(text):
global BOT_KEYWORDS
text = text.lower()
for keyword in BOT_KEYWORDS:
if keyword in text:
return True
return False
STATS = {
"total_count": 0,
"no-actions": {
"count": 0
}, # key is reason for broken; value is count
"augmented_already": {
"count": 0,
"aug-removed": 0
},
"augmented": {
"count": 0,
"bot": 0,
"human": 0,
"mixed": 0,
"aug-removed": 0
},
"remove-revert": {
"link-count": 0,
"occurance-count": 0,
"bot": 0,
"human": 0,
"mixed": 0
},
"remove-purposely": {
"count": 0,
"==External links==": 0,
# "content": 0, # store to edit_history/iter/remove_purposely_content.json
# "format": 0, # store to edit_history/iter/remove_purposely_format.json
# "link-rot": { # store to edit_history/iter/remove_purposely_link_rot.json
# "==External links==": 0,
# "other": 0
# }
}
}
with open("edit_history/iter/_no_actions.json", "w") as f:
pass
with open("edit_history/iter/_augmentation_by_mixed.json", "w") as f:
pass
with open("edit_history/iter/_augmentation_by_human.json", "w") as f:
pass
with open("edit_history/iter/_remove_purposely.json", "w") as f:
pass
for link_obj in all_results:
try:
STATS["total_count"] += 1
if link_obj["augmentation"]["augmentation_url"] == "" and link_obj["remove-purposely"] == []:
if type(link_obj["reason"]) == list:
reason = link_obj["reason"][0]
else:
reason = link_obj["reason"]
STATS["no-actions"][reason] = STATS["no-actions"].get(reason, 0) + 1
STATS["no-actions"]["count"] += 1
with open("edit_history/iter/_no_actions.json", "a") as f:
f.write(f"{json.dumps(link_obj)}\n")
if link_obj["augmentation"]["augmentation_url"] != "" and link_obj["augmentation"].get("first", False):
STATS["augmented_already"]["count"] += 1
if link_obj["augmentation"]["remove-purposely"] != [] and link_obj["augmentation"]["remove-purposely"][-1]["edit_meta_to"] is None:
STATS["augmented_already"]["aug-removed"] += 1
if link_obj["augmentation"]["augmentation_url"] != "" and not link_obj["augmentation"].get("first", False):
if is_bot(link_obj["augmentation"]["edit_meta"].get("username", "")):
STATS["augmented"]["bot"] += 1
elif is_bot(link_obj["augmentation"]["edit_meta"].get("comment", "")):
STATS["augmented"]["mixed"] += 1
with open("edit_history/iter/_augmentation_by_mixed.json", "a") as f:
f.write(f"{json.dumps(link_obj)}\n")
else:
STATS["augmented"]["human"] += 1
with open("edit_history/iter/_augmentation_by_human.json", "a") as f:
f.write(f"{json.dumps(link_obj)}\n")
STATS["augmented"]["count"] += 1
if link_obj["augmentation"]["remove-purposely"]!= [] and link_obj["augmentation"]["remove-purposely"][-1]["edit_meta_to"] is None:
STATS["augmented"]["aug-removed"] += 1
if link_obj["remove-revert"] != []:
for rev_obj in link_obj["remove-revert"]:
STATS["remove-revert"]["occurance-count"] += 1
if is_bot(rev_obj["edit_meta_to"].get("username", "")):
STATS["remove-revert"]["bot"] += 1
elif is_bot(rev_obj["edit_meta_to"].get("comment", "")):
STATS["remove-revert"]["mixed"] += 1
else:
STATS["remove-revert"]["human"] += 1
STATS["remove-revert"]["link-count"] += 1
if link_obj["remove-purposely"] != []:
STATS["remove-purposely"]["count"] += 1
if link_obj["==External links=="]:
STATS["remove-purposely"]["==External links=="] += 1
with open("edit_history/iter/_remove_purposely.json", "a") as f:
f.write(f"{json.dumps(link_obj)}\n")
except Exception as e:
pass
print(json.dumps(STATS, indent=4))