-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathSummarizing_NMF_Top_Abstract.py
47 lines (34 loc) · 1.36 KB
/
Summarizing_NMF_Top_Abstract.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
import pickle
from transformers import pipeline
import pandas as pd
from tqdm import tqdm
import numpy as np
#os.environ["CUDA_VISIBLE_DEVICES"] = "0"
summarizer = pipeline("summarization", model="google/bigbird-pegasus-large-pubmed")
summary_per_topic = []
raw_text_file = "models/raw_texts.sav"
abstracts = pickle.load(open(raw_text_file, 'rb'))
result_arrays = 'models/bi_trained_NMF_model_sklearn_res_array.npy'
topic_weights = np.load(result_arrays)
df_weights = pd.DataFrame(topic_weights)
maxValueIndex = df_weights.idxmax()
print(maxValueIndex)
i=0
for top_abs_topic in tqdm(maxValueIndex):
abstract = abstracts[top_abs_topic]
summary_text = summarizer(abstract, max_length=100, min_length=5, do_sample=False)[0]['summary_text']
topic = str(i) + ": " + summary_text
summary_per_topic.append(topic)
i+=1
print(summary_per_topic)
with open('results/NMF_topics.txt', 'w') as f:
for item in summary_per_topic:
f.write("%s\n\n" % item)
# @misc{zaheer2021big,
# title={Big Bird: Transformers for Longer Sequences},
# author={Manzil Zaheer and Guru Guruganesh and Avinava Dubey and Joshua Ainslie and Chris Alberti and Santiago Ontanon and Philip Pham and Anirudh Ravula and Qifan Wang and Li Yang and Amr Ahmed},
# year={2021},
# eprint={2007.14062},
# archivePrefix={arXiv},
# primaryClass={cs.LG}
# }