-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdissertation.py
94 lines (73 loc) · 2.7 KB
/
dissertation.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
import tweepy
import re
import json
import random
from time import sleep
import sqlite3 as lite
import datetime, time, os, sys
import argparse, ConfigParser
Config = ConfigParser.ConfigParser()
Config.read('config.cnf')
consumer_key = Config.get('twitterdissertation', 'consumer_key')
consumer_secret = Config.get('twitterdissertation', 'consumer_secret')
access_token = Config.get('twitterdissertation', 'access_token')
access_token_secret = Config.get('twitterdissertation', 'access_token_secret')
auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)
# set up access to the Twitter API
api = tweepy.API(auth, wait_on_rate_limit=True, wait_on_rate_limit_notify=True)
with open('tweetabledissertation.md') as f:
diss=f.read().strip().decode('utf8')
import nltk.data
from nltk import word_tokenize
sent_detector = nltk.data.load('tokenizers/punkt/english.pickle')
# <codecell>
diss_sent = sent_detector.tokenize(diss)
# <codecell>
#tw = "The Public Impact of Latin America's Approach to Open Access, by Juan Pablo Alperin"
#api.update_status(tw)
# <codecell>
i = 0
try:
for sent in diss_sent[i:]:
sent = sent.strip().encode('utf8')
tw = ''
for token in sent.split(' '):
if len(token) >= 138: continue
if len("%s %s" % (tw, token)) < 137:
tw = "%s %s" % (tw, token)
elif len(tw) < 137:
try:
api.update_status(tw + "*^")
except tweepy.TweepError, error:
print error
print "at sentence: %s" % i
print datetime.datetime.now().isoformat()
if error.api_code == 261:
exit(1)
time.sleep(random.randrange(2*60,4*60))
tw = token
else:
print "should not happen"
print tw
print token
tw = token
i = i + 1
try:
if len(tw) < 137:
api.update_status(tw + "*^")
except tweepy.TweepError, error:
print error
print "at sentence (end of loop): %s" % i
print datetime.datetime.now().isoformat()
if error.api_code == 261:
exit(1)
time.sleep(random.randrange(2*60,4*60))
if datetime.datetime.now().hour == 4:
# even bots go to bed
api.update_status('Time for bed! See you in 6 hours folks: %s' % datetime.datetime.now().isoformat())
time.sleep(60*60*6)
except KeyboardInterrupt, error:
print datetime.datetime.now().isoformat()
print "interrupted at: " + str(i)
raise