-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathgdeltr2.R
169 lines (113 loc) · 4.31 KB
/
gdeltr2.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
library(gdeltr2)
sports_terms <-
c('"Brooklyn Nets"', "Caris LeVert", '"Kyrie Irving" Trade', '"Luka Doncic"',
'NBA "Draft Prospect"', '"Jarrett Allen"')
political_terms <-
c('"Bill Perkins"', '"New York City" "City Counsel"')
finance_real_estate_terms <-
c("Eastdil", "Condo Bubble", '"JBG Smith"', '"CPPIB"', "Anbang",
"WeWork", '"Goldman Sachs"' , 'Blackstone "Real Estate"')
other_terms <-
c("Supergoop", '"LNG"', 'Maryland "High School Football"',
'"Jared Kushner"', '"Eddie Huang"')
my_terms <-
c(sports_terms, political_terms, finance_real_estate_terms, other_terms)
news_domains <-
c("nypost.com", "washingtonpost.com", "wsj.com", "gothamgazette.com")
sports_domains <-
c("espn.com", "netsdaily.com")
finance_real_estate_domains <-
c("realdeal.com", "zerohedge.com", "institutionalinvestor.com", 'pionline.com',
"curbed.com", "archdaily.com")
random_domains <-
c("tmz.com", "snopes.com", "alphr.com", "oilprice.com")
my_domains <-
c(news_domains, sports_domains, finance_real_estate_domains, random_domains)
df_gkg <-
get_gdelt_codebook_ft_api(code_book = "gkg")
View(df_gkg)
df_gkg <-
gdeltr2::get_gdelt_codebook_ft_api(code_book = "gkg")
my_themes <-
c("ECON_WORLDCURRENCIES_CHINESE_YUAN", # stories about china's currency -- god way to find stories about china's economy
"ECON_BUBBLE", # articles about economic bubble
"TAX_FNCACT_BROKER", # articles about brokers of things
"ECON_HOUSING_PRICES", # articls about housing prices
"ECON_BITCOIN", # articles about bitcoin
"ELECTION_FRAUD", # articles about election fraud
"SOC_POINTSOFINTEREST_GOVERNMENT_BUILDINGS", # articles about government buildings
"WB_1277_BANKRUPTCY_AND_LIQUIDATION", # articles about bankruptcy
"WB_639_REPRODUCTIVE_MATERNAL_AND_CHILD_HEALTH", # articles about pregnancy and child health
"WB_2151_CHILD_DEVELOPMENT", # articles about child development
"TAX_FNCACT_BUILDER" # articles about builders
)
set.seed(1234)
random_themes <-
df_gkg %>% pull(idGKGTheme) %>% sample(3)
my_themes <-
c(my_themes, random_themes)
my_ocr <-
c(
"Brooklyn Nets",
"Panerai",
"Four Seasons",
"NBA",
"Goldman Sachs",
"Philadelphia Eagles",
"Supergoop",
"Boston Celtics",
"Big Baller Brand",
"BBB",
"Boston Properties"
)
df_imagetags <-
get_gdelt_codebook_ft_api(code_book = "imagetags")
View(df_imagetags)
my_image_tags <-
c("Toy Poodle", # looks for toy poodles
"poodle", # looks for any form of poodle
"commercial building", # looks for an office building
"basketball player", # looks for a professional basketball player
"supermodel" # take a guess :)s
)
df_imageweb <-
get_gdelt_codebook_ft_api(code_book = "imageweb")
View(df_imageweb)
my_image_web <-
c(
"Jared Kushner",
"Empire State Building",
"Serena Williams",
"New York City",
"Ivanka Trump",
"Tesla Model 3",
"Jeremy Lin",
"NBA",
"Brooklyn Nets"
)
my_timespan <-
"5 days"
df_countries <-
get_gdelt_codebook_ft_api(code_book = "countries")
View(df_countries)
my_trelliscope_parameters <-
list(
rows = 1,
columns = 2,
path = NULL
)
get_data_ft_v2_api(terms = my_terms, domains = my_domains, images_web_tag = my_image_web,
images_tag = my_image_tags, images_ocr = my_ocr, gkg_themes = my_themes,
modes = c("Artlist"), timespans = my_timespan, trelliscope_parameters = my_trelliscope_parameters)
trelliscopeImage
get_data_ft_v2_api(terms = my_terms, domains = my_domains, images_web_tag = my_image_web,
images_tag = my_image_tags, images_ocr = my_ocr, gkg_themes = my_themes,
modes = c("TimelineVolInfo"), timespans = "12 weeks", trelliscope_parameters = my_trelliscope_parameters)
trelliscopeHighcharter
get_data_ft_v2_api(terms = my_terms, domains = my_domains, images_web_tag = my_image_web,
images_tag = my_image_tags, images_ocr = my_ocr, gkg_themes = my_themes,
modes = c("WordCloudEnglish", "WordCloudTheme", "WordCloudImageTags", "WordCloudImageWebTags"),
timespans = "2 weeks", trelliscope_parameters = list(rows = 1, columns = 1,
path = NULL))
trelliscopeWordcloud
options(viewer = NULL)