-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathlocations.Rmd
153 lines (121 loc) · 3.51 KB
/
locations.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
---
title: "location_summary"
output: html_document
---
```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)
```
### from Automate.R:
```{r}
library(tidyverse)
library(rtweet)
library(countrycode)
library(lubridate)
library(magrittr)
library(jsonlite)
library(streamR)
library(httr)
source("text_analysis_functions.R")
## DO NOT run the write.csv lines when making edits to script ----
# (make a seperate path in your own home folder on aurora)
path <- '/home/shares/soilcarbon/Twitter' # LOCATION OF MASTER FILES
## READ PREVIOUS (MASTER) DATA ----
# twitter_merged.master <- read.csv(file.path(path, 'Merged_v3/twitter_merged_v3.csv'), stringsAsFactors = FALSE)
# twitter_merged_noRT.master <- read.csv(file.path(path, 'Merged_v3/twitter_merged_noRT_v3.csv'), stringsAsFactors = FALSE)
#
# # twitter_merged.master <- flag_india(twitter_merged.master) # one time fix (used 2019/09/06)
# # twitter_merged_noRT.master <- flag_india(twitter_merged_noRT.master) # one time fix (used 2019/09/06)
#
#
# ## **QUERY** TWITTER API FOR LAST 6-9 DAYS OF TWEET DATA ----
#
# # Create token
# twitter_token <- readRDS('twitter_token.rds')
#
# # Import tag_list.csv (this contains the words to be used in search query of twitter data)
# tag_file <- read.csv('tag_list.csv', stringsAsFactors = FALSE)
#
# # Create a list from tag_list.csv
# tag_list <- as.character(tag_file$tag_list)
#
# # Take tag_list and put quotes around each element for the twitterAPI search below
# q <- unname(sapply(tag_list, function(x) toString(dQuote(x))))
#
# # Searching tweets with query above (THIS CODE SEARCHES TWITTER FOR TERMS LISTED IN tag_list OVER THE LAST 6-9 DAYS)
# twitterAPI_new <- search_tweets2(q, n = 100000, retryonratelimit = T)
#
#
#
#
# ## CLEAN NEW DATA ----
#
# # Make it a data frame
# twitterAPI_new <- as.data.frame(twitterAPI_new, stringsAsFactors = FALSE)
```
```{r}
unique(twitterAPI_new$geo_coords)
```
```{r}
unique(twitterAPI_new$coords_coords)
```
```{r}
length(unique(twitterAPI_new$bbox_coords))
```
```{r}
unique(twitterAPI_new$location)
```
### from rw_data_proccessing.R:
```{r}
library(tidyverse)
library(jsonlite)
library(streamR)
library(devtools)
library(googledrive)
library(ndjson)
library(dplyr)
library(tidytext)
library(stringr)
library(rtweet)
library(rjson)
library(skimr)
library(janitor)
library(gtools)
library(magrittr)
library(data.table)
library(lubridate)
library(ids)
library(countrycode)
#
# ##### CONSTANTS ####
#
# # folder containing the data downloaded from the API
# dir_fix_tweet <- "./API_csv"
#
# ## Aurora path
# main_path <- "/home/shares/soilcarbon/Twitter"
#
#
#
# ########### I. READING_DATA #############################
#
# # 1/ Reading json(ARC)/API twitter archival datasets ####
#
# # a. Read in ARC #####
# snapp_twitterdata_raw <- stream_in(file.path(main_path,"twitter.json"))
#
# #' The \code{stream_in} and \code{stream_out} functions implement line-by-line processing
# #' of JSON data over a \code{\link{connection}}, such as a socket, url, file or pipe. JSON
# #' streaming requires the \href{http://ndjson.org}{ndjson} format, which slightly differs
# #' from \code{\link{fromJSON}} and \code{\link{toJSON}}, see details.
# #'
# #' Notes: termed twitter.json as Archived (ARC) dataset in script
# #' (1) Ensure path is linked to the soil-carbon twitter file
# #' (2) VERY LARGE DF: avoid viewing - 3480 columns, 96553 obs.
#
# ## remove all NA rows
# snapp_twitterdata <- snapp_twitterdata_raw %>%
# filter(!is.na(body))
```
```{r}
names(snapp_twitterdata)
```