English
setwd("F:/Coronavirus_Tweets")
library(data.table)
library(dplyr)
mm= fread("03282020_Corona_Clean.csv")
dim(mm)
## [1] 770375 34
mm$text <- tolower(mm$text)
mm= subset(mm, lang=="en")
dim(mm)
## [1] 422421 34
stopwords1 <- fread("stop-word-list.csv")
#### subset
mm1 <- mm[grep("mental", mm$text),]
mm2= mm1[, c("created_at", "user_screen_name", "text", "hashtags", "retweet_count", "favorite_count", "lang")]
dim(mm1)
## [1] 2449 34
mm1$time1 <- as.POSIXct(mm1$created_at, format = "%a %b %d %H:%M:%S +0000 %Y")
mm1$hour <- hour(mm1$time1)
mm1$minute <- minute(mm1$time1)
mm1$Date <- as.Date(mm1$time1)
mm1a= mm1[,c("text", "hour")]
library(tidytext)
mm1b <- mm1a %>%
unnest_tokens(word, text)
mm1c <- mm1b %>%
anti_join(stopwords1)
mm1d= mm1c %>% group_by(hour, word) %>% summarize(count=n())
mm1f= mm1d %>%
group_by(word) %>%
mutate(cum= cumsum(count))
mm1g= subset(mm1f, word!="mental")
mm1g= subset(mm1g, word!="health")
mm1e= subset(mm1g, cum >150)
library(dplyr)
library(streamgraph)
colnames(mm1e)[1] <- "year"
colnames(mm1e)[2] <- "name"
colnames(mm1e)[3] <- "n"
### https://rud.is/b/2015/03/07/streamgraph-package-now-supports-continuous-x-axis-scale/
mm1e %>%
streamgraph("name","n","year", scale="continuous") %>%
sg_axis_x(tick_format="d")