R graphics

Corona TWets 03282010

English

setwd("F:/Coronavirus_Tweets")

library(data.table)
library(dplyr)
mm= fread("03282020_Corona_Clean.csv")
dim(mm)
## [1] 770375     34
mm$text <- tolower(mm$text)
mm= subset(mm, lang=="en")
dim(mm)
## [1] 422421     34
stopwords1 <- fread("stop-word-list.csv")
#### subset
mm1 <- mm[grep("mental", mm$text),]
mm2= mm1[, c("created_at", "user_screen_name", "text", "hashtags", "retweet_count", "favorite_count", "lang")]
dim(mm1)
## [1] 2449   34
mm1$time1 <- as.POSIXct(mm1$created_at, format = "%a %b %d %H:%M:%S +0000 %Y")
mm1$hour <- hour(mm1$time1)
mm1$minute  <- minute(mm1$time1)
mm1$Date <- as.Date(mm1$time1)
mm1a= mm1[,c("text", "hour")]
library(tidytext)
mm1b <- mm1a %>%
  unnest_tokens(word, text)
mm1c <- mm1b %>%
  anti_join(stopwords1)
mm1d= mm1c %>% group_by(hour, word) %>% summarize(count=n())
mm1f= mm1d %>%
  group_by(word) %>%
  mutate(cum= cumsum(count))
mm1g= subset(mm1f, word!="mental")
mm1g= subset(mm1g, word!="health")

mm1e= subset(mm1g, cum >150)
library(dplyr)
library(streamgraph)

colnames(mm1e)[1] <- "year"
colnames(mm1e)[2] <- "name"
colnames(mm1e)[3] <- "n"

### https://rud.is/b/2015/03/07/streamgraph-package-now-supports-continuous-x-axis-scale/
mm1e %>% 
  streamgraph("name","n","year", scale="continuous") %>% 
  sg_axis_x(tick_format="d")