R graphics

Corona TWets 03282010

English

setwd("F:/Coronavirus_Tweets")

library(data.table)
library(tidytext)
library(ggplot2)
library(dplyr)


mm= fread("03282020_Corona_Clean.csv")
dim(mm)
## [1] 770375     34
names(mm)
##  [1] "coordinates"                "created_at"                
##  [3] "hashtags"                   "media"                     
##  [5] "urls"                       "favorite_count"            
##  [7] "id"                         "in_reply_to_screen_name"   
##  [9] "in_reply_to_status_id"      "in_reply_to_user_id"       
## [11] "lang"                       "place"                     
## [13] "possibly_sensitive"         "retweet_count"             
## [15] "reweet_id"                  "retweet_screen_name"       
## [17] "source"                     "text"                      
## [19] "tweet_url"                  "user_created_at"           
## [21] "user_screen_name"           "user_default_profile_image"
## [23] "user_description"           "user_favourites_count"     
## [25] "user_followers_count"       "user_friends_count"        
## [27] "user_listed_count"          "user_location"             
## [29] "user_name"                  "user_screen_name"          
## [31] "user_statuses_count"        "user_time_zone"            
## [33] "user_urls"                  "user_verified"
mm$text <- tolower(mm$text)
mm= subset(mm, lang=="en")
dim(mm)
## [1] 422421     34
mm1= mm %>%
  unnest_tokens(word, text)


stopwords1 <- fread("stop-word-list.csv")


mm1a <- mm1 %>%
  anti_join(stopwords1)

mm1a %>%
  count(word, sort = TRUE) 
## # A tibble: 633,794 x 2
##    word       n
##    <chr>  <int>
##  1 people 33543
##  2 amp    26183
##  3 cases  24058
##  4 new    23271
##  5 now    22806
##  6 more   21679
##  7 via    21305
##  8 trump  21018
##  9 out    19402
## 10 up     16656
## # ... with 633,784 more rows
mm1a %>%
  count(word, sort = TRUE) %>%
  filter(n > 3000) %>%
  mutate(word = reorder(word, n)) %>%
  ggplot(aes(word, n)) +
  geom_col() +theme_bw()+
  xlab(NULL) +
  coord_flip()