Load the required packages

library(twitteR); library(tidyverse)
## -- Attaching packages ------------------------------------- tidyverse 1.3.0 --
## v ggplot2 3.3.2     v purrr   0.3.4
## v tibble  3.0.1     v dplyr   1.0.0
## v tidyr   1.1.0     v stringr 1.4.0
## v readr   1.3.1     v forcats 0.5.0
## -- Conflicts ---------------------------------------- tidyverse_conflicts() --
## x dplyr::filter()   masks stats::filter()
## x dplyr::id()       masks twitteR::id()
## x dplyr::lag()      masks stats::lag()
## x dplyr::location() masks twitteR::location()
library(lubridate);library(tidytext)
## 
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
## 
##     date, intersect, setdiff, union
## [1] "Using direct authentication"

Search #GE2020

tw <- searchTwitter('#GE2020', lang='en', n = 2000,
                    geocode="1.290270,103.851959,30km")
ge <- twListToDF(tw)
ge <- ge %>% mutate(Date = date(created))

Break the data into media and public

media <- c("sgelection", "STcom", "asiaonecom", "BusinessTimes",
           "ChannelNewsAsia")
'%ni%' <- Negate('%in%')
ge_media <- ge %>% filter(screenName %in% media)
ge_public <- ge %>% filter(screenName %ni% media)

Visualize count of tweets by screenName (top 10)

top10users <- ge_public %>% count(screenName, sort = T) %>% top_n(10) %>%
  ungroup()
## Selecting by n
ggplot(top10users, aes(fct_inorder(screenName), n)) +
  geom_bar(stat = "identity") +
  ggtitle("#GE2020 Top 10 tweeters as at 25 June") +
  theme(axis.text.x = element_text(angle = 45))

Convert ge_pubic tweets into a tibble

tweets_text <- ge_public$text
length(tweets_text)
## [1] 1476
tweets_tbl <- tibble(line =1:1476, text = tweets_text)
twitter_stop_words <- tibble(word = c("https","t.co","rt","amp",
                                      "rstats","gt"), lexicon = "twitter")

The public opinion of GE2020 (trigram)

tri_po <- tweets_tbl %>%
  unnest_tokens(trigram, text, token = "ngrams", n = 3)
trigrams_separated <- tri_po %>%
  separate(trigram, c('word1',  'word2', 'word3'), sep = " ")
trigrams_filtered <- trigrams_separated %>%
  filter(!word1 %in% stop_words$word) %>%
  filter(!word2 %in% stop_words$word) %>%
  filter(!word3 %in% stop_words$word)
trigrams_filtered <- trigrams_separated %>%
  filter(!word1 %in% twitter_stop_words$word) %>%
  filter(!word2 %in% twitter_stop_words$word) %>%
  filter(!word3 %in% twitter_stop_words$word)
# Count the new bigram
trigrams_filtered %>%
  count(word1, word2, word3, sort = T) %>%  print(n=50)
## # A tibble: 3,991 x 4
##    word1           word2      word3          n
##    <chr>           <chr>      <chr>      <int>
##  1 lee             hsien      yang         280
##  2 progress        singapore  party        262
##  3 channelnewsasia just       in           259
##  4 opposition      progress   singapore    250
##  5 singapore       pm         lee          240
##  6 lee             hsien      loong        220
##  7 pm              lee        hsien        217
##  8 brother         of         singapore    216
##  9 hsien           loong      joins        216
## 10 hsien           yang       brother      216
## 11 in              lee        hsien        216
## 12 joins           opposition progress     216
## 13 just            in         lee          216
## 14 loong           joins      opposition   216
## 15 of              singapore  pm           216
## 16 yang            brother    of           216
## 17 day             will       be           162
## 18 on              july       10           113
## 19 continues       this       general      111
## 20 election        make       your         111
## 21 general         election   make         111
## 22 how             our        walk         111
## 23 make            your       vote         111
## 24 members         share      how          111
## 25 our             party      members      111
## 26 our             walk       with         111
## 27 party           members    share        111
## 28 share           how        our          111
## 29 singapore       continues  this         111
## 30 this            general    election     111
## 31 vote            count      ge2020       111
## 32 walk            with       singapore    111
## 33 with            singapore  continues    111
## 34 your            vote       count        111
## 35 wpsg            our        party        110
## 36 day             on         july          98
## 37 polling         day        on            98
## 38 10              as         general       95
## 39 30              ge2020     singapore     95
## 40 as              general    election      95
## 41 be              june       30            95
## 42 breaking        polling    day           95
## 43 called          in         singapore     95
## 44 election        is         called        95
## 45 general         election   is            95
## 46 in              singapore  nomination    95
## 47 is              called     in            95
## 48 july            10         as            95
## 49 june            30         ge2020        95
## 50 nomination      day        will          95
## # ... with 3,941 more rows

Mr Lee Hsien Yang is 60 mentions more than PM Lee Hsien Loong

Progress Singapore Party is the most mentioned political party