Team:
Shyam Kumar Voleti
Sudhanshu Sharma
Nikhil Dandapanthula
Xueqing Wang
Priyanka Bodepudi

Load packages

#rm(list = ls())

# load twitter library
library(rtweet)
#Plotting library
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 3.5.1
#data processing/cleaning/pipelining library
library(dplyr)
## Warning: package 'dplyr' was built under R version 3.5.1
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
# text mining library
library(tidytext)
## Warning: package 'tidytext' was built under R version 3.5.1
# plotting packages
library(igraph)
## Warning: package 'igraph' was built under R version 3.5.1
## 
## Attaching package: 'igraph'
## The following objects are masked from 'package:dplyr':
## 
##     as_data_frame, groups, union
## The following objects are masked from 'package:stats':
## 
##     decompose, spectrum
## The following object is masked from 'package:base':
## 
##     union
library(ggraph)
## Warning: package 'ggraph' was built under R version 3.5.1
library("NLP")
## 
## Attaching package: 'NLP'
## The following object is masked from 'package:ggplot2':
## 
##     annotate
library("syuzhet")
## Warning: package 'syuzhet' was built under R version 3.5.1
## 
## Attaching package: 'syuzhet'
## The following object is masked from 'package:rtweet':
## 
##     get_tokens
library("tm")
## Warning: package 'tm' was built under R version 3.5.1
library("SnowballC")
library("stringi")
library("topicmodels")
## Warning: package 'topicmodels' was built under R version 3.5.1
library("ROAuth")
## Warning: package 'ROAuth' was built under R version 3.5.1
library(widyr)
library(tidyr)
## 
## Attaching package: 'tidyr'
## The following object is masked from 'package:igraph':
## 
##     crossing
#TwitterApp 
appname <- "ANLY 545 Twitter data analytics"

## api key
key <- 'Yh1QFngD8tRacpgTVEQ8Xsx6P'

## api secret
secret <- '4NvruwpPKoSa80sG0HFhQZcu297Fw2XXlRkJXAgN80NQOvTR99'

# create token
twitter_token <- create_token(
  app = "ANLY 545 Twitter data analytics",
  consumer_key = key,
  consumer_secret = secret,
  access_token ='1037519686525702144-PExsbFE6wrHC2lNzd8ffBXpbr5c4vq' ,
  access_secret ='yc2XHlc29J7R84MZMwJmoEpvVjaE5oJHsIPfxLoMdsI9y' )

Read Tweets

library(dplyr)
library(tidyr)
job_tweets <- search_tweets(q = "#job",
                               n = 500)
head(job_tweets, n = 3)
## # A tibble: 3 x 88
##   user_id status_id created_at          screen_name text  source
##   <chr>   <chr>     <dttm>              <chr>       <chr> <chr> 
## 1 791258~ 10524169~ 2018-10-17 04:31:56 RHFAJobs    This~ Caree~
## 2 464681~ 10524168~ 2018-10-17 04:31:47 weareteamt~ We'r~ Caree~
## 3 464681~ 10524023~ 2018-10-17 03:34:03 weareteamt~ Can ~ Caree~
## # ... with 82 more variables: display_text_width <dbl>,
## #   reply_to_status_id <chr>, reply_to_user_id <chr>,
## #   reply_to_screen_name <chr>, is_quote <lgl>, is_retweet <lgl>,
## #   favorite_count <int>, retweet_count <int>, hashtags <list>,
## #   symbols <list>, urls_url <list>, urls_t.co <list>,
## #   urls_expanded_url <list>, media_url <list>, media_t.co <list>,
## #   media_expanded_url <list>, media_type <list>, ext_media_url <list>,
## #   ext_media_t.co <list>, ext_media_expanded_url <list>,
## #   ext_media_type <chr>, mentions_user_id <list>,
## #   mentions_screen_name <list>, lang <chr>, quoted_status_id <chr>,
## #   quoted_text <chr>, quoted_created_at <dttm>, quoted_source <chr>,
## #   quoted_favorite_count <int>, quoted_retweet_count <int>,
## #   quoted_user_id <chr>, quoted_screen_name <chr>, quoted_name <chr>,
## #   quoted_followers_count <int>, quoted_friends_count <int>,
## #   quoted_statuses_count <int>, quoted_location <chr>,
## #   quoted_description <chr>, quoted_verified <lgl>,
## #   retweet_status_id <chr>, retweet_text <chr>,
## #   retweet_created_at <dttm>, retweet_source <chr>,
## #   retweet_favorite_count <int>, retweet_retweet_count <int>,
## #   retweet_user_id <chr>, retweet_screen_name <chr>, retweet_name <chr>,
## #   retweet_followers_count <int>, retweet_friends_count <int>,
## #   retweet_statuses_count <int>, retweet_location <chr>,
## #   retweet_description <chr>, retweet_verified <lgl>, place_url <chr>,
## #   place_name <chr>, place_full_name <chr>, place_type <chr>,
## #   country <chr>, country_code <chr>, geo_coords <list>,
## #   coords_coords <list>, bbox_coords <list>, status_url <chr>,
## #   name <chr>, location <chr>, description <chr>, url <chr>,
## #   protected <lgl>, followers_count <int>, friends_count <int>,
## #   listed_count <int>, statuses_count <int>, favourites_count <int>,
## #   account_created_at <dttm>, verified <lgl>, profile_url <chr>,
## #   profile_expanded_url <chr>, account_lang <chr>,
## #   profile_banner_url <chr>, profile_background_url <chr>,
## #   profile_image_url <chr>
# Only tweets and no retweets
job_tweets <- search_tweets("#job", n = 500,
                             include_rts = FALSE)
# view top 2 rows of data
head(job_tweets, n = 2)
## # A tibble: 2 x 88
##   user_id status_id created_at          screen_name text  source
##   <chr>   <chr>     <dttm>              <chr>       <chr> <chr> 
## 1 791258~ 10524169~ 2018-10-17 04:31:56 RHFAJobs    This~ Caree~
## 2 464681~ 10524168~ 2018-10-17 04:31:47 weareteamt~ We'r~ Caree~
## # ... with 82 more variables: display_text_width <dbl>,
## #   reply_to_status_id <chr>, reply_to_user_id <chr>,
## #   reply_to_screen_name <chr>, is_quote <lgl>, is_retweet <lgl>,
## #   favorite_count <int>, retweet_count <int>, hashtags <list>,
## #   symbols <list>, urls_url <list>, urls_t.co <list>,
## #   urls_expanded_url <list>, media_url <list>, media_t.co <list>,
## #   media_expanded_url <list>, media_type <list>, ext_media_url <list>,
## #   ext_media_t.co <list>, ext_media_expanded_url <list>,
## #   ext_media_type <chr>, mentions_user_id <list>,
## #   mentions_screen_name <list>, lang <chr>, quoted_status_id <chr>,
## #   quoted_text <chr>, quoted_created_at <dttm>, quoted_source <chr>,
## #   quoted_favorite_count <int>, quoted_retweet_count <int>,
## #   quoted_user_id <chr>, quoted_screen_name <chr>, quoted_name <chr>,
## #   quoted_followers_count <int>, quoted_friends_count <int>,
## #   quoted_statuses_count <int>, quoted_location <chr>,
## #   quoted_description <chr>, quoted_verified <lgl>,
## #   retweet_status_id <chr>, retweet_text <chr>,
## #   retweet_created_at <dttm>, retweet_source <chr>,
## #   retweet_favorite_count <int>, retweet_retweet_count <int>,
## #   retweet_user_id <chr>, retweet_screen_name <chr>, retweet_name <chr>,
## #   retweet_followers_count <int>, retweet_friends_count <int>,
## #   retweet_statuses_count <int>, retweet_location <chr>,
## #   retweet_description <chr>, retweet_verified <lgl>, place_url <chr>,
## #   place_name <chr>, place_full_name <chr>, place_type <chr>,
## #   country <chr>, country_code <chr>, geo_coords <list>,
## #   coords_coords <list>, bbox_coords <list>, status_url <chr>,
## #   name <chr>, location <chr>, description <chr>, url <chr>,
## #   protected <lgl>, followers_count <int>, friends_count <int>,
## #   listed_count <int>, statuses_count <int>, favourites_count <int>,
## #   account_created_at <dttm>, verified <lgl>, profile_url <chr>,
## #   profile_expanded_url <chr>, account_lang <chr>,
## #   profile_banner_url <chr>, profile_background_url <chr>,
## #   profile_image_url <chr>
# view column with screen names
head(job_tweets$screen_name)
## [1] "RHFAJobs"       "weareteamtrump" "weareteamtrump" "weareteamtrump"
## [5] "USSANews"       "SanofiUS_Jobs"
unique(job_tweets$screen_name)
##   [1] "RHFAJobs"        "weareteamtrump"  "USSANews"       
##   [4] "SanofiUS_Jobs"   "dsgnjbs"         "EmploiAngouleme"
##   [7] "SpectrumCommLLC" "MultiCare_Jobs"  "ThermoFisherJob"
##  [10] "MercyJobs"       "EmploiLille"     "Emploi59"       
##  [13] "selly_zest"      "rgiscareers"     "ictjob_be"      
##  [16] "TNG_jobs"        "ishnae"          "luckysmktjobs"  
##  [19] "WalserCareers"   "jobely"          "JoeDRSHR"       
##  [22] "HVMGCareers"     "Trad_Spirits"    "joinphilipslght"
##  [25] "E2EJobs"         "MemorialCareJob" "PSJHCareers"    
##  [28] "ePlusJobs"       "MobileMiniJobs"  "dalgsllc"       
##  [31] "RRDJobs"         "SCLocumsJobs"    "GodivaJobs"     
##  [34] "careercast_jobs" "jcrew_hiring"    "CareerCastHlth" 
##  [37] "ChicosCareers"   "WHBMCareers"     "PacDenCareers"  
##  [40] "ChristinaFo"     "DTEEnergyJobs"   "CaspersCareers" 
##  [43] "WestmorelandCO"  "CROSSMARKJobs"   "OfficeTeamJobs" 
##  [46] "scoularcareers"  "3leads"          "CompassJobBoard"
##  [49] "HobbyLobbyJobs"  "julesdebene"     "ITJob_Columbus" 
##  [52] "JobSeineMarne"   "MatthewCraven12" "tmj_tha_jobs"   
##  [55] "PlastipakJobs"   "alleyagee"       "JoinCellular"   
##  [58] "tmj_inh_finance" "TheGioCetina"    "ngchunchit"     
##  [61] "KSV1870"         "wendymanganaro"  "AccountempsJobs"
##  [64] "RHTechJobs"      "PSJHCareersRN"   "tmj_rus_it"     
##  [67] "tmj_sgp_banking" "tmj_ich_manuf"   "tmj_hkg_banking"
##  [70] "TrojanRecruit"   "tmj_lka_jobs"    "tmj_mng_jobs"   
##  [73] "mcguirl"         "tmj_ide_green"   "EmploiLimoges"  
##  [76] "StanChartJobs"   "tmj_ich_banking" "tmj_inm_it"     
##  [79] "tmj_mys_banking" "PTGTCareers"     "tmj_pak_banking"
##  [82] "tmj_chs_jobs"    "tmj_inh_adv"     "tmj_inp_jobs"   
##  [85] "tmj_inm_itpm"    "tmj_mmr_jobs"    "tmj_ide_mgmt"   
##  [88] "lets_ace"        "weareddstep"     "tmj_uzb_jobs1"  
##  [91] "EmploiCoteArmor" "tmj_inh_eng"     "tmj_ide_jobs"   
##  [94] "franknaval"      "HlthcareJobsite" "tmj_rus_jobs"   
##  [97] "tmj_ich_finance" "tmj_vnm_banking" "PGJobs"         
## [100] "tmj_asi_jobs"    "tmj_inc_banking" "anselmbradford" 
## [103] "PMPConnect"      "tmj_inc_itpm"    "tmj_inb_jobs"   
## [106] "seanchiggins"    "tmj_inh_jobs"    "tmj_inm_jobs"   
## [109] "tmj_ich_eng"     "tmj_ich_legal"   "tmj_ich_jobs"   
## [112] "tmj_chs_pharm"   "tmj_hkg_jobs"    "tmj_ide_it"     
## [115] "tmj_inm_cler"    "tmj_inc_jobs"    "Rengineeringjob"
## [118] "tmj_ich_itjava"  "tmj_inb_finance" "tmj_inh_itdb"   
## [121] "WorkWithSHC"     "tmj_inm_legal"   "tmj_inb_eng"    
## [124] "tmj_inn_jobs"    "tmj_ing_jobs"    "tmj_ide_itqa"   
## [127] "tmj_inm_adv"     "tmj_phi_jobs"    "tmj_inh_mgmt"   
## [130] "tmj_inm_manuf"   "tmj_inh_banking" "tmj_ich_cler"   
## [133] "tmj_mys_jobs"    "JoeFranscella"   "tmj_inc_it"     
## [136] "tmj_tha_banking" "tmj_inh_itpm"    "EGonzalezHaas"  
## [139] "JJamin"          "tmj_ich_it"      "BarrazaChico"   
## [142] "winklerdaniel"   "tmj_vnm_jobs"    "tmj_ide_itdb"   
## [145] "tmj_inh_cstsrv"  "tmj_inm_mgmt"    "tmj_uae_green"  
## [148] "JCI_Jobs"        "JaySangra"       "nature_careers" 
## [151] "recruiter_sj"    "tmj_ide_acct"    "tmj_inm_hr"     
## [154] "tmj_ich_sales"   "tmj_brn_jobs"    "tmj_ich_mgmt"   
## [157] "tmj_kaz_jobs1"   "tmj_twn_jobs"    "tmj_rus_itqa"   
## [160] "tmj_inm_finance" "tmj_ide_media"   "tmj_inb_mgmt"   
## [163] "tmj_ich_hr"      "tmj_inh_sales"   "tmj_ide_hr"     
## [166] "tmj_inh_it"      "phogg96"         "tmj_ide_itpm"   
## [169] "tmj_chn_banking" "BakerHughesJobs" "tmj_ast_jobs"   
## [172] "tmj_chs_recruit" "tmj_inb_hr"      "MPajemolin"     
## [175] "cititrendsjobs"  "tmj_inm_banking" "tmj_ide_sales"  
## [178] "tmj_inm_writing" "MikeP_Reed"      "farmwork23"     
## [181] "tmj_inm_itqa"    "design2perform"  "EmploiMontpel"  
## [184] "EmploiHerault"   "JobsCaithness"   "tmj_nzw_jobs"   
## [187] "PNPersonnel"     "EmploiAisne"     "MarketSourceJob"
## [190] "Tyrex18943336"   "EmploiEpinal"    "tombdugan"      
## [193] "EmploiPau"       "ajstravlin"      "emploibelfort"  
## [196] "EmploiVendee"    "DelNorthCareers" "DanielWaite_NOW"
## [199] "EmploiAquitaine" "pherlha_aguilar" "AAIHS"          
## [202] "EmploiBourgogne" "masterartisantz" "RICHMEGAJOBS"   
## [205] "0crat"           "LiveRecruitment" "LauraGLiveRec"  
## [208] "FYTEasily"       "MortimerBell"    "NadiaLiveRec"   
## [211] "LipsonLloydJ"    "ManpowerSG"      "BarefootStudent"
## [214] "denisempratt"    "FourSeasonsJobs" "tmj_nzc_jobs"   
## [217] "karotex1"        "kunalism"        "immiultimate"   
## [220] "ADSWCareers"     "tmj_hon_green"   "blue_red_orange"
## [223] "RSchrishuhn"     "WDCourse"        "LocalWorkCa"    
## [226] "PSIPax"          "WayneArmstrong"  "tmj_syd_mgmt"   
## [229] "MH_Pigis"        "tmj_syd_sales"   "RivCoJobs"      
## [232] "tmj_mel_sales"   "tmj_mel_it"      "JobYamamon"     
## [235] "tmj_syd_cstsrv"  "Mariah_Ismail"   "tmj_inm_realest"
## [238] "RMSI_jobs"       "tmj_syd_finance" "tmj_mel_itdb"   
## [241] "voltsgjobs"      "ULG_Trades"      "tmj_mel_itpm"   
## [244] "tmj_syd_manuf"   "ultabeautyjobs"  "tmj_syd_it"     
## [247] "mashia6"         "DriveMelton"     "GoldenCorralJob"
## [250] "tmj_syd_legal"   "CoxPurtellJobs"  "BarSpire"       
## [253] "RAHomesJobs"     "tmj_mel_itqa"    "AssignRecruit"  
## [256] "ZurichNACareers" "tmj_mel_jobs"    "VanaVana2200000"
## [259] "LLU_Careers"     "tmj_mel_cstsrv"  "tmj_auc_jobs"   
## [262] "JimGiammatteo"   "CSGICareers"     "MyHubIntranet"  
## [265] "tmj_mnp_jobs"    "tmj_syd_recruit" "COC_Careers"    
## [268] "SimoneMahedy"    "gmtpeople"       "Job2Grow"       
## [271] "artisanupdates"  "tmj_syd_hr"      "tmj_syd_itpm"   
## [274] "Beth_Finger"     "tmj_mel_pharm"   "IQVIAcareers"   
## [277] "smcb03"          "tmj_syd_retail"  "tmj_syd_itdb"   
## [280] "tmj_syd_hrta"    "inzejob"         "t_rendezvous"   
## [283] "tmj_gum_jobs"    "PandaCareers"    "tmj_kor_jobs"   
## [286] "Bluisooner"      "WorkHendersonNV" "EdHuntr"        
## [289] "tmj_syd_cler"    "tmj_inh_itqa"    "ThePiagentini"  
## [292] "peoplebankjobs"  "attCAREERS"      "JobWindow_Jobs" 
## [295] "snaphuntjobs"    "musStiforp"      "shah_saharsh"   
## [298] "TicknerRobin"    "rachelebitte"    "danridesharleys"
## [301] "StuartGHazell"   "tmj_jpn_jobs"    "danispeck"      
## [304] "LMartin_TX"      "2CHRONICLES_714" "Ben_ServiceNow" 
## [307] "Lasvegasacs"     "SatansXwife"     "tmj_ide_itjava" 
## [310] "IamEdzM"         "majawashington"  "TinaLOwens"     
## [313] "NursempJobs"     "Zhaopin_com"     "langly511"      
## [316] "LisaJRamos"      "RitaBrue"        "Lee_James_FL"   
## [319] "ChristaMcCabe"   "EbohAjeroh"      "flowmotor"      
## [322] "mkhan004_"       "CCSF_Cyber_Club" "RecruitByMark"  
## [325] "fuzirbarry"      "c_cs"            "mj_kernan"      
## [328] "find_me_in_cali" "interviewgig"    "ZWDcom"         
## [331] "MktgJobForce"    "tmj_ndo_jobs1"   "IIS_Delivers"   
## [334] "ITJobs_IL"       "Mike_McCown"     "aciperski"      
## [337] "JesusValdesMX"   "matoysumayao"    "Kryokelt"       
## [340] "MarraBeppe"      "tmj_mdv_jobs"    "carpalwatch"    
## [343] "parityconsult"   "EssityCareers"   "JennBennetSE"   
## [346] "hopeobaker"      "thebeebles"      "lauraholliday"  
## [349] "h0v1k"           "tmj_inb_it"      "gym_guide"      
## [352] "alg_talent"      "vmorgangipson"   "p2pBianca"      
## [355] "tmj_inm_sales"   "tmj_HI_EDU"      "tmj_HI_ACCT"    
## [358] "tmj_mau_acct"    "Labs83"          "tmj_mau_retail"
users <- search_users("job_tweets",
                      n = 500)
## Searching for users...
## Finished collecting users!
users %>%
  ggplot(aes(location)) +
  geom_bar() + coord_flip() +
      labs(x = "Count",
      y = "Location",
      title = "Twitter users ")

users %>%
  dplyr::count(location, sort = TRUE) %>%
  mutate(location = reorder(location,n)) %>%
  na.omit() %>%
  top_n(20) %>%
  ggplot(aes(x = location,y = n)) +
  geom_col() +
  coord_flip() +
      labs(x = "Location",
      y = "Count",
      title = "Twitter users by City/Region  ")
## Selecting by n

job_tweets$stripped_text <- gsub("http.*","",  job_tweets$text)
job_tweets$stripped_text <- gsub("https.*","", job_tweets$stripped_text)

# remove punctuation, convert to lowercase, add id for each tweet!
  job_tweets_clean <- job_tweets %>%
  dplyr::select(stripped_text) %>%
  unnest_tokens(word, stripped_text)

Sentiment Analysis

#getting emotions using in-built function
mysentiment_job<- get_nrc_sentiment((job_tweets_clean$word))
#calculationg total score for each sentiment
Sentimentscores_job<-data.frame(colSums(mysentiment_job[,]))
names(Sentimentscores_job)<-"Score"
Sentimentscores_job<-cbind("sentiment"=rownames(Sentimentscores_job),Sentimentscores_job)
rownames(Sentimentscores_job)<-NULL
#plotting the sentiments with scores
ggplot(data=Sentimentscores_job,aes(x=sentiment,y=Score))+geom_bar(aes(fill=sentiment),stat = "identity")+
  theme(legend.position="none")+
  xlab("Sentiments")+ylab("scores")+ggtitle("Sentiments of people behind the tweets on job")

From the above plot of sentiment analysis on twitter with #job, it is observable that people have a positive attitude on their job and feel secured.

BossDay Twitter Sentiments OCt 16th, 2018

bossDay_tweets <- search_tweets(q = "#bossday", n = 500, lang = "en",
                             include_rts = FALSE)

head(bossDay_tweets$text)
## [1] "Who run the world? MOMS <U+0001F30D> Happy #BossDay Mamas! <U+0001F495><U+0001F931><U+0001F495> @Regran_ed from @itsAmandaAcosta <U+0001F4F8> #momlife #mommyblogger @ San Diego, California https://t.co/AWrGjn6Xla"       
## [2] "WHERE MY REAL FRIENDS AT?? I didn’t get any love on this #bossDay. Y’all know I’m a BOSS <U+0001F644>"                                                                                                                      
## [3] "Today is #NationalBossDay. We'd like to recognize our boss, Jim Marshall, the Director of @_911TI_. His leadership makes it a joy to serve at #911TI. #BossDay #boss #911dispatcher #911dispatchers https://t.co/APCey30vOa"
## [4] "You may find it hard to sort your boss, for everything else there is Kangaro. Bosses can be inspiring, charming, encouraging...Tag your most memorable boss till date.#BossDay https://t.co/6CR3Qqw8SW"                     
## [5] "Yo happy boss day to @maggiej_5! Thanks for believing in us and empowering us with the tools necessary to empower our people. You the champ that runs the camp! #bossday"                                                   
## [6] "#BossDay Thankful to be part of the Macy Family!  @MacyEagles #MacyEagles #LJSD #JoyfulLeaders https://t.co/BMIB7Jidae"

Data Clean Up

bossDay_tweets$stripped_text <- gsub("http.*","",  bossDay_tweets$text)
bossDay_tweets$stripped_text <- gsub("https.*","", bossDay_tweets$stripped_text)

# remove punctuation, convert to lowercase, add id for each tweet!
bossDay_tweets_clean <- bossDay_tweets %>%
  dplyr::select(stripped_text) %>%
  unnest_tokens(word, stripped_text)

Count of unique words

# plot the top 15 words
bossDay_tweets_clean %>%
  dplyr::count(word, sort = TRUE) %>%
  top_n(15) %>%
  mutate(word = reorder(word, n)) %>%
  ggplot(aes(x = word, y = n)) +
  geom_col() +
  xlab(NULL) +
  coord_flip() +
      labs(x = "Count",
      y = "Unique words",
      title = "Count of unique words found in tweets")
## Selecting by n

Count of Unique words after further cleaning

bossDay_tweets_clean <- bossDay_tweets_clean %>%
  anti_join(stop_words)
## Joining, by = "word"
# plot the top 15 words
bossDay_tweets_clean %>%
  dplyr::count(word, sort = TRUE) %>%
  top_n(15) %>%
  mutate(word = reorder(word, n)) %>%
  ggplot(aes(x = word, y = n)) +
  geom_col() +
  xlab(NULL) +
  coord_flip() +
      labs(y = "Count",
      x = "Unique words",
      title = "Count of unique words found in tweets",
      subtitle = "Stop words removed from the list")
## Selecting by n

Network of Words

# remove punctuation, convert to lowercase, add id for each tweet!
bossDay_tweets_paired_words <- bossDay_tweets %>%
  dplyr::select(stripped_text) %>%
  unnest_tokens(paired_words, stripped_text, token = "ngrams", n = 2)

bossDay_tweets_paired_words %>%
  dplyr::count(paired_words, sort = TRUE)
## # A tibble: 7,838 x 2
##    paired_words      n
##    <chr>         <int>
##  1 thank you        78
##  2 day to           77
##  3 happy bossday    69
##  4 boss's day       64
##  5 to our           57
##  6 boss day         53
##  7 boss’s day       52
##  8 bossday to       46
##  9 you for          42
## 10 to all           40
## # ... with 7,828 more rows
bossDay_tweets_separated_words <- bossDay_tweets_paired_words %>%
  tidyr::separate(paired_words, c("word1", "word2"), sep = " ")

bossDay_tweets_filtered <- bossDay_tweets_separated_words %>%
  dplyr::filter(!word1 %in% stop_words$word) %>%
  dplyr::filter(!word2 %in% stop_words$word)

# new bigram counts:
bossDay_words_counts <- bossDay_tweets_filtered %>%
  dplyr::count(word1, word2, sort = TRUE)

head(bossDay_words_counts)
## # A tibble: 6 x 3
##   word1   word2       n
##   <chr>   <chr>   <int>
## 1 happy   bossday    69
## 2 boss's  day        64
## 3 boss    day        53
## 4 boss’s  day        52
## 5 bossday happy      36
## 6 happy   boss’s     36

plot BossDay word network

bossDay_words_counts %>%
        filter(n >= 24) %>%
        graph_from_data_frame() %>%
        ggraph(layout = "fr") +
        geom_edge_link(aes(edge_alpha = n, edge_width = n)) +
        geom_node_point(color = "darkslategray4", size = 3) +
        geom_node_text(aes(label = name), vjust = 1.8, size = 3) +
        labs(title = "Word Network: #bossday",
             subtitle = "Text mining twitter data ",
             x = "", y = "")