Team:

Shyam Kumar Voleti

Sudhanshu Sharma

Nikhil Dandapanthula

Xueqing Wang

Priyanka Bodepudi

Load packages

#rm(list = ls())

# load twitter library
library(rtweet)
#Plotting library
library(ggplot2)

## Warning: package 'ggplot2' was built under R version 3.5.1

#data processing/cleaning/pipelining library
library(dplyr)

## Warning: package 'dplyr' was built under R version 3.5.1

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

# text mining library
library(tidytext)

## Warning: package 'tidytext' was built under R version 3.5.1

# plotting packages
library(igraph)

## Warning: package 'igraph' was built under R version 3.5.1

## 
## Attaching package: 'igraph'

## The following objects are masked from 'package:dplyr':
## 
##     as_data_frame, groups, union

## The following objects are masked from 'package:stats':
## 
##     decompose, spectrum

## The following object is masked from 'package:base':
## 
##     union

library(ggraph)

## Warning: package 'ggraph' was built under R version 3.5.1

library("NLP")

## 
## Attaching package: 'NLP'

## The following object is masked from 'package:ggplot2':
## 
##     annotate

library("syuzhet")

## Warning: package 'syuzhet' was built under R version 3.5.1

## 
## Attaching package: 'syuzhet'

## The following object is masked from 'package:rtweet':
## 
##     get_tokens

library("tm")

## Warning: package 'tm' was built under R version 3.5.1

library("SnowballC")
library("stringi")
library("topicmodels")

## Warning: package 'topicmodels' was built under R version 3.5.1

library("ROAuth")

## Warning: package 'ROAuth' was built under R version 3.5.1

library(widyr)
library(tidyr)

## 
## Attaching package: 'tidyr'

## The following object is masked from 'package:igraph':
## 
##     crossing

#TwitterApp 
appname <- "ANLY 545 Twitter data analytics"

## api key
key <- 'Yh1QFngD8tRacpgTVEQ8Xsx6P'

## api secret
secret <- '4NvruwpPKoSa80sG0HFhQZcu297Fw2XXlRkJXAgN80NQOvTR99'

# create token
twitter_token <- create_token(
  app = "ANLY 545 Twitter data analytics",
  consumer_key = key,
  consumer_secret = secret,
  access_token ='1037519686525702144-PExsbFE6wrHC2lNzd8ffBXpbr5c4vq' ,
  access_secret ='yc2XHlc29J7R84MZMwJmoEpvVjaE5oJHsIPfxLoMdsI9y' )

Read Tweets

library(dplyr)
library(tidyr)
job_tweets <- search_tweets(q = "#job",
                               n = 500)
head(job_tweets, n = 3)

## # A tibble: 3 x 88
##   user_id status_id created_at          screen_name text  source
##   <chr>   <chr>     <dttm>              <chr>       <chr> <chr> 
## 1 791258~ 10524169~ 2018-10-17 04:31:56 RHFAJobs    This~ Caree~
## 2 464681~ 10524168~ 2018-10-17 04:31:47 weareteamt~ We'r~ Caree~
## 3 464681~ 10524023~ 2018-10-17 03:34:03 weareteamt~ Can ~ Caree~
## # ... with 82 more variables: display_text_width <dbl>,
## #   reply_to_status_id <chr>, reply_to_user_id <chr>,
## #   reply_to_screen_name <chr>, is_quote <lgl>, is_retweet <lgl>,
## #   favorite_count <int>, retweet_count <int>, hashtags <list>,
## #   symbols <list>, urls_url <list>, urls_t.co <list>,
## #   urls_expanded_url <list>, media_url <list>, media_t.co <list>,
## #   media_expanded_url <list>, media_type <list>, ext_media_url <list>,
## #   ext_media_t.co <list>, ext_media_expanded_url <list>,
## #   ext_media_type <chr>, mentions_user_id <list>,
## #   mentions_screen_name <list>, lang <chr>, quoted_status_id <chr>,
## #   quoted_text <chr>, quoted_created_at <dttm>, quoted_source <chr>,
## #   quoted_favorite_count <int>, quoted_retweet_count <int>,
## #   quoted_user_id <chr>, quoted_screen_name <chr>, quoted_name <chr>,
## #   quoted_followers_count <int>, quoted_friends_count <int>,
## #   quoted_statuses_count <int>, quoted_location <chr>,
## #   quoted_description <chr>, quoted_verified <lgl>,
## #   retweet_status_id <chr>, retweet_text <chr>,
## #   retweet_created_at <dttm>, retweet_source <chr>,
## #   retweet_favorite_count <int>, retweet_retweet_count <int>,
## #   retweet_user_id <chr>, retweet_screen_name <chr>, retweet_name <chr>,
## #   retweet_followers_count <int>, retweet_friends_count <int>,
## #   retweet_statuses_count <int>, retweet_location <chr>,
## #   retweet_description <chr>, retweet_verified <lgl>, place_url <chr>,
## #   place_name <chr>, place_full_name <chr>, place_type <chr>,
## #   country <chr>, country_code <chr>, geo_coords <list>,
## #   coords_coords <list>, bbox_coords <list>, status_url <chr>,
## #   name <chr>, location <chr>, description <chr>, url <chr>,
## #   protected <lgl>, followers_count <int>, friends_count <int>,
## #   listed_count <int>, statuses_count <int>, favourites_count <int>,
## #   account_created_at <dttm>, verified <lgl>, profile_url <chr>,
## #   profile_expanded_url <chr>, account_lang <chr>,
## #   profile_banner_url <chr>, profile_background_url <chr>,
## #   profile_image_url <chr>

# Only tweets and no retweets
job_tweets <- search_tweets("#job", n = 500,
                             include_rts = FALSE)
# view top 2 rows of data
head(job_tweets, n = 2)

## # A tibble: 2 x 88
##   user_id status_id created_at          screen_name text  source
##   <chr>   <chr>     <dttm>              <chr>       <chr> <chr> 
## 1 791258~ 10524169~ 2018-10-17 04:31:56 RHFAJobs    This~ Caree~
## 2 464681~ 10524168~ 2018-10-17 04:31:47 weareteamt~ We'r~ Caree~
## # ... with 82 more variables: display_text_width <dbl>,
## #   reply_to_status_id <chr>, reply_to_user_id <chr>,
## #   reply_to_screen_name <chr>, is_quote <lgl>, is_retweet <lgl>,
## #   favorite_count <int>, retweet_count <int>, hashtags <list>,
## #   symbols <list>, urls_url <list>, urls_t.co <list>,
## #   urls_expanded_url <list>, media_url <list>, media_t.co <list>,
## #   media_expanded_url <list>, media_type <list>, ext_media_url <list>,
## #   ext_media_t.co <list>, ext_media_expanded_url <list>,
## #   ext_media_type <chr>, mentions_user_id <list>,
## #   mentions_screen_name <list>, lang <chr>, quoted_status_id <chr>,
## #   quoted_text <chr>, quoted_created_at <dttm>, quoted_source <chr>,
## #   quoted_favorite_count <int>, quoted_retweet_count <int>,
## #   quoted_user_id <chr>, quoted_screen_name <chr>, quoted_name <chr>,
## #   quoted_followers_count <int>, quoted_friends_count <int>,
## #   quoted_statuses_count <int>, quoted_location <chr>,
## #   quoted_description <chr>, quoted_verified <lgl>,
## #   retweet_status_id <chr>, retweet_text <chr>,
## #   retweet_created_at <dttm>, retweet_source <chr>,
## #   retweet_favorite_count <int>, retweet_retweet_count <int>,
## #   retweet_user_id <chr>, retweet_screen_name <chr>, retweet_name <chr>,
## #   retweet_followers_count <int>, retweet_friends_count <int>,
## #   retweet_statuses_count <int>, retweet_location <chr>,
## #   retweet_description <chr>, retweet_verified <lgl>, place_url <chr>,
## #   place_name <chr>, place_full_name <chr>, place_type <chr>,
## #   country <chr>, country_code <chr>, geo_coords <list>,
## #   coords_coords <list>, bbox_coords <list>, status_url <chr>,
## #   name <chr>, location <chr>, description <chr>, url <chr>,
## #   protected <lgl>, followers_count <int>, friends_count <int>,
## #   listed_count <int>, statuses_count <int>, favourites_count <int>,
## #   account_created_at <dttm>, verified <lgl>, profile_url <chr>,
## #   profile_expanded_url <chr>, account_lang <chr>,
## #   profile_banner_url <chr>, profile_background_url <chr>,
## #   profile_image_url <chr>

# view column with screen names
head(job_tweets$screen_name)

## [1] "RHFAJobs"       "weareteamtrump" "weareteamtrump" "weareteamtrump"
## [5] "USSANews"       "SanofiUS_Jobs"

unique(job_tweets$screen_name)

##   [1] "RHFAJobs"        "weareteamtrump"  "USSANews"       
##   [4] "SanofiUS_Jobs"   "dsgnjbs"         "EmploiAngouleme"
##   [7] "SpectrumCommLLC" "MultiCare_Jobs"  "ThermoFisherJob"
##  [10] "MercyJobs"       "EmploiLille"     "Emploi59"       
##  [13] "selly_zest"      "rgiscareers"     "ictjob_be"      
##  [16] "TNG_jobs"        "ishnae"          "luckysmktjobs"  
##  [19] "WalserCareers"   "jobely"          "JoeDRSHR"       
##  [22] "HVMGCareers"     "Trad_Spirits"    "joinphilipslght"
##  [25] "E2EJobs"         "MemorialCareJob" "PSJHCareers"    
##  [28] "ePlusJobs"       "MobileMiniJobs"  "dalgsllc"       
##  [31] "RRDJobs"         "SCLocumsJobs"    "GodivaJobs"     
##  [34] "careercast_jobs" "jcrew_hiring"    "CareerCastHlth" 
##  [37] "ChicosCareers"   "WHBMCareers"     "PacDenCareers"  
##  [40] "ChristinaFo"     "DTEEnergyJobs"   "CaspersCareers" 
##  [43] "WestmorelandCO"  "CROSSMARKJobs"   "OfficeTeamJobs" 
##  [46] "scoularcareers"  "3leads"          "CompassJobBoard"
##  [49] "HobbyLobbyJobs"  "julesdebene"     "ITJob_Columbus" 
##  [52] "JobSeineMarne"   "MatthewCraven12" "tmj_tha_jobs"   
##  [55] "PlastipakJobs"   "alleyagee"       "JoinCellular"   
##  [58] "tmj_inh_finance" "TheGioCetina"    "ngchunchit"     
##  [61] "KSV1870"         "wendymanganaro"  "AccountempsJobs"
##  [64] "RHTechJobs"      "PSJHCareersRN"   "tmj_rus_it"     
##  [67] "tmj_sgp_banking" "tmj_ich_manuf"   "tmj_hkg_banking"
##  [70] "TrojanRecruit"   "tmj_lka_jobs"    "tmj_mng_jobs"   
##  [73] "mcguirl"         "tmj_ide_green"   "EmploiLimoges"  
##  [76] "StanChartJobs"   "tmj_ich_banking" "tmj_inm_it"     
##  [79] "tmj_mys_banking" "PTGTCareers"     "tmj_pak_banking"
##  [82] "tmj_chs_jobs"    "tmj_inh_adv"     "tmj_inp_jobs"   
##  [85] "tmj_inm_itpm"    "tmj_mmr_jobs"    "tmj_ide_mgmt"   
##  [88] "lets_ace"        "weareddstep"     "tmj_uzb_jobs1"  
##  [91] "EmploiCoteArmor" "tmj_inh_eng"     "tmj_ide_jobs"   
##  [94] "franknaval"      "HlthcareJobsite" "tmj_rus_jobs"   
##  [97] "tmj_ich_finance" "tmj_vnm_banking" "PGJobs"         
## [100] "tmj_asi_jobs"    "tmj_inc_banking" "anselmbradford" 
## [103] "PMPConnect"      "tmj_inc_itpm"    "tmj_inb_jobs"   
## [106] "seanchiggins"    "tmj_inh_jobs"    "tmj_inm_jobs"   
## [109] "tmj_ich_eng"     "tmj_ich_legal"   "tmj_ich_jobs"   
## [112] "tmj_chs_pharm"   "tmj_hkg_jobs"    "tmj_ide_it"     
## [115] "tmj_inm_cler"    "tmj_inc_jobs"    "Rengineeringjob"
## [118] "tmj_ich_itjava"  "tmj_inb_finance" "tmj_inh_itdb"   
## [121] "WorkWithSHC"     "tmj_inm_legal"   "tmj_inb_eng"    
## [124] "tmj_inn_jobs"    "tmj_ing_jobs"    "tmj_ide_itqa"   
## [127] "tmj_inm_adv"     "tmj_phi_jobs"    "tmj_inh_mgmt"   
## [130] "tmj_inm_manuf"   "tmj_inh_banking" "tmj_ich_cler"   
## [133] "tmj_mys_jobs"    "JoeFranscella"   "tmj_inc_it"     
## [136] "tmj_tha_banking" "tmj_inh_itpm"    "EGonzalezHaas"  
## [139] "JJamin"          "tmj_ich_it"      "BarrazaChico"   
## [142] "winklerdaniel"   "tmj_vnm_jobs"    "tmj_ide_itdb"   
## [145] "tmj_inh_cstsrv"  "tmj_inm_mgmt"    "tmj_uae_green"  
## [148] "JCI_Jobs"        "JaySangra"       "nature_careers" 
## [151] "recruiter_sj"    "tmj_ide_acct"    "tmj_inm_hr"     
## [154] "tmj_ich_sales"   "tmj_brn_jobs"    "tmj_ich_mgmt"   
## [157] "tmj_kaz_jobs1"   "tmj_twn_jobs"    "tmj_rus_itqa"   
## [160] "tmj_inm_finance" "tmj_ide_media"   "tmj_inb_mgmt"   
## [163] "tmj_ich_hr"      "tmj_inh_sales"   "tmj_ide_hr"     
## [166] "tmj_inh_it"      "phogg96"         "tmj_ide_itpm"   
## [169] "tmj_chn_banking" "BakerHughesJobs" "tmj_ast_jobs"   
## [172] "tmj_chs_recruit" "tmj_inb_hr"      "MPajemolin"     
## [175] "cititrendsjobs"  "tmj_inm_banking" "tmj_ide_sales"  
## [178] "tmj_inm_writing" "MikeP_Reed"      "farmwork23"     
## [181] "tmj_inm_itqa"    "design2perform"  "EmploiMontpel"  
## [184] "EmploiHerault"   "JobsCaithness"   "tmj_nzw_jobs"   
## [187] "PNPersonnel"     "EmploiAisne"     "MarketSourceJob"
## [190] "Tyrex18943336"   "EmploiEpinal"    "tombdugan"      
## [193] "EmploiPau"       "ajstravlin"      "emploibelfort"  
## [196] "EmploiVendee"    "DelNorthCareers" "DanielWaite_NOW"
## [199] "EmploiAquitaine" "pherlha_aguilar" "AAIHS"          
## [202] "EmploiBourgogne" "masterartisantz" "RICHMEGAJOBS"   
## [205] "0crat"           "LiveRecruitment" "LauraGLiveRec"  
## [208] "FYTEasily"       "MortimerBell"    "NadiaLiveRec"   
## [211] "LipsonLloydJ"    "ManpowerSG"      "BarefootStudent"
## [214] "denisempratt"    "FourSeasonsJobs" "tmj_nzc_jobs"   
## [217] "karotex1"        "kunalism"        "immiultimate"   
## [220] "ADSWCareers"     "tmj_hon_green"   "blue_red_orange"
## [223] "RSchrishuhn"     "WDCourse"        "LocalWorkCa"    
## [226] "PSIPax"          "WayneArmstrong"  "tmj_syd_mgmt"   
## [229] "MH_Pigis"        "tmj_syd_sales"   "RivCoJobs"      
## [232] "tmj_mel_sales"   "tmj_mel_it"      "JobYamamon"     
## [235] "tmj_syd_cstsrv"  "Mariah_Ismail"   "tmj_inm_realest"
## [238] "RMSI_jobs"       "tmj_syd_finance" "tmj_mel_itdb"   
## [241] "voltsgjobs"      "ULG_Trades"      "tmj_mel_itpm"   
## [244] "tmj_syd_manuf"   "ultabeautyjobs"  "tmj_syd_it"     
## [247] "mashia6"         "DriveMelton"     "GoldenCorralJob"
## [250] "tmj_syd_legal"   "CoxPurtellJobs"  "BarSpire"       
## [253] "RAHomesJobs"     "tmj_mel_itqa"    "AssignRecruit"  
## [256] "ZurichNACareers" "tmj_mel_jobs"    "VanaVana2200000"
## [259] "LLU_Careers"     "tmj_mel_cstsrv"  "tmj_auc_jobs"   
## [262] "JimGiammatteo"   "CSGICareers"     "MyHubIntranet"  
## [265] "tmj_mnp_jobs"    "tmj_syd_recruit" "COC_Careers"    
## [268] "SimoneMahedy"    "gmtpeople"       "Job2Grow"       
## [271] "artisanupdates"  "tmj_syd_hr"      "tmj_syd_itpm"   
## [274] "Beth_Finger"     "tmj_mel_pharm"   "IQVIAcareers"   
## [277] "smcb03"          "tmj_syd_retail"  "tmj_syd_itdb"   
## [280] "tmj_syd_hrta"    "inzejob"         "t_rendezvous"   
## [283] "tmj_gum_jobs"    "PandaCareers"    "tmj_kor_jobs"   
## [286] "Bluisooner"      "WorkHendersonNV" "EdHuntr"        
## [289] "tmj_syd_cler"    "tmj_inh_itqa"    "ThePiagentini"  
## [292] "peoplebankjobs"  "attCAREERS"      "JobWindow_Jobs" 
## [295] "snaphuntjobs"    "musStiforp"      "shah_saharsh"   
## [298] "TicknerRobin"    "rachelebitte"    "danridesharleys"
## [301] "StuartGHazell"   "tmj_jpn_jobs"    "danispeck"      
## [304] "LMartin_TX"      "2CHRONICLES_714" "Ben_ServiceNow" 
## [307] "Lasvegasacs"     "SatansXwife"     "tmj_ide_itjava" 
## [310] "IamEdzM"         "majawashington"  "TinaLOwens"     
## [313] "NursempJobs"     "Zhaopin_com"     "langly511"      
## [316] "LisaJRamos"      "RitaBrue"        "Lee_James_FL"   
## [319] "ChristaMcCabe"   "EbohAjeroh"      "flowmotor"      
## [322] "mkhan004_"       "CCSF_Cyber_Club" "RecruitByMark"  
## [325] "fuzirbarry"      "c_cs"            "mj_kernan"      
## [328] "find_me_in_cali" "interviewgig"    "ZWDcom"         
## [331] "MktgJobForce"    "tmj_ndo_jobs1"   "IIS_Delivers"   
## [334] "ITJobs_IL"       "Mike_McCown"     "aciperski"      
## [337] "JesusValdesMX"   "matoysumayao"    "Kryokelt"       
## [340] "MarraBeppe"      "tmj_mdv_jobs"    "carpalwatch"    
## [343] "parityconsult"   "EssityCareers"   "JennBennetSE"   
## [346] "hopeobaker"      "thebeebles"      "lauraholliday"  
## [349] "h0v1k"           "tmj_inb_it"      "gym_guide"      
## [352] "alg_talent"      "vmorgangipson"   "p2pBianca"      
## [355] "tmj_inm_sales"   "tmj_HI_EDU"      "tmj_HI_ACCT"    
## [358] "tmj_mau_acct"    "Labs83"          "tmj_mau_retail"

users <- search_users("job_tweets",
                      n = 500)

## Searching for users...

## Finished collecting users!

users %>%
  ggplot(aes(location)) +
  geom_bar() + coord_flip() +
      labs(x = "Count",
      y = "Location",
      title = "Twitter users ")

users %>%
  dplyr::count(location, sort = TRUE) %>%
  mutate(location = reorder(location,n)) %>%
  na.omit() %>%
  top_n(20) %>%
  ggplot(aes(x = location,y = n)) +
  geom_col() +
  coord_flip() +
      labs(x = "Location",
      y = "Count",
      title = "Twitter users by City/Region  ")

## Selecting by n

job_tweets$stripped_text <- gsub("http.*","",  job_tweets$text)
job_tweets$stripped_text <- gsub("https.*","", job_tweets$stripped_text)

# remove punctuation, convert to lowercase, add id for each tweet!
  job_tweets_clean <- job_tweets %>%
  dplyr::select(stripped_text) %>%
  unnest_tokens(word, stripped_text)

Sentiment Analysis

#getting emotions using in-built function
mysentiment_job<- get_nrc_sentiment((job_tweets_clean$word))
#calculationg total score for each sentiment
Sentimentscores_job<-data.frame(colSums(mysentiment_job[,]))
names(Sentimentscores_job)<-"Score"
Sentimentscores_job<-cbind("sentiment"=rownames(Sentimentscores_job),Sentimentscores_job)
rownames(Sentimentscores_job)<-NULL
#plotting the sentiments with scores
ggplot(data=Sentimentscores_job,aes(x=sentiment,y=Score))+geom_bar(aes(fill=sentiment),stat = "identity")+
  theme(legend.position="none")+
  xlab("Sentiments")+ylab("scores")+ggtitle("Sentiments of people behind the tweets on job")

From the above plot of sentiment analysis on twitter with #job, it is observable that people have a positive attitude on their job and feel secured.

BossDay Twitter Sentiments OCt 16th, 2018

bossDay_tweets <- search_tweets(q = "#bossday", n = 500, lang = "en",
                             include_rts = FALSE)

head(bossDay_tweets$text)

## [1] "Who run the world? MOMS <U+0001F30D> Happy #BossDay Mamas! <U+0001F495><U+0001F931><U+0001F495> @Regran_ed from @itsAmandaAcosta <U+0001F4F8> #momlife #mommyblogger @ San Diego, California https://t.co/AWrGjn6Xla"       
## [2] "WHERE MY REAL FRIENDS AT?? I didnt get any love on this #bossDay. Yall know Im a BOSS <U+0001F644>"                                                                                                                      
## [3] "Today is #NationalBossDay. We'd like to recognize our boss, Jim Marshall, the Director of @_911TI_. His leadership makes it a joy to serve at #911TI. #BossDay #boss #911dispatcher #911dispatchers https://t.co/APCey30vOa"
## [4] "You may find it hard to sort your boss, for everything else there is Kangaro. Bosses can be inspiring, charming, encouraging...Tag your most memorable boss till date.#BossDay https://t.co/6CR3Qqw8SW"                     
## [5] "Yo happy boss day to @maggiej_5! Thanks for believing in us and empowering us with the tools necessary to empower our people. You the champ that runs the camp! #bossday"                                                   
## [6] "#BossDay Thankful to be part of the Macy Family!  @MacyEagles #MacyEagles #LJSD #JoyfulLeaders https://t.co/BMIB7Jidae"

Data Clean Up

bossDay_tweets$stripped_text <- gsub("http.*","",  bossDay_tweets$text)
bossDay_tweets$stripped_text <- gsub("https.*","", bossDay_tweets$stripped_text)

# remove punctuation, convert to lowercase, add id for each tweet!
bossDay_tweets_clean <- bossDay_tweets %>%
  dplyr::select(stripped_text) %>%
  unnest_tokens(word, stripped_text)

Count of unique words

# plot the top 15 words
bossDay_tweets_clean %>%
  dplyr::count(word, sort = TRUE) %>%
  top_n(15) %>%
  mutate(word = reorder(word, n)) %>%
  ggplot(aes(x = word, y = n)) +
  geom_col() +
  xlab(NULL) +
  coord_flip() +
      labs(x = "Count",
      y = "Unique words",
      title = "Count of unique words found in tweets")

## Selecting by n

Count of Unique words after further cleaning

bossDay_tweets_clean <- bossDay_tweets_clean %>%
  anti_join(stop_words)

## Joining, by = "word"

# plot the top 15 words
bossDay_tweets_clean %>%
  dplyr::count(word, sort = TRUE) %>%
  top_n(15) %>%
  mutate(word = reorder(word, n)) %>%
  ggplot(aes(x = word, y = n)) +
  geom_col() +
  xlab(NULL) +
  coord_flip() +
      labs(y = "Count",
      x = "Unique words",
      title = "Count of unique words found in tweets",
      subtitle = "Stop words removed from the list")

## Selecting by n

Network of Words

# remove punctuation, convert to lowercase, add id for each tweet!
bossDay_tweets_paired_words <- bossDay_tweets %>%
  dplyr::select(stripped_text) %>%
  unnest_tokens(paired_words, stripped_text, token = "ngrams", n = 2)

bossDay_tweets_paired_words %>%
  dplyr::count(paired_words, sort = TRUE)

## # A tibble: 7,838 x 2
##    paired_words      n
##    <chr>         <int>
##  1 thank you        78
##  2 day to           77
##  3 happy bossday    69
##  4 boss's day       64
##  5 to our           57
##  6 boss day         53
##  7 bosss day       52
##  8 bossday to       46
##  9 you for          42
## 10 to all           40
## # ... with 7,828 more rows

bossDay_tweets_separated_words <- bossDay_tweets_paired_words %>%
  tidyr::separate(paired_words, c("word1", "word2"), sep = " ")

bossDay_tweets_filtered <- bossDay_tweets_separated_words %>%
  dplyr::filter(!word1 %in% stop_words$word) %>%
  dplyr::filter(!word2 %in% stop_words$word)

# new bigram counts:
bossDay_words_counts <- bossDay_tweets_filtered %>%
  dplyr::count(word1, word2, sort = TRUE)

head(bossDay_words_counts)

## # A tibble: 6 x 3
##   word1   word2       n
##   <chr>   <chr>   <int>
## 1 happy   bossday    69
## 2 boss's  day        64
## 3 boss    day        53
## 4 bosss  day        52
## 5 bossday happy      36
## 6 happy   bosss     36

plot BossDay word network

bossDay_words_counts %>%
        filter(n >= 24) %>%
        graph_from_data_frame() %>%
        ggraph(layout = "fr") +
        geom_edge_link(aes(edge_alpha = n, edge_width = n)) +
        geom_node_point(color = "darkslategray4", size = 3) +
        geom_node_text(aes(label = name), vjust = 1.8, size = 3) +
        labs(title = "Word Network: #bossday",
             subtitle = "Text mining twitter data ",
             x = "", y = "")

Final Project–Twitter Analysis

2018-10-16

Team:

Shyam Kumar Voleti

Sudhanshu Sharma

Nikhil Dandapanthula

Xueqing Wang

Priyanka Bodepudi

Load packages

Read Tweets

Sentiment Analysis

From the above plot of sentiment analysis on twitter with #job, it is observable that people have a positive attitude on their job and feel secured.

BossDay Twitter Sentiments OCt 16th, 2018

Data Clean Up

Count of unique words

Count of Unique words after further cleaning

Network of Words

plot BossDay word network