The program uses a function for sentiment analysis of words using the AFINN lexicon. This “dictionary”" maps a selected word to a “valence,” of integer scores between -5 to +5 that represent negative to positive sentiment ratings.
The program uploads 750 tweets from Texas and 750 from Maine. Words from the text of tweets are matched to the lexicon. Matched text words are assigned a sentiment valence. Scores are aggregated by the minute, averaged, and graphed.
TEXAS
Texas(TX) tweets downloaded, converted to df, & universally date-formatted
num_tweets <- 750
Tt <- searchTwitter('#Texas', n = num_tweets)
Tt_df <- twListToDF(Tt) %>% select(text, created) %>% arrange(created)
Tt_df$created <- as.POSIXct(Tt_df$created, format="%Y-%m-%d %H%M")
Reference times to CST and create independent hour, min, and sec vectors
Tt_df$created <- format(Tt_df$created, tz="America/Chicago")
Tt_df$hour <- as.POSIXlt(Tt_df$created)$hour
Tt_df$min <- as.POSIXlt(Tt_df$created)$min
Tt_df$sec <- as.POSIXlt(Tt_df$created)$sec
TX tweet words isolated
reg <- "([^A-Za-z\\d#@']|'(?![A-Za-z\\d#@]))"
Tt_sep_words <- Tt_df %>%
filter(!str_detect(text, '^"')) %>%
mutate(text = str_replace_all(text, "https://t.co/[A-Za-z\\d]+|&", "")) %>%
unnest_tokens(word, text, token = "regex", pattern = reg) %>%
filter(!word %in% stop_words$word,
str_detect(word, "[a-z]")) %>% arrange(created)
create a df of sentiment scored words. Lexicon word scores & TX words are inner joined
TaFinn <- get_sentiments("afinn")
Tscore_clock <- Tt_sep_words %>% inner_join(TaFinn, by="word") %>% arrange(created)
A summary df is started for TX word scores that are aggregated, & averaged by the minute
Tscore_clock_summary <- ddply(Tscore_clock, c("hour","min"), summarise,
N = length(score), TX_sentiment = mean(score))
Clock times are entered for scores
Tscore_clock_summary$time <- as.POSIXct(factor(paste0(as.character(Tscore_clock_summary$hour),':',as.character(Tscore_clock_summary$min))), format="%H:%M")
MAINE
The same process is performed on Maine tweets
Mt <- searchTwitter('#Maine', n = num_tweets)
Mt_df <- twListToDF(Mt) %>% select(text, created) %>% arrange(created)
Mt_df$created <- as.POSIXct(Mt_df$created, format="%Y-%m-%d %H%M")
Mt_df$created <- format(Mt_df$created, tz="America/Chicago")
Mt_df$hour <- as.POSIXlt(Mt_df$created)$hour
Mt_df$min <- as.POSIXlt(Mt_df$created)$min
Mt_df$sec <- as.POSIXlt(Mt_df$created)$sec
reg <- "([^A-Za-z\\d#@']|'(?![A-Za-z\\d#@]))"
Mt_sep_words <- Mt_df %>%
filter(!str_detect(text, '^"')) %>%
mutate(text = str_replace_all(text, "https://t.co/[A-Za-z\\d]+|&", "")) %>%
unnest_tokens(word, text, token = "regex", pattern = reg) %>%
filter(!word %in% stop_words$word,
str_detect(word, "[a-z]")) %>% arrange(created)
MaFinn <- get_sentiments("afinn")
Mscore_clock <- Mt_sep_words %>% inner_join(MaFinn, by="word") %>% arrange(created)
Mscore_clock_summary <- ddply(Mscore_clock, c("hour","min"), summarise,
N = length(score), ME_sentiment = mean(score))
Mscore_clock_summary$time <- as.POSIXct(factor(paste0(as.character(Mscore_clock_summary$hour),':',as.character(Mscore_clock_summary$min))), format="%H:%M")
Tscore_clock_summary %>%
ggvis(x= ~time, y= ~TX_sentiment) %>%
layer_lines
Texas (TX) Sentiment Scores from -5 to 5 (from negative to positive) -Versus- Time
Lexicon words are matched to the text of Tweets from Texas and Maine. Words are assigned a “valence,” of integer scores between -5 to +5 that represent negative to positive sentiment ratings. Scores are aggregated by the minute, averaged, and graphed
Mscore_clock_summary %>%
ggvis(x= ~time, y= ~ME_sentiment) %>%
layer_lines
Maine (ME) Sentiment Score -Versus- Time