Starbucks Coffee Sentiment Analysis

Load the required packages

library(twitteR); library(ROAuth)
library(tidyverse); library(lubridate)
library(tidytext)

Search “starbucks coffee” Twitter

I could not search for “starbucks logo 2011” due to the not more than 9 days timeline

tw <- searchTwitter('starbucks coffee', n=100000)
df <- twListToDF(tw)
df <- df %>% mutate(Date = date(created))

Read the extracted file

df <- read_csv('starbucks_tweets.csv')

## Rows: 16745 Columns: 17
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr  (4): text, replyToSN, statusSource, screenName
## dbl  (7): favoriteCount, replyToSID, id, replyToUID, retweetCount, longitude...
## lgl  (4): favorited, truncated, isRetweet, retweeted
## dttm (1): created
## date (1): Date
## 
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.

df

## # A tibble: 16,745 x 17
##    text     favor~1 favor~2 reply~3 created             trunc~4 replyT~5      id
##    <chr>    <lgl>     <dbl> <chr>   <dttm>              <lgl>      <dbl>   <dbl>
##  1 "@peele~ FALSE         0 peeler~ 2022-10-01 05:45:31 FALSE    1.58e18 1.58e18
##  2 "coffee~ FALSE         0 <NA>    2022-10-01 05:44:22 FALSE   NA       1.58e18
##  3 "RT @Mo~ FALSE         0 <NA>    2022-10-01 05:43:38 FALSE   NA       1.58e18
##  4 "RT @sk~ FALSE         0 <NA>    2022-10-01 05:41:57 FALSE   NA       1.58e18
##  5 "@theho~ FALSE         0 thehof~ 2022-10-01 05:40:29 FALSE    1.58e18 1.58e18
##  6 "RT @th~ FALSE         0 <NA>    2022-10-01 05:40:27 FALSE   NA       1.58e18
##  7 "@Xia_L~ FALSE         0 Xia_La~ 2022-10-01 05:38:19 TRUE     1.58e18 1.58e18
##  8 "RT @Kr~ FALSE         0 <NA>    2022-10-01 05:37:42 FALSE   NA       1.58e18
##  9 "RT @Ke~ FALSE         0 <NA>    2022-10-01 05:36:34 FALSE   NA       1.58e18
## 10 "We wil~ FALSE         0 <NA>    2022-10-01 05:36:26 TRUE    NA       1.58e18
## # ... with 16,735 more rows, 9 more variables: replyToUID <dbl>,
## #   statusSource <chr>, screenName <chr>, retweetCount <dbl>, isRetweet <lgl>,
## #   retweeted <lgl>, longitude <dbl>, latitude <dbl>, Date <date>, and
## #   abbreviated variable names 1: favorited, 2: favoriteCount, 3: replyToSN,
## #   4: truncated, 5: replyToSID

Convert df into a tibble

tweets_text <- df$text
length(tweets_text)

## [1] 16745

tweets_tbl <- tibble(line =1:16745, text = tweets_text)

Create twitter stopwords

my_stop_words <- tibble(word=c("https","t.co","rt","amp","rstats","gt"), lexicon = "twitter")

Change tidy_tweets to one-token per row

tidy_words <- tweets_tbl %>%
  unnest_tokens(word, text)
tidy_words %>%
  count(word, sort = TRUE)

## # A tibble: 22,086 x 2
##    word          n
##    <chr>     <int>
##  1 u         72066
##  2 starbucks 14091
##  3 coffee    13594
##  4 rt         9151
##  5 https      7070
##  6 t.co       6993
##  7 the        6728
##  8 i          5972
##  9 to         5767
## 10 a          5335
## # ... with 22,076 more rows

Add BuzCom positive sentiments into bing positive

buzcom <- c('simple', 'minimalist', 'visible', 'direct')
pos <- rep('positive', 4)
positive <- get_sentiments('bing') %>%
  add_row(word = buzcom, sentiment = pos) %>%
  filter(sentiment == 'positive')

Look at the positive words with the highest count

tidy_words %>%
  semi_join(positive) %>%
  count(word, sort = TRUE)

## Joining, by = "word"

## # A tibble: 446 x 2
##    word         n
##    <chr>    <int>
##  1 like       945
##  2 support    793
##  3 better     660
##  4 premier    596
##  5 greatest   591
##  6 sweet      570
##  7 free       373
##  8 led        363
##  9 happy      360
## 10 win        340
## # ... with 436 more rows

Create a new bing with BuzCom positive words for Starbucks

bing <- get_sentiments('bing') %>%
  add_row(word = buzcom, sentiment = pos)

Look at the most common positive and negative words

bing_word_counts <- tidy_words %>%
  inner_join(bing) %>%
  count(word, sentiment, sort = TRUE)

## Joining, by = "word"

bing_word_counts %>%
  filter(n > 150) %>%
  mutate(n = ifelse(sentiment == "negative", -n, n)) %>%
  mutate(word = reorder(word, n)) %>%
  ggplot(aes(word, n, fill = sentiment)) +
  geom_col() +
  coord_flip() +
  labs(y = "Contribution to sentiment")

Starbucks Coffee Sentiment Analysis

CK LIM

10/1/2022

Load the required packages

Search “starbucks coffee” Twitter

I could not search for “starbucks logo 2011” due to the not more than 9 days timeline

Read the extracted file

Convert df into a tibble

Create twitter stopwords

Change tidy_tweets to one-token per row

Add BuzCom positive sentiments into bing positive

Look at the positive words with the highest count

Create a new bing with BuzCom positive words for Starbucks

Look at the most common positive and negative words