Read in Data

In a previous file, I have used the Twitter API to scrape the Twitter accounts “JoeBiden” and “PeteButtigieg” from November 3rd, 2019 to March 1st, 2020, collecting each candidate’s Tweets during this time period (excluding Retweets).

BidTweets <- read_csv("../BidenTweets.csv")

## Rows: 913 Columns: 5
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr  (1): tweet
## dbl  (3): id, retweets, likes
## dttm (1): timestamp
## 
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.

ButTweets <- read_csv("../ButtigiegTweets.csv")

## Rows: 1040 Columns: 5
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr  (1): tweet
## dbl  (3): id, retweets, likes
## dttm (1): timestamp
## 
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.

Explaining time range: - beginning November 3rd, 2019, exactly one year before election day for the 2020 presidential general election - ending March 1st, 2020, when Buttigieg withdrew from the race

Important dates: - Buttigieg announced campaign April 12, 2019 - Biden announced campaign April 25, 2019 - First primaries/caucuses took place February of 2020

During this time: - Buttigieg sent 1,040 tweets - Biden sent 913 tweets

Create Document-Feature Matrices and Wordclouds

# create corpus for Biden Tweets
biden_corpus <- corpus(BidTweets$tweet)

# create document-feature matrix
## remove punctuation, numbers, stopwords
## convert to lowercase
dfm_biden <- tokens(biden_corpus, 
                      remove_punct= TRUE,
                      remove_numbers = TRUE) %>%
  tokens_tolower() %>%
  tokens_select(pattern=stopwords("en"),
                selection="remove") %>%
  dfm() 

head(dfm_biden)

## Document-feature matrix of: 6 documents, 4,052 features (99.54% sparse) and 0 docvars.
##        features
## docs    one year determine mark donald trump leave country much stake
##   text1   1    2         1    1      1     1     1       1    1     1
##   text2   1    1         0    0      1     2     0       0    0     0
##   text3   0    0         0    0      0     0     0       0    0     0
##   text4   0    0         0    0      0     0     0       0    0     0
##   text5   0    0         0    0      0     0     0       0    0     0
##   text6   1    1         0    0      1     1     0       0    0     0
## [ reached max_nfeat ... 4,042 more features ]

# Create wordcloud with 60 most-frequent words
# Already excludes stopwords, punctuation, numbers from DFM preprocessing
textplot_wordcloud(dfm_biden, max_words = 60, min_size = 0.5, max_size = 5.0)

# create corpus for Buttigieg Tweets
buttigieg_corpus <- corpus(ButTweets$tweet)

# create document-feature matrix
## remove punctuation, numbers, stopwords
## convert to lowercase
dfm_buttigieg <- tokens(buttigieg_corpus, 
                      remove_punct= TRUE,
                      remove_numbers = TRUE) %>%
  tokens_tolower() %>%
  tokens_select(pattern=stopwords("en"),
                selection="remove") %>%
  dfm() 

head(dfm_buttigieg)

## Document-feature matrix of: 6 documents, 4,673 features (99.46% sparse) and 0 docvars.
##        features
## docs    thank inviting homes sharing stories putting trust launched campaign
##   text1     1        1     1       1       1       1     1        1        1
##   text2     0        0     0       0       0       0     0        0        0
##   text3     1        0     0       0       0       0     0        0        0
##   text4     0        0     0       0       0       0     0        0        0
##   text5     1        0     0       0       0       0     0        0        0
##   text6     0        0     0       0       0       0     0        0        0
##        features
## docs    americans
##   text1         1
##   text2         0
##   text3         0
##   text4         1
##   text5         1
##   text6         0
## [ reached max_nfeat ... 4,663 more features ]

textplot_wordcloud(dfm_buttigieg, max_words = 60, min_size = 0.5, max_size = 5.0)

Top 20 Features for Each Candidate

# Biden
textstat_frequency(dfm_biden, n=20)

##       feature frequency rank docfreq group
## 1       trump       230    1     218   all
## 2   president       217    2     198   all
## 3      donald       195    3     193   all
## 4        need       129    4     125   all
## 5     country       125    5     124   all
## 6        make       109    6     103   all
## 7       every       105    7      96   all
## 8         get       104    8     100   all
## 9      nation       102    9      99   all
## 10        one       101   10      98   all
## 11        day       101   10      90   all
## 12        can        93   12      78   all
## 13       just        89   13      85   all
## 14   american        84   14      80   all
## 15       help        82   15      70   all
## 16         us        82   15      75   all
## 17       time        82   15      80   all
## 18       take        81   18      76   all
## 19      today        80   19      76   all
## 20 #demdebate        78   20      78   all

# Buttigieg
textstat_frequency(dfm_buttigieg, n=20)

##       feature frequency rank docfreq group
## 1   president       181    1     168   all
## 2          us       161    2     143   all
## 3         can       152    3     127   all
## 4         new       138    4     118   all
## 5     country       134    5     127   all
## 6  #demdebate       129    6     129   all
## 7      people       128    7     114   all
## 8    american       124    8     115   all
## 9   americans       119    9     112   all
## 10       need       108   10      98   all
## 11       time        98   11      92   all
## 12       make        98   11      87   all
## 13       chip        97   13      97   all
## 14      trump        91   14      87   all
## 15   together        87   15      81   all
## 16       turn        87   15      86   all
## 17       page        85   17      85   all
## 18        one        79   18      74   all
## 19       just        78   19      77   all
## 20     future        77   20      73   all

Biden’s 10 Most Retweeted Tweets

# top 10 shared tweets for Biden
BidenTopTen <- BidTweets %>% arrange(desc(retweets))
BidenTopTen <- BidenTopTen[1:10,]

select(BidenTopTen, "tweet", "retweets", "likes") %>% kable(col.names = c("Tweet", "Retweets", "Likes")) %>% kable_styling() %>% scroll_box(width = "100%", height = "100%")

Tweet	Retweets	Likes
The world is laughing at President Trump. They see him for what he really is: dangerously incompetent and incapable of world leadership. We cannot give him four more years as commander in chief. https://t.co/IR8K2k54YQ	68099	253158
Two elections. Zero criminal convictions. https://t.co/fyy1DLOac9	17043	119498
What kind of president bullies a teenager? @realDonaldTrump, you could learn a few things from Greta on what it means to be a leader. https://t.co/18Y6uZexMC	16894	110731
Donald Trump should release his taxes or shut up about corruption. https://t.co/5G8VwWsyx0	15212	57807
Wanted to make sure you saw this, @realDonaldTrump: “Trump’s First 3 Years Created 1.5 Million Fewer Jobs Than Obama’s Last 3.” https://t.co/KWoHe1d4ku	13961	40497
This same poll has you losing to me by 7 points. https://t.co/cbwKoCbW7b	13753	97617
.@realDonaldTrump, release your taxes or shut up about corruption. https://t.co/u3czWLj5ju	13605	76813
This is a crisis of Donald Trump’s own making. He claimed pulling out of the Iran deal would deter Iranian aggression and result in a better deal. He has failed on both counts. He is the most erratic and incompetent commander in chief we’ve ever had. https://t.co/wHhWJA1Zhm	12729	50500
Donald Trump is cutting food assistance for 700,000 people, but somehow found $1.4 billion for his sham of a border wall. This administration is morally bankrupt. https://t.co/1CPQhgNUdx	12467	47248
I’ve released 21 years of my tax returns — why hasn’t President Trump? He should release his taxes or shut up about corruption. https://t.co/7NrKkagZlt	11351	65420

# summary stats for Biden retweets and likes
summary(BidTweets$retweets)

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##      73     467     994    1938    2285   68099

summary(BidTweets$likes)

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##     338    1986    4209    9636    9960  253158

Buttigieg’s 10 Most Retweeted Tweets

# top 10 shared tweets for Buttigieg
ButtigiegTopTen <- ButTweets %>% arrange(desc(retweets))
ButtigiegTopTen <- ButtigiegTopTen[1:10,]

select(ButtigiegTopTen, "tweet", "retweets", "likes") %>% kable(col.names = c("Tweet", "Retweets", "Likes")) %>% kable_styling() %>% scroll_box(width = "100%", height = "100%")

Tweet	Retweets	Likes
Fellow veterans take note: the President of the United States has formally confessed to illegally misdirecting charitable funds from an event he claimed was a benefit for veterans. How many more affronts to flag and uniform will we tolerate? https://t.co/ZyvEYoBV8D	20841	64880
Thank you for inviting me into your homes, sharing your stories, and putting your trust in me. We launched our campaign because Americans are hungry for a new kind of politics that brings us together. And together we’ll beat this president and build the era that must come next. https://t.co/QDajvx1lpL	15648	191212
God does not belong to a political party.	15227	121178
My marriage has never involved me sending hush money to a porn star. I’m ready to have a debate with Donald Trump on family values. #CNNTownHall https://t.co/ZVjpPjVUcb	15155	96491
There’s nothing pro-military about overruling our military justice system to prevent it from delivering accountability for war crimes. The president has again dishonored our armed services.	10236	59557
I appreciate this reporter’s swift and honest correction of a misquote on my views of the Obama presidency. From health care to DADT repeal to the rescue of the auto industry, my appreciation of the great leadership of Barack Obama comes from a very personal place. https://t.co/eWvSDtcpTQ	8862	44085
Trayvon Martin would have been 25 today. How many 25th birthdays have been stolen from us by white supremacy, gun violence, prejudice, and fear? #BlackLivesMatter	6947	49994
We don’t yet have all the details of the horrifying events in Santa Clarita. But we do know that in America today, children are scared to go to school. Parents and teachers are terrified. And that can’t stand. It is time to hold the NRA and our leaders in Washington accountable.	5305	35462
China is waging a shocking, merciless campaign to erase the religious and ethnic identity of millions. The United States has a responsibility to speak out. This president’s silence has a cost. https://t.co/y7tPxKhBjN	4884	15514
Tonight, Americans in Iraq are under fire. My prayers are with them, their loved ones, and their families.	4560	48105

# summary stats for Buttigieg retweets and likes
summary(ButTweets$retweets)

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    10.0   184.8   350.5   650.6   638.2 20841.0

summary(ButTweets$likes)

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##      43    1200    2239    4717    4560  191212

Each Candidate’s Highest Retweet Count per Day

# separate timestamp to create date column separate from time
dateBiden <- BidTweets %>% 
  mutate(
    date = as.Date(timestamp),
    hour = hour(timestamp),
    minute = minute(timestamp),
    second = second(timestamp)
  ) %>% 
  mutate(
    format_date = format(date, "%m/%d/%Y"),
    format_hour = paste(hour, minute, second, sep = ":")
  )

# filter top retweeted tweet for each unique date
TopDateBiden <- dateBiden %>%
  group_by(date) %>%
  slice(which.max(retweets))

#repeat above for Buttigieg
dateButtigieg <- ButTweets %>% 
  mutate(
    date = as.Date(timestamp),
    hour = hour(timestamp),
    minute = minute(timestamp),
    second = second(timestamp)
  ) %>% 
  mutate(
    format_date = format(date, "%m/%d/%Y"),
    format_hour = paste(hour, minute, second, sep = ":")
  )

TopDateButtigieg <- dateButtigieg %>%
  group_by(date) %>%
  slice(which.max(retweets))

ggplot(data = TopDateBiden) + 
  geom_line(mapping = aes(x = date, y = retweets), size = 1.2) +
  theme_bw() +
  labs(title = "Biden: Retweet Count for Most Popular Tweet per Day", 
       x = "Date (Nov 2019 - Mar 2020)", 
       y = "Retweet Count")

ggplot(data = TopDateButtigieg) + 
  geom_line(mapping = aes(x = date, y = retweets), size = 1.2) +
  theme_bw() +
  labs(title = "Buttigieg: Retweet Count for Most Popular Tweet per Day", 
       x = "Date (Nov 2019 - Mar 2020)", 
       y = "Retweet Count") +
  ylim(0, 60000)

Sentiment Analysis

NRC_Biden <- dfm_lookup(dfm_biden, dictionary = data_dictionary_NRC)

sentiments <- c("anger", "anticipation", "disgust", "fear", "joy", "negative", "positive", "sadness", "surprise", "trust") %>% as.data.frame()

Biden_Sent <- as.data.frame(NRC_Biden)

## Warning: 'as.data.frame.dfm' is deprecated.
## Use 'convert(x, to = "data.frame")' instead.
## See help("Deprecated")

Biden_Sent <- subset(Biden_Sent, select = -c(doc_id))
Biden_Sent <- colSums(Biden_Sent) %>% as_tibble()
BidSentSummary <- cbind(sentiments, Biden_Sent)
BidSentSummary <- mutate(BidSentSummary, sentiment = .) %>% select("sentiment", "value")

NRC_Buttigieg <- dfm_lookup(dfm_buttigieg, dictionary = data_dictionary_NRC)

sentiments <- c("anger", "anticipation", "disgust", "fear", "joy", "negative", "positive", "sadness", "surprise", "trust") %>% as.data.frame()

Buttigieg_Sent <- as.data.frame(NRC_Buttigieg)

## Warning: 'as.data.frame.dfm' is deprecated.
## Use 'convert(x, to = "data.frame")' instead.
## See help("Deprecated")

Buttigieg_Sent <- subset(Buttigieg_Sent, select = -c(doc_id))
Buttigieg_Sent <- colSums(Buttigieg_Sent) %>% as_tibble()
ButSentSummary <- cbind(sentiments, Buttigieg_Sent)
ButSentSummary <- mutate(ButSentSummary, sentiment = .) %>% select("sentiment", "value")

BothSentiment <- cbind(BidSentSummary, ButSentSummary)
names(BothSentiment) <- c("Sentiment", "Biden", "Sent", "Buttigieg")
BothSentiment <- select(BothSentiment, "Sentiment", "Biden", "Buttigieg")
BothSentiment

##       Sentiment Biden Buttigieg
## 1         anger   713       497
## 2  anticipation   948       943
## 3       disgust   287       211
## 4          fear   792       606
## 5           joy   642       657
## 6      negative  1137       941
## 7      positive  2072      2223
## 8       sadness   525       431
## 9      surprise   553       430
## 10        trust  1485      1392

BothSent <- BothSentiment %>% pivot_longer(cols = c('Biden', 'Buttigieg'), names_to = 'Candidate', values_to = 'Value')

ggplot(data = BothSent, mapping = aes(x = Sentiment, y = Value, fill = Candidate)) + 
  geom_col(position = position_dodge()) +
  theme_bw(base_size = 11) +
  theme(axis.text.x = element_text(angle=30, vjust = 0.7, size = 11)) +
  scale_fill_manual(values = c("Biden" = "darkblue", "Buttigieg" = "lightblue")) +
  labs(title = "Comparing Sentiment between Biden and Buttigieg", 
       subtitle = "Using the NRC Data Dictionary",
       x = "Sentiment", y = "Sentiment Occurrence Count")

Buttigieg and Biden Tweet Analysis

Megan Georges

2022-08-12