In a previous file, I have used the Twitter API to scrape the Twitter accounts “JoeBiden” and “PeteButtigieg” from November 3rd, 2019 to March 1st, 2020, collecting each candidate’s Tweets during this time period (excluding Retweets).
BidTweets <- read_csv("../BidenTweets.csv")
## Rows: 913 Columns: 5
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (1): tweet
## dbl (3): id, retweets, likes
## dttm (1): timestamp
##
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
ButTweets <- read_csv("../ButtigiegTweets.csv")
## Rows: 1040 Columns: 5
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (1): tweet
## dbl (3): id, retweets, likes
## dttm (1): timestamp
##
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
Explaining time range: - beginning November 3rd, 2019, exactly one year before election day for the 2020 presidential general election - ending March 1st, 2020, when Buttigieg withdrew from the race
Important dates: - Buttigieg announced campaign April 12, 2019 - Biden announced campaign April 25, 2019 - First primaries/caucuses took place February of 2020
During this time: - Buttigieg sent 1,040 tweets - Biden sent 913 tweets
# create corpus for Biden Tweets
biden_corpus <- corpus(BidTweets$tweet)
# create document-feature matrix
## remove punctuation, numbers, stopwords
## convert to lowercase
dfm_biden <- tokens(biden_corpus,
remove_punct= TRUE,
remove_numbers = TRUE) %>%
tokens_tolower() %>%
tokens_select(pattern=stopwords("en"),
selection="remove") %>%
dfm()
head(dfm_biden)
## Document-feature matrix of: 6 documents, 4,052 features (99.54% sparse) and 0 docvars.
## features
## docs one year determine mark donald trump leave country much stake
## text1 1 2 1 1 1 1 1 1 1 1
## text2 1 1 0 0 1 2 0 0 0 0
## text3 0 0 0 0 0 0 0 0 0 0
## text4 0 0 0 0 0 0 0 0 0 0
## text5 0 0 0 0 0 0 0 0 0 0
## text6 1 1 0 0 1 1 0 0 0 0
## [ reached max_nfeat ... 4,042 more features ]
# Create wordcloud with 60 most-frequent words
# Already excludes stopwords, punctuation, numbers from DFM preprocessing
textplot_wordcloud(dfm_biden, max_words = 60, min_size = 0.5, max_size = 5.0)
# create corpus for Buttigieg Tweets
buttigieg_corpus <- corpus(ButTweets$tweet)
# create document-feature matrix
## remove punctuation, numbers, stopwords
## convert to lowercase
dfm_buttigieg <- tokens(buttigieg_corpus,
remove_punct= TRUE,
remove_numbers = TRUE) %>%
tokens_tolower() %>%
tokens_select(pattern=stopwords("en"),
selection="remove") %>%
dfm()
head(dfm_buttigieg)
## Document-feature matrix of: 6 documents, 4,673 features (99.46% sparse) and 0 docvars.
## features
## docs thank inviting homes sharing stories putting trust launched campaign
## text1 1 1 1 1 1 1 1 1 1
## text2 0 0 0 0 0 0 0 0 0
## text3 1 0 0 0 0 0 0 0 0
## text4 0 0 0 0 0 0 0 0 0
## text5 1 0 0 0 0 0 0 0 0
## text6 0 0 0 0 0 0 0 0 0
## features
## docs americans
## text1 1
## text2 0
## text3 0
## text4 1
## text5 1
## text6 0
## [ reached max_nfeat ... 4,663 more features ]
textplot_wordcloud(dfm_buttigieg, max_words = 60, min_size = 0.5, max_size = 5.0)
# Biden
textstat_frequency(dfm_biden, n=20)
## feature frequency rank docfreq group
## 1 trump 230 1 218 all
## 2 president 217 2 198 all
## 3 donald 195 3 193 all
## 4 need 129 4 125 all
## 5 country 125 5 124 all
## 6 make 109 6 103 all
## 7 every 105 7 96 all
## 8 get 104 8 100 all
## 9 nation 102 9 99 all
## 10 one 101 10 98 all
## 11 day 101 10 90 all
## 12 can 93 12 78 all
## 13 just 89 13 85 all
## 14 american 84 14 80 all
## 15 help 82 15 70 all
## 16 us 82 15 75 all
## 17 time 82 15 80 all
## 18 take 81 18 76 all
## 19 today 80 19 76 all
## 20 #demdebate 78 20 78 all
# Buttigieg
textstat_frequency(dfm_buttigieg, n=20)
## feature frequency rank docfreq group
## 1 president 181 1 168 all
## 2 us 161 2 143 all
## 3 can 152 3 127 all
## 4 new 138 4 118 all
## 5 country 134 5 127 all
## 6 #demdebate 129 6 129 all
## 7 people 128 7 114 all
## 8 american 124 8 115 all
## 9 americans 119 9 112 all
## 10 need 108 10 98 all
## 11 time 98 11 92 all
## 12 make 98 11 87 all
## 13 chip 97 13 97 all
## 14 trump 91 14 87 all
## 15 together 87 15 81 all
## 16 turn 87 15 86 all
## 17 page 85 17 85 all
## 18 one 79 18 74 all
## 19 just 78 19 77 all
## 20 future 77 20 73 all
# top 10 shared tweets for Biden
BidenTopTen <- BidTweets %>% arrange(desc(retweets))
BidenTopTen <- BidenTopTen[1:10,]
select(BidenTopTen, "tweet", "retweets", "likes") %>% kable(col.names = c("Tweet", "Retweets", "Likes")) %>% kable_styling() %>% scroll_box(width = "100%", height = "100%")
| Tweet | Retweets | Likes |
|---|---|---|
|
The world is laughing at President Trump. They see him for what he really is: dangerously incompetent and incapable of world leadership. We cannot give him four more years as commander in chief. https://t.co/IR8K2k54YQ |
68099 | 253158 |
| Two elections. Zero criminal convictions. https://t.co/fyy1DLOac9 | 17043 | 119498 |
| What kind of president bullies a teenager? @realDonaldTrump, you could learn a few things from Greta on what it means to be a leader. https://t.co/18Y6uZexMC | 16894 | 110731 |
| Donald Trump should release his taxes or shut up about corruption. https://t.co/5G8VwWsyx0 | 15212 | 57807 |
| Wanted to make sure you saw this, @realDonaldTrump: “Trump’s First 3 Years Created 1.5 Million Fewer Jobs Than Obama’s Last 3.” https://t.co/KWoHe1d4ku | 13961 | 40497 |
| This same poll has you losing to me by 7 points. https://t.co/cbwKoCbW7b | 13753 | 97617 |
| .@realDonaldTrump, release your taxes or shut up about corruption. https://t.co/u3czWLj5ju | 13605 | 76813 |
|
This is a crisis of Donald Trump’s own making. He claimed pulling out of the Iran deal would deter Iranian aggression and result in a better deal. He has failed on both counts. He is the most erratic and incompetent commander in chief we’ve ever had. https://t.co/wHhWJA1Zhm |
12729 | 50500 |
| Donald Trump is cutting food assistance for 700,000 people, but somehow found $1.4 billion for his sham of a border wall. This administration is morally bankrupt. https://t.co/1CPQhgNUdx | 12467 | 47248 |
|
I’ve released 21 years of my tax returns — why hasn’t President Trump? He should release his taxes or shut up about corruption. https://t.co/7NrKkagZlt |
11351 | 65420 |
# summary stats for Biden retweets and likes
summary(BidTweets$retweets)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 73 467 994 1938 2285 68099
summary(BidTweets$likes)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 338 1986 4209 9636 9960 253158
# top 10 shared tweets for Buttigieg
ButtigiegTopTen <- ButTweets %>% arrange(desc(retweets))
ButtigiegTopTen <- ButtigiegTopTen[1:10,]
select(ButtigiegTopTen, "tweet", "retweets", "likes") %>% kable(col.names = c("Tweet", "Retweets", "Likes")) %>% kable_styling() %>% scroll_box(width = "100%", height = "100%")
| Tweet | Retweets | Likes |
|---|---|---|
| Fellow veterans take note: the President of the United States has formally confessed to illegally misdirecting charitable funds from an event he claimed was a benefit for veterans. How many more affronts to flag and uniform will we tolerate? https://t.co/ZyvEYoBV8D | 20841 | 64880 |
|
Thank you for inviting me into your homes, sharing your stories, and putting your trust in me. We launched our campaign because Americans are hungry for a new kind of politics that brings us together. And together we’ll beat this president and build the era that must come next. https://t.co/QDajvx1lpL |
15648 | 191212 |
| God does not belong to a political party. | 15227 | 121178 |
| My marriage has never involved me sending hush money to a porn star. I’m ready to have a debate with Donald Trump on family values. #CNNTownHall https://t.co/ZVjpPjVUcb | 15155 | 96491 |
| There’s nothing pro-military about overruling our military justice system to prevent it from delivering accountability for war crimes. The president has again dishonored our armed services. | 10236 | 59557 |
| I appreciate this reporter’s swift and honest correction of a misquote on my views of the Obama presidency. From health care to DADT repeal to the rescue of the auto industry, my appreciation of the great leadership of Barack Obama comes from a very personal place. https://t.co/eWvSDtcpTQ | 8862 | 44085 |
|
Trayvon Martin would have been 25 today. How many 25th birthdays have been stolen from us by white supremacy, gun violence, prejudice, and fear? #BlackLivesMatter |
6947 | 49994 |
| We don’t yet have all the details of the horrifying events in Santa Clarita. But we do know that in America today, children are scared to go to school. Parents and teachers are terrified. And that can’t stand. It is time to hold the NRA and our leaders in Washington accountable. | 5305 | 35462 |
| China is waging a shocking, merciless campaign to erase the religious and ethnic identity of millions. The United States has a responsibility to speak out. This president’s silence has a cost. https://t.co/y7tPxKhBjN | 4884 | 15514 |
|
Tonight, Americans in Iraq are under fire. My prayers are with them, their loved ones, and their families. |
4560 | 48105 |
# summary stats for Buttigieg retweets and likes
summary(ButTweets$retweets)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 10.0 184.8 350.5 650.6 638.2 20841.0
summary(ButTweets$likes)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 43 1200 2239 4717 4560 191212
# separate timestamp to create date column separate from time
dateBiden <- BidTweets %>%
mutate(
date = as.Date(timestamp),
hour = hour(timestamp),
minute = minute(timestamp),
second = second(timestamp)
) %>%
mutate(
format_date = format(date, "%m/%d/%Y"),
format_hour = paste(hour, minute, second, sep = ":")
)
# filter top retweeted tweet for each unique date
TopDateBiden <- dateBiden %>%
group_by(date) %>%
slice(which.max(retweets))
#repeat above for Buttigieg
dateButtigieg <- ButTweets %>%
mutate(
date = as.Date(timestamp),
hour = hour(timestamp),
minute = minute(timestamp),
second = second(timestamp)
) %>%
mutate(
format_date = format(date, "%m/%d/%Y"),
format_hour = paste(hour, minute, second, sep = ":")
)
TopDateButtigieg <- dateButtigieg %>%
group_by(date) %>%
slice(which.max(retweets))
ggplot(data = TopDateBiden) +
geom_line(mapping = aes(x = date, y = retweets), size = 1.2) +
theme_bw() +
labs(title = "Biden: Retweet Count for Most Popular Tweet per Day",
x = "Date (Nov 2019 - Mar 2020)",
y = "Retweet Count")
ggplot(data = TopDateButtigieg) +
geom_line(mapping = aes(x = date, y = retweets), size = 1.2) +
theme_bw() +
labs(title = "Buttigieg: Retweet Count for Most Popular Tweet per Day",
x = "Date (Nov 2019 - Mar 2020)",
y = "Retweet Count") +
ylim(0, 60000)
NRC_Biden <- dfm_lookup(dfm_biden, dictionary = data_dictionary_NRC)
sentiments <- c("anger", "anticipation", "disgust", "fear", "joy", "negative", "positive", "sadness", "surprise", "trust") %>% as.data.frame()
Biden_Sent <- as.data.frame(NRC_Biden)
## Warning: 'as.data.frame.dfm' is deprecated.
## Use 'convert(x, to = "data.frame")' instead.
## See help("Deprecated")
Biden_Sent <- subset(Biden_Sent, select = -c(doc_id))
Biden_Sent <- colSums(Biden_Sent) %>% as_tibble()
BidSentSummary <- cbind(sentiments, Biden_Sent)
BidSentSummary <- mutate(BidSentSummary, sentiment = .) %>% select("sentiment", "value")
NRC_Buttigieg <- dfm_lookup(dfm_buttigieg, dictionary = data_dictionary_NRC)
sentiments <- c("anger", "anticipation", "disgust", "fear", "joy", "negative", "positive", "sadness", "surprise", "trust") %>% as.data.frame()
Buttigieg_Sent <- as.data.frame(NRC_Buttigieg)
## Warning: 'as.data.frame.dfm' is deprecated.
## Use 'convert(x, to = "data.frame")' instead.
## See help("Deprecated")
Buttigieg_Sent <- subset(Buttigieg_Sent, select = -c(doc_id))
Buttigieg_Sent <- colSums(Buttigieg_Sent) %>% as_tibble()
ButSentSummary <- cbind(sentiments, Buttigieg_Sent)
ButSentSummary <- mutate(ButSentSummary, sentiment = .) %>% select("sentiment", "value")
BothSentiment <- cbind(BidSentSummary, ButSentSummary)
names(BothSentiment) <- c("Sentiment", "Biden", "Sent", "Buttigieg")
BothSentiment <- select(BothSentiment, "Sentiment", "Biden", "Buttigieg")
BothSentiment
## Sentiment Biden Buttigieg
## 1 anger 713 497
## 2 anticipation 948 943
## 3 disgust 287 211
## 4 fear 792 606
## 5 joy 642 657
## 6 negative 1137 941
## 7 positive 2072 2223
## 8 sadness 525 431
## 9 surprise 553 430
## 10 trust 1485 1392
BothSent <- BothSentiment %>% pivot_longer(cols = c('Biden', 'Buttigieg'), names_to = 'Candidate', values_to = 'Value')
ggplot(data = BothSent, mapping = aes(x = Sentiment, y = Value, fill = Candidate)) +
geom_col(position = position_dodge()) +
theme_bw(base_size = 11) +
theme(axis.text.x = element_text(angle=30, vjust = 0.7, size = 11)) +
scale_fill_manual(values = c("Biden" = "darkblue", "Buttigieg" = "lightblue")) +
labs(title = "Comparing Sentiment between Biden and Buttigieg",
subtitle = "Using the NRC Data Dictionary",
x = "Sentiment", y = "Sentiment Occurrence Count")