setwd("/Users/isaiahmireles/Desktop/Trump folder")
population <- read.csv("trump_tweets_dataset.csv")
df <- read.csv("trump_sample_labeled.csv")
How does Donald Trump’s public discourse distribute across core themes (e.g., immigration, religion, education, economics), and how does this thematic composition evolve over time (2009–2026)?
What are the beliefs of trump about each topic?
Which themes are most strongly associated with viral engagement (likes, retweets, replies), and does this relationship change over time?
Does emotionally charged rhetoric within certain themes (e.g., immigration + negative sentiment) disproportionately drive engagement? Provide Examples
theme_labellibrary(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
edu <-
df |> select(date, theme_label, confidence, text, post_url) |>
filter(theme_label=="education") |>
arrange(desc(confidence))
edu_conf <- edu |> filter(confidence>=.3)
colnames(df)
## [1] "date" "platform" "handle" "text"
## [5] "favorite_count" "repost_count" "deleted_flag" "word_count"
## [9] "hashtags" "urls" "user_mentions" "media_count"
## [13] "media_urls" "post_url" "text_lwr" "text_clean"
## [17] "theme_label" "confidence"
Vis.
library(dplyr)
library(lubridate)
##
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
##
## date, intersect, setdiff, union
df <-
df |>
mutate(date = as.Date(date))
df_monthly <-
df |>
mutate(month = floor_date(date, unit = "month"))
topic_monthly_counts <-
df |>
mutate(month = floor_date(date, "month")) |>
group_by(month, theme_label) |>
summarize(
n_posts = n(),
.groups = "drop"
)
topic_monthly_prop <-
df |>
mutate(month = floor_date(date, "month")) |>
group_by(month, theme_label) |>
summarize(n_posts = n(), .groups = "drop") |>
group_by(month) |>
mutate(prop = n_posts / sum(n_posts)) |>
ungroup()
df_high_conf <-
df |>
filter(confidence >= 0.3)
topic_monthly_counts <-
df_high_conf |>
mutate(month = floor_date(date, "month")) |>
group_by(month, theme_label) |>
summarize(n_posts = n(), .groups = "drop")
library(ggplot2)
topic_monthly_counts |>
ggplot(aes(x = month, y = n_posts, color = theme_label)) +
geom_line() +
labs(
title = "Tweets by Theme Over Time",
x = "Month",
y = "Number of Tweets"
)
GDP <- read.csv("GDP.csv")
GDP$observation_date <- as.Date(GDP$observation_date)
GDP <- GDP |> arrange(desc(observation_date))
df <- df |> arrange(desc(date))
unique(df$theme_label)
## [1] "economics" "education" "immigration" "religion" "homelessness"
Government Data (
BEA: Quarterly GDP
BLS: Monthly Unemployment Rate, CPI
Census: Annual Median Income, Poverty Rate
HUD: Annual Homelessness Counts
DHS: Monthly Border Encounters
NCES: Annual Education Statistics
General Questions
Does tweet sentiment shift significantly in response to changes in official economic indicators (GDP, unemployment, CPI)?
Are tweets posted during periods of worsening government indicators associated with higher mean engagement?
Is emotionally extreme rhetoric (high absolute sentiment score) consistently associated with higher virality across themes?
Economics
Immigration
Homelessness
Education
Religion