setwd("/Users/isaiahmireles/Desktop/Trump folder")
population <- read.csv("trump_tweets_dataset.csv")
df <- read.csv("trump_sample_labeled.csv")
How does Donald Trump’s public discourse distribute across core themes (e.g., immigration, religion, education, economics), and how does this thematic composition evolve over time (2009–2026)?
What are the beliefs of trump about each topic?
Which themes are most strongly associated with viral engagement (likes, retweets, replies), and does this relationship change over time?
Does emotionally charged rhetoric within certain themes (e.g., immigration + negative sentiment) disproportionately drive engagement? Provide Examples
theme_labellibrary(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
edu <-
df |> select(date, theme_label, confidence, text, post_url) |>
filter(theme_label=="education") |>
arrange(desc(confidence))
edu_conf <- edu |> filter(confidence>=.3)
colnames(df)
## [1] "date" "platform" "handle" "text"
## [5] "favorite_count" "repost_count" "deleted_flag" "word_count"
## [9] "hashtags" "urls" "user_mentions" "media_count"
## [13] "media_urls" "post_url" "text_lwr" "text_clean"
## [17] "theme_label" "confidence"
library(dplyr)
library(lubridate)
##
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
##
## date, intersect, setdiff, union
df <-
df |>
mutate(date = as.Date(date))
df_monthly <-
df |>
mutate(month = floor_date(date, unit = "month"))
topic_monthly_counts <-
df |>
mutate(month = floor_date(date, "month")) |>
group_by(month, theme_label) |>
summarize(
n_posts = n(),
.groups = "drop"
)
topic_monthly_prop <-
df |>
mutate(month = floor_date(date, "month")) |>
group_by(month, theme_label) |>
summarize(n_posts = n(), .groups = "drop") |>
group_by(month) |>
mutate(prop = n_posts / sum(n_posts)) |>
ungroup()
df_high_conf <-
df |>
filter(confidence >= 0.3)
topic_monthly_counts <-
df_high_conf |>
mutate(month = floor_date(date, "month")) |>
group_by(month, theme_label) |>
summarize(n_posts = n(), .groups = "drop")
library(ggplot2)
muted_dark <- c(
"#8B2F2F", # dark red
"#2C5AA0", # dark blue
"#2E6F3E", # dark green
"#B9A3D6", # light purple
"#BFBFBF", # light grey
"#000000", # black
"#C46210", # dark orange
"#2F7F7F", # teal
"#B59A00", # dark yellow
"#5A3E8C", # dark purple
"#6B4F3A" # brown
)
topic_monthly_counts |>
ggplot(aes(x = month, y = n_posts, color = theme_label)) +
geom_line(linewidth = 1.2) +
scale_color_manual(
values = muted_dark
) +
labs(
title = "Tweets by Theme Over Time",
x = "Month",
y = "Number of Tweets",
color = "Theme"
) +
theme_minimal()
topic_monthly_counts |>
ggplot(aes(x = month, y = n_posts, color = theme_label)) +
geom_line(linewidth = 1.1) +
facet_wrap(~ theme_label) +
scale_color_manual(values = muted_dark) +
theme_minimal()
topic_monthly_counts |>
ggplot(aes(x = month, y = n_posts, fill = theme_label)) +
geom_area(alpha = 0.85) +
scale_fill_manual(values = muted_dark) +
theme_minimal()
topic_monthly_prop |>
ggplot(aes(x = month, y = prop, fill = theme_label)) +
geom_area() +
scale_fill_manual(values = muted_dark) +
theme_minimal()
df_high_conf |>
ggplot(aes(x = theme_label, fill = theme_label)) +
geom_bar() +
scale_fill_manual(values = muted_dark) +
theme_minimal()
df_high_conf |>
ggplot(aes(x = theme_label, fill = theme_label)) +
geom_bar() +
scale_fill_manual(values = muted_dark) +
theme_minimal()
topic_monthly_counts |>
ggplot(aes(x = month, y = theme_label, fill = n_posts)) +
geom_tile() +
scale_fill_gradient(low = "#E5E5E5", high = "#2F4F4F") +
theme_minimal()
muted_dark <- c(
"#8B2F2F", # dark red
"#2C5AA0", # dark blue
"#2E6F3E", # dark green
"#B9A3D6", # light purple
"#BFBFBF", # light grey
"#000000", # black
"#C46210", # dark orange
"#2F7F7F", # teal
"#B59A00", # dark yellow
"#5A3E8C", # dark purple
"#6B4F3A" # brown
)
topic_monthly_counts |>
ggplot(aes(
x = month,
y = n_posts,
color = theme_label,
alpha = n_posts
)) +
geom_line(linewidth = 1.3) +
scale_color_manual(values = muted_dark) +
scale_alpha(range = c(0.3, 1)) +
labs(
title = "Tweets by Theme Over Time",
x = "Month",
y = "Number of Tweets",
color = "Theme"
) +
theme_minimal()
range(df$date)
## [1] "2009-05-05" "2026-01-07"
GDP <- read.csv("GDP.csv")
GDP$observation_date <- as.Date(GDP$observation_date)
GDP <- GDP |> arrange(desc(observation_date))
df <- df |> arrange(desc(date))
unique(df$theme_label)
## [1] "war" "none"
## [3] "jobs" "crime"
## [5] "healthcare" "democrats"
## [7] "immigration" "religion"
## [9] "Government assistance programs" "education"
## [11] "poverty"
Government Data (
BEA: Quarterly GDP
BLS: Monthly Unemployment Rate, CPI
Census: Annual Median Income, Poverty Rate
HUD: Annual Homelessness Counts
DHS: Monthly Border Encounters
NCES: Annual Education Statistics
General Questions
Does tweet sentiment shift significantly in response to changes in official economic indicators (GDP, unemployment, CPI)?
Are tweets posted during periods of worsening government indicators associated with higher mean engagement?
Is emotionally extreme rhetoric (high absolute sentiment score) consistently associated with higher virality across themes?
Economics
Immigration
Homelessness
Education
Religion
df$pre_campaign <- df$date >= "2009-05-05" & df$date <= "2015-06-15"
df$campaign_2016 <- df$date >= "2015-06-16" & df$date <= "2016-11-07"
df$trump_pres_1 <- df$date >= "2016-11-08" & df$date <= "2021-01-20"
df$post_pres <- df$date >= "2021-01-21" & df$date <= "2024-11-04"
df$campaign_2024 <- df$date >= "2022-11-15" & df$date <= "2024-11-04"
df$trump_pres_2 <- df$date >= "2025-01-20" & df$date <= "2026-01-07"
df$era <- cut(df$date,
breaks = as.Date(c(
"2009-05-05","2015-06-16","2016-11-08",
"2021-01-21","2024-11-15","2025-01-20","2026-01-07")),
labels = c("pre_campaign","campaign_2016","pres1","post_pres","campaign_2024","pres2")
)
| Short Name | Meaning |
|---|---|
pre16 |
pre-2016 campaign |
camp16 |
2016 campaign |
pres1 |
first presidency |
post21 |
post presidency |
camp24 |
2024 campaign |
pres2 |
second presidency |
##################################################
# CAMPAIGN CYCLE WINDOWS
##################################################
df$camp16 <- df$date >= "2015-06-16" & df$date <= "2016-11-08"
df$camp20 <- df$date >= "2019-06-18" & df$date <= "2020-11-03"
df$camp24 <- df$date >= "2022-11-15" & df$date <= "2024-11-05"
##################################################
# POLICY SHOCK WINDOWS
##################################################
df$shock_travelban <- df$date >= "2017-01-27" & df$date <= "2017-03-31"
df$shock_taxcuts <- df$date >= "2017-12-01" & df$date <= "2018-03-01"
df$shock_covid <- df$date >= "2020-03-01" & df$date <= "2020-06-01"
df$shock_election <- df$date >= "2020-11-03" & df$date <= "2021-01-06"
df$shock_war_iran <- df$date >= "2026-02-01" & df$date <= "2026-06-01"
##################################################
# IMMIGRATION
##################################################
df$immig_wall_campaign <- df$date >= "2015-06-16" & df$date <= "2016-11-08"
df$immig_travel_ban <- df$date >= "2017-01-27" & df$date <= "2017-12-31"
df$immig_mass_deport <- df$date >= "2025-01-20" & df$date <= "2026-01-07"
##################################################
# DEMOCRATS
##################################################
df$dem_attack_2016 <- df$date >= "2015-06-16" & df$date <= "2016-11-08"
df$dem_russia_probe <- df$date >= "2017-01-01" & df$date <= "2019-03-31"
df$dem_post_pres <- df$date >= "2021-01-21" & df$date <= "2024-11-05"
##################################################
# CRIME
##################################################
df$crime_law_order_2016 <- df$date >= "2016-06-01" & df$date <= "2016-11-08"
df$crime_blm_protests <- df$date >= "2020-05-25" & df$date <= "2020-09-01"
df$crime_city_policy <- df$date >= "2025-01-20" & df$date <= "2026-01-07"
##################################################
# POVERTY
##################################################
df$poverty_campaign_focus <- df$date >= "2016-01-01" & df$date <= "2016-11-08"
df$poverty_opportunity <- df$date >= "2018-01-01" & df$date <= "2019-12-31"
df$poverty_post_covid <- df$date >= "2021-01-01" & df$date <= "2022-12-31"
##################################################
# RELIGION
##################################################
df$religion_evangelical <- df$date >= "2015-06-16" & df$date <= "2016-11-08"
df$religion_liberty_exec <- df$date >= "2017-05-01" & df$date <= "2018-12-31"
df$religion_campaign24 <- df$date >= "2024-01-01" & df$date <= "2024-11-05"
##################################################
# GOVERNMENT ASSISTANCE PROGRAMS
##################################################
df$assist_welfare_reform <- df$date >= "2017-01-20" & df$date <= "2018-12-31"
df$assist_covid_relief <- df$date >= "2020-03-01" & df$date <= "2020-12-31"
df$assist_post_covid <- df$date >= "2021-01-01" & df$date <= "2022-12-31"
##################################################
# HEALTHCARE
##################################################
df$healthcare_repeal_aca <- df$date >= "2016-01-01" & df$date <= "2017-12-31"
df$healthcare_covid <- df$date >= "2020-03-01" & df$date <= "2020-12-31"
df$healthcare_policy24 <- df$date >= "2024-01-01" & df$date <= "2024-11-05"
##################################################
# JOBS
##################################################
df$jobs_trade_policy <- df$date >= "2017-01-20" & df$date <= "2019-12-31"
df$jobs_tariffs <- df$date >= "2018-03-01" & df$date <= "2019-12-31"
df$jobs_campaign24 <- df$date >= "2024-01-01" & df$date <= "2024-11-05"
##################################################
# WAR
##################################################
df$war_syria_strike <- df$date >= "2017-04-01" & df$date <= "2017-06-01"
df$war_isis <- df$date >= "2017-01-20" & df$date <= "2019-12-31"
df$war_iran <- df$date >= "2026-02-01" & df$date <= "2026-06-01"
##################################################
# EDUCATION
##################################################
df$edu_campaign_focus <- df$date >= "2016-01-01" & df$date <= "2016-11-08"
df$edu_free_speech <- df$date >= "2019-01-01" & df$date <= "2019-12-31"
df$edu_anti_woke <- df$date >= "2023-01-01" & df$date <= "2024-11-05"
##################################################
# NONE (GENERAL COMMENTARY)
##################################################
df$none_precampaign <- df$date >= "2009-05-05" & df$date <= "2015-06-15"
df$none_presidency <- df$date >= "2017-01-20" & df$date <= "2021-01-20"
df$none_postpres <- df$date >= "2021-01-21" & df$date <= "2026-01-07"
df$era <- cut(df$date,
breaks = as.Date(c(
"2009-05-05",
"2015-06-16",
"2017-01-20",
"2021-01-21",
"2025-01-20",
"2026-01-07"
)),
labels = c("pre_campaign","campaign16","pres1","post_pres","pres2")
)
########################################################
# Dataset Documentation / Data Dictionary
########################################################
data_dictionary <- data.frame(
feature = c(
"date","platform","handle","text","favorite_count","repost_count",
"deleted_flag","word_count","hashtags","urls","user_mentions",
"media_count","media_urls","post_url","text_lwr","text_clean",
"theme_label","confidence",
"pre_campaign","campaign_2016","trump_pres_1","post_pres",
"campaign_2024","trump_pres_2","era",
"camp16","camp20","camp24",
"shock_travelban","shock_taxcuts","shock_covid","shock_election","shock_war_iran",
"immig_wall_campaign","immig_travel_ban","immig_mass_deport",
"dem_attack_2016","dem_russia_probe","dem_post_pres",
"crime_law_order_2016","crime_blm_protests","crime_city_policy",
"poverty_campaign_focus","poverty_opportunity","poverty_post_covid",
"religion_evangelical","religion_liberty_exec","religion_campaign24",
"assist_welfare_reform","assist_covid_relief","assist_post_covid",
"healthcare_repeal_aca","healthcare_covid","healthcare_policy24",
"jobs_trade_policy","jobs_tariffs","jobs_campaign24",
"war_syria_strike","war_isis","war_iran",
"edu_campaign_focus","edu_free_speech","edu_anti_woke",
"none_precampaign","none_presidency","none_postpres"
),
description = c(
"date the post was created",
"social media platform where the post appeared (twitter or truth social)",
"account handle that published the post",
"original post text content",
"number of likes or favorites the post received",
"number of reposts or retweets",
"flag indicating if the post was later deleted",
"number of words contained in the post",
"hashtags extracted from the post",
"urls contained in the post",
"user accounts mentioned in the post",
"number of media items attached to the post",
"urls linking to attached media",
"direct link to the original post",
"lowercase version of the text field",
"cleaned version of the text with punctuation and noise removed",
"predicted topic category assigned by classification model",
"classification confidence score produced by the model",
"flag indicating posts before trump announced his 2016 presidential campaign",
"flag indicating posts during the 2016 presidential campaign",
"flag indicating posts during trump's first presidency",
"flag indicating posts after his first presidency",
"flag indicating posts during the 2024 campaign cycle",
"flag indicating posts during trump's second presidency",
"categorical variable describing the broader political era",
"flag for posts during the 2016 campaign cycle",
"flag for posts during the 2020 campaign cycle",
"flag for posts during the 2024 campaign cycle",
"policy shock window around the 2017 travel ban policy",
"policy shock window around the 2017 tax cuts legislation",
"policy shock window during the early covid pandemic period",
"policy shock window around the 2020 election dispute period",
"policy shock window associated with military conflict involving iran",
"immigration theme window during the border wall campaign",
"immigration theme window during the travel ban policy period",
"immigration theme window during renewed deportation policy discussions",
"democrats theme window during the 2016 campaign attacks",
"democrats theme window during the russia investigation period",
"democrats theme window during the post presidency political period",
"crime theme window emphasizing law and order messaging in 2016",
"crime theme window during 2020 protest and policing debates",
"crime theme window focusing on urban crime discussions",
"poverty theme window during campaign rhetoric about economic hardship",
"poverty theme window during opportunity zone economic initiatives",
"poverty theme window during post covid economic recovery debates",
"religion theme window during evangelical outreach in the 2016 campaign",
"religion theme window during religious liberty policy initiatives",
"religion theme window during the 2024 campaign",
"government assistance theme window during welfare reform discussions",
"government assistance theme window during pandemic relief programs",
"government assistance theme window during post pandemic policy debates",
"healthcare theme window during efforts to repeal the affordable care act",
"healthcare theme window during covid health policy debates",
"healthcare theme window during the 2024 campaign policy messaging",
"jobs theme window during trade and manufacturing policy discussions",
"jobs theme window during tariff and trade war messaging",
"jobs theme window during economic messaging in the 2024 campaign",
"war theme window around military strikes in syria",
"war theme window related to messaging about the fight against isis",
"war theme window related to military escalation involving iran",
"education theme window during education reform messaging in campaigns",
"education theme window during campus free speech policy debates",
"education theme window criticizing ideological trends in schools",
"general commentary window before the 2016 campaign",
"general commentary window during the first presidency",
"general commentary window during the post presidency period"
),
stringsAsFactors = FALSE
)
########################################################
# View documentation
########################################################
data_dictionary
## feature
## 1 date
## 2 platform
## 3 handle
## 4 text
## 5 favorite_count
## 6 repost_count
## 7 deleted_flag
## 8 word_count
## 9 hashtags
## 10 urls
## 11 user_mentions
## 12 media_count
## 13 media_urls
## 14 post_url
## 15 text_lwr
## 16 text_clean
## 17 theme_label
## 18 confidence
## 19 pre_campaign
## 20 campaign_2016
## 21 trump_pres_1
## 22 post_pres
## 23 campaign_2024
## 24 trump_pres_2
## 25 era
## 26 camp16
## 27 camp20
## 28 camp24
## 29 shock_travelban
## 30 shock_taxcuts
## 31 shock_covid
## 32 shock_election
## 33 shock_war_iran
## 34 immig_wall_campaign
## 35 immig_travel_ban
## 36 immig_mass_deport
## 37 dem_attack_2016
## 38 dem_russia_probe
## 39 dem_post_pres
## 40 crime_law_order_2016
## 41 crime_blm_protests
## 42 crime_city_policy
## 43 poverty_campaign_focus
## 44 poverty_opportunity
## 45 poverty_post_covid
## 46 religion_evangelical
## 47 religion_liberty_exec
## 48 religion_campaign24
## 49 assist_welfare_reform
## 50 assist_covid_relief
## 51 assist_post_covid
## 52 healthcare_repeal_aca
## 53 healthcare_covid
## 54 healthcare_policy24
## 55 jobs_trade_policy
## 56 jobs_tariffs
## 57 jobs_campaign24
## 58 war_syria_strike
## 59 war_isis
## 60 war_iran
## 61 edu_campaign_focus
## 62 edu_free_speech
## 63 edu_anti_woke
## 64 none_precampaign
## 65 none_presidency
## 66 none_postpres
## description
## 1 date the post was created
## 2 social media platform where the post appeared (twitter or truth social)
## 3 account handle that published the post
## 4 original post text content
## 5 number of likes or favorites the post received
## 6 number of reposts or retweets
## 7 flag indicating if the post was later deleted
## 8 number of words contained in the post
## 9 hashtags extracted from the post
## 10 urls contained in the post
## 11 user accounts mentioned in the post
## 12 number of media items attached to the post
## 13 urls linking to attached media
## 14 direct link to the original post
## 15 lowercase version of the text field
## 16 cleaned version of the text with punctuation and noise removed
## 17 predicted topic category assigned by classification model
## 18 classification confidence score produced by the model
## 19 flag indicating posts before trump announced his 2016 presidential campaign
## 20 flag indicating posts during the 2016 presidential campaign
## 21 flag indicating posts during trump's first presidency
## 22 flag indicating posts after his first presidency
## 23 flag indicating posts during the 2024 campaign cycle
## 24 flag indicating posts during trump's second presidency
## 25 categorical variable describing the broader political era
## 26 flag for posts during the 2016 campaign cycle
## 27 flag for posts during the 2020 campaign cycle
## 28 flag for posts during the 2024 campaign cycle
## 29 policy shock window around the 2017 travel ban policy
## 30 policy shock window around the 2017 tax cuts legislation
## 31 policy shock window during the early covid pandemic period
## 32 policy shock window around the 2020 election dispute period
## 33 policy shock window associated with military conflict involving iran
## 34 immigration theme window during the border wall campaign
## 35 immigration theme window during the travel ban policy period
## 36 immigration theme window during renewed deportation policy discussions
## 37 democrats theme window during the 2016 campaign attacks
## 38 democrats theme window during the russia investigation period
## 39 democrats theme window during the post presidency political period
## 40 crime theme window emphasizing law and order messaging in 2016
## 41 crime theme window during 2020 protest and policing debates
## 42 crime theme window focusing on urban crime discussions
## 43 poverty theme window during campaign rhetoric about economic hardship
## 44 poverty theme window during opportunity zone economic initiatives
## 45 poverty theme window during post covid economic recovery debates
## 46 religion theme window during evangelical outreach in the 2016 campaign
## 47 religion theme window during religious liberty policy initiatives
## 48 religion theme window during the 2024 campaign
## 49 government assistance theme window during welfare reform discussions
## 50 government assistance theme window during pandemic relief programs
## 51 government assistance theme window during post pandemic policy debates
## 52 healthcare theme window during efforts to repeal the affordable care act
## 53 healthcare theme window during covid health policy debates
## 54 healthcare theme window during the 2024 campaign policy messaging
## 55 jobs theme window during trade and manufacturing policy discussions
## 56 jobs theme window during tariff and trade war messaging
## 57 jobs theme window during economic messaging in the 2024 campaign
## 58 war theme window around military strikes in syria
## 59 war theme window related to messaging about the fight against isis
## 60 war theme window related to military escalation involving iran
## 61 education theme window during education reform messaging in campaigns
## 62 education theme window during campus free speech policy debates
## 63 education theme window criticizing ideological trends in schools
## 64 general commentary window before the 2016 campaign
## 65 general commentary window during the first presidency
## 66 general commentary window during the post presidency period
########################################################
# Optional: export documentation
########################################################
write.csv(data_dictionary,"dataset_data_dictionary.csv",row.names=FALSE)
colnames(df)
## [1] "date" "platform" "handle"
## [4] "text" "favorite_count" "repost_count"
## [7] "deleted_flag" "word_count" "hashtags"
## [10] "urls" "user_mentions" "media_count"
## [13] "media_urls" "post_url" "text_lwr"
## [16] "text_clean" "theme_label" "confidence"
## [19] "pre_campaign" "campaign_2016" "trump_pres_1"
## [22] "post_pres" "campaign_2024" "trump_pres_2"
## [25] "era" "camp16" "camp20"
## [28] "camp24" "shock_travelban" "shock_taxcuts"
## [31] "shock_covid" "shock_election" "shock_war_iran"
## [34] "immig_wall_campaign" "immig_travel_ban" "immig_mass_deport"
## [37] "dem_attack_2016" "dem_russia_probe" "dem_post_pres"
## [40] "crime_law_order_2016" "crime_blm_protests" "crime_city_policy"
## [43] "poverty_campaign_focus" "poverty_opportunity" "poverty_post_covid"
## [46] "religion_evangelical" "religion_liberty_exec" "religion_campaign24"
## [49] "assist_welfare_reform" "assist_covid_relief" "assist_post_covid"
## [52] "healthcare_repeal_aca" "healthcare_covid" "healthcare_policy24"
## [55] "jobs_trade_policy" "jobs_tariffs" "jobs_campaign24"
## [58] "war_syria_strike" "war_isis" "war_iran"
## [61] "edu_campaign_focus" "edu_free_speech" "edu_anti_woke"
## [64] "none_precampaign" "none_presidency" "none_postpres"
| Column | Description |
|---|---|
pre_campaign |
Posts before the 2016 campaign announcement. |
campaign_2016 |
Posts during the 2016 presidential campaign. |
trump_pres_1 |
Posts during Trump’s first presidency (2017–2021). |
post_pres |
Posts during the post-presidency political period. |
campaign_2024 |
Posts during the 2024 campaign period. |
trump_pres_2 |
Posts during Trump’s second presidency period. |
era |
Categorical variable summarizing the political era for each post. |
| Column | Description |
|---|---|
camp16 |
Posts during the 2016 presidential campaign cycle. |
camp20 |
Posts during the 2020 reelection campaign cycle. |
camp24 |
Posts during the 2024 presidential campaign cycle. |
| Column | Description |
|---|---|
shock_travelban |
Period surrounding the 2017 travel restriction executive order. |
shock_taxcuts |
Period around the passage of the 2017 Tax Cuts and Jobs Act. |
shock_covid |
Early COVID-19 pandemic period when healthcare and government assistance messaging increased. |
shock_election |
Period between the 2020 election and early 2021 election dispute events. |
shock_war_iran |
Period surrounding military escalation involving Iran. |
| Column | Description |
|---|---|
immig_wall_campaign |
Immigration messaging during the 2016 campaign emphasizing border wall construction. |
immig_travel_ban |
Period of immigration restrictions and travel ban policy discussion. |
immig_mass_deport |
Period of increased messaging about deportation policies. |
| Column | Description |
|---|---|
dem_attack_2016 |
Criticism of Democratic candidates during the 2016 campaign. |
dem_russia_probe |
Messaging during investigations and political disputes between 2017–2019. |
dem_post_pres |
Continued criticism of Democrats during the post-presidency political period. |
| Column | Description |
|---|---|
crime_law_order_2016 |
Law-and-order messaging during the 2016 campaign. |
crime_blm_protests |
Crime and policing discussions during the 2020 protest period. |
crime_city_policy |
Posts focusing on crime in major U.S. cities and policing policy. |
| Column | Description |
|---|---|
poverty_campaign_focus |
Campaign messaging referencing struggling communities and economic hardship. |
poverty_opportunity |
Messaging about economic development initiatives such as opportunity zones. |
poverty_post_covid |
Discussion of poverty and inequality following the COVID-19 economic disruption. |
| Column | Description |
|---|---|
religion_evangelical |
Campaign messaging targeting evangelical voters. |
religion_liberty_exec |
Posts referencing religious liberty initiatives during the presidency. |
religion_campaign24 |
Religion-related messaging during the 2024 campaign period. |
| Column | Description |
|---|---|
assist_welfare_reform |
Posts discussing reform of government welfare or assistance programs. |
assist_covid_relief |
Messaging related to pandemic stimulus and relief programs. |
assist_post_covid |
Discussions of federal spending and assistance programs following the pandemic. |
| Column | Description |
|---|---|
healthcare_repeal_aca |
Messaging around attempts to repeal or replace the Affordable Care Act. |
healthcare_covid |
Healthcare discussions during the COVID-19 pandemic. |
healthcare_policy24 |
Healthcare messaging during the 2024 campaign cycle. |
| Column | Description |
|---|---|
jobs_trade_policy |
Messaging related to trade policy and domestic manufacturing. |
jobs_tariffs |
Period of messaging about tariffs and international trade disputes. |
jobs_campaign24 |
Jobs and economic messaging during the 2024 campaign cycle. |
| Column | Description |
|---|---|
war_syria_strike |
Period surrounding U.S. military strikes in Syria. |
war_isis |
Messaging related to military operations against ISIS. |
war_iran |
Messaging during escalations involving Iran or Middle East conflict. |
| Column | Description |
|---|---|
edu_campaign_focus |
Education policy messaging during the 2016 campaign. |
edu_free_speech |
Messaging related to campus free speech debates. |
edu_anti_woke |
Messaging criticizing ideological trends in education institutions. |
| Column | Description |
|---|---|
none_precampaign |
General commentary before the 2016 campaign. |
none_presidency |
Non-policy commentary during the presidency. |
none_postpres |
General commentary during the post-presidency period. |
library(tidyr)
##
## Attaching package: 'tidyr'
## The following object is masked _by_ '.GlobalEnv':
##
## population
library(dplyr)
library(ggplot2)
library(lubridate)
theme_flags <- list(
immigration = c("immig_wall_campaign","immig_travel_ban","immig_mass_deport"),
democrats = c("dem_attack_2016","dem_russia_probe","dem_post_pres"),
crime = c("crime_law_order_2016","crime_blm_protests","crime_city_policy"),
poverty = c("poverty_campaign_focus","poverty_opportunity","poverty_post_covid"),
religion = c("religion_evangelical","religion_liberty_exec","religion_campaign24"),
`Government assistance programs` =
c("assist_welfare_reform","assist_covid_relief","assist_post_covid"),
healthcare =
c("healthcare_repeal_aca","healthcare_covid","healthcare_policy24"),
jobs =
c("jobs_trade_policy","jobs_tariffs","jobs_campaign24"),
war =
c("war_syria_strike","war_isis","war_iran"),
education =
c("edu_campaign_focus","edu_free_speech","edu_anti_woke"),
none =
c("none_precampaign","none_presidency","none_postpres")
)
topic_monthly_counts <- df |>
mutate(month = lubridate::floor_date(date, "month")) |>
count(month, theme_label, name = "n_posts")
get_flag_windows <- function(flag_cols){
df |>
select(date, all_of(flag_cols)) |>
pivot_longer(-date, names_to="flag", values_to="active") |>
filter(active) |>
distinct(date, flag) |>
mutate(
start = date - 15,
end = date + 15
)
}
plots <- lapply(seq_along(theme_flags), function(i){
theme <- names(theme_flags)[i]
flags <- theme_flags[[i]]
flag_windows <- get_flag_windows(flags)
data <- topic_monthly_counts |>
filter(theme_label == theme)
ggplot(data, aes(x = month, y = n_posts)) +
geom_rect(
data = flag_windows,
aes(xmin = start, xmax = end, ymin = -Inf, ymax = Inf),
fill = muted_dark[i],
alpha = 0.12,
inherit.aes = FALSE
) +
geom_line(
color = muted_dark[i],
linewidth = 1.2
) +
labs(
title = paste("Topic Trend:", theme),
x = "Date",
y = "Posts per Month"
) +
theme_minimal() +
theme(
plot.title = element_text(face = "bold"),
panel.grid.minor = element_blank()
)
})
for(p in plots) print(p)
topic_monthly_counts <- topic_monthly_counts |>
mutate(
period_4yr = case_when(
month >= as.Date("2009-01-01") & month <= as.Date("2012-12-31") ~ "2009–2012",
month >= as.Date("2013-01-01") & month <= as.Date("2016-12-31") ~ "2013–2016",
month >= as.Date("2017-01-01") & month <= as.Date("2020-12-31") ~ "2017–2020",
TRUE ~ "2021–2026"
)
)
topic_monthly_counts |>
ggplot(aes(x = month, y = n_posts, color = theme_label)) +
geom_line(linewidth = 1.2) +
facet_wrap(~period_4yr, scales = "free_x", ncol = 1) +
scale_color_manual(values = muted_dark) +
labs(
title = "Tweets by Theme Over Time (4-Year Windows)",
x = "Month",
y = "Number of Tweets",
color = "Theme"
) +
theme_minimal()
library(tidyr)
library(dplyr)
library(ggplot2)
library(lubridate)
############################################
# Theme -> Flag mapping
############################################
theme_flags <- list(
immigration = c("immig_wall_campaign","immig_travel_ban","immig_mass_deport"),
democrats = c("dem_attack_2016","dem_russia_probe","dem_post_pres"),
crime = c("crime_law_order_2016","crime_blm_protests","crime_city_policy"),
poverty = c("poverty_campaign_focus","poverty_opportunity","poverty_post_covid"),
religion = c("religion_evangelical","religion_liberty_exec","religion_campaign24"),
`Government assistance programs` =
c("assist_welfare_reform","assist_covid_relief","assist_post_covid"),
healthcare =
c("healthcare_repeal_aca","healthcare_covid","healthcare_policy24"),
jobs =
c("jobs_trade_policy","jobs_tariffs","jobs_campaign24"),
war =
c("war_syria_strike","war_isis","war_iran"),
education =
c("edu_campaign_focus","edu_free_speech","edu_anti_woke"),
none =
c("none_precampaign","none_presidency","none_postpres")
)
############################################
# Monthly topic counts
############################################
topic_monthly_counts <- df |>
mutate(month = floor_date(date, "month")) |>
count(month, theme_label, name = "n_posts")
############################################
# Create flag windows
############################################
get_flag_windows <- function(flag_cols){
df |>
select(date, all_of(flag_cols)) |>
pivot_longer(-date, names_to="flag", values_to="active") |>
filter(active) |>
distinct(date, flag) |>
mutate(
start = date - 15,
end = date + 15
) |>
group_by(flag) |>
summarise(
start = min(start),
end = max(end),
.groups="drop"
) |>
mutate(
label = paste0(
flag,
"\n(",
format(start,"%Y-%m-%d"),
" → ",
format(end,"%Y-%m-%d"),
")"
)
)
}
############################################
# Generate plots
############################################
plots <- lapply(seq_along(theme_flags), function(i){
theme <- names(theme_flags)[i]
flags <- theme_flags[[i]]
flag_windows <- get_flag_windows(flags)
data <- topic_monthly_counts |>
filter(theme_label == theme)
ggplot(data, aes(x = month, y = n_posts)) +
geom_rect(
data = flag_windows,
aes(
xmin = start,
xmax = end,
ymin = -Inf,
ymax = Inf,
fill = label
),
alpha = 0.06,
inherit.aes = FALSE
) +
geom_line(
color = muted_dark[i],
linewidth = 1.2
) +
scale_fill_manual(
values = rep(muted_dark[i], nrow(flag_windows))
) +
labs(
title = paste("Topic Trend:", theme),
x = "Date",
y = "Posts per Month",
fill = "Event Flags"
) +
theme_minimal() +
theme(
plot.title = element_text(face = "bold"),
legend.position = "bottom",
panel.grid.minor = element_blank()
)
})
############################################
# Print plots
############################################
for(p in plots) print(p)
dir.create("theme_plots", showWarnings = FALSE)
for(i in seq_along(theme_flags)){
theme <- names(theme_flags)[i]
flags <- theme_flags[[i]]
flag_windows <- get_flag_windows(flags)
data <- topic_monthly_counts |>
filter(theme_label == theme)
p <- ggplot(data, aes(x = month, y = n_posts)) +
geom_rect(
data = flag_windows,
aes(
xmin = start,
xmax = end,
ymin = -Inf,
ymax = Inf,
fill = label
),
alpha = 0.06,
inherit.aes = FALSE
) +
geom_line(
color = muted_dark[i],
linewidth = 1.2
) +
scale_fill_manual(
values = rep(muted_dark[i], nrow(flag_windows))
) +
labs(
title = paste("Topic Trend:", theme),
x = "Date",
y = "Posts per Month",
fill = "Event Flags"
) +
theme_minimal()
print(p)
################################
# Save plot
################################
filename <- paste0(
"theme_plots/",
gsub(" ", "_", tolower(theme)),
"_timeline.png"
)
ggsave(
filename,
plot = p,
width = 10,
height = 6,
dpi = 300
)
}
write.csv(df, "daytuh.csv")