setwd("/Users/isaiahmireles/Desktop/Trump folder")
population <- read.csv("trump_tweets_dataset.csv")
df <- read.csv("trump_sample_labeled.csv")

Research Qs)

Brief

Most to least confident education, theme_label

library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
edu <- 
  df |> select(date, theme_label, confidence, text, post_url) |> 
  filter(theme_label=="education") |> 
  arrange(desc(confidence))
edu_conf <- edu |> filter(confidence>=.3)
colnames(df)
##  [1] "date"           "platform"       "handle"         "text"          
##  [5] "favorite_count" "repost_count"   "deleted_flag"   "word_count"    
##  [9] "hashtags"       "urls"           "user_mentions"  "media_count"   
## [13] "media_urls"     "post_url"       "text_lwr"       "text_clean"    
## [17] "theme_label"    "confidence"
library(dplyr)
library(lubridate)
## 
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
## 
##     date, intersect, setdiff, union
df <- 
  df |> 
  mutate(date = as.Date(date))

df_monthly <- 
  df |> 
  mutate(month = floor_date(date, unit = "month"))

topic_monthly_counts <- 
  df |> 
  mutate(month = floor_date(date, "month")) |> 
  group_by(month, theme_label) |> 
  summarize(
    n_posts = n(),
    .groups = "drop"
  )

topic_monthly_prop <- 
  df |> 
  mutate(month = floor_date(date, "month")) |> 
  group_by(month, theme_label) |> 
  summarize(n_posts = n(), .groups = "drop") |> 
  group_by(month) |> 
  mutate(prop = n_posts / sum(n_posts)) |> 
  ungroup()

df_high_conf <- 
  df |> 
  filter(confidence >= 0.3)

topic_monthly_counts <- 
  df_high_conf |> 
  mutate(month = floor_date(date, "month")) |> 
  group_by(month, theme_label) |> 
  summarize(n_posts = n(), .groups = "drop")

library(ggplot2)
muted_dark <- c(
  "#8B2F2F", # dark red
  "#2C5AA0", # dark blue
  "#2E6F3E", # dark green
  "#B9A3D6", # light purple
  "#BFBFBF", # light grey
  "#000000", # black
  "#C46210", # dark orange
  "#2F7F7F", # teal
  "#B59A00", # dark yellow
  "#5A3E8C", # dark purple
  "#6B4F3A"  # brown
)


topic_monthly_counts |> 
  ggplot(aes(x = month, y = n_posts, color = theme_label)) +
  geom_line(linewidth = 1.2) +
  scale_color_manual(
  values = muted_dark
) +
  labs(
    title = "Tweets by Theme Over Time",
    x = "Month",
    y = "Number of Tweets",
    color = "Theme"
  ) +
  theme_minimal()

topic_monthly_counts |>
  ggplot(aes(x = month, y = n_posts, color = theme_label)) +
  geom_line(linewidth = 1.1) +
  facet_wrap(~ theme_label) +
  scale_color_manual(values = muted_dark) +
  theme_minimal()

topic_monthly_counts |>
  ggplot(aes(x = month, y = n_posts, fill = theme_label)) +
  geom_area(alpha = 0.85) +
  scale_fill_manual(values = muted_dark) +
  theme_minimal()

topic_monthly_prop |>
  ggplot(aes(x = month, y = prop, fill = theme_label)) +
  geom_area() +
  scale_fill_manual(values = muted_dark) +
  theme_minimal()

df_high_conf |>
  ggplot(aes(x = theme_label, fill = theme_label)) +
  geom_bar() +
  scale_fill_manual(values = muted_dark) +
  theme_minimal()

df_high_conf |>
  ggplot(aes(x = theme_label, fill = theme_label)) +
  geom_bar() +
  scale_fill_manual(values = muted_dark) +
  theme_minimal()

topic_monthly_counts |>
  ggplot(aes(x = month, y = theme_label, fill = n_posts)) +
  geom_tile() +
  scale_fill_gradient(low = "#E5E5E5", high = "#2F4F4F") +
  theme_minimal()

muted_dark <- c(
  "#8B2F2F", # dark red
  "#2C5AA0", # dark blue
  "#2E6F3E", # dark green
  "#B9A3D6", # light purple
  "#BFBFBF", # light grey
  "#000000", # black
  "#C46210", # dark orange
  "#2F7F7F", # teal
  "#B59A00", # dark yellow
  "#5A3E8C", # dark purple
  "#6B4F3A"  # brown
)

topic_monthly_counts |>
  ggplot(aes(
    x = month,
    y = n_posts,
    color = theme_label,
    alpha = n_posts
  )) +
  geom_line(linewidth = 1.3) +
  scale_color_manual(values = muted_dark) +
  scale_alpha(range = c(0.3, 1)) +
  labs(
    title = "Tweets by Theme Over Time",
    x = "Month",
    y = "Number of Tweets",
    color = "Theme"
  ) +
  theme_minimal()

range(df$date)
## [1] "2009-05-05" "2026-01-07"

Data was got from

Federal Reserve Bank of St. Louis

GDP <- read.csv("GDP.csv")
GDP$observation_date <- as.Date(GDP$observation_date)
GDP <- GDP |> arrange(desc(observation_date))
df <- df |> arrange(desc(date))

unique(df$theme_label)
##  [1] "war"                            "none"                          
##  [3] "jobs"                           "crime"                         
##  [5] "healthcare"                     "democrats"                     
##  [7] "immigration"                    "religion"                      
##  [9] "Government assistance programs" "education"                     
## [11] "poverty"

Updated-Research Qs)

Government Data (

General Questions

Economics

Immigration

Homelessness

Education

Religion

df$pre_campaign  <- df$date >= "2009-05-05" & df$date <= "2015-06-15"

df$campaign_2016 <- df$date >= "2015-06-16" & df$date <= "2016-11-07"

df$trump_pres_1  <- df$date >= "2016-11-08" & df$date <= "2021-01-20"

df$post_pres     <- df$date >= "2021-01-21" & df$date <= "2024-11-04"

df$campaign_2024 <- df$date >= "2022-11-15" & df$date <= "2024-11-04"

df$trump_pres_2  <- df$date >= "2025-01-20" & df$date <= "2026-01-07"


df$era <- cut(df$date,
  breaks = as.Date(c(
    "2009-05-05","2015-06-16","2016-11-08",
    "2021-01-21","2024-11-15","2025-01-20","2026-01-07")),
  labels = c("pre_campaign","campaign_2016","pres1","post_pres","campaign_2024","pres2")
)
Short Name Meaning
pre16 pre-2016 campaign
camp16 2016 campaign
pres1 first presidency
post21 post presidency
camp24 2024 campaign
pres2 second presidency
##################################################
# CAMPAIGN CYCLE WINDOWS
##################################################

df$camp16 <- df$date >= "2015-06-16" & df$date <= "2016-11-08"
df$camp20 <- df$date >= "2019-06-18" & df$date <= "2020-11-03"
df$camp24 <- df$date >= "2022-11-15" & df$date <= "2024-11-05"


##################################################
# POLICY SHOCK WINDOWS
##################################################

df$shock_travelban   <- df$date >= "2017-01-27" & df$date <= "2017-03-31"
df$shock_taxcuts     <- df$date >= "2017-12-01" & df$date <= "2018-03-01"
df$shock_covid       <- df$date >= "2020-03-01" & df$date <= "2020-06-01"
df$shock_election    <- df$date >= "2020-11-03" & df$date <= "2021-01-06"
df$shock_war_iran    <- df$date >= "2026-02-01" & df$date <= "2026-06-01"


##################################################
# IMMIGRATION
##################################################

df$immig_wall_campaign <- df$date >= "2015-06-16" & df$date <= "2016-11-08"
df$immig_travel_ban    <- df$date >= "2017-01-27" & df$date <= "2017-12-31"
df$immig_mass_deport   <- df$date >= "2025-01-20" & df$date <= "2026-01-07"


##################################################
# DEMOCRATS
##################################################

df$dem_attack_2016  <- df$date >= "2015-06-16" & df$date <= "2016-11-08"
df$dem_russia_probe <- df$date >= "2017-01-01" & df$date <= "2019-03-31"
df$dem_post_pres    <- df$date >= "2021-01-21" & df$date <= "2024-11-05"


##################################################
# CRIME
##################################################

df$crime_law_order_2016 <- df$date >= "2016-06-01" & df$date <= "2016-11-08"
df$crime_blm_protests   <- df$date >= "2020-05-25" & df$date <= "2020-09-01"
df$crime_city_policy    <- df$date >= "2025-01-20" & df$date <= "2026-01-07"


##################################################
# POVERTY
##################################################

df$poverty_campaign_focus <- df$date >= "2016-01-01" & df$date <= "2016-11-08"
df$poverty_opportunity    <- df$date >= "2018-01-01" & df$date <= "2019-12-31"
df$poverty_post_covid     <- df$date >= "2021-01-01" & df$date <= "2022-12-31"


##################################################
# RELIGION
##################################################

df$religion_evangelical  <- df$date >= "2015-06-16" & df$date <= "2016-11-08"
df$religion_liberty_exec <- df$date >= "2017-05-01" & df$date <= "2018-12-31"
df$religion_campaign24   <- df$date >= "2024-01-01" & df$date <= "2024-11-05"


##################################################
# GOVERNMENT ASSISTANCE PROGRAMS
##################################################

df$assist_welfare_reform <- df$date >= "2017-01-20" & df$date <= "2018-12-31"
df$assist_covid_relief   <- df$date >= "2020-03-01" & df$date <= "2020-12-31"
df$assist_post_covid     <- df$date >= "2021-01-01" & df$date <= "2022-12-31"


##################################################
# HEALTHCARE
##################################################

df$healthcare_repeal_aca <- df$date >= "2016-01-01" & df$date <= "2017-12-31"
df$healthcare_covid      <- df$date >= "2020-03-01" & df$date <= "2020-12-31"
df$healthcare_policy24   <- df$date >= "2024-01-01" & df$date <= "2024-11-05"


##################################################
# JOBS
##################################################

df$jobs_trade_policy <- df$date >= "2017-01-20" & df$date <= "2019-12-31"
df$jobs_tariffs      <- df$date >= "2018-03-01" & df$date <= "2019-12-31"
df$jobs_campaign24   <- df$date >= "2024-01-01" & df$date <= "2024-11-05"


##################################################
# WAR
##################################################

df$war_syria_strike <- df$date >= "2017-04-01" & df$date <= "2017-06-01"
df$war_isis        <- df$date >= "2017-01-20" & df$date <= "2019-12-31"
df$war_iran        <- df$date >= "2026-02-01" & df$date <= "2026-06-01"


##################################################
# EDUCATION
##################################################

df$edu_campaign_focus <- df$date >= "2016-01-01" & df$date <= "2016-11-08"
df$edu_free_speech    <- df$date >= "2019-01-01" & df$date <= "2019-12-31"
df$edu_anti_woke      <- df$date >= "2023-01-01" & df$date <= "2024-11-05"


##################################################
# NONE (GENERAL COMMENTARY)
##################################################

df$none_precampaign <- df$date >= "2009-05-05" & df$date <= "2015-06-15"
df$none_presidency  <- df$date >= "2017-01-20" & df$date <= "2021-01-20"
df$none_postpres    <- df$date >= "2021-01-21" & df$date <= "2026-01-07"
df$era <- cut(df$date,
breaks = as.Date(c(
"2009-05-05",
"2015-06-16",
"2017-01-20",
"2021-01-21",
"2025-01-20",
"2026-01-07"
)),
labels = c("pre_campaign","campaign16","pres1","post_pres","pres2")
)
########################################################
# Dataset Documentation / Data Dictionary
########################################################

data_dictionary <- data.frame(

feature = c(
"date","platform","handle","text","favorite_count","repost_count",
"deleted_flag","word_count","hashtags","urls","user_mentions",
"media_count","media_urls","post_url","text_lwr","text_clean",
"theme_label","confidence",

"pre_campaign","campaign_2016","trump_pres_1","post_pres",
"campaign_2024","trump_pres_2","era",

"camp16","camp20","camp24",

"shock_travelban","shock_taxcuts","shock_covid","shock_election","shock_war_iran",

"immig_wall_campaign","immig_travel_ban","immig_mass_deport",

"dem_attack_2016","dem_russia_probe","dem_post_pres",

"crime_law_order_2016","crime_blm_protests","crime_city_policy",

"poverty_campaign_focus","poverty_opportunity","poverty_post_covid",

"religion_evangelical","religion_liberty_exec","religion_campaign24",

"assist_welfare_reform","assist_covid_relief","assist_post_covid",

"healthcare_repeal_aca","healthcare_covid","healthcare_policy24",

"jobs_trade_policy","jobs_tariffs","jobs_campaign24",

"war_syria_strike","war_isis","war_iran",

"edu_campaign_focus","edu_free_speech","edu_anti_woke",

"none_precampaign","none_presidency","none_postpres"
),

description = c(

"date the post was created",
"social media platform where the post appeared (twitter or truth social)",
"account handle that published the post",
"original post text content",
"number of likes or favorites the post received",
"number of reposts or retweets",
"flag indicating if the post was later deleted",
"number of words contained in the post",
"hashtags extracted from the post",
"urls contained in the post",
"user accounts mentioned in the post",
"number of media items attached to the post",
"urls linking to attached media",
"direct link to the original post",
"lowercase version of the text field",
"cleaned version of the text with punctuation and noise removed",
"predicted topic category assigned by classification model",
"classification confidence score produced by the model",

"flag indicating posts before trump announced his 2016 presidential campaign",
"flag indicating posts during the 2016 presidential campaign",
"flag indicating posts during trump's first presidency",
"flag indicating posts after his first presidency",
"flag indicating posts during the 2024 campaign cycle",
"flag indicating posts during trump's second presidency",
"categorical variable describing the broader political era",

"flag for posts during the 2016 campaign cycle",
"flag for posts during the 2020 campaign cycle",
"flag for posts during the 2024 campaign cycle",

"policy shock window around the 2017 travel ban policy",
"policy shock window around the 2017 tax cuts legislation",
"policy shock window during the early covid pandemic period",
"policy shock window around the 2020 election dispute period",
"policy shock window associated with military conflict involving iran",

"immigration theme window during the border wall campaign",
"immigration theme window during the travel ban policy period",
"immigration theme window during renewed deportation policy discussions",

"democrats theme window during the 2016 campaign attacks",
"democrats theme window during the russia investigation period",
"democrats theme window during the post presidency political period",

"crime theme window emphasizing law and order messaging in 2016",
"crime theme window during 2020 protest and policing debates",
"crime theme window focusing on urban crime discussions",

"poverty theme window during campaign rhetoric about economic hardship",
"poverty theme window during opportunity zone economic initiatives",
"poverty theme window during post covid economic recovery debates",

"religion theme window during evangelical outreach in the 2016 campaign",
"religion theme window during religious liberty policy initiatives",
"religion theme window during the 2024 campaign",

"government assistance theme window during welfare reform discussions",
"government assistance theme window during pandemic relief programs",
"government assistance theme window during post pandemic policy debates",

"healthcare theme window during efforts to repeal the affordable care act",
"healthcare theme window during covid health policy debates",
"healthcare theme window during the 2024 campaign policy messaging",

"jobs theme window during trade and manufacturing policy discussions",
"jobs theme window during tariff and trade war messaging",
"jobs theme window during economic messaging in the 2024 campaign",

"war theme window around military strikes in syria",
"war theme window related to messaging about the fight against isis",
"war theme window related to military escalation involving iran",

"education theme window during education reform messaging in campaigns",
"education theme window during campus free speech policy debates",
"education theme window criticizing ideological trends in schools",

"general commentary window before the 2016 campaign",
"general commentary window during the first presidency",
"general commentary window during the post presidency period"
),

stringsAsFactors = FALSE
)

########################################################
# View documentation
########################################################

data_dictionary
##                   feature
## 1                    date
## 2                platform
## 3                  handle
## 4                    text
## 5          favorite_count
## 6            repost_count
## 7            deleted_flag
## 8              word_count
## 9                hashtags
## 10                   urls
## 11          user_mentions
## 12            media_count
## 13             media_urls
## 14               post_url
## 15               text_lwr
## 16             text_clean
## 17            theme_label
## 18             confidence
## 19           pre_campaign
## 20          campaign_2016
## 21           trump_pres_1
## 22              post_pres
## 23          campaign_2024
## 24           trump_pres_2
## 25                    era
## 26                 camp16
## 27                 camp20
## 28                 camp24
## 29        shock_travelban
## 30          shock_taxcuts
## 31            shock_covid
## 32         shock_election
## 33         shock_war_iran
## 34    immig_wall_campaign
## 35       immig_travel_ban
## 36      immig_mass_deport
## 37        dem_attack_2016
## 38       dem_russia_probe
## 39          dem_post_pres
## 40   crime_law_order_2016
## 41     crime_blm_protests
## 42      crime_city_policy
## 43 poverty_campaign_focus
## 44    poverty_opportunity
## 45     poverty_post_covid
## 46   religion_evangelical
## 47  religion_liberty_exec
## 48    religion_campaign24
## 49  assist_welfare_reform
## 50    assist_covid_relief
## 51      assist_post_covid
## 52  healthcare_repeal_aca
## 53       healthcare_covid
## 54    healthcare_policy24
## 55      jobs_trade_policy
## 56           jobs_tariffs
## 57        jobs_campaign24
## 58       war_syria_strike
## 59               war_isis
## 60               war_iran
## 61     edu_campaign_focus
## 62        edu_free_speech
## 63          edu_anti_woke
## 64       none_precampaign
## 65        none_presidency
## 66          none_postpres
##                                                                    description
## 1                                                    date the post was created
## 2      social media platform where the post appeared (twitter or truth social)
## 3                                       account handle that published the post
## 4                                                   original post text content
## 5                               number of likes or favorites the post received
## 6                                                number of reposts or retweets
## 7                                flag indicating if the post was later deleted
## 8                                        number of words contained in the post
## 9                                             hashtags extracted from the post
## 10                                                  urls contained in the post
## 11                                         user accounts mentioned in the post
## 12                                  number of media items attached to the post
## 13                                              urls linking to attached media
## 14                                            direct link to the original post
## 15                                         lowercase version of the text field
## 16              cleaned version of the text with punctuation and noise removed
## 17                   predicted topic category assigned by classification model
## 18                       classification confidence score produced by the model
## 19 flag indicating posts before trump announced his 2016 presidential campaign
## 20                 flag indicating posts during the 2016 presidential campaign
## 21                       flag indicating posts during trump's first presidency
## 22                            flag indicating posts after his first presidency
## 23                        flag indicating posts during the 2024 campaign cycle
## 24                      flag indicating posts during trump's second presidency
## 25                   categorical variable describing the broader political era
## 26                               flag for posts during the 2016 campaign cycle
## 27                               flag for posts during the 2020 campaign cycle
## 28                               flag for posts during the 2024 campaign cycle
## 29                       policy shock window around the 2017 travel ban policy
## 30                    policy shock window around the 2017 tax cuts legislation
## 31                  policy shock window during the early covid pandemic period
## 32                 policy shock window around the 2020 election dispute period
## 33        policy shock window associated with military conflict involving iran
## 34                    immigration theme window during the border wall campaign
## 35                immigration theme window during the travel ban policy period
## 36      immigration theme window during renewed deportation policy discussions
## 37                     democrats theme window during the 2016 campaign attacks
## 38               democrats theme window during the russia investigation period
## 39          democrats theme window during the post presidency political period
## 40              crime theme window emphasizing law and order messaging in 2016
## 41                 crime theme window during 2020 protest and policing debates
## 42                      crime theme window focusing on urban crime discussions
## 43       poverty theme window during campaign rhetoric about economic hardship
## 44           poverty theme window during opportunity zone economic initiatives
## 45            poverty theme window during post covid economic recovery debates
## 46      religion theme window during evangelical outreach in the 2016 campaign
## 47           religion theme window during religious liberty policy initiatives
## 48                              religion theme window during the 2024 campaign
## 49        government assistance theme window during welfare reform discussions
## 50          government assistance theme window during pandemic relief programs
## 51      government assistance theme window during post pandemic policy debates
## 52    healthcare theme window during efforts to repeal the affordable care act
## 53                  healthcare theme window during covid health policy debates
## 54           healthcare theme window during the 2024 campaign policy messaging
## 55         jobs theme window during trade and manufacturing policy discussions
## 56                     jobs theme window during tariff and trade war messaging
## 57            jobs theme window during economic messaging in the 2024 campaign
## 58                           war theme window around military strikes in syria
## 59          war theme window related to messaging about the fight against isis
## 60              war theme window related to military escalation involving iran
## 61       education theme window during education reform messaging in campaigns
## 62             education theme window during campus free speech policy debates
## 63            education theme window criticizing ideological trends in schools
## 64                          general commentary window before the 2016 campaign
## 65                       general commentary window during the first presidency
## 66                 general commentary window during the post presidency period
########################################################
# Optional: export documentation
########################################################

write.csv(data_dictionary,"dataset_data_dictionary.csv",row.names=FALSE)
colnames(df)
##  [1] "date"                   "platform"               "handle"                
##  [4] "text"                   "favorite_count"         "repost_count"          
##  [7] "deleted_flag"           "word_count"             "hashtags"              
## [10] "urls"                   "user_mentions"          "media_count"           
## [13] "media_urls"             "post_url"               "text_lwr"              
## [16] "text_clean"             "theme_label"            "confidence"            
## [19] "pre_campaign"           "campaign_2016"          "trump_pres_1"          
## [22] "post_pres"              "campaign_2024"          "trump_pres_2"          
## [25] "era"                    "camp16"                 "camp20"                
## [28] "camp24"                 "shock_travelban"        "shock_taxcuts"         
## [31] "shock_covid"            "shock_election"         "shock_war_iran"        
## [34] "immig_wall_campaign"    "immig_travel_ban"       "immig_mass_deport"     
## [37] "dem_attack_2016"        "dem_russia_probe"       "dem_post_pres"         
## [40] "crime_law_order_2016"   "crime_blm_protests"     "crime_city_policy"     
## [43] "poverty_campaign_focus" "poverty_opportunity"    "poverty_post_covid"    
## [46] "religion_evangelical"   "religion_liberty_exec"  "religion_campaign24"   
## [49] "assist_welfare_reform"  "assist_covid_relief"    "assist_post_covid"     
## [52] "healthcare_repeal_aca"  "healthcare_covid"       "healthcare_policy24"   
## [55] "jobs_trade_policy"      "jobs_tariffs"           "jobs_campaign24"       
## [58] "war_syria_strike"       "war_isis"               "war_iran"              
## [61] "edu_campaign_focus"     "edu_free_speech"        "edu_anti_woke"         
## [64] "none_precampaign"       "none_presidency"        "none_postpres"

Era Indicators

Column Description
pre_campaign Posts before the 2016 campaign announcement.
campaign_2016 Posts during the 2016 presidential campaign.
trump_pres_1 Posts during Trump’s first presidency (2017–2021).
post_pres Posts during the post-presidency political period.
campaign_2024 Posts during the 2024 campaign period.
trump_pres_2 Posts during Trump’s second presidency period.
era Categorical variable summarizing the political era for each post.

Campaign Cycle Windows

Column Description
camp16 Posts during the 2016 presidential campaign cycle.
camp20 Posts during the 2020 reelection campaign cycle.
camp24 Posts during the 2024 presidential campaign cycle.

Policy Shock Windows

Column Description
shock_travelban Period surrounding the 2017 travel restriction executive order.
shock_taxcuts Period around the passage of the 2017 Tax Cuts and Jobs Act.
shock_covid Early COVID-19 pandemic period when healthcare and government assistance messaging increased.
shock_election Period between the 2020 election and early 2021 election dispute events.
shock_war_iran Period surrounding military escalation involving Iran.

Theme Event Windows

Column Description
immig_wall_campaign Immigration messaging during the 2016 campaign emphasizing border wall construction.
immig_travel_ban Period of immigration restrictions and travel ban policy discussion.
immig_mass_deport Period of increased messaging about deportation policies.

Democrats

Column Description
dem_attack_2016 Criticism of Democratic candidates during the 2016 campaign.
dem_russia_probe Messaging during investigations and political disputes between 2017–2019.
dem_post_pres Continued criticism of Democrats during the post-presidency political period.

Crime

Column Description
crime_law_order_2016 Law-and-order messaging during the 2016 campaign.
crime_blm_protests Crime and policing discussions during the 2020 protest period.
crime_city_policy Posts focusing on crime in major U.S. cities and policing policy.

Poverty

Column Description
poverty_campaign_focus Campaign messaging referencing struggling communities and economic hardship.
poverty_opportunity Messaging about economic development initiatives such as opportunity zones.
poverty_post_covid Discussion of poverty and inequality following the COVID-19 economic disruption.

Religion

Column Description
religion_evangelical Campaign messaging targeting evangelical voters.
religion_liberty_exec Posts referencing religious liberty initiatives during the presidency.
religion_campaign24 Religion-related messaging during the 2024 campaign period.

Government Assistance Programs

Column Description
assist_welfare_reform Posts discussing reform of government welfare or assistance programs.
assist_covid_relief Messaging related to pandemic stimulus and relief programs.
assist_post_covid Discussions of federal spending and assistance programs following the pandemic.

Healthcare

Column Description
healthcare_repeal_aca Messaging around attempts to repeal or replace the Affordable Care Act.
healthcare_covid Healthcare discussions during the COVID-19 pandemic.
healthcare_policy24 Healthcare messaging during the 2024 campaign cycle.

Jobs

Column Description
jobs_trade_policy Messaging related to trade policy and domestic manufacturing.
jobs_tariffs Period of messaging about tariffs and international trade disputes.
jobs_campaign24 Jobs and economic messaging during the 2024 campaign cycle.

War / Foreign Policy

Column Description
war_syria_strike Period surrounding U.S. military strikes in Syria.
war_isis Messaging related to military operations against ISIS.
war_iran Messaging during escalations involving Iran or Middle East conflict.

Education

Column Description
edu_campaign_focus Education policy messaging during the 2016 campaign.
edu_free_speech Messaging related to campus free speech debates.
edu_anti_woke Messaging criticizing ideological trends in education institutions.

None (General Commentary)

Column Description
none_precampaign General commentary before the 2016 campaign.
none_presidency Non-policy commentary during the presidency.
none_postpres General commentary during the post-presidency period.
library(tidyr)
## 
## Attaching package: 'tidyr'
## The following object is masked _by_ '.GlobalEnv':
## 
##     population
library(dplyr)
library(ggplot2)
library(lubridate)

theme_flags <- list(
  immigration = c("immig_wall_campaign","immig_travel_ban","immig_mass_deport"),
  democrats = c("dem_attack_2016","dem_russia_probe","dem_post_pres"),
  crime = c("crime_law_order_2016","crime_blm_protests","crime_city_policy"),
  poverty = c("poverty_campaign_focus","poverty_opportunity","poverty_post_covid"),
  religion = c("religion_evangelical","religion_liberty_exec","religion_campaign24"),
  `Government assistance programs` =
    c("assist_welfare_reform","assist_covid_relief","assist_post_covid"),
  healthcare =
    c("healthcare_repeal_aca","healthcare_covid","healthcare_policy24"),
  jobs =
    c("jobs_trade_policy","jobs_tariffs","jobs_campaign24"),
  war =
    c("war_syria_strike","war_isis","war_iran"),
  education =
    c("edu_campaign_focus","edu_free_speech","edu_anti_woke"),
  none =
    c("none_precampaign","none_presidency","none_postpres")
)

topic_monthly_counts <- df |>
  mutate(month = lubridate::floor_date(date, "month")) |>
  count(month, theme_label, name = "n_posts")

get_flag_windows <- function(flag_cols){

  df |>
    select(date, all_of(flag_cols)) |>
    pivot_longer(-date, names_to="flag", values_to="active") |>
    filter(active) |>
    distinct(date, flag) |>
    mutate(
      start = date - 15,
      end   = date + 15
    )
}

plots <- lapply(seq_along(theme_flags), function(i){

  theme <- names(theme_flags)[i]
  flags <- theme_flags[[i]]

  flag_windows <- get_flag_windows(flags)

  data <- topic_monthly_counts |>
    filter(theme_label == theme)

  ggplot(data, aes(x = month, y = n_posts)) +

    geom_rect(
      data = flag_windows,
      aes(xmin = start, xmax = end, ymin = -Inf, ymax = Inf),
      fill = muted_dark[i],
      alpha = 0.12,
      inherit.aes = FALSE
    ) +

    geom_line(
      color = muted_dark[i],
      linewidth = 1.2
    ) +

    labs(
      title = paste("Topic Trend:", theme),
      x = "Date",
      y = "Posts per Month"
    ) +

    theme_minimal() +
    theme(
      plot.title = element_text(face = "bold"),
      panel.grid.minor = element_blank()
    )
})
for(p in plots) print(p)

topic_monthly_counts <- topic_monthly_counts |>
  mutate(
    period_4yr = case_when(
      month >= as.Date("2009-01-01") & month <= as.Date("2012-12-31") ~ "2009–2012",
      month >= as.Date("2013-01-01") & month <= as.Date("2016-12-31") ~ "2013–2016",
      month >= as.Date("2017-01-01") & month <= as.Date("2020-12-31") ~ "2017–2020",
      TRUE ~ "2021–2026"
    )
  )

topic_monthly_counts |> 
  ggplot(aes(x = month, y = n_posts, color = theme_label)) +

  geom_line(linewidth = 1.2) +

  facet_wrap(~period_4yr, scales = "free_x", ncol = 1) +

  scale_color_manual(values = muted_dark) +

  labs(
    title = "Tweets by Theme Over Time (4-Year Windows)",
    x = "Month",
    y = "Number of Tweets",
    color = "Theme"
  ) +

  theme_minimal()

library(tidyr)
library(dplyr)
library(ggplot2)
library(lubridate)

############################################
# Theme -> Flag mapping
############################################

theme_flags <- list(
  immigration = c("immig_wall_campaign","immig_travel_ban","immig_mass_deport"),
  democrats = c("dem_attack_2016","dem_russia_probe","dem_post_pres"),
  crime = c("crime_law_order_2016","crime_blm_protests","crime_city_policy"),
  poverty = c("poverty_campaign_focus","poverty_opportunity","poverty_post_covid"),
  religion = c("religion_evangelical","religion_liberty_exec","religion_campaign24"),
  `Government assistance programs` =
    c("assist_welfare_reform","assist_covid_relief","assist_post_covid"),
  healthcare =
    c("healthcare_repeal_aca","healthcare_covid","healthcare_policy24"),
  jobs =
    c("jobs_trade_policy","jobs_tariffs","jobs_campaign24"),
  war =
    c("war_syria_strike","war_isis","war_iran"),
  education =
    c("edu_campaign_focus","edu_free_speech","edu_anti_woke"),
  none =
    c("none_precampaign","none_presidency","none_postpres")
)

############################################
# Monthly topic counts
############################################

topic_monthly_counts <- df |>
  mutate(month = floor_date(date, "month")) |>
  count(month, theme_label, name = "n_posts")

############################################
# Create flag windows
############################################

get_flag_windows <- function(flag_cols){

  df |>
    select(date, all_of(flag_cols)) |>
    pivot_longer(-date, names_to="flag", values_to="active") |>
    filter(active) |>
    distinct(date, flag) |>
    mutate(
      start = date - 15,
      end   = date + 15
    ) |>
    group_by(flag) |>
    summarise(
      start = min(start),
      end   = max(end),
      .groups="drop"
    ) |>
    mutate(
      label = paste0(
        flag,
        "\n(",
        format(start,"%Y-%m-%d"),
        " → ",
        format(end,"%Y-%m-%d"),
        ")"
      )
    )
}

############################################
# Generate plots
############################################

plots <- lapply(seq_along(theme_flags), function(i){

  theme <- names(theme_flags)[i]
  flags <- theme_flags[[i]]

  flag_windows <- get_flag_windows(flags)

  data <- topic_monthly_counts |>
    filter(theme_label == theme)

  ggplot(data, aes(x = month, y = n_posts)) +

    geom_rect(
      data = flag_windows,
      aes(
        xmin = start,
        xmax = end,
        ymin = -Inf,
        ymax = Inf,
        fill = label
      ),
      alpha = 0.06,
      inherit.aes = FALSE
    ) +

    geom_line(
      color = muted_dark[i],
      linewidth = 1.2
    ) +

    scale_fill_manual(
      values = rep(muted_dark[i], nrow(flag_windows))
    ) +

    labs(
      title = paste("Topic Trend:", theme),
      x = "Date",
      y = "Posts per Month",
      fill = "Event Flags"
    ) +

    theme_minimal() +
    theme(
      plot.title = element_text(face = "bold"),
      legend.position = "bottom",
      panel.grid.minor = element_blank()
    )
})

############################################
# Print plots
############################################

for(p in plots) print(p)

dir.create("theme_plots", showWarnings = FALSE)

for(i in seq_along(theme_flags)){

  theme <- names(theme_flags)[i]
  flags <- theme_flags[[i]]

  flag_windows <- get_flag_windows(flags)

  data <- topic_monthly_counts |>
    filter(theme_label == theme)

  p <- ggplot(data, aes(x = month, y = n_posts)) +

    geom_rect(
      data = flag_windows,
      aes(
        xmin = start,
        xmax = end,
        ymin = -Inf,
        ymax = Inf,
        fill = label
      ),
      alpha = 0.06,
      inherit.aes = FALSE
    ) +

    geom_line(
      color = muted_dark[i],
      linewidth = 1.2
    ) +

    scale_fill_manual(
      values = rep(muted_dark[i], nrow(flag_windows))
    ) +

    labs(
      title = paste("Topic Trend:", theme),
      x = "Date",
      y = "Posts per Month",
      fill = "Event Flags"
    ) +

    theme_minimal()

  print(p)

  ################################
  # Save plot
  ################################

  filename <- paste0(
    "theme_plots/",
    gsub(" ", "_", tolower(theme)),
    "_timeline.png"
  )

  ggsave(
    filename,
    plot = p,
    width = 10,
    height = 6,
    dpi = 300
  )
}

write.csv(df, "daytuh.csv")