setwd("/Users/isaiahmireles/Desktop/Trump folder")
population <- read.csv("trump_tweets_dataset.csv")
df <- read.csv("trump_sample_labeled.csv")

Research Qs)

Brief

Most to least confident education, theme_label

library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
edu <- 
  df |> select(date, theme_label, confidence, text, post_url) |> 
  filter(theme_label=="education") |> 
  arrange(desc(confidence))
edu_conf <- edu |> filter(confidence>=.3)
colnames(df)
##  [1] "date"           "platform"       "handle"         "text"          
##  [5] "favorite_count" "repost_count"   "deleted_flag"   "word_count"    
##  [9] "hashtags"       "urls"           "user_mentions"  "media_count"   
## [13] "media_urls"     "post_url"       "text_lwr"       "text_clean"    
## [17] "theme_label"    "confidence"

Vis.

library(dplyr)
library(lubridate)
## 
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
## 
##     date, intersect, setdiff, union
df <- 
  df |> 
  mutate(date = as.Date(date))

df_monthly <- 
  df |> 
  mutate(month = floor_date(date, unit = "month"))

topic_monthly_counts <- 
  df |> 
  mutate(month = floor_date(date, "month")) |> 
  group_by(month, theme_label) |> 
  summarize(
    n_posts = n(),
    .groups = "drop"
  )

topic_monthly_prop <- 
  df |> 
  mutate(month = floor_date(date, "month")) |> 
  group_by(month, theme_label) |> 
  summarize(n_posts = n(), .groups = "drop") |> 
  group_by(month) |> 
  mutate(prop = n_posts / sum(n_posts)) |> 
  ungroup()

df_high_conf <- 
  df |> 
  filter(confidence >= 0.3)

topic_monthly_counts <- 
  df_high_conf |> 
  mutate(month = floor_date(date, "month")) |> 
  group_by(month, theme_label) |> 
  summarize(n_posts = n(), .groups = "drop")

library(ggplot2)

topic_monthly_counts |> 
  ggplot(aes(x = month, y = n_posts, color = theme_label)) +
  geom_line() +
  labs(
    title = "Tweets by Theme Over Time",
    x = "Month",
    y = "Number of Tweets"
  )

Data was got from

Federal Reserve Bank of St. Louis

GDP <- read.csv("GDP.csv")
GDP$observation_date <- as.Date(GDP$observation_date)
GDP <- GDP |> arrange(desc(observation_date))
df <- df |> arrange(desc(date))

unique(df$theme_label)
## [1] "economics"    "education"    "immigration"  "religion"     "homelessness"

Updated-Research Qs)

Government Data (

General Questions

Economics

Immigration

Homelessness

Education

Religion