library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.6
## ✔ forcats   1.0.1     ✔ stringr   1.6.0
## ✔ ggplot2   4.0.1     ✔ tibble    3.3.0
## ✔ lubridate 1.9.4     ✔ tidyr     1.3.1
## ✔ purrr     1.2.0     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(stringr)

files <- c(
  "finance_10092025.csv",
  "money_10092025.csv",
  "investing_10092025.csv",
  "GenZ_10092025.csv",
  "financial_advice_from_AI_10092025.csv"
)

read_safe <- function(p){
  tryCatch(readr::read_csv(p, show_col_types = FALSE),
           error = function(e) tryCatch(readr::read_csv(p, locale = readr::locale(encoding = "latin1"), show_col_types = FALSE),
                                        error = function(e2) tibble()))
}

build_text <- function(df){
  if (nrow(df) == 0) return(character(0))
  text_cols <- names(df)[str_detect(tolower(names(df)), "(title|selftext|text|comment|snippet|body)")]
  if (!length(text_cols)) text_cols <- names(df)[sapply(df, is.character)]
  df %>%
    select(any_of(text_cols)) %>%
    mutate(across(everything(), ~ tolower(as.character(.x)))) %>%
    pivot_longer(everything(), values_to = "txt") %>%
    pull(txt) %>% replace_na("")
}

rx_ai  <- "(\\bai\\b|\\bchatgpt\\b|\\brobo[-\\s]*advisor\\b|\\broboadvisor\\b|\\bgpt[-\\s]*\\d*\\b)"
rx_adv <- "(\\bfinancial advisor(s)?\\b|\\badvisor(s)?\\b|\\bfinancial planner(s)?\\b|\\bcfp\\b|\\bfiduciary\\b)"
rx_tp  <- "(\\btrust\\b|\\bfiduciary\\b|\\breliable\\b)"
rx_tn  <- "(don't trust|do not trust|\\bscam(s)?\\b|misinfo(rmation)?)"

has_any <- function(v, pattern) sum(str_detect(v, regex(pattern, ignore_case = TRUE)), na.rm = TRUE)

# Load text
all_txt <- files %>% map(read_safe) %>% map(build_text) %>% unlist(use.names = FALSE)
## New names:
## New names:
## New names:
## New names:
## New names:
## • `` -> `...1`
totals <- tibble(
  metric = c("AI","Advisor","Trust_Positive","Trust_Negative"),
  value  = c(has_any(all_txt, rx_ai),
             has_any(all_txt, rx_adv),
             has_any(all_txt, rx_tp),
             has_any(all_txt, rx_tn))
)

# Chart 1: AI vs Advisor
totals %>% 
  filter(metric %in% c("AI","Advisor")) %>%
  ggplot(aes(metric, value, fill = metric)) +
  geom_col(width = 0.6) +
  labs(title = "Mentions: AI vs Financial Advisor (Reddit sample)",
       x = "Category", y = "Number of posts mentioning") +
  theme_minimal()

# Chart 2: Trust signals
totals %>% 
  filter(metric %in% c("Trust_Positive","Trust_Negative")) %>%
  ggplot(aes(metric, value, fill = metric)) +
  geom_col(width = 0.6) +
  labs(title = "Trust Signals in Posts",
       x = "Trust sentiment", y = "Number of posts mentioning") +
  theme_minimal()