library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.6
## ✔ forcats 1.0.1 ✔ stringr 1.6.0
## ✔ ggplot2 4.0.1 ✔ tibble 3.3.0
## ✔ lubridate 1.9.4 ✔ tidyr 1.3.1
## ✔ purrr 1.2.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(stringr)
files <- c(
"finance_10092025.csv",
"money_10092025.csv",
"investing_10092025.csv",
"GenZ_10092025.csv",
"financial_advice_from_AI_10092025.csv"
)
read_safe <- function(p){
tryCatch(readr::read_csv(p, show_col_types = FALSE),
error = function(e) tryCatch(readr::read_csv(p, locale = readr::locale(encoding = "latin1"), show_col_types = FALSE),
error = function(e2) tibble()))
}
build_text <- function(df){
if (nrow(df) == 0) return(character(0))
text_cols <- names(df)[str_detect(tolower(names(df)), "(title|selftext|text|comment|snippet|body)")]
if (!length(text_cols)) text_cols <- names(df)[sapply(df, is.character)]
df %>%
select(any_of(text_cols)) %>%
mutate(across(everything(), ~ tolower(as.character(.x)))) %>%
pivot_longer(everything(), values_to = "txt") %>%
pull(txt) %>% replace_na("")
}
rx_ai <- "(\\bai\\b|\\bchatgpt\\b|\\brobo[-\\s]*advisor\\b|\\broboadvisor\\b|\\bgpt[-\\s]*\\d*\\b)"
rx_adv <- "(\\bfinancial advisor(s)?\\b|\\badvisor(s)?\\b|\\bfinancial planner(s)?\\b|\\bcfp\\b|\\bfiduciary\\b)"
rx_tp <- "(\\btrust\\b|\\bfiduciary\\b|\\breliable\\b)"
rx_tn <- "(don't trust|do not trust|\\bscam(s)?\\b|misinfo(rmation)?)"
has_any <- function(v, pattern) sum(str_detect(v, regex(pattern, ignore_case = TRUE)), na.rm = TRUE)
# Load text
all_txt <- files %>% map(read_safe) %>% map(build_text) %>% unlist(use.names = FALSE)
## New names:
## New names:
## New names:
## New names:
## New names:
## • `` -> `...1`
totals <- tibble(
metric = c("AI","Advisor","Trust_Positive","Trust_Negative"),
value = c(has_any(all_txt, rx_ai),
has_any(all_txt, rx_adv),
has_any(all_txt, rx_tp),
has_any(all_txt, rx_tn))
)
# Chart 1: AI vs Advisor
totals %>%
filter(metric %in% c("AI","Advisor")) %>%
ggplot(aes(metric, value, fill = metric)) +
geom_col(width = 0.6) +
labs(title = "Mentions: AI vs Financial Advisor (Reddit sample)",
x = "Category", y = "Number of posts mentioning") +
theme_minimal()

# Chart 2: Trust signals
totals %>%
filter(metric %in% c("Trust_Positive","Trust_Negative")) %>%
ggplot(aes(metric, value, fill = metric)) +
geom_col(width = 0.6) +
labs(title = "Trust Signals in Posts",
x = "Trust sentiment", y = "Number of posts mentioning") +
theme_minimal()
