Data Loading and Preparation
# ========================================
# CHANGE THIS LINE TO YOUR FILE LOCATION
# ========================================
# Option 1: File in same folder as this .Rmd file (just filename)
file_path <- "Marketing Analytics Survey_October 29, 2025_18.57.csv"
# Option 2: File in a specific folder (uncomment and edit one of these)
# file_path <- "~/Downloads/Marketing Analytics Survey_October 29, 2025_18.57.csv" # Mac/Linux
# file_path <- "C:/Users/YourName/Downloads/Marketing Analytics Survey_October 29, 2025_18.57.csv" # Windows
# Option 3: Let R find it (uncomment this to search for any matching CSV)
# csv_files <- list.files(pattern = "Marketing.*Survey.*\\.csv$", full.names = TRUE, recursive = FALSE)
# if(length(csv_files) > 0) { file_path <- csv_files[1] }
# ========================================
# Check if file exists
if(!file.exists(file_path)) {
cat("ERROR: Cannot find file!\n")
cat("Looking for:", file_path, "\n")
cat("Current directory:", getwd(), "\n")
cat("\nCSV files in current directory:\n")
csv_list <- list.files(pattern = "\\.csv$")
if(length(csv_list) > 0) {
for(i in seq_along(csv_list)) {
cat(" ", i, ". ", csv_list[i], "\n", sep="")
}
cat("\nUpdate line 22 with one of these filenames.\n")
} else {
cat(" No CSV files found!\n")
cat("\nPossible solutions:\n")
cat(" 1. Move your CSV to:", getwd(), "\n")
cat(" 2. Or use full path in line 22\n")
}
stop("File not found - see messages above")
}
# Read the CSV - first check how many columns we actually have
data_test <- read.csv(file_path, nrows = 1, stringsAsFactors = FALSE)
actual_cols <- ncol(data_test)
cat("Actual columns in file:", actual_cols, "\n")
## Actual columns in file: 27
# Read the CSV properly - skip the first 2 header rows
data_raw <- read.csv(file_path, skip = 2, stringsAsFactors = FALSE,
na.strings = c("", "NA"), header = FALSE)
cat("✓ File loaded:", file_path, "\n")
## ✓ File loaded: Marketing Analytics Survey_October 29, 2025_18.57.csv
cat(" Rows:", nrow(data_raw), "\n")
## Rows: 27
cat(" Columns:", ncol(data_raw), "\n\n")
## Columns: 29
# Assign column names based on actual number of columns
col_names <- c("StartDate", "EndDate", "Status", "IPAddress", "Progress",
"Duration", "Finished", "RecordedDate", "ResponseId",
"RecipientLastName", "RecipientFirstName", "RecipientEmail",
"ExternalReference", "LocationLatitude", "LocationLongitude",
"DistributionChannel", "UserLanguage", "AgeGroup",
"DiscouragingFactors", "AIvsProfessionalTrust",
"AdviceSource", "WhyThisSource", "AIExperience",
"RecognizeRisks", "FactCheckConfidence", "AlgorithmInfluence",
"ConnectionSuggestions")
# Adjust if there are extra columns
if(ncol(data_raw) > length(col_names)) {
extra_cols <- ncol(data_raw) - length(col_names)
col_names <- c(col_names, paste0("Extra", 1:extra_cols))
cat("Note: Found", extra_cols, "extra columns\n")
} else if(ncol(data_raw) < length(col_names)) {
col_names <- col_names[1:ncol(data_raw)]
cat("Note: Using first", ncol(data_raw), "column names\n")
}
## Note: Found 2 extra columns
colnames(data_raw) <- col_names
cat("✓ Column names assigned\n")
## ✓ Column names assigned
# Clean the data - remove empty age responses
data_clean <- data_raw %>%
filter(!is.na(AgeGroup), AgeGroup != "", AgeGroup != "NA")
cat("✓ Clean responses:", nrow(data_clean), "\n\n")
## ✓ Clean responses: 25
# Preview
cat("Sample data:\n")
## Sample data:
print(head(data_clean %>% select(AgeGroup, AdviceSource), 3))
## AgeGroup AdviceSource
## 1 {"ImportId":"QID12"} {"ImportId":"QID3_TEXT"}
## 2 14-25 NerdWallet or CreditKarma
## 3 14-25 The internet
Demographics
Age Distribution
age_counts <- data_clean %>%
count(AgeGroup) %>%
mutate(Percentage = n / sum(n) * 100)
ggplot(age_counts, aes(x = reorder(AgeGroup, -n), y = n, fill = AgeGroup)) +
geom_bar(stat = "identity", color = "black", alpha = 0.8) +
geom_text(aes(label = paste0(n, "\n(", round(Percentage, 1), "%)")),
vjust = -0.5, size = 4, fontface = "bold") +
labs(title = "Survey Respondents by Age Group",
x = "Age Group",
y = "Number of Responses") +
theme_minimal(base_size = 14) +
theme(legend.position = "none",
plot.title = element_text(face = "bold", size = 16, hjust = 0.5))

kable(age_counts, col.names = c("Age Group", "Count", "Percentage"))
| 14-25 |
17 |
68 |
| 26-35 |
6 |
24 |
| 36-45 |
1 |
4 |
| {“ImportId”:“QID12”} |
1 |
4 |
Trust in Financial Advice Sources
# Categorize trust responses
data_clean <- data_clean %>%
mutate(TrustCategory = case_when(
grepl("don't trust|do not trust|not at all", AIvsProfessionalTrust, ignore.case = TRUE) ~ "Don't Trust AI/Influencers",
grepl("trust AI|trust.*more|AI.*accurate|trust a lot", AIvsProfessionalTrust, ignore.case = TRUE) ~ "Trust AI More",
grepl("same|equal|depends|50", AIvsProfessionalTrust, ignore.case = TRUE) ~ "Equal/Depends",
grepl("somewhat|kind of|a bit", AIvsProfessionalTrust, ignore.case = TRUE) ~ "Somewhat Trust AI",
!is.na(AIvsProfessionalTrust) & AIvsProfessionalTrust != "" ~ "Other/Unclear",
TRUE ~ NA_character_
))
trust_summary <- data_clean %>%
filter(!is.na(TrustCategory)) %>%
count(TrustCategory) %>%
mutate(Percentage = n / sum(n) * 100) %>%
arrange(desc(n))
ggplot(trust_summary, aes(x = reorder(TrustCategory, n), y = n, fill = TrustCategory)) +
geom_bar(stat = "identity", color = "black", alpha = 0.8) +
geom_text(aes(label = paste0(n, " (", round(Percentage, 1), "%)")),
hjust = -0.1, size = 4) +
coord_flip() +
labs(title = "Trust in AI/Influencer vs Professional Financial Advice",
x = "",
y = "Number of Responses") +
scale_fill_brewer(palette = "Set2") +
theme_minimal(base_size = 14) +
theme(legend.position = "none",
plot.title = element_text(face = "bold", size = 14))

Primary Sources for Financial Advice
# Categorize advice sources
data_clean <- data_clean %>%
mutate(AdviceCategory = case_when(
grepl("family|friend|trusted|partner|sister|dad|stepmom|husband|loved", AdviceSource, ignore.case = TRUE) ~ "Family/Friends",
grepl("AI|chat|GPT|gemini", AdviceSource, ignore.case = TRUE) ~ "AI Tools",
grepl("bank|financial advisor", AdviceSource, ignore.case = TRUE) ~ "Bank/Financial Advisor",
grepl("social|tik tok|youtube|internet|influencer", AdviceSource, ignore.case = TRUE) ~ "Social Media/Internet",
grepl("myself|me|no one|don't", AdviceSource, ignore.case = TRUE) ~ "Self/No One",
grepl("NerdWallet|CreditKarma", AdviceSource, ignore.case = TRUE) ~ "Financial Websites",
!is.na(AdviceSource) & AdviceSource != "" ~ "Other",
TRUE ~ NA_character_
))
source_summary <- data_clean %>%
filter(!is.na(AdviceCategory)) %>%
count(AdviceCategory) %>%
mutate(Percentage = n / sum(n) * 100) %>%
arrange(desc(n))
ggplot(source_summary, aes(x = reorder(AdviceCategory, n), y = n, fill = AdviceCategory)) +
geom_bar(stat = "identity", color = "black", alpha = 0.8) +
geom_text(aes(label = paste0(n, "\n", round(Percentage, 1), "%")),
hjust = -0.1, size = 4) +
coord_flip() +
labs(title = "Where People Go First for Financial Advice",
x = "",
y = "Number of Responses") +
scale_fill_brewer(palette = "Set3") +
theme_minimal(base_size = 14) +
theme(legend.position = "none",
plot.title = element_text(face = "bold", size = 14))

Barriers to Consulting Professional Advisors
data_clean <- data_clean %>%
mutate(BarrierCategory = case_when(
grepl("money|cost|fee|free|financial|spending", DiscouragingFactors, ignore.case = TRUE) ~ "Cost/Money",
grepl("time|availability|busy", DiscouragingFactors, ignore.case = TRUE) ~ "Time/Availability",
grepl("trust|judgment|dismissed|sell|scam|young|stranger", DiscouragingFactors, ignore.case = TRUE) ~ "Trust/Judgment Issues",
grepl("don't|don’t|not.*help", DiscouragingFactors, ignore.case = TRUE) ~ "Not Helpful",
!is.na(DiscouragingFactors) & DiscouragingFactors != "" ~ "Other",
TRUE ~ NA_character_
))
barrier_summary <- data_clean %>%
filter(!is.na(BarrierCategory)) %>%
count(BarrierCategory) %>%
arrange(desc(n))
ggplot(barrier_summary, aes(x = reorder(BarrierCategory, n), y = n, fill = BarrierCategory)) +
geom_bar(stat = "identity", color = "black", alpha = 0.8) +
geom_text(aes(label = n), hjust = -0.2, size = 5, fontface = "bold") +
coord_flip() +
labs(title = "Key Barriers to Consulting Professional Financial Advisors",
x = "",
y = "Number of Mentions") +
scale_fill_brewer(palette = "Set1") +
theme_minimal(base_size = 14) +
theme(legend.position = "none",
plot.title = element_text(face = "bold", size = 14))

AI Experience Summary
ai_exp <- data_clean %>%
filter(!is.na(AIExperience) & AIExperience != "") %>%
mutate(HasUsedAI = case_when(
grepl("never|no|not|haven't|have not|nope", AIExperience, ignore.case = TRUE) ~ "Never Used AI",
grepl("helpful|accurate|well|good|useful|better", AIExperience, ignore.case = TRUE) ~ "Positive Experience",
TRUE ~ "Has Used AI"
))
ai_summary <- ai_exp %>%
count(HasUsedAI) %>%
mutate(Percentage = n / sum(n) * 100)
ggplot(ai_summary, aes(x = "", y = n, fill = HasUsedAI)) +
geom_bar(stat = "identity", width = 1, color = "white") +
coord_polar("y", start = 0) +
geom_text(aes(label = paste0(HasUsedAI, "\n", n, " (", round(Percentage, 1), "%)")),
position = position_stack(vjust = 0.5), size = 4) +
labs(title = "AI Usage for Financial Advice") +
scale_fill_brewer(palette = "Pastel1") +
theme_void() +
theme(plot.title = element_text(face = "bold", hjust = 0.5, size = 16),
legend.position = "none")

Misinformation Awareness
# Risk recognition
risk_data <- data_clean %>%
filter(!is.na(RecognizeRisks) & RecognizeRisks != "") %>%
mutate(RiskAwareness = case_when(
grepl("^yes|^i believe so|recognize|aware", RecognizeRisks, ignore.case = TRUE) ~ "Yes",
grepl("^no|don’t|do not|not recogni", RecognizeRisks, ignore.case = TRUE) ~ "No",
grepl("maybe|sometimes|some might|some", RecognizeRisks, ignore.case = TRUE) ~ "Mixed/Uncertain",
TRUE ~ "Other"
))
risk_summary <- risk_data %>%
count(RiskAwareness) %>%
mutate(Percentage = n / sum(n) * 100)
# Fact-checking confidence
factcheck_data <- data_clean %>%
filter(!is.na(FactCheckConfidence) & FactCheckConfidence != "") %>%
mutate(FactCheckAbility = case_when(
grepl("^yes|confident|better|^i believe so", FactCheckConfidence, ignore.case = TRUE) ~ "Yes/Confident",
grepl("^no|not confident|not often|don’t", FactCheckConfidence, ignore.case = TRUE) ~ "No/Not Confident",
grepl("maybe|some|not sure|probably|depends", FactCheckConfidence, ignore.case = TRUE) ~ "Mixed/Uncertain",
TRUE ~ "Other"
))
factcheck_summary <- factcheck_data %>%
count(FactCheckAbility) %>%
mutate(Percentage = n / sum(n) * 100)
# Combined bar chart
combined_data <- bind_rows(
risk_summary %>% mutate(Question = "Recognize Risks?", Response = RiskAwareness),
factcheck_summary %>% mutate(Question = "Can Fact-Check?", Response = FactCheckAbility)
) %>% select(Question, Response, n, Percentage)
ggplot(combined_data, aes(x = Response, y = n, fill = Question)) +
geom_bar(stat = "identity", position = "dodge", color = "black", alpha = 0.8) +
geom_text(aes(label = paste0(n, "\n", round(Percentage, 1), "%")),
position = position_dodge(width = 0.9), vjust = -0.5, size = 3.5) +
labs(title = "Misinformation Awareness & Fact-Checking Confidence",
x = "",
y = "Number of Responses",
fill = "") +
scale_fill_manual(values = c("#E74C3C", "#3498DB")) +
theme_minimal(base_size = 14) +
theme(plot.title = element_text(face = "bold", size = 14, hjust = 0.5),
legend.position = "top")

Key Recommendations from Respondents
# Extract keywords from suggestions
suggestions <- data_clean %>%
filter(!is.na(ConnectionSuggestions) & ConnectionSuggestions != "") %>%
pull(ConnectionSuggestions)
# Common themes
themes <- data.frame(
Theme = c("Social Media", "Relatable/Simple", "Trust/Professional", "Free/Accessible", "Education/Understanding"),
Keywords = c("social.*media|tik.*tok|online|instagram",
"relatable|simple|easy|digest|understand|fun|appeal",
"trust|professional|genuine|proof|reliable",
"free|consult|accessible|available",
"educat|learn|explain|information|terms")
)
theme_counts <- sapply(1:nrow(themes), function(i) {
sum(grepl(themes$Keywords[i], suggestions, ignore.case = TRUE))
})
theme_df <- data.frame(
Theme = themes$Theme,
Mentions = theme_counts
) %>%
filter(Mentions > 0) %>%
arrange(desc(Mentions))
ggplot(theme_df, aes(x = reorder(Theme, Mentions), y = Mentions, fill = Theme)) +
geom_bar(stat = "identity", color = "black", alpha = 0.8) +
geom_text(aes(label = Mentions), hjust = -0.2, size = 5, fontface = "bold") +
coord_flip() +
labs(title = "Key Themes in Recommendations for Financial Companies",
x = "",
y = "Number of Mentions") +
scale_fill_brewer(palette = "Dark2") +
theme_minimal(base_size = 14) +
theme(legend.position = "none",
plot.title = element_text(face = "bold", size = 14))

Sample Recommendations
recommendations <- data_clean %>%
filter(!is.na(ConnectionSuggestions) & ConnectionSuggestions != "") %>%
select(AgeGroup, ConnectionSuggestions) %>%
head(10)
kable(recommendations,
col.names = c("Age Group", "Suggestions for Companies"),
caption = "Sample Respondent Suggestions")
Sample Respondent Suggestions
| {“ImportId”:“QID12”} |
{“ImportId”:“QID11_TEXT”} |
| 14-25 |
Be more relatable on social media. |
| 14-25 |
To be more appealing and make learning about finance
fun, not boring. |
| 26-35 |
Social Media Marketing |
| 14-25 |
|
| 36-45 |
social media like jumping on Tik Tok |
| 14-25 |
target audience |
| 14-25 |
Make financial issues and terms easier to digest and
understand |
| 26-35 |
Yes |
| 14-25 |
Market more on social media |
Summary Statistics
cat("## Survey Overview\n\n")
## ## Survey Overview
cat("**Total Valid Responses:**", nrow(data_clean), "\n\n")
## **Total Valid Responses:** 25
cat("**Age Groups:**", paste(unique(data_clean$AgeGroup), collapse = ", "), "\n\n")
## **Age Groups:** {"ImportId":"QID12"}, 14-25, 26-35, 36-45
cat("**Most Common Advice Source:**", source_summary$AdviceCategory[1],
"(", source_summary$n[1], "responses )\n\n")
## **Most Common Advice Source:** Family/Friends ( 9 responses )
cat("**Top Barrier:**", barrier_summary$BarrierCategory[1],
"(", barrier_summary$n[1], "mentions )\n\n")
## **Top Barrier:** Cost/Money ( 11 mentions )
cat("**Most Common Trust Sentiment:**", trust_summary$TrustCategory[1],
"(", trust_summary$n[1], "responses )\n\n")
## **Most Common Trust Sentiment:** Other/Unclear ( 7 responses )