Data Loading and Preparation

# ========================================
# CHANGE THIS LINE TO YOUR FILE LOCATION
# ========================================

# Option 1: File in same folder as this .Rmd file (just filename)
file_path <- "Marketing Analytics Survey_October 29, 2025_18.57.csv"

# Option 2: File in a specific folder (uncomment and edit one of these)
# file_path <- "~/Downloads/Marketing Analytics Survey_October 29, 2025_18.57.csv"  # Mac/Linux
# file_path <- "C:/Users/YourName/Downloads/Marketing Analytics Survey_October 29, 2025_18.57.csv"  # Windows

# Option 3: Let R find it (uncomment this to search for any matching CSV)
# csv_files <- list.files(pattern = "Marketing.*Survey.*\\.csv$", full.names = TRUE, recursive = FALSE)
# if(length(csv_files) > 0) { file_path <- csv_files[1] }

# ========================================

# Check if file exists
if(!file.exists(file_path)) {
  cat("ERROR: Cannot find file!\n")
  cat("Looking for:", file_path, "\n")
  cat("Current directory:", getwd(), "\n")
  cat("\nCSV files in current directory:\n")
  csv_list <- list.files(pattern = "\\.csv$")
  if(length(csv_list) > 0) {
    for(i in seq_along(csv_list)) {
      cat("  ", i, ". ", csv_list[i], "\n", sep="")
    }
    cat("\nUpdate line 22 with one of these filenames.\n")
  } else {
    cat("  No CSV files found!\n")
    cat("\nPossible solutions:\n")
    cat("  1. Move your CSV to:", getwd(), "\n")
    cat("  2. Or use full path in line 22\n")
  }
  stop("File not found - see messages above")
}

# Read the CSV - first check how many columns we actually have
data_test <- read.csv(file_path, nrows = 1, stringsAsFactors = FALSE)
actual_cols <- ncol(data_test)
cat("Actual columns in file:", actual_cols, "\n")

## Actual columns in file: 27

# Read the CSV properly - skip the first 2 header rows
data_raw <- read.csv(file_path, skip = 2, stringsAsFactors = FALSE, 
                     na.strings = c("", "NA"), header = FALSE)
cat("✓ File loaded:", file_path, "\n")

## ✓ File loaded: Marketing Analytics Survey_October 29, 2025_18.57.csv

cat("  Rows:", nrow(data_raw), "\n")

##   Rows: 27

cat("  Columns:", ncol(data_raw), "\n\n")

##   Columns: 29

# Assign column names based on actual number of columns
col_names <- c("StartDate", "EndDate", "Status", "IPAddress", "Progress", 
               "Duration", "Finished", "RecordedDate", "ResponseId", 
               "RecipientLastName", "RecipientFirstName", "RecipientEmail", 
               "ExternalReference", "LocationLatitude", "LocationLongitude", 
               "DistributionChannel", "UserLanguage", "AgeGroup", 
               "DiscouragingFactors", "AIvsProfessionalTrust", 
               "AdviceSource", "WhyThisSource", "AIExperience", 
               "RecognizeRisks", "FactCheckConfidence", "AlgorithmInfluence", 
               "ConnectionSuggestions")

# Adjust if there are extra columns
if(ncol(data_raw) > length(col_names)) {
  extra_cols <- ncol(data_raw) - length(col_names)
  col_names <- c(col_names, paste0("Extra", 1:extra_cols))
  cat("Note: Found", extra_cols, "extra columns\n")
} else if(ncol(data_raw) < length(col_names)) {
  col_names <- col_names[1:ncol(data_raw)]
  cat("Note: Using first", ncol(data_raw), "column names\n")
}

## Note: Found 2 extra columns

colnames(data_raw) <- col_names
cat("✓ Column names assigned\n")

## ✓ Column names assigned

# Clean the data - remove empty age responses
data_clean <- data_raw %>%
  filter(!is.na(AgeGroup), AgeGroup != "", AgeGroup != "NA")

cat("✓ Clean responses:", nrow(data_clean), "\n\n")

## ✓ Clean responses: 25

# Preview
cat("Sample data:\n")

## Sample data:

print(head(data_clean %>% select(AgeGroup, AdviceSource), 3))

##               AgeGroup              AdviceSource
## 1 {"ImportId":"QID12"}  {"ImportId":"QID3_TEXT"}
## 2                14-25 NerdWallet or CreditKarma
## 3                14-25              The internet

Demographics

Age Distribution

age_counts <- data_clean %>%
  count(AgeGroup) %>%
  mutate(Percentage = n / sum(n) * 100)

ggplot(age_counts, aes(x = reorder(AgeGroup, -n), y = n, fill = AgeGroup)) +
  geom_bar(stat = "identity", color = "black", alpha = 0.8) +
  geom_text(aes(label = paste0(n, "\n(", round(Percentage, 1), "%)")), 
            vjust = -0.5, size = 4, fontface = "bold") +
  labs(title = "Survey Respondents by Age Group",
       x = "Age Group",
       y = "Number of Responses") +
  theme_minimal(base_size = 14) +
  theme(legend.position = "none",
        plot.title = element_text(face = "bold", size = 16, hjust = 0.5))

kable(age_counts, col.names = c("Age Group", "Count", "Percentage"))

Age Group	Count	Percentage
14-25	17	68
26-35	6	24
36-45	1	4
{“ImportId”:“QID12”}	1	4

Trust in Financial Advice Sources

# Categorize trust responses
data_clean <- data_clean %>%
  mutate(TrustCategory = case_when(
    grepl("don't trust|do not trust|not at all", AIvsProfessionalTrust, ignore.case = TRUE) ~ "Don't Trust AI/Influencers",
    grepl("trust AI|trust.*more|AI.*accurate|trust a lot", AIvsProfessionalTrust, ignore.case = TRUE) ~ "Trust AI More",
    grepl("same|equal|depends|50", AIvsProfessionalTrust, ignore.case = TRUE) ~ "Equal/Depends",
    grepl("somewhat|kind of|a bit", AIvsProfessionalTrust, ignore.case = TRUE) ~ "Somewhat Trust AI",
    !is.na(AIvsProfessionalTrust) & AIvsProfessionalTrust != "" ~ "Other/Unclear",
    TRUE ~ NA_character_
  ))

trust_summary <- data_clean %>%
  filter(!is.na(TrustCategory)) %>%
  count(TrustCategory) %>%
  mutate(Percentage = n / sum(n) * 100) %>%
  arrange(desc(n))

ggplot(trust_summary, aes(x = reorder(TrustCategory, n), y = n, fill = TrustCategory)) +
  geom_bar(stat = "identity", color = "black", alpha = 0.8) +
  geom_text(aes(label = paste0(n, " (", round(Percentage, 1), "%)")), 
            hjust = -0.1, size = 4) +
  coord_flip() +
  labs(title = "Trust in AI/Influencer vs Professional Financial Advice",
       x = "",
       y = "Number of Responses") +
  scale_fill_brewer(palette = "Set2") +
  theme_minimal(base_size = 14) +
  theme(legend.position = "none",
        plot.title = element_text(face = "bold", size = 14))

Primary Sources for Financial Advice

# Categorize advice sources
data_clean <- data_clean %>%
  mutate(AdviceCategory = case_when(
    grepl("family|friend|trusted|partner|sister|dad|stepmom|husband|loved", AdviceSource, ignore.case = TRUE) ~ "Family/Friends",
    grepl("AI|chat|GPT|gemini", AdviceSource, ignore.case = TRUE) ~ "AI Tools",
    grepl("bank|financial advisor", AdviceSource, ignore.case = TRUE) ~ "Bank/Financial Advisor",
    grepl("social|tik tok|youtube|internet|influencer", AdviceSource, ignore.case = TRUE) ~ "Social Media/Internet",
    grepl("myself|me|no one|don't", AdviceSource, ignore.case = TRUE) ~ "Self/No One",
    grepl("NerdWallet|CreditKarma", AdviceSource, ignore.case = TRUE) ~ "Financial Websites",
    !is.na(AdviceSource) & AdviceSource != "" ~ "Other",
    TRUE ~ NA_character_
  ))

source_summary <- data_clean %>%
  filter(!is.na(AdviceCategory)) %>%
  count(AdviceCategory) %>%
  mutate(Percentage = n / sum(n) * 100) %>%
  arrange(desc(n))

ggplot(source_summary, aes(x = reorder(AdviceCategory, n), y = n, fill = AdviceCategory)) +
  geom_bar(stat = "identity", color = "black", alpha = 0.8) +
  geom_text(aes(label = paste0(n, "\n", round(Percentage, 1), "%")), 
            hjust = -0.1, size = 4) +
  coord_flip() +
  labs(title = "Where People Go First for Financial Advice",
       x = "",
       y = "Number of Responses") +
  scale_fill_brewer(palette = "Set3") +
  theme_minimal(base_size = 14) +
  theme(legend.position = "none",
        plot.title = element_text(face = "bold", size = 14))

Barriers to Consulting Professional Advisors

data_clean <- data_clean %>%
  mutate(BarrierCategory = case_when(
    grepl("money|cost|fee|free|financial|spending", DiscouragingFactors, ignore.case = TRUE) ~ "Cost/Money",
    grepl("time|availability|busy", DiscouragingFactors, ignore.case = TRUE) ~ "Time/Availability",
    grepl("trust|judgment|dismissed|sell|scam|young|stranger", DiscouragingFactors, ignore.case = TRUE) ~ "Trust/Judgment Issues",
    grepl("don't|donâ€™t|not.*help", DiscouragingFactors, ignore.case = TRUE) ~ "Not Helpful",
    !is.na(DiscouragingFactors) & DiscouragingFactors != "" ~ "Other",
    TRUE ~ NA_character_
  ))

barrier_summary <- data_clean %>%
  filter(!is.na(BarrierCategory)) %>%
  count(BarrierCategory) %>%
  arrange(desc(n))

ggplot(barrier_summary, aes(x = reorder(BarrierCategory, n), y = n, fill = BarrierCategory)) +
  geom_bar(stat = "identity", color = "black", alpha = 0.8) +
  geom_text(aes(label = n), hjust = -0.2, size = 5, fontface = "bold") +
  coord_flip() +
  labs(title = "Key Barriers to Consulting Professional Financial Advisors",
       x = "",
       y = "Number of Mentions") +
  scale_fill_brewer(palette = "Set1") +
  theme_minimal(base_size = 14) +
  theme(legend.position = "none",
        plot.title = element_text(face = "bold", size = 14))

AI Experience Summary

ai_exp <- data_clean %>%
  filter(!is.na(AIExperience) & AIExperience != "") %>%
  mutate(HasUsedAI = case_when(
    grepl("never|no|not|haven't|have not|nope", AIExperience, ignore.case = TRUE) ~ "Never Used AI",
    grepl("helpful|accurate|well|good|useful|better", AIExperience, ignore.case = TRUE) ~ "Positive Experience",
    TRUE ~ "Has Used AI"
  ))

ai_summary <- ai_exp %>%
  count(HasUsedAI) %>%
  mutate(Percentage = n / sum(n) * 100)

ggplot(ai_summary, aes(x = "", y = n, fill = HasUsedAI)) +
  geom_bar(stat = "identity", width = 1, color = "white") +
  coord_polar("y", start = 0) +
  geom_text(aes(label = paste0(HasUsedAI, "\n", n, " (", round(Percentage, 1), "%)")), 
            position = position_stack(vjust = 0.5), size = 4) +
  labs(title = "AI Usage for Financial Advice") +
  scale_fill_brewer(palette = "Pastel1") +
  theme_void() +
  theme(plot.title = element_text(face = "bold", hjust = 0.5, size = 16),
        legend.position = "none")

Misinformation Awareness

# Risk recognition
risk_data <- data_clean %>%
  filter(!is.na(RecognizeRisks) & RecognizeRisks != "") %>%
  mutate(RiskAwareness = case_when(
    grepl("^yes|^i believe so|recognize|aware", RecognizeRisks, ignore.case = TRUE) ~ "Yes",
    grepl("^no|donâ€™t|do not|not recogni", RecognizeRisks, ignore.case = TRUE) ~ "No",
    grepl("maybe|sometimes|some might|some", RecognizeRisks, ignore.case = TRUE) ~ "Mixed/Uncertain",
    TRUE ~ "Other"
  ))

risk_summary <- risk_data %>%
  count(RiskAwareness) %>%
  mutate(Percentage = n / sum(n) * 100)

# Fact-checking confidence
factcheck_data <- data_clean %>%
  filter(!is.na(FactCheckConfidence) & FactCheckConfidence != "") %>%
  mutate(FactCheckAbility = case_when(
    grepl("^yes|confident|better|^i believe so", FactCheckConfidence, ignore.case = TRUE) ~ "Yes/Confident",
    grepl("^no|not confident|not often|donâ€™t", FactCheckConfidence, ignore.case = TRUE) ~ "No/Not Confident",
    grepl("maybe|some|not sure|probably|depends", FactCheckConfidence, ignore.case = TRUE) ~ "Mixed/Uncertain",
    TRUE ~ "Other"
  ))

factcheck_summary <- factcheck_data %>%
  count(FactCheckAbility) %>%
  mutate(Percentage = n / sum(n) * 100)

# Combined bar chart
combined_data <- bind_rows(
  risk_summary %>% mutate(Question = "Recognize Risks?", Response = RiskAwareness),
  factcheck_summary %>% mutate(Question = "Can Fact-Check?", Response = FactCheckAbility)
) %>% select(Question, Response, n, Percentage)

ggplot(combined_data, aes(x = Response, y = n, fill = Question)) +
  geom_bar(stat = "identity", position = "dodge", color = "black", alpha = 0.8) +
  geom_text(aes(label = paste0(n, "\n", round(Percentage, 1), "%")),
            position = position_dodge(width = 0.9), vjust = -0.5, size = 3.5) +
  labs(title = "Misinformation Awareness & Fact-Checking Confidence",
       x = "",
       y = "Number of Responses",
       fill = "") +
  scale_fill_manual(values = c("#E74C3C", "#3498DB")) +
  theme_minimal(base_size = 14) +
  theme(plot.title = element_text(face = "bold", size = 14, hjust = 0.5),
        legend.position = "top")

Key Recommendations from Respondents

# Extract keywords from suggestions
suggestions <- data_clean %>%
  filter(!is.na(ConnectionSuggestions) & ConnectionSuggestions != "") %>%
  pull(ConnectionSuggestions)

# Common themes
themes <- data.frame(
  Theme = c("Social Media", "Relatable/Simple", "Trust/Professional", "Free/Accessible", "Education/Understanding"),
  Keywords = c("social.*media|tik.*tok|online|instagram",
               "relatable|simple|easy|digest|understand|fun|appeal",
               "trust|professional|genuine|proof|reliable",
               "free|consult|accessible|available",
               "educat|learn|explain|information|terms")
)

theme_counts <- sapply(1:nrow(themes), function(i) {
  sum(grepl(themes$Keywords[i], suggestions, ignore.case = TRUE))
})

theme_df <- data.frame(
  Theme = themes$Theme,
  Mentions = theme_counts
) %>%
  filter(Mentions > 0) %>%
  arrange(desc(Mentions))

ggplot(theme_df, aes(x = reorder(Theme, Mentions), y = Mentions, fill = Theme)) +
  geom_bar(stat = "identity", color = "black", alpha = 0.8) +
  geom_text(aes(label = Mentions), hjust = -0.2, size = 5, fontface = "bold") +
  coord_flip() +
  labs(title = "Key Themes in Recommendations for Financial Companies",
       x = "",
       y = "Number of Mentions") +
  scale_fill_brewer(palette = "Dark2") +
  theme_minimal(base_size = 14) +
  theme(legend.position = "none",
        plot.title = element_text(face = "bold", size = 14))

Sample Recommendations

recommendations <- data_clean %>%
  filter(!is.na(ConnectionSuggestions) & ConnectionSuggestions != "") %>%
  select(AgeGroup, ConnectionSuggestions) %>%
  head(10)

kable(recommendations, 
      col.names = c("Age Group", "Suggestions for Companies"),
      caption = "Sample Respondent Suggestions")

Sample Respondent Suggestions
Age Group	Suggestions for Companies
{“ImportId”:“QID12”}	{“ImportId”:“QID11_TEXT”}
14-25	Be more relatable on social media.
14-25	To be more appealing and make learning about finance fun, not boring.
26-35	Social Media Marketing
14-25
36-45	social media like jumping on Tik Tok
14-25	target audience
14-25	Make financial issues and terms easier to digest and understand
26-35	Yes
14-25	Market more on social media

Summary Statistics

cat("## Survey Overview\n\n")

## ## Survey Overview

cat("**Total Valid Responses:**", nrow(data_clean), "\n\n")

## **Total Valid Responses:** 25

cat("**Age Groups:**", paste(unique(data_clean$AgeGroup), collapse = ", "), "\n\n")

## **Age Groups:** {"ImportId":"QID12"}, 14-25, 26-35, 36-45

cat("**Most Common Advice Source:**", source_summary$AdviceCategory[1], 
    "(", source_summary$n[1], "responses )\n\n")

## **Most Common Advice Source:** Family/Friends ( 9 responses )

cat("**Top Barrier:**", barrier_summary$BarrierCategory[1], 
    "(", barrier_summary$n[1], "mentions )\n\n")

## **Top Barrier:** Cost/Money ( 11 mentions )

cat("**Most Common Trust Sentiment:**", trust_summary$TrustCategory[1], 
    "(", trust_summary$n[1], "responses )\n\n")

## **Most Common Trust Sentiment:** Other/Unclear ( 7 responses )

Marketing Analytics Survey Analysis

Maple Variance Group

October 29, 2025`

Data Loading and Preparation

Demographics

Age Distribution

Trust in Financial Advice Sources

Primary Sources for Financial Advice

Barriers to Consulting Professional Advisors

AI Experience Summary

Misinformation Awareness

Key Recommendations from Respondents

Sample Recommendations

Summary Statistics