Issues & Ideology

1 Load Data

Candidate and text data:

# Load candidate/text data
setwd("/Users/annemariegreen/Library/CloudStorage/Box-Box/Mintt 2024 Direct Mail/Data analysis")
mintt_df <- readRDS("mintt_df.rds")

Pilot Results (300 mailers):

pilot_dir <- "/Users/annemariegreen/Library/CloudStorage/Box-Box/Mintt 2024 Direct Mail/MinttCampaign_Python/S2"

### Call 1: folder-level (anon_text, endorsement, candidates) ###
lines_c1 <- readLines(file.path(pilot_dir, "Call 1", "pilot_300_call1_results.jsonl"), warn = FALSE)

pilot_call1 <- map_dfr(lines_c1, function(line) {
  tryCatch({
    record <- fromJSON(line)
    folder <- sub("^folder-", "", record$custom_id)
    args_str <- record$response$body$choices$message$function_call$arguments
    args <- fromJSON(args_str)
    tibble(
      folder = folder,
      anon_text = args$anon_text,
      endorsement = as.integer(args$endorsement),
      reasoning_c1 = args$reasoning,
      candidates_json = as.character(toJSON(args$candidates, auto_unbox = TRUE))
    )
  }, error = function(e) tibble(folder = NA_character_))
})

### Call 2: folder-level, with issues array exploded ###
lines_c2 <- readLines(file.path(pilot_dir, "Call 2", "pilot_300_call2_results.jsonl"), warn = FALSE)

pilot_call2 <- map_dfr(lines_c2, function(line) {
  tryCatch({
    record <- fromJSON(line)
    folder <- sub("^folder-", "", record$custom_id)
    args_str <- record$response$body$choices$message$function_call$arguments
    args <- fromJSON(args_str, simplifyVector = FALSE)
    if (length(args$issues) == 0) {
      return(tibble(folder = folder, reasoning_c2 = args$reasoning,
                    issue = NA_character_, candidate_code = NA_character_,
                    issue_text = NA_character_,
                    policy_input = NA_integer_, outcome = NA_integer_,
                    prospective = NA_integer_, retrospective = NA_integer_))
    }
    map_dfr(args$issues, function(iss) {
      tibble(
        folder = folder,
        reasoning_c2 = args$reasoning,
        issue = iss$issue,
        candidate_code = iss$candidate,
        issue_text = iss$text,
        policy_input = as.integer(iss$policy_input),
        outcome = as.integer(iss$outcome),
        prospective = as.integer(iss$prospective),
        retrospective = as.integer(iss$retrospective)
      )
    })
  }, error = function(e) tibble(folder = NA_character_))
})

### Call 3: issue-candidate level (ideology scores) ###
lines_c3 <- readLines(file.path(pilot_dir, "Call 3", "pilot_300_call3_results.jsonl"), warn = FALSE)

pilot_call3 <- map_dfr(lines_c3, function(line) {
  tryCatch({
    record <- fromJSON(line)
    parts <- strsplit(record$custom_id, "-")[[1]]
    args_str <- record$response$body$choices$message$function_call$arguments
    args <- fromJSON(args_str)
    tibble(
      folder = parts[2],
      idx = as.integer(parts[4]),
      policy_ideology = as.integer(args$policy_ideology),
      policy_reasoning = args$policy_reasoning,
      framing_ideology = as.integer(args$framing_ideology),
      framing_reasoning = args$framing_reasoning,
      policy_confidence = as.integer(args$policy_confidence),
      framing_confidence = args$framing_confidence
    )
  }, error = function(e) tibble(folder = NA_character_))
})

# Merge Call 2 + Call 3: add row index to Call 2 for joining
pilot_call2 <- pilot_call2 %>%
  group_by(folder) %>%
  mutate(idx = row_number() - 1L) %>%
  ungroup()

pilot_issues <- pilot_call2 %>%
  left_join(pilot_call3, by = c("folder", "idx"))

# Merge Call 1 + issues
pilot_all <- pilot_call1 %>%
  left_join(pilot_issues, by = "folder")

# Standardize issue names (consolidate near-duplicates from AI extraction)
pilot_all <- pilot_all %>%
  mutate(issue = case_when(
    # Sexualized crime variants
    str_detect(issue, regex("sexualized crime", ignore_case = TRUE)) ~ "Sexualized Crime",
    # Economy variants
    issue == "Economy" ~ "Economy (generic reference)",
    # Environment variants
    issue == "Environment" ~ "Environment (generic reference)",
    issue == "Environment/Energy" ~ "Environment (generic reference)",
    # Healthcare variants
    issue == "Healthcare" ~ "Healthcare (not prescription drugs)",
    issue == "Universal healthcare/healthcare for all" ~ "Healthcare (not prescription drugs)",
    issue == "Affordable Care Act / Obamacare / Health Care Law / etc." ~ "Healthcare (not prescription drugs)",
    # Military variants
    issue == "Military (generic reference)" ~ "Military",
    # Foreign Policy variants
    issue == "Foreign Policy (generic reference)" ~ "Foreign Policy",
    # Housing variants
    issue == "Housing" ~ "Housing / Sub-prime Mortgages",
    # Infrastructure variants
    issue == "Infrastructure" ~ "Transportation / Infrastructure",
    # Seniors variants
    issue == "Seniors" ~ "Seniors (not Medicare)",
    # Opioids variants
    issue == "Opioids / Rx Drug Abuse" ~ "Fentanyl / Opioids",
    # Narcotics
    issue == "Narcotics/Illegal Drugs" ~ "Fentanyl / Opioids",
    # Public Lands
    issue == "Public Lands" ~ "Public Lands Access",
    # Renewable energy variants
    str_detect(issue, regex("renewable energy", ignore_case = TRUE)) ~ "Energy Policy",
    issue == "Oil and natural gas drilling" ~ "Energy Policy",
    issue == "Fracking" ~ "Energy Policy",
    # Overtime Pay
    issue == "Overtime Pay / Employment/Jobs" ~ "Employment/Jobs",
    issue == "Overtime Pay" ~ "Employment/Jobs",
    TRUE ~ issue
  ))

# Merge pilot results with mintt_df by folder
pilot_merged <- mintt_df %>%
  inner_join(pilot_all, by = "folder")

# Exclude folder with incorrect AI coding
pilot_all <- pilot_all %>% filter(folder != "114721283581")

tibble(
  Metric = 
    c("Total mailers (Call 1)", 
  "Total issues (Call 2)", 
  "Total issues w/ Ideology score (Call 3)",
  "Unique issues (cleaned)", 
  "Folder has merged Mintt candidate data",
  "Issue has Mintt candidate data (merged w/ mintt_df)"),
  Value = 
    c(nrow(pilot_call1), 
  nrow(pilot_call2), 
  nrow(pilot_call3),
  n_distinct(pilot_all$issue, na.rm = TRUE),
  paste(n_distinct(pilot_merged$folder), "of", n_distinct(pilot_all$folder)),
  nrow(pilot_merged)
  )
) %>% kable()

Metric	Value
Total mailers (Call 1)	301
Total issues (Call 2)	1446
Total issues w/ Ideology score (Call 3)	1418
Unique issues (cleaned)	72
Folder has merged Mintt candidate data	229 of 300
Issue has Mintt candidate data (merged w/ mintt_df)	1219

1.1 NOTES:

Issue Aggregation: I had the code collapse the following issue categories. The AI still took some liberty with the issue naming that was different from our least (even though we specified not to), but it’s not eggregious. I’ll see if I can correct the prompt to get it to do this less. Here’s what I aggregated:

“Sexualized crime” variants → Sexualized Crime
“Economy” → Economy (generic reference)
“Environment”, “Environment/Energy” → Environment (generic reference)
“Healthcare”, “Universal healthcare/healthcare for all”, “Affordable Care Act / Obamacare / Health Care Law / etc.” → Healthcare (not prescription drugs)
“Military (generic reference)” → Military
“Foreign Policy (generic reference)” → Foreign Policy
“Housing” → Housing / Sub-prime Mortgages
“Infrastructure” → Transportation / Infrastructure
“Seniors” → Seniors (not Medicare)
“Opioids / Rx Drug Abuse”, “Narcotics/Illegal Drugs” → Fentanyl / Opioids
“Public Lands” → Public Lands Access
“Renewable energy” variants, “Oil and natural gas drilling”, “Fracking” → Energy Policy
“Overtime Pay / Employment/Jobs”, “Overtime Pay” → Employment/Jobs

Mintt Missing Candidate Data: Only 229/301 mailers had matches in the candidates data, probably because of Mintt’s 9000 missing candidates data fields.

Note: Folder 114721283581 is excluded — it is a voter guide with many candidates across multiple races, and the AI output does not map correctly to the A/B candidate framework.

1.2 Set partisanship of mailer

Derive supported and opposing party from Call 1 candidate codes. Any Candidate A* = supp_party, any Candidate B* = opp_party, matched against known candidates.

# Overview of candidates
pilot_candidates <- pilot_call1 %>%
  filter(!is.na(candidates_json)) %>%
  mutate(candidates = map(candidates_json, function(j) {
    parsed <- fromJSON(j, simplifyVector = FALSE)
    if (length(parsed) == 0) return(tibble(cand_name = character(), cand_code = character()))
    map_dfr(parsed, ~tibble(cand_name = .x$cand_name, cand_code = .x$cand_code))
  })) %>%
  unnest(candidates) %>%
  select(folder, cand_name, cand_code)

candidate_summary <- pilot_candidates %>%
  count(cand_name, sort = TRUE) %>%
  rename(Candidate = cand_name, `Mailers Mentioning` = n)

# Known candidate-party mapping - this is for the imputed data in "Archive", which I didn't end up using. 
known_parties <- tribble(
  ~cand_name_clean, ~cand_party,
  "donald trump",   "R",
  "kamala harris",  "D",
  "tim sheehy",     "R",
  "jon tester",     "D",
  "swanson",        "R",
  "mccormick",      "R",
  "hovde",          "R",
  "tammy baldwin",  "D",
  "bob casey",      "D",
  "j.d. vance",     "R",
  "tim walz",       "D"
)

# Match candidate name to known party
match_party <- function(name) {
  name_lower <- str_to_lower(name)
  for (i in seq_len(nrow(known_parties))) {
    if (str_detect(name_lower, fixed(known_parties$cand_name_clean[i]))) {
      return(known_parties$cand_party[i])
    }
  }
  NA_character_
}

# Build folder-level lookup: supp_party from any Candidate A*, opp_party from any Candidate B*
folder_parties <- pilot_candidates %>%
  filter(folder != "114721283581") %>%
  mutate(cand_party = map_chr(cand_name, match_party)) %>%
  filter(!is.na(cand_party)) %>%
  mutate(role = case_when(
    str_detect(cand_code, "^Candidate A") ~ "supp_party",
    str_detect(cand_code, "^Candidate B") ~ "opp_party"
  )) %>%
  filter(!is.na(role)) %>%
  distinct(folder, role, cand_party) %>%
  pivot_wider(names_from = role, values_from = cand_party, values_fn = first)

# Ensure folder_parties has both columns even if pivot_wider didn't produce them
if (!"supp_party" %in% names(folder_parties)) folder_parties$supp_party <- NA_character_
if (!"opp_party" %in% names(folder_parties)) folder_parties$opp_party <- NA_character_

# Add party and issue-role columns directly to pilot_all
pilot_all <- pilot_all %>%
  left_join(folder_parties, by = "folder") %>%
  mutate(
    supp_party = replace_na(supp_party, "Unknown"),
    opp_party = replace_na(opp_party, "Unknown"),
    # Infer supp_party from opp_party (and vice versa) when one is Unknown
    supp_party = case_when(
      supp_party == "Unknown" & opp_party == "D" ~ "R",
      supp_party == "Unknown" & opp_party == "R" ~ "D",
      TRUE ~ supp_party
    ),
    opp_party = case_when(
      opp_party == "Unknown" & supp_party == "D" ~ "R",
      opp_party == "Unknown" & supp_party == "R" ~ "D",
      TRUE ~ opp_party
    ),
    is_supp_issue = str_detect(candidate_code, "^Candidate A"),
    is_opp_issue = str_detect(candidate_code, "^Candidate B")
  )

# Recreate pilot_merged now that pilot_all has party/issue-role columns
pilot_merged <- mintt_df %>%
  inner_join(pilot_all, by = "folder") %>%
  mutate(mintt_party = case_when(
    mintt_partisanship == "Republican/Conservative" ~ "R",
    mintt_partisanship == "Democratic/Progressive" ~ "D",
    TRUE ~ "Other"
  ))
mintt_tbl <- table(pilot_merged$mintt_party, useNA = "ifany")

tibble(
  Metric = c("pilot_all rows",
             "Pilot + mintt_df rows",
             paste0("mintt_party: ", names(mintt_tbl))),
  Value = c(as.character(nrow(pilot_all)),
            as.character(nrow(pilot_merged)),
            as.character(mintt_tbl))
) %>% kable()

Metric	Value
pilot_all rows	1410
Pilot + mintt_df rows	1219
mintt_party: D	448
mintt_party: Other	49
mintt_party: R	722

1.3 Full Dataset

mintt_text <- mintt_df %>% distinct(folder, original_text = text)

pilot_all_display <- pilot_all %>%
  left_join(mintt_text, by = "folder") %>%
  select(-anon_text, -reasoning_c1, -reasoning_c2, -candidates_json) %>%
  mutate(across(where(is.character), ~str_trunc(., 150)))

issue_text_idx <- which(colnames(pilot_all_display) == "issue_text") - 1

datatable(pilot_all_display,
          caption = "Full pilot_all dataset",
          options = list(
            pageLength = 5,
            columnDefs = list(
              list(width = '500px', targets = issue_text_idx)
            )
          ))

1.4 AI bad ideology scoring

1.5 Check for discrepancies in Mintt’s Candidate Data

Compare mintt_partisanship (sender-level from Mintt’s mailers table) with supp_party derived from Call 1 A/B candidate codes.

# Get mintt_partisanship party per folder (one row per folder, no primary candidate filter needed)
mintt_party_lookup <- pilot_merged %>%
  distinct(folder, mintt_party)

# Compare with our supp_party (one row per folder)
party_comparison <- pilot_all %>%
  distinct(folder, supp_party) %>%
  inner_join(mintt_party_lookup, by = "folder") %>%
  filter(supp_party != "Unknown", mintt_party %in% c("R", "D")) %>%
  mutate(match = supp_party == mintt_party)

tibble(
  Metric = c("Folders with both party sources", "Match", "Mismatch", "Mismatch rate"),
  Value = c(as.character(nrow(party_comparison)), as.character(sum(party_comparison$match)),
            as.character(sum(!party_comparison$match)),
            paste0(round(mean(!party_comparison$match) * 100, 1), "%"))
) %>% kable()

Metric	Value
Folders with both party sources	125
Match	125
Mismatch	0
Mismatch rate	0%

2 Ideology Analysis w/ Mintt Candidate Data

2.1 A1: Policy Ideology: Ridgeline Plots by Party

Two densities per issue using pilot_merged: how a party talks about its own candidates (A, solid fill) vs. how the opposing party talks about those same candidates (B, dashed outline). Party comes from mintt_partisanship (sender-level). Ideology is the policy/position ideology, not the framing ideology.

Top 10 issues are the 10 issues in which candidates position their own party [Republican/Democrat] the most, the bottom 5 are the top 5 issues in which the other party references [Republicans/Democrats] positions.

What jumps out to me:

Republican plot: Education/Schools (I’m curious what kinds of issues are discussed: funding vs. cultural issues in schools), Social Security
Democratic plot: Crime, Immigration (not discussed by Dems at all), the R blips towards right-leaning ideology (need to look into data to see if it was incorrect scoring).

Clear problems with incorrect scoring in the data:

Republican plot: - Government Spending and Taxes (I checked some of these, and the AI output was definitely mistaken.)

Democratic plot:

# Shared ridge plot helper: maps -99 → -1, 99 → 0 for x-axis positioning
prep_ideology <- function(data, col = "policy_ideology") {
  data %>%
    mutate(ideology_numeric = case_when(
      .data[[col]] == -99 ~ -1,
      .data[[col]] == 99  ~  0,
      TRUE ~ as.numeric(.data[[col]])
    ))
}

ridge_x_scale <- scale_x_continuous(
  breaks = c(-1, 0, 1:9),
  labels = c("Bad\n(-99)", "Good\n(99)", as.character(1:9))
)

make_ridge_plot_merged <- function(data, party_label, color, ideology_col = "policy_ideology", ideology_label = "Policy") {
  opposite <- ifelse(party_label == "R", "D", ifelse(party_label == "D", "R", NA))

  # Own party's messaging about their candidates (A* issues from own mailers)
  own_data <- data %>%
    filter(mintt_party == party_label, is_supp_issue,
           !is.na(issue), !is.na(.data[[ideology_col]])) %>%
    mutate(source = paste0(party_label, " about own candidates"))

  # Opposing party's messaging about this party's candidates (B* issues from opponent mailers)
  opp_data <- if (!is.na(opposite)) {
    data %>%
      filter(mintt_party == opposite, is_opp_issue,
             !is.na(issue), !is.na(.data[[ideology_col]])) %>%
      mutate(source = paste0(opposite, " about ", party_label, " candidates"))
  } else {
    tibble()
  }

  # Top 10 issues from own messaging + top 5 from opposing (union to avoid duplicates)
  top_own <- own_data %>% count(issue, sort = TRUE) %>% slice_head(n = 10) %>% pull(issue)
  top_opp <- if (nrow(opp_data) > 0) {
    opp_data %>% count(issue, sort = TRUE) %>% slice_head(n = 5) %>% pull(issue)
  } else {
    character()
  }
  top_iss <- unique(c(top_own, top_opp))

  plot_data <- bind_rows(own_data, opp_data) %>%
    filter(issue %in% top_iss) %>%
    prep_ideology(col = ideology_col) %>%
    mutate(issue = factor(issue, levels = rev(top_iss)))

  # Per-issue, per-source means for vertical reference lines (excluding valence)
  means_df <- plot_data %>%
    filter(ideology_numeric >= 1, ideology_numeric <= 9) %>%
    group_by(issue, source) %>%
    summarise(mean_val = mean(ideology_numeric, na.rm = TRUE), .groups = "drop")

  opp_color <- if (!is.na(opposite)) {
    c("D" = "dodgerblue", "R" = "firebrick1")[[opposite]]
  } else {
    "grey50"
  }

  fill_values <- setNames(c(color, "transparent"),
                           c(unique(own_data$source), unique(opp_data$source)))
  color_values <- setNames(c(color, opp_color),
                            c(unique(own_data$source), unique(opp_data$source)))
  linetype_values <- setNames(c("solid", "dashed"),
                               c(unique(own_data$source), unique(opp_data$source)))

  ggplot(plot_data, aes(x = ideology_numeric, y = issue,
                        fill = source, color = source, linetype = source)) +
    geom_density_ridges(alpha = 0.4, bandwidth = 0.6) +
    geom_segment(data = means_df,
      aes(x = mean_val, xend = mean_val,
          y = as.numeric(issue) - 0.1, yend = as.numeric(issue) + 0.5,
          color = source, linetype = source),
      linewidth = 0.4, alpha = 0.7, inherit.aes = FALSE, show.legend = FALSE) +
    scale_fill_manual(values = fill_values) +
    scale_color_manual(values = color_values) +
    scale_linetype_manual(values = linetype_values) +
    ridge_x_scale +
    labs(
      title = paste0(ideology_label, " Ideology — ", party_label, " Candidates (Mintt data)"),
      subtitle = paste0("Solid = ", party_label, "'s own mailers | Dashed = opposing party's mailers\nVertical lines = mean score"),
      x = paste0(ideology_label, " Ideology (-99 = bad, 99 = good; 1 = most liberal, 9 = most conservative)"),
      y = NULL,
      fill = NULL, color = NULL, linetype = NULL
    ) +
    theme_minimal() +
    theme(legend.position = "bottom")
}

2.1.1 Republican Candidates

# Diagnostic: D mailers talking about R candidates (Candidate B) on Gov. Spending & Taxes
pilot_merged %>%
  filter(mintt_party == "D", is_opp_issue,
         str_detect(issue, regex("government spending|taxes", ignore_case = TRUE))) %>%
  distinct(folder, issue, candidate_code, mintt_party, mintt_partisanship, policy_ideology, issue_text) %>%
  datatable(caption = "Gov. Spending & Taxes — D mailers, Candidate B (R candidates being attacked)",
            options = list(pageLength = 5))

make_ridge_plot_merged(pilot_merged, "R", "firebrick1")

2.1.2 Democratic Candidates

make_ridge_plot_merged(pilot_merged, "D", "dodgerblue")

2.2 A2: Policy Ideology: Summary Table

fmt_ms <- function(m, s) {
  case_when(
    is.nan(m) | is.na(m) ~ "—",
    is.na(s) ~ as.character(round(m, 2)),
    TRUE ~ paste0(round(m, 2), " (", round(s, 2), ")")
  )
}

# Deduplicate to one row per folder-issue (mintt_party is folder-level, not candidate-level)
merged_dedup <- pilot_merged %>%
  distinct(folder, issue, candidate_code, .keep_all = TRUE) %>%
  filter(!is.na(issue))

total_rows_merged <- nrow(merged_dedup)
total_pilot_folders <- n_distinct(pilot_merged$folder)

summary_tbl_merged <- merged_dedup %>%
  group_by(issue) %>%
  summarise(
    Count = n(),
    Pct_Mailers = round(n_distinct(folder) / total_pilot_folders * 100, 1),
    Pct_Total = round(n() / total_rows_merged * 100, 1),
    R_self_m = mean(policy_ideology[mintt_party == "R" & is_supp_issue &
                        policy_ideology >= 1 & policy_ideology <= 9], na.rm = TRUE),
    R_self_s = sd(policy_ideology[mintt_party == "R" & is_supp_issue &
                        policy_ideology >= 1 & policy_ideology <= 9], na.rm = TRUE),
    D_self_m = mean(policy_ideology[mintt_party == "D" & is_supp_issue &
                        policy_ideology >= 1 & policy_ideology <= 9], na.rm = TRUE),
    D_self_s = sd(policy_ideology[mintt_party == "D" & is_supp_issue &
                        policy_ideology >= 1 & policy_ideology <= 9], na.rm = TRUE),
    R_other_m = mean(policy_ideology[mintt_party == "D" & is_opp_issue &
                        policy_ideology >= 1 & policy_ideology <= 9], na.rm = TRUE),
    R_other_s = sd(policy_ideology[mintt_party == "D" & is_opp_issue &
                        policy_ideology >= 1 & policy_ideology <= 9], na.rm = TRUE),
    D_other_m = mean(policy_ideology[mintt_party == "R" & is_opp_issue &
                        policy_ideology >= 1 & policy_ideology <= 9], na.rm = TRUE),
    D_other_s = sd(policy_ideology[mintt_party == "R" & is_opp_issue &
                        policy_ideology >= 1 & policy_ideology <= 9], na.rm = TRUE),
    Pct_Ideological = round(sum(policy_ideology >= 1 & policy_ideology <= 9, na.rm = TRUE) /
                              sum(!is.na(policy_ideology)) * 100, 1),
    R_Pct_Valence = round(sum(mintt_party == "R" & policy_ideology %in% c(-99, 99), na.rm = TRUE) /
                            max(sum(mintt_party == "R" & !is.na(policy_ideology)), 1) * 100, 1),
    D_Pct_Valence = round(sum(mintt_party == "D" & policy_ideology %in% c(-99, 99), na.rm = TRUE) /
                            max(sum(mintt_party == "D" & !is.na(policy_ideology)), 1) * 100, 1),
    .groups = "drop"
  ) %>%
  mutate(
    R_self = fmt_ms(R_self_m, R_self_s),
    D_self = fmt_ms(D_self_m, D_self_s),
    R_other = fmt_ms(R_other_m, R_other_s),
    D_other = fmt_ms(D_other_m, D_other_s)
  ) %>%
  select(issue, Count, Pct_Mailers, Pct_Total, R_self, D_self, R_other, D_other,
         Pct_Ideological, R_Pct_Valence, D_Pct_Valence) %>%
  arrange(desc(Count))

datatable(summary_tbl_merged,
          colnames = c("Issue", "Count", "% Mailers", "% of Total",
                       "R Self Avg (SD)", "D Self Avg (SD)",
                       "R Other Avg (SD)", "D Other Avg (SD)",
                       "% Ideological (1-9)", "R % Valence", "D % Valence"),
          caption = "Policy Ideology — Issue Summary by mintt_partisanship",
          options = list(pageLength = 15))

2.3 B1: Framing Ideology: Ridgeline Plots by Party

2.3.1 Republican Candidates

make_ridge_plot_merged(pilot_merged, "R", "firebrick1",
                       ideology_col = "framing_ideology", ideology_label = "Framing")

2.3.2 Democratic Candidates

make_ridge_plot_merged(pilot_merged, "D", "dodgerblue",
                       ideology_col = "framing_ideology", ideology_label = "Framing")

2.4 B2: Framing Ideology: Summary Table

summary_tbl_framing <- merged_dedup %>%
  group_by(issue) %>%
  summarise(
    Count = n(),
    Pct_Mailers = round(n_distinct(folder) / total_pilot_folders * 100, 1),
    Pct_Total = round(n() / total_rows_merged * 100, 1),
    R_self_m = mean(framing_ideology[mintt_party == "R" & is_supp_issue &
                        framing_ideology >= 1 & framing_ideology <= 9], na.rm = TRUE),
    R_self_s = sd(framing_ideology[mintt_party == "R" & is_supp_issue &
                        framing_ideology >= 1 & framing_ideology <= 9], na.rm = TRUE),
    D_self_m = mean(framing_ideology[mintt_party == "D" & is_supp_issue &
                        framing_ideology >= 1 & framing_ideology <= 9], na.rm = TRUE),
    D_self_s = sd(framing_ideology[mintt_party == "D" & is_supp_issue &
                        framing_ideology >= 1 & framing_ideology <= 9], na.rm = TRUE),
    R_other_m = mean(framing_ideology[mintt_party == "D" & is_opp_issue &
                        framing_ideology >= 1 & framing_ideology <= 9], na.rm = TRUE),
    R_other_s = sd(framing_ideology[mintt_party == "D" & is_opp_issue &
                        framing_ideology >= 1 & framing_ideology <= 9], na.rm = TRUE),
    D_other_m = mean(framing_ideology[mintt_party == "R" & is_opp_issue &
                        framing_ideology >= 1 & framing_ideology <= 9], na.rm = TRUE),
    D_other_s = sd(framing_ideology[mintt_party == "R" & is_opp_issue &
                        framing_ideology >= 1 & framing_ideology <= 9], na.rm = TRUE),
    Pct_Ideological = round(sum(framing_ideology >= 1 & framing_ideology <= 9, na.rm = TRUE) /
                              sum(!is.na(framing_ideology)) * 100, 1),
    R_Pct_Valence = round(sum(mintt_party == "R" & framing_ideology %in% c(-99, 99), na.rm = TRUE) /
                            max(sum(mintt_party == "R" & !is.na(framing_ideology)), 1) * 100, 1),
    D_Pct_Valence = round(sum(mintt_party == "D" & framing_ideology %in% c(-99, 99), na.rm = TRUE) /
                            max(sum(mintt_party == "D" & !is.na(framing_ideology)), 1) * 100, 1),
    .groups = "drop"
  ) %>%
  mutate(
    R_self = fmt_ms(R_self_m, R_self_s),
    D_self = fmt_ms(D_self_m, D_self_s),
    R_other = fmt_ms(R_other_m, R_other_s),
    D_other = fmt_ms(D_other_m, D_other_s)
  ) %>%
  select(issue, Count, Pct_Mailers, Pct_Total, R_self, D_self, R_other, D_other,
         Pct_Ideological, R_Pct_Valence, D_Pct_Valence) %>%
  arrange(desc(Count))

datatable(summary_tbl_framing,
          colnames = c("Issue", "Count", "% Mailers", "% of Total",
                       "R Self Avg (SD)", "D Self Avg (SD)",
                       "R Other Avg (SD)", "D Other Avg (SD)",
                       "% Ideological (1-9)", "R % Valence", "D % Valence"),
          caption = "Framing Ideology — Issue Summary by mintt_partisanship",
          options = list(pageLength = 15))

3 2 by 2

class_counts <- merged_dedup %>%
  filter(mintt_party %in% c("D", "R")) %>%
  group_by(mintt_party) %>%
  summarise(
    `Policy Inputs` = sum(policy_input == 1, na.rm = TRUE),
    Outcomes = sum(outcome == 1, na.rm = TRUE),
    Retrospective = sum(retrospective == 1, na.rm = TRUE),
    Prospective = sum(prospective == 1, na.rm = TRUE),
    .groups = "drop"
  ) %>%
  pivot_longer(-mintt_party, names_to = "Category", values_to = "Count") %>%
  mutate(Category = factor(Category, levels = c("Policy Inputs", "Outcomes",
                                                 "Retrospective", "Prospective")))

ggplot(class_counts, aes(x = Category, y = Count, fill = mintt_party)) +
  geom_col(position = "dodge", alpha = 0.8) +
  scale_fill_manual(values = c("D" = "dodgerblue", "R" = "firebrick1"),
                    labels = c("D" = "Democratic", "R" = "Republican")) +
  labs(title = "Issue Mention Classifications by Party",
       x = NULL, y = "Number of Issue Mentions", fill = NULL) +
  theme_minimal() +
  theme(legend.position = "bottom")

class_props <- class_counts %>%
  group_by(mintt_party) %>%
  mutate(Proportion = Count / sum(Count)) %>%
  ungroup()

ggplot(class_props, aes(x = Category, y = Proportion, fill = mintt_party)) +
  geom_col(position = "dodge", alpha = 0.8) +
  scale_fill_manual(values = c("D" = "dodgerblue", "R" = "firebrick1"),
                    labels = c("D" = "Democratic", "R" = "Republican")) +
  scale_y_continuous(labels = scales::percent_format()) +
  labs(title = "Issue Mention Classifications by Party (Proportion)",
       x = NULL, y = "Proportion of Total Issue Mentions", fill = NULL) +
  theme_minimal() +
  theme(legend.position = "bottom")

4 Candidates in Sample

tibble(
  Metric = c("Total unique candidates", "Total candidate mentions"),
  Value = c(nrow(candidate_summary), sum(candidate_summary$`Mailers Mentioning`))
) %>% kable()

Metric	Value
Total unique candidates	239
Total candidate mentions	562

datatable(candidate_summary, caption = "All Candidates Mentioned in Pilot Sample (Call 1)",
          options = list(pageLength = 5))

5 Archive

5.1 NEED TO FIX - Analysis 1a: Ideology Distribution by Party (Top Issues)

5.2 Analysis w/ Imputed Candidate Party

5.2.1 Analysis 1b: Ridgeline Plots by Party

Two densities per issue: how a party talks about its own candidates (A, solid fill) vs. how the opposing party talks about those same candidates (B, dashed outline). Scores of -99 and 99 are bucketed separately.

5.2.1.1 Republican Candidates

5.2.1.2 Democratic Candidates

5.2.1.3 Unknown

5.2.2 Analysis 2: Summary Table

6 Wrong-Direction Ideology Scoring

Issues where the opposing party scores candidates toward their own ideology rather than away from it — i.e., Ds score R candidates further LEFT than Rs score themselves, or Rs score D candidates further RIGHT than Ds score themselves. These likely indicate AI scoring errors.

The table shows all individual issue mentions from issues where the mean other-party score goes in the wrong direction relative to the self-party mean. self_mean = how that party scores itself on the issue; other_mean = how the opposing party scores them.

# Per-issue ideology means by perspective (1-9 scale only)
issue_ideology_means <- merged_dedup %>%
  filter(policy_ideology >= 1, policy_ideology <= 9) %>%
  group_by(issue) %>%
  summarise(
    R_self_mean = mean(policy_ideology[mintt_party == "R" & is_supp_issue], na.rm = TRUE),
    R_other_mean = mean(policy_ideology[mintt_party == "D" & is_opp_issue], na.rm = TRUE),
    D_self_mean = mean(policy_ideology[mintt_party == "D" & is_supp_issue], na.rm = TRUE),
    D_other_mean = mean(policy_ideology[mintt_party == "R" & is_opp_issue], na.rm = TRUE),
    .groups = "drop"
  )

# Anomaly 1: Ds score R candidates further LEFT (lower) than Rs score themselves
r_anomaly_issues <- issue_ideology_means %>%
  filter(!is.nan(R_self_mean), !is.nan(R_other_mean), R_other_mean < R_self_mean)

r_anomaly_rows <- merged_dedup %>%
  filter(mintt_party == "D", is_opp_issue,
         policy_ideology >= 1, policy_ideology <= 9,
         issue %in% r_anomaly_issues$issue) %>%
  left_join(r_anomaly_issues %>% select(issue, R_self_mean, R_other_mean), by = "issue") %>%
  mutate(anomaly = "D scores R further LEFT than R self",
         self_mean = round(R_self_mean, 2),
         other_mean = round(R_other_mean, 2))

# Anomaly 2: Rs score D candidates further RIGHT (higher) than Ds score themselves
d_anomaly_issues <- issue_ideology_means %>%
  filter(!is.nan(D_self_mean), !is.nan(D_other_mean), D_other_mean > D_self_mean)

d_anomaly_rows <- merged_dedup %>%
  filter(mintt_party == "R", is_opp_issue,
         policy_ideology >= 1, policy_ideology <= 9,
         issue %in% d_anomaly_issues$issue) %>%
  left_join(d_anomaly_issues %>% select(issue, D_self_mean, D_other_mean), by = "issue") %>%
  mutate(anomaly = "R scores D further RIGHT than D self",
         self_mean = round(D_self_mean, 2),
         other_mean = round(D_other_mean, 2))

wrong_dir <- bind_rows(r_anomaly_rows, d_anomaly_rows) %>%
  select(anomaly, folder, idx, issue, candidate_code, mintt_party,
         policy_ideology, self_mean, other_mean, policy_reasoning, issue_text) %>%
  mutate(issue_text = as.character(issue_text))

# Load all rerun CSVs
rerun_csv1 <- read_csv(file.path(pilot_dir, "Call 3 Rerun", "call_3_rerun_results.csv"),
                       show_col_types = FALSE)
rerun_csv2 <- read_csv(file.path(pilot_dir, "Call 3 Rerun", "call_3_rerun_results_test2.csv"),
                       show_col_types = FALSE)
rerun_csv3 <- read_csv(file.path(pilot_dir, "Call 3 Rerun", "call_3_rerun_results_test3.csv"),
                       show_col_types = FALSE)
rerun_csv4 <- read_csv(file.path(pilot_dir, "Call 3 Rerun", "call_3_rerun_results_test4.csv"),
                       show_col_types = FALSE)
rerun_csv5 <- read_csv(file.path(pilot_dir, "Call 3 Rerun", "call_3_rerun_results_test5.csv"),
                       show_col_types = FALSE)

# Helper: join rerun CSV onto wrong_dir scaffold and build a run dataframe
join_rerun <- function(rerun_df, run_num) {
  wrong_dir %>%
    select(anomaly, folder, idx, issue, candidate_code, mintt_party,
           self_mean, other_mean, issue_text) %>%
    inner_join(rerun_df %>%
                 mutate(folder = as.character(folder)) %>%
                 select(folder, issue, candidate,
                        policy_ideology = policy_ideology,
                        policy_reasoning = policy_reasoning),
               by = c("folder", "issue", "candidate_code" = "candidate")) %>%
    mutate(policy_ideology = as.integer(policy_ideology),
           Run = run_num)
}

# Helper: count wrong-direction infractions in a dataframe (1-9 scale only)
count_wrong <- function(df) {
  df <- df %>% filter(between(policy_ideology, 1, 9))
  sum(
    (str_detect(df$anomaly, "RIGHT") & df$policy_ideology >= 6) |
    (str_detect(df$anomaly, "LEFT")  & df$policy_ideology <= 4),
    na.rm = TRUE
  )
}

# Count for original run (Run 0)
count_wrong_orig <- wrong_dir %>%
  filter(between(policy_ideology, 1, 9)) %>%
  summarise(n = sum(
    (str_detect(anomaly, "RIGHT") & policy_ideology >= 6) |
    (str_detect(anomaly, "LEFT")  & policy_ideology <= 4),
    na.rm = TRUE
  )) %>% pull(n)

# Build all rerun dataframes
run1 <- join_rerun(rerun_csv1, 1L)
run2 <- join_rerun(rerun_csv2, 2L)
run3 <- join_rerun(rerun_csv3, 3L)
run4 <- join_rerun(rerun_csv4, 4L)
run5 <- join_rerun(rerun_csv5, 5L)

tibble(
  Run = c("Run 0 (original)", "Run 1", "Run 2", "Run 3", "Run 4", "Run 5"),
  `Wrong-Direction Infractions` = c(
    count_wrong_orig,
    count_wrong(run1),
    count_wrong(run2),
    count_wrong(run3),
    count_wrong(run4),
    count_wrong(run5)
  )
) %>% kable(caption = "Wrong-Direction Infraction Count by Run")

Table 6.1: Wrong-Direction Infraction Count by Run
Run	Wrong-Direction Infractions
Run 0 (original)	43
Run 1	34
Run 2	9
Run 3	9
Run 4	11
Run 5	9

# Combine runs 3, 4, 5 with wrong-direction flag
wrong_dir_345 <- bind_rows(run3, run4, run5) %>%
  mutate(is_wrong = case_when(
    !between(policy_ideology, 1, 9) ~ FALSE,
    str_detect(anomaly, "RIGHT") & policy_ideology >= 6 ~ TRUE,
    str_detect(anomaly, "LEFT")  & policy_ideology <= 4 ~ TRUE,
    TRUE ~ FALSE
  ))

# Keep all 3 runs for any folder+issue+candidate where ANY run has a wrong score
wrong_combos <- wrong_dir_345 %>%
  filter(is_wrong) %>%
  distinct(folder, issue, candidate_code)

wrong_dir_combined <- wrong_dir_345 %>%
  semi_join(wrong_combos, by = c("folder", "issue", "candidate_code")) %>%
  select(Run, anomaly, folder, issue, candidate_code, mintt_party,
         policy_ideology, self_mean, other_mean, policy_reasoning, issue_text) %>%
  arrange(anomaly, issue, folder, Run)

datatable(wrong_dir_combined,
          rownames = FALSE,
          colnames = c("Run", "Anomaly", "Folder", "Issue", "Candidate", "Mailer Party",
                       "Policy Ideology", "Self Mean", "Other Mean", "AI Reasoning", "Issue Text"),
          caption = "Wrong-Direction Ideology (Runs 3-5): Opposing party scores candidates toward their own ideology",
          options = list(
            pageLength = 15,
            columnDefs = list(
              list(width = '400px', targets = c(9, 10))
            )
          ))