Issues & Ideology
1 Load Data
Candidate and text data:
# Load candidate/text data
setwd("/Users/annemariegreen/Library/CloudStorage/Box-Box/Mintt 2024 Direct Mail/Data analysis")
mintt_df <- readRDS("mintt_df.rds")Pilot Results (300 mailers):
pilot_dir <- "/Users/annemariegreen/Library/CloudStorage/Box-Box/Mintt 2024 Direct Mail/MinttCampaign_Python/S2"
### Call 1: folder-level (anon_text, endorsement, candidates) ###
lines_c1 <- readLines(file.path(pilot_dir, "Call 1", "pilot_300_call1_results.jsonl"), warn = FALSE)
pilot_call1 <- map_dfr(lines_c1, function(line) {
tryCatch({
record <- fromJSON(line)
folder <- sub("^folder-", "", record$custom_id)
args_str <- record$response$body$choices$message$function_call$arguments
args <- fromJSON(args_str)
tibble(
folder = folder,
anon_text = args$anon_text,
endorsement = as.integer(args$endorsement),
reasoning_c1 = args$reasoning,
candidates_json = as.character(toJSON(args$candidates, auto_unbox = TRUE))
)
}, error = function(e) tibble(folder = NA_character_))
})
### Call 2: folder-level, with issues array exploded ###
lines_c2 <- readLines(file.path(pilot_dir, "Call 2", "pilot_300_call2_results.jsonl"), warn = FALSE)
pilot_call2 <- map_dfr(lines_c2, function(line) {
tryCatch({
record <- fromJSON(line)
folder <- sub("^folder-", "", record$custom_id)
args_str <- record$response$body$choices$message$function_call$arguments
args <- fromJSON(args_str, simplifyVector = FALSE)
if (length(args$issues) == 0) {
return(tibble(folder = folder, reasoning_c2 = args$reasoning,
issue = NA_character_, candidate_code = NA_character_,
issue_text = NA_character_,
policy_input = NA_integer_, outcome = NA_integer_,
prospective = NA_integer_, retrospective = NA_integer_))
}
map_dfr(args$issues, function(iss) {
tibble(
folder = folder,
reasoning_c2 = args$reasoning,
issue = iss$issue,
candidate_code = iss$candidate,
issue_text = iss$text,
policy_input = as.integer(iss$policy_input),
outcome = as.integer(iss$outcome),
prospective = as.integer(iss$prospective),
retrospective = as.integer(iss$retrospective)
)
})
}, error = function(e) tibble(folder = NA_character_))
})
### Call 3: issue-candidate level (ideology scores) ###
lines_c3 <- readLines(file.path(pilot_dir, "Call 3", "pilot_300_call3_results.jsonl"), warn = FALSE)
pilot_call3 <- map_dfr(lines_c3, function(line) {
tryCatch({
record <- fromJSON(line)
parts <- strsplit(record$custom_id, "-")[[1]]
args_str <- record$response$body$choices$message$function_call$arguments
args <- fromJSON(args_str)
tibble(
folder = parts[2],
idx = as.integer(parts[4]),
policy_ideology = as.integer(args$policy_ideology),
policy_reasoning = args$policy_reasoning,
framing_ideology = as.integer(args$framing_ideology),
framing_reasoning = args$framing_reasoning,
policy_confidence = as.integer(args$policy_confidence),
framing_confidence = args$framing_confidence
)
}, error = function(e) tibble(folder = NA_character_))
})
# Merge Call 2 + Call 3: add row index to Call 2 for joining
pilot_call2 <- pilot_call2 %>%
group_by(folder) %>%
mutate(idx = row_number() - 1L) %>%
ungroup()
pilot_issues <- pilot_call2 %>%
left_join(pilot_call3, by = c("folder", "idx"))
# Merge Call 1 + issues
pilot_all <- pilot_call1 %>%
left_join(pilot_issues, by = "folder")
# Standardize issue names (consolidate near-duplicates from AI extraction)
pilot_all <- pilot_all %>%
mutate(issue = case_when(
# Sexualized crime variants
str_detect(issue, regex("sexualized crime", ignore_case = TRUE)) ~ "Sexualized Crime",
# Economy variants
issue == "Economy" ~ "Economy (generic reference)",
# Environment variants
issue == "Environment" ~ "Environment (generic reference)",
issue == "Environment/Energy" ~ "Environment (generic reference)",
# Healthcare variants
issue == "Healthcare" ~ "Healthcare (not prescription drugs)",
issue == "Universal healthcare/healthcare for all" ~ "Healthcare (not prescription drugs)",
issue == "Affordable Care Act / Obamacare / Health Care Law / etc." ~ "Healthcare (not prescription drugs)",
# Military variants
issue == "Military (generic reference)" ~ "Military",
# Foreign Policy variants
issue == "Foreign Policy (generic reference)" ~ "Foreign Policy",
# Housing variants
issue == "Housing" ~ "Housing / Sub-prime Mortgages",
# Infrastructure variants
issue == "Infrastructure" ~ "Transportation / Infrastructure",
# Seniors variants
issue == "Seniors" ~ "Seniors (not Medicare)",
# Opioids variants
issue == "Opioids / Rx Drug Abuse" ~ "Fentanyl / Opioids",
# Narcotics
issue == "Narcotics/Illegal Drugs" ~ "Fentanyl / Opioids",
# Public Lands
issue == "Public Lands" ~ "Public Lands Access",
# Renewable energy variants
str_detect(issue, regex("renewable energy", ignore_case = TRUE)) ~ "Energy Policy",
issue == "Oil and natural gas drilling" ~ "Energy Policy",
issue == "Fracking" ~ "Energy Policy",
# Overtime Pay
issue == "Overtime Pay / Employment/Jobs" ~ "Employment/Jobs",
issue == "Overtime Pay" ~ "Employment/Jobs",
TRUE ~ issue
))
# Merge pilot results with mintt_df by folder
pilot_merged <- mintt_df %>%
inner_join(pilot_all, by = "folder")
# Exclude folder with incorrect AI coding
pilot_all <- pilot_all %>% filter(folder != "114721283581")
tibble(
Metric =
c("Total mailers (Call 1)",
"Total issues (Call 2)",
"Total issues w/ Ideology score (Call 3)",
"Unique issues (cleaned)",
"Folder has merged Mintt candidate data",
"Issue has Mintt candidate data (merged w/ mintt_df)"),
Value =
c(nrow(pilot_call1),
nrow(pilot_call2),
nrow(pilot_call3),
n_distinct(pilot_all$issue, na.rm = TRUE),
paste(n_distinct(pilot_merged$folder), "of", n_distinct(pilot_all$folder)),
nrow(pilot_merged)
)
) %>% kable()| Metric | Value |
|---|---|
| Total mailers (Call 1) | 301 |
| Total issues (Call 2) | 1446 |
| Total issues w/ Ideology score (Call 3) | 1418 |
| Unique issues (cleaned) | 72 |
| Folder has merged Mintt candidate data | 229 of 300 |
| Issue has Mintt candidate data (merged w/ mintt_df) | 1219 |
1.1 NOTES:
Issue Aggregation: I had the code collapse the following issue categories. The AI still took some liberty with the issue naming that was different from our least (even though we specified not to), but it’s not eggregious. I’ll see if I can correct the prompt to get it to do this less. Here’s what I aggregated:
- “Sexualized crime” variants → Sexualized Crime
- “Economy” → Economy (generic reference)
- “Environment”, “Environment/Energy” → Environment (generic reference)
- “Healthcare”, “Universal healthcare/healthcare for all”, “Affordable Care Act / Obamacare / Health Care Law / etc.” → Healthcare (not prescription drugs)
- “Military (generic reference)” → Military
- “Foreign Policy (generic reference)” → Foreign Policy
- “Housing” → Housing / Sub-prime Mortgages
- “Infrastructure” → Transportation / Infrastructure
- “Seniors” → Seniors (not Medicare)
- “Opioids / Rx Drug Abuse”, “Narcotics/Illegal Drugs” → Fentanyl / Opioids
- “Public Lands” → Public Lands Access
- “Renewable energy” variants, “Oil and natural gas drilling”, “Fracking” → Energy Policy
- “Overtime Pay / Employment/Jobs”, “Overtime Pay” → Employment/Jobs
Mintt Missing Candidate Data: Only 229/301 mailers had matches in the candidates data, probably because of Mintt’s 9000 missing candidates data fields.
Note: Folder 114721283581 is excluded — it is a voter guide with many candidates across multiple races, and the AI output does not map correctly to the A/B candidate framework.
1.2 Set partisanship of mailer
Derive supported and opposing party from Call 1 candidate codes. Any Candidate A* = supp_party, any Candidate B* = opp_party, matched against known candidates.
# Overview of candidates
pilot_candidates <- pilot_call1 %>%
filter(!is.na(candidates_json)) %>%
mutate(candidates = map(candidates_json, function(j) {
parsed <- fromJSON(j, simplifyVector = FALSE)
if (length(parsed) == 0) return(tibble(cand_name = character(), cand_code = character()))
map_dfr(parsed, ~tibble(cand_name = .x$cand_name, cand_code = .x$cand_code))
})) %>%
unnest(candidates) %>%
select(folder, cand_name, cand_code)
candidate_summary <- pilot_candidates %>%
count(cand_name, sort = TRUE) %>%
rename(Candidate = cand_name, `Mailers Mentioning` = n)
# Known candidate-party mapping - this is for the imputed data in "Archive", which I didn't end up using.
known_parties <- tribble(
~cand_name_clean, ~cand_party,
"donald trump", "R",
"kamala harris", "D",
"tim sheehy", "R",
"jon tester", "D",
"swanson", "R",
"mccormick", "R",
"hovde", "R",
"tammy baldwin", "D",
"bob casey", "D",
"j.d. vance", "R",
"tim walz", "D"
)
# Match candidate name to known party
match_party <- function(name) {
name_lower <- str_to_lower(name)
for (i in seq_len(nrow(known_parties))) {
if (str_detect(name_lower, fixed(known_parties$cand_name_clean[i]))) {
return(known_parties$cand_party[i])
}
}
NA_character_
}
# Build folder-level lookup: supp_party from any Candidate A*, opp_party from any Candidate B*
folder_parties <- pilot_candidates %>%
filter(folder != "114721283581") %>%
mutate(cand_party = map_chr(cand_name, match_party)) %>%
filter(!is.na(cand_party)) %>%
mutate(role = case_when(
str_detect(cand_code, "^Candidate A") ~ "supp_party",
str_detect(cand_code, "^Candidate B") ~ "opp_party"
)) %>%
filter(!is.na(role)) %>%
distinct(folder, role, cand_party) %>%
pivot_wider(names_from = role, values_from = cand_party, values_fn = first)
# Ensure folder_parties has both columns even if pivot_wider didn't produce them
if (!"supp_party" %in% names(folder_parties)) folder_parties$supp_party <- NA_character_
if (!"opp_party" %in% names(folder_parties)) folder_parties$opp_party <- NA_character_
# Add party and issue-role columns directly to pilot_all
pilot_all <- pilot_all %>%
left_join(folder_parties, by = "folder") %>%
mutate(
supp_party = replace_na(supp_party, "Unknown"),
opp_party = replace_na(opp_party, "Unknown"),
# Infer supp_party from opp_party (and vice versa) when one is Unknown
supp_party = case_when(
supp_party == "Unknown" & opp_party == "D" ~ "R",
supp_party == "Unknown" & opp_party == "R" ~ "D",
TRUE ~ supp_party
),
opp_party = case_when(
opp_party == "Unknown" & supp_party == "D" ~ "R",
opp_party == "Unknown" & supp_party == "R" ~ "D",
TRUE ~ opp_party
),
is_supp_issue = str_detect(candidate_code, "^Candidate A"),
is_opp_issue = str_detect(candidate_code, "^Candidate B")
)
# Recreate pilot_merged now that pilot_all has party/issue-role columns
pilot_merged <- mintt_df %>%
inner_join(pilot_all, by = "folder") %>%
mutate(mintt_party = case_when(
mintt_partisanship == "Republican/Conservative" ~ "R",
mintt_partisanship == "Democratic/Progressive" ~ "D",
TRUE ~ "Other"
))
mintt_tbl <- table(pilot_merged$mintt_party, useNA = "ifany")
tibble(
Metric = c("pilot_all rows",
"Pilot + mintt_df rows",
paste0("mintt_party: ", names(mintt_tbl))),
Value = c(as.character(nrow(pilot_all)),
as.character(nrow(pilot_merged)),
as.character(mintt_tbl))
) %>% kable()| Metric | Value |
|---|---|
| pilot_all rows | 1410 |
| Pilot + mintt_df rows | 1219 |
| mintt_party: D | 448 |
| mintt_party: Other | 49 |
| mintt_party: R | 722 |
1.3 Full Dataset
mintt_text <- mintt_df %>% distinct(folder, original_text = text)
pilot_all_display <- pilot_all %>%
left_join(mintt_text, by = "folder") %>%
select(-anon_text, -reasoning_c1, -reasoning_c2, -candidates_json) %>%
mutate(across(where(is.character), ~str_trunc(., 150)))
issue_text_idx <- which(colnames(pilot_all_display) == "issue_text") - 1
datatable(pilot_all_display,
caption = "Full pilot_all dataset",
options = list(
pageLength = 5,
columnDefs = list(
list(width = '500px', targets = issue_text_idx)
)
))1.4 AI bad ideology scoring
1.5 Check for discrepancies in Mintt’s Candidate Data
Compare mintt_partisanship (sender-level from Mintt’s mailers table) with supp_party derived from Call 1 A/B candidate codes.
# Get mintt_partisanship party per folder (one row per folder, no primary candidate filter needed)
mintt_party_lookup <- pilot_merged %>%
distinct(folder, mintt_party)
# Compare with our supp_party (one row per folder)
party_comparison <- pilot_all %>%
distinct(folder, supp_party) %>%
inner_join(mintt_party_lookup, by = "folder") %>%
filter(supp_party != "Unknown", mintt_party %in% c("R", "D")) %>%
mutate(match = supp_party == mintt_party)
tibble(
Metric = c("Folders with both party sources", "Match", "Mismatch", "Mismatch rate"),
Value = c(as.character(nrow(party_comparison)), as.character(sum(party_comparison$match)),
as.character(sum(!party_comparison$match)),
paste0(round(mean(!party_comparison$match) * 100, 1), "%"))
) %>% kable()| Metric | Value |
|---|---|
| Folders with both party sources | 125 |
| Match | 125 |
| Mismatch | 0 |
| Mismatch rate | 0% |
2 Ideology Analysis w/ Mintt Candidate Data
2.1 A1: Policy Ideology: Ridgeline Plots by Party
Two densities per issue using pilot_merged: how a party talks about its own candidates (A, solid fill) vs. how the opposing party talks about those same candidates (B, dashed outline). Party comes from mintt_partisanship (sender-level). Ideology is the policy/position ideology, not the framing ideology.
Top 10 issues are the 10 issues in which candidates position their own party [Republican/Democrat] the most, the bottom 5 are the top 5 issues in which the other party references [Republicans/Democrats] positions.
What jumps out to me:
- Republican plot: Education/Schools (I’m curious what kinds of issues are discussed: funding vs. cultural issues in schools), Social Security
- Democratic plot: Crime, Immigration (not discussed by Dems at all), the R blips towards right-leaning ideology (need to look into data to see if it was incorrect scoring).
Clear problems with incorrect scoring in the data:
Republican plot: - Government Spending and Taxes (I checked some of these, and the AI output was definitely mistaken.)
Democratic plot:
# Shared ridge plot helper: maps -99 → -1, 99 → 0 for x-axis positioning
prep_ideology <- function(data, col = "policy_ideology") {
data %>%
mutate(ideology_numeric = case_when(
.data[[col]] == -99 ~ -1,
.data[[col]] == 99 ~ 0,
TRUE ~ as.numeric(.data[[col]])
))
}
ridge_x_scale <- scale_x_continuous(
breaks = c(-1, 0, 1:9),
labels = c("Bad\n(-99)", "Good\n(99)", as.character(1:9))
)
make_ridge_plot_merged <- function(data, party_label, color, ideology_col = "policy_ideology", ideology_label = "Policy") {
opposite <- ifelse(party_label == "R", "D", ifelse(party_label == "D", "R", NA))
# Own party's messaging about their candidates (A* issues from own mailers)
own_data <- data %>%
filter(mintt_party == party_label, is_supp_issue,
!is.na(issue), !is.na(.data[[ideology_col]])) %>%
mutate(source = paste0(party_label, " about own candidates"))
# Opposing party's messaging about this party's candidates (B* issues from opponent mailers)
opp_data <- if (!is.na(opposite)) {
data %>%
filter(mintt_party == opposite, is_opp_issue,
!is.na(issue), !is.na(.data[[ideology_col]])) %>%
mutate(source = paste0(opposite, " about ", party_label, " candidates"))
} else {
tibble()
}
# Top 10 issues from own messaging + top 5 from opposing (union to avoid duplicates)
top_own <- own_data %>% count(issue, sort = TRUE) %>% slice_head(n = 10) %>% pull(issue)
top_opp <- if (nrow(opp_data) > 0) {
opp_data %>% count(issue, sort = TRUE) %>% slice_head(n = 5) %>% pull(issue)
} else {
character()
}
top_iss <- unique(c(top_own, top_opp))
plot_data <- bind_rows(own_data, opp_data) %>%
filter(issue %in% top_iss) %>%
prep_ideology(col = ideology_col) %>%
mutate(issue = factor(issue, levels = rev(top_iss)))
# Per-issue, per-source means for vertical reference lines (excluding valence)
means_df <- plot_data %>%
filter(ideology_numeric >= 1, ideology_numeric <= 9) %>%
group_by(issue, source) %>%
summarise(mean_val = mean(ideology_numeric, na.rm = TRUE), .groups = "drop")
opp_color <- if (!is.na(opposite)) {
c("D" = "dodgerblue", "R" = "firebrick1")[[opposite]]
} else {
"grey50"
}
fill_values <- setNames(c(color, "transparent"),
c(unique(own_data$source), unique(opp_data$source)))
color_values <- setNames(c(color, opp_color),
c(unique(own_data$source), unique(opp_data$source)))
linetype_values <- setNames(c("solid", "dashed"),
c(unique(own_data$source), unique(opp_data$source)))
ggplot(plot_data, aes(x = ideology_numeric, y = issue,
fill = source, color = source, linetype = source)) +
geom_density_ridges(alpha = 0.4, bandwidth = 0.6) +
geom_segment(data = means_df,
aes(x = mean_val, xend = mean_val,
y = as.numeric(issue) - 0.1, yend = as.numeric(issue) + 0.5,
color = source, linetype = source),
linewidth = 0.4, alpha = 0.7, inherit.aes = FALSE, show.legend = FALSE) +
scale_fill_manual(values = fill_values) +
scale_color_manual(values = color_values) +
scale_linetype_manual(values = linetype_values) +
ridge_x_scale +
labs(
title = paste0(ideology_label, " Ideology — ", party_label, " Candidates (Mintt data)"),
subtitle = paste0("Solid = ", party_label, "'s own mailers | Dashed = opposing party's mailers\nVertical lines = mean score"),
x = paste0(ideology_label, " Ideology (-99 = bad, 99 = good; 1 = most liberal, 9 = most conservative)"),
y = NULL,
fill = NULL, color = NULL, linetype = NULL
) +
theme_minimal() +
theme(legend.position = "bottom")
}2.1.1 Republican Candidates
# Diagnostic: D mailers talking about R candidates (Candidate B) on Gov. Spending & Taxes
pilot_merged %>%
filter(mintt_party == "D", is_opp_issue,
str_detect(issue, regex("government spending|taxes", ignore_case = TRUE))) %>%
distinct(folder, issue, candidate_code, mintt_party, mintt_partisanship, policy_ideology, issue_text) %>%
datatable(caption = "Gov. Spending & Taxes — D mailers, Candidate B (R candidates being attacked)",
options = list(pageLength = 5))2.2 A2: Policy Ideology: Summary Table
fmt_ms <- function(m, s) {
case_when(
is.nan(m) | is.na(m) ~ "—",
is.na(s) ~ as.character(round(m, 2)),
TRUE ~ paste0(round(m, 2), " (", round(s, 2), ")")
)
}
# Deduplicate to one row per folder-issue (mintt_party is folder-level, not candidate-level)
merged_dedup <- pilot_merged %>%
distinct(folder, issue, candidate_code, .keep_all = TRUE) %>%
filter(!is.na(issue))
total_rows_merged <- nrow(merged_dedup)
total_pilot_folders <- n_distinct(pilot_merged$folder)
summary_tbl_merged <- merged_dedup %>%
group_by(issue) %>%
summarise(
Count = n(),
Pct_Mailers = round(n_distinct(folder) / total_pilot_folders * 100, 1),
Pct_Total = round(n() / total_rows_merged * 100, 1),
R_self_m = mean(policy_ideology[mintt_party == "R" & is_supp_issue &
policy_ideology >= 1 & policy_ideology <= 9], na.rm = TRUE),
R_self_s = sd(policy_ideology[mintt_party == "R" & is_supp_issue &
policy_ideology >= 1 & policy_ideology <= 9], na.rm = TRUE),
D_self_m = mean(policy_ideology[mintt_party == "D" & is_supp_issue &
policy_ideology >= 1 & policy_ideology <= 9], na.rm = TRUE),
D_self_s = sd(policy_ideology[mintt_party == "D" & is_supp_issue &
policy_ideology >= 1 & policy_ideology <= 9], na.rm = TRUE),
R_other_m = mean(policy_ideology[mintt_party == "D" & is_opp_issue &
policy_ideology >= 1 & policy_ideology <= 9], na.rm = TRUE),
R_other_s = sd(policy_ideology[mintt_party == "D" & is_opp_issue &
policy_ideology >= 1 & policy_ideology <= 9], na.rm = TRUE),
D_other_m = mean(policy_ideology[mintt_party == "R" & is_opp_issue &
policy_ideology >= 1 & policy_ideology <= 9], na.rm = TRUE),
D_other_s = sd(policy_ideology[mintt_party == "R" & is_opp_issue &
policy_ideology >= 1 & policy_ideology <= 9], na.rm = TRUE),
Pct_Ideological = round(sum(policy_ideology >= 1 & policy_ideology <= 9, na.rm = TRUE) /
sum(!is.na(policy_ideology)) * 100, 1),
R_Pct_Valence = round(sum(mintt_party == "R" & policy_ideology %in% c(-99, 99), na.rm = TRUE) /
max(sum(mintt_party == "R" & !is.na(policy_ideology)), 1) * 100, 1),
D_Pct_Valence = round(sum(mintt_party == "D" & policy_ideology %in% c(-99, 99), na.rm = TRUE) /
max(sum(mintt_party == "D" & !is.na(policy_ideology)), 1) * 100, 1),
.groups = "drop"
) %>%
mutate(
R_self = fmt_ms(R_self_m, R_self_s),
D_self = fmt_ms(D_self_m, D_self_s),
R_other = fmt_ms(R_other_m, R_other_s),
D_other = fmt_ms(D_other_m, D_other_s)
) %>%
select(issue, Count, Pct_Mailers, Pct_Total, R_self, D_self, R_other, D_other,
Pct_Ideological, R_Pct_Valence, D_Pct_Valence) %>%
arrange(desc(Count))
datatable(summary_tbl_merged,
colnames = c("Issue", "Count", "% Mailers", "% of Total",
"R Self Avg (SD)", "D Self Avg (SD)",
"R Other Avg (SD)", "D Other Avg (SD)",
"% Ideological (1-9)", "R % Valence", "D % Valence"),
caption = "Policy Ideology — Issue Summary by mintt_partisanship",
options = list(pageLength = 15))2.3 B1: Framing Ideology: Ridgeline Plots by Party
2.4 B2: Framing Ideology: Summary Table
summary_tbl_framing <- merged_dedup %>%
group_by(issue) %>%
summarise(
Count = n(),
Pct_Mailers = round(n_distinct(folder) / total_pilot_folders * 100, 1),
Pct_Total = round(n() / total_rows_merged * 100, 1),
R_self_m = mean(framing_ideology[mintt_party == "R" & is_supp_issue &
framing_ideology >= 1 & framing_ideology <= 9], na.rm = TRUE),
R_self_s = sd(framing_ideology[mintt_party == "R" & is_supp_issue &
framing_ideology >= 1 & framing_ideology <= 9], na.rm = TRUE),
D_self_m = mean(framing_ideology[mintt_party == "D" & is_supp_issue &
framing_ideology >= 1 & framing_ideology <= 9], na.rm = TRUE),
D_self_s = sd(framing_ideology[mintt_party == "D" & is_supp_issue &
framing_ideology >= 1 & framing_ideology <= 9], na.rm = TRUE),
R_other_m = mean(framing_ideology[mintt_party == "D" & is_opp_issue &
framing_ideology >= 1 & framing_ideology <= 9], na.rm = TRUE),
R_other_s = sd(framing_ideology[mintt_party == "D" & is_opp_issue &
framing_ideology >= 1 & framing_ideology <= 9], na.rm = TRUE),
D_other_m = mean(framing_ideology[mintt_party == "R" & is_opp_issue &
framing_ideology >= 1 & framing_ideology <= 9], na.rm = TRUE),
D_other_s = sd(framing_ideology[mintt_party == "R" & is_opp_issue &
framing_ideology >= 1 & framing_ideology <= 9], na.rm = TRUE),
Pct_Ideological = round(sum(framing_ideology >= 1 & framing_ideology <= 9, na.rm = TRUE) /
sum(!is.na(framing_ideology)) * 100, 1),
R_Pct_Valence = round(sum(mintt_party == "R" & framing_ideology %in% c(-99, 99), na.rm = TRUE) /
max(sum(mintt_party == "R" & !is.na(framing_ideology)), 1) * 100, 1),
D_Pct_Valence = round(sum(mintt_party == "D" & framing_ideology %in% c(-99, 99), na.rm = TRUE) /
max(sum(mintt_party == "D" & !is.na(framing_ideology)), 1) * 100, 1),
.groups = "drop"
) %>%
mutate(
R_self = fmt_ms(R_self_m, R_self_s),
D_self = fmt_ms(D_self_m, D_self_s),
R_other = fmt_ms(R_other_m, R_other_s),
D_other = fmt_ms(D_other_m, D_other_s)
) %>%
select(issue, Count, Pct_Mailers, Pct_Total, R_self, D_self, R_other, D_other,
Pct_Ideological, R_Pct_Valence, D_Pct_Valence) %>%
arrange(desc(Count))
datatable(summary_tbl_framing,
colnames = c("Issue", "Count", "% Mailers", "% of Total",
"R Self Avg (SD)", "D Self Avg (SD)",
"R Other Avg (SD)", "D Other Avg (SD)",
"% Ideological (1-9)", "R % Valence", "D % Valence"),
caption = "Framing Ideology — Issue Summary by mintt_partisanship",
options = list(pageLength = 15))3 2 by 2
class_counts <- merged_dedup %>%
filter(mintt_party %in% c("D", "R")) %>%
group_by(mintt_party) %>%
summarise(
`Policy Inputs` = sum(policy_input == 1, na.rm = TRUE),
Outcomes = sum(outcome == 1, na.rm = TRUE),
Retrospective = sum(retrospective == 1, na.rm = TRUE),
Prospective = sum(prospective == 1, na.rm = TRUE),
.groups = "drop"
) %>%
pivot_longer(-mintt_party, names_to = "Category", values_to = "Count") %>%
mutate(Category = factor(Category, levels = c("Policy Inputs", "Outcomes",
"Retrospective", "Prospective")))
ggplot(class_counts, aes(x = Category, y = Count, fill = mintt_party)) +
geom_col(position = "dodge", alpha = 0.8) +
scale_fill_manual(values = c("D" = "dodgerblue", "R" = "firebrick1"),
labels = c("D" = "Democratic", "R" = "Republican")) +
labs(title = "Issue Mention Classifications by Party",
x = NULL, y = "Number of Issue Mentions", fill = NULL) +
theme_minimal() +
theme(legend.position = "bottom")class_props <- class_counts %>%
group_by(mintt_party) %>%
mutate(Proportion = Count / sum(Count)) %>%
ungroup()
ggplot(class_props, aes(x = Category, y = Proportion, fill = mintt_party)) +
geom_col(position = "dodge", alpha = 0.8) +
scale_fill_manual(values = c("D" = "dodgerblue", "R" = "firebrick1"),
labels = c("D" = "Democratic", "R" = "Republican")) +
scale_y_continuous(labels = scales::percent_format()) +
labs(title = "Issue Mention Classifications by Party (Proportion)",
x = NULL, y = "Proportion of Total Issue Mentions", fill = NULL) +
theme_minimal() +
theme(legend.position = "bottom")4 Candidates in Sample
tibble(
Metric = c("Total unique candidates", "Total candidate mentions"),
Value = c(nrow(candidate_summary), sum(candidate_summary$`Mailers Mentioning`))
) %>% kable()| Metric | Value |
|---|---|
| Total unique candidates | 239 |
| Total candidate mentions | 562 |
5 Archive
5.1 NEED TO FIX - Analysis 1a: Ideology Distribution by Party (Top Issues)
5.2 Analysis w/ Imputed Candidate Party
5.2.1 Analysis 1b: Ridgeline Plots by Party
Two densities per issue: how a party talks about its own candidates (A, solid fill) vs. how the opposing party talks about those same candidates (B, dashed outline). Scores of -99 and 99 are bucketed separately.
5.2.1.1 Republican Candidates
5.2.1.2 Democratic Candidates
5.2.1.3 Unknown
5.2.2 Analysis 2: Summary Table
6 Wrong-Direction Ideology Scoring
Issues where the opposing party scores candidates toward their own ideology rather than away from it — i.e., Ds score R candidates further LEFT than Rs score themselves, or Rs score D candidates further RIGHT than Ds score themselves. These likely indicate AI scoring errors.
The table shows all individual issue mentions from issues where the mean other-party score goes in the wrong direction relative to the self-party mean. self_mean = how that party scores itself on the issue; other_mean = how the opposing party scores them.
# Per-issue ideology means by perspective (1-9 scale only)
issue_ideology_means <- merged_dedup %>%
filter(policy_ideology >= 1, policy_ideology <= 9) %>%
group_by(issue) %>%
summarise(
R_self_mean = mean(policy_ideology[mintt_party == "R" & is_supp_issue], na.rm = TRUE),
R_other_mean = mean(policy_ideology[mintt_party == "D" & is_opp_issue], na.rm = TRUE),
D_self_mean = mean(policy_ideology[mintt_party == "D" & is_supp_issue], na.rm = TRUE),
D_other_mean = mean(policy_ideology[mintt_party == "R" & is_opp_issue], na.rm = TRUE),
.groups = "drop"
)
# Anomaly 1: Ds score R candidates further LEFT (lower) than Rs score themselves
r_anomaly_issues <- issue_ideology_means %>%
filter(!is.nan(R_self_mean), !is.nan(R_other_mean), R_other_mean < R_self_mean)
r_anomaly_rows <- merged_dedup %>%
filter(mintt_party == "D", is_opp_issue,
policy_ideology >= 1, policy_ideology <= 9,
issue %in% r_anomaly_issues$issue) %>%
left_join(r_anomaly_issues %>% select(issue, R_self_mean, R_other_mean), by = "issue") %>%
mutate(anomaly = "D scores R further LEFT than R self",
self_mean = round(R_self_mean, 2),
other_mean = round(R_other_mean, 2))
# Anomaly 2: Rs score D candidates further RIGHT (higher) than Ds score themselves
d_anomaly_issues <- issue_ideology_means %>%
filter(!is.nan(D_self_mean), !is.nan(D_other_mean), D_other_mean > D_self_mean)
d_anomaly_rows <- merged_dedup %>%
filter(mintt_party == "R", is_opp_issue,
policy_ideology >= 1, policy_ideology <= 9,
issue %in% d_anomaly_issues$issue) %>%
left_join(d_anomaly_issues %>% select(issue, D_self_mean, D_other_mean), by = "issue") %>%
mutate(anomaly = "R scores D further RIGHT than D self",
self_mean = round(D_self_mean, 2),
other_mean = round(D_other_mean, 2))
wrong_dir <- bind_rows(r_anomaly_rows, d_anomaly_rows) %>%
select(anomaly, folder, idx, issue, candidate_code, mintt_party,
policy_ideology, self_mean, other_mean, policy_reasoning, issue_text) %>%
mutate(issue_text = as.character(issue_text))
# Load all rerun CSVs
rerun_csv1 <- read_csv(file.path(pilot_dir, "Call 3 Rerun", "call_3_rerun_results.csv"),
show_col_types = FALSE)
rerun_csv2 <- read_csv(file.path(pilot_dir, "Call 3 Rerun", "call_3_rerun_results_test2.csv"),
show_col_types = FALSE)
rerun_csv3 <- read_csv(file.path(pilot_dir, "Call 3 Rerun", "call_3_rerun_results_test3.csv"),
show_col_types = FALSE)
rerun_csv4 <- read_csv(file.path(pilot_dir, "Call 3 Rerun", "call_3_rerun_results_test4.csv"),
show_col_types = FALSE)
rerun_csv5 <- read_csv(file.path(pilot_dir, "Call 3 Rerun", "call_3_rerun_results_test5.csv"),
show_col_types = FALSE)
# Helper: join rerun CSV onto wrong_dir scaffold and build a run dataframe
join_rerun <- function(rerun_df, run_num) {
wrong_dir %>%
select(anomaly, folder, idx, issue, candidate_code, mintt_party,
self_mean, other_mean, issue_text) %>%
inner_join(rerun_df %>%
mutate(folder = as.character(folder)) %>%
select(folder, issue, candidate,
policy_ideology = policy_ideology,
policy_reasoning = policy_reasoning),
by = c("folder", "issue", "candidate_code" = "candidate")) %>%
mutate(policy_ideology = as.integer(policy_ideology),
Run = run_num)
}
# Helper: count wrong-direction infractions in a dataframe (1-9 scale only)
count_wrong <- function(df) {
df <- df %>% filter(between(policy_ideology, 1, 9))
sum(
(str_detect(df$anomaly, "RIGHT") & df$policy_ideology >= 6) |
(str_detect(df$anomaly, "LEFT") & df$policy_ideology <= 4),
na.rm = TRUE
)
}
# Count for original run (Run 0)
count_wrong_orig <- wrong_dir %>%
filter(between(policy_ideology, 1, 9)) %>%
summarise(n = sum(
(str_detect(anomaly, "RIGHT") & policy_ideology >= 6) |
(str_detect(anomaly, "LEFT") & policy_ideology <= 4),
na.rm = TRUE
)) %>% pull(n)
# Build all rerun dataframes
run1 <- join_rerun(rerun_csv1, 1L)
run2 <- join_rerun(rerun_csv2, 2L)
run3 <- join_rerun(rerun_csv3, 3L)
run4 <- join_rerun(rerun_csv4, 4L)
run5 <- join_rerun(rerun_csv5, 5L)
tibble(
Run = c("Run 0 (original)", "Run 1", "Run 2", "Run 3", "Run 4", "Run 5"),
`Wrong-Direction Infractions` = c(
count_wrong_orig,
count_wrong(run1),
count_wrong(run2),
count_wrong(run3),
count_wrong(run4),
count_wrong(run5)
)
) %>% kable(caption = "Wrong-Direction Infraction Count by Run")| Run | Wrong-Direction Infractions |
|---|---|
| Run 0 (original) | 43 |
| Run 1 | 34 |
| Run 2 | 9 |
| Run 3 | 9 |
| Run 4 | 11 |
| Run 5 | 9 |
# Combine runs 3, 4, 5 with wrong-direction flag
wrong_dir_345 <- bind_rows(run3, run4, run5) %>%
mutate(is_wrong = case_when(
!between(policy_ideology, 1, 9) ~ FALSE,
str_detect(anomaly, "RIGHT") & policy_ideology >= 6 ~ TRUE,
str_detect(anomaly, "LEFT") & policy_ideology <= 4 ~ TRUE,
TRUE ~ FALSE
))
# Keep all 3 runs for any folder+issue+candidate where ANY run has a wrong score
wrong_combos <- wrong_dir_345 %>%
filter(is_wrong) %>%
distinct(folder, issue, candidate_code)
wrong_dir_combined <- wrong_dir_345 %>%
semi_join(wrong_combos, by = c("folder", "issue", "candidate_code")) %>%
select(Run, anomaly, folder, issue, candidate_code, mintt_party,
policy_ideology, self_mean, other_mean, policy_reasoning, issue_text) %>%
arrange(anomaly, issue, folder, Run)
datatable(wrong_dir_combined,
rownames = FALSE,
colnames = c("Run", "Anomaly", "Folder", "Issue", "Candidate", "Mailer Party",
"Policy Ideology", "Self Mean", "Other Mean", "AI Reasoning", "Issue Text"),
caption = "Wrong-Direction Ideology (Runs 3-5): Opposing party scores candidates toward their own ideology",
options = list(
pageLength = 15,
columnDefs = list(
list(width = '400px', targets = c(9, 10))
)
))