David Barker Figures

Preparation

Show the code
# libraries
library(tidyverse)
library(haven)
library(survey)

# load data
setwd("~/Downloads")
data <- read_dta('barkerCES.dta')

# uniform aesthestics
david_func <- function() {
  list(
    theme_bw(),
    theme(panel.grid = element_blank(),
        axis.text = element_text(size = 10),
        axis.title = element_text(size = 12),
        legend.text = element_text(size = 10),
        legend.title = element_text(size = 12)) 

  )
}

Chapter #2

Figure 2.1

Show the code
# Creating variables based on text description
group <- rep(c('Full Sample', 'Democrats', 'Republicans'), 2)
type <- c('Core anti-intellectualism', 'Core anti-intellectualism', 'Core anti-intellectualism', 'Indignant anti-intellectualism', 'Indignant anti-intellectualism', 'Indignant anti-intellectualism')
cors <- c(-.57, -.64, -.38, -.34, -.44, 0.01)

# combining into single dataframe
f2.1 <- data.frame(group, type, cors)
f2.1$group <- factor(f2.1$group, levels = c('Full Sample', 'Democrats', 'Republicans'))

# Plotting
f2.1 %>% 
  ggplot(aes(x = type, y = cors, fill = group)) +
  geom_col(color = 'black', position = 'dodge') +
  geom_text(aes(label = paste(cors), 
                vjust = ifelse(cors >= 0, -0.3, 1.3)), 
            position = position_dodge(width = 0.9)) +
  scale_fill_grey(start = 0.3, end = 0.8) +
  labs(fill = 'Sample',
       x = '', y = 'Pearson\'s Correlation') +
  david_func()

Chapter #3

Figure 3.1

To provide a broader and more precise picture of the partisan Diploma Divide, we performed a series of statistical regression analyses using the 2022 survey data that we described in Chapter 2. We examined differences between (a) non-college graduates, (b) bachelor’s degree holders, and (c) advanced degree holders in their relative propensities of identifying as either a Democrat, a Republican, or a “pure” Independent (who has no partisan leaning), while simultaneously accounting for the influence of other demographic characteristics (race, gender, age, household income, and church attendance).

As Figure 3-1 shows, the relative probability of identifying as a Democrat/Democratic leaner or a Republican/Republican leaner nil among those whose formal educational attainment capped out with an associate’s degree or less

Show the code
# Creating outcomes
data <- data %>% 
  mutate(pidfac_d = case_when(pid3lean01 == 0 ~ 1,
                                           T ~ 0),
          pidfac_i = case_when(pid3lean01 == 0.5 ~ 1,
                                           T ~ 0),
          pidfac_r = case_when(pid3lean01 == 1 ~ 1,
                                           T ~ 0))

# survey design to include weights and cluster SEs by state
mydesign <- svydesign(ids = ~inputstate, weights = ~weight, data = data)

# Models
R <- svyglm(pidfac_r ~ justbachelorsdegree + graddegree + black + hispanic + male + age01 + faminc_missmedsub01 + churchatt601, design = mydesign, family = quasibinomial(link = "probit"))

I <- svyglm(pidfac_i ~ justbachelorsdegree + graddegree + black + hispanic + male + age01 + faminc_missmedsub01 + churchatt601, design = mydesign, family = quasibinomial(link = "probit"))

D <- svyglm(pidfac_d ~ justbachelorsdegree + graddegree + black + hispanic + male + age01 + faminc_missmedsub01 + churchatt601, design = mydesign, family = quasibinomial(link = "probit"))

# Function to extract predicted probabilities
pred_func <- function(reg, bach, grad, pid, educ) {

    predict_data <- data.frame(
    
    # desired levels of education
    justbachelorsdegree = bach,
    graddegree = grad,
    
    # covariates fixed at their menas
    black = weighted.mean(data$black, na.rm = TRUE, w = data$weight),
    hispanic = weighted.mean(data$hispanic, na.rm = TRUE, w = data$weight),
    male = weighted.mean(data$male, na.rm = TRUE), w = data$weight,
    age01 = weighted.mean(data$age01, na.rm = TRUE, w = data$weight),
    faminc_missmedsub01 = weighted.mean(data$faminc_missmedsub01, na.rm = TRUE, w = data$weight),
    churchatt601 = weighted.mean(data$churchatt601, na.rm = TRUE, w = data$weight)
  )
  
  # tidying
  preds <- predict(reg, newdata = predict_data, type = "response")
  preds <- as.data.frame(preds)
  preds$pid <- pid
  preds$educ <- educ
  
  return(preds)
  
}

# Creating dataframe of predictions
d1 <- pred_func(D, 0, 0, 'Democrat', 'Associates degree or less') 
d2 <- pred_func(D, 1, 0, 'Democrat', 'Bachelor\'s degree') 
d3 <- pred_func(D, 1, 1, 'Democrat', 'Graduate degree') 

i1 <- pred_func(I, 0, 0, 'Independent', 'Associates degree or less') 
i2 <- pred_func(I, 1, 0, 'Independent', 'Bachelor\'s degree') 
i3 <- pred_func(I, 1, 1, 'Independent', 'Graduate degree') 

r1 <- pred_func(R, 0, 0, 'Republican', 'Associates degree or less') 
r2 <- pred_func(R, 1, 0, 'Republican', 'Bachelor\'s degree') 
r3 <- pred_func(R, 1, 1, 'Republican', 'Graduate degree') 

f3.1 <- bind_rows(d1, d2, d3, i1, i2, i3, r1, r2, r3)

f3.1 <- f3.1 %>% 
  mutate(ymax = response + SE,
         ymin = response - SE)

# Plot
f3.1 %>% 
  ggplot(., aes(x = pid, y = response, ymin = ymin, ymax = ymax)) +
  geom_col(color = 'black', position = 'dodge', fill = 'grey60') +
  geom_errorbar(position = position_dodge(width = 0.9), width  = 0.2) +
  david_func() +
  facet_wrap(~educ) +
  labs(x = 'Party identification', y = 'Probability of PID') +
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
  scale_y_continuous(labels = scales::percent)

Figure 3.2

Given those numbers, we asked our survey respondents to indicate the name and location of the last college or university they had attended (if they had), and then scored those responses on a five-point scale ranging from “zero” (online for-profit university with universal admissions) to “four” (top 100 university or liberal arts college, based on the 2022 US News and World Report rankings). We then added that measure of “college elitism” to our regression model.

Show the code
# Models
R2 <- svyglm(pidfac_r ~ justbachelorsdegree + graddegree + collegeelitism + black + hispanic + male + age01 + faminc_missmedsub01 + churchatt601, design = mydesign, family = quasibinomial(link = "probit"))

I2 <- svyglm(pidfac_i ~ justbachelorsdegree + graddegree + collegeelitism + black + hispanic + male + age01 + faminc_missmedsub01 + churchatt601, design = mydesign, family = quasibinomial(link = "probit"))

D2 <- svyglm(pidfac_d ~ justbachelorsdegree + graddegree + collegeelitism + black + hispanic + male + age01 + faminc_missmedsub01 + churchatt601, design = mydesign, family = quasibinomial(link = "probit"))

# Function to extract predicted probabilities
pred_func <- function(reg, bach, grad, elite, pid, educ, institution) {

    predict_data <- data.frame(
    
    # desired levels of education
    justbachelorsdegree = bach,
    graddegree = grad,
    collegeelitism = elite,
    
    # covariates fixed at their menas
    black = weighted.mean(data$black, na.rm = TRUE, w = data$weight),
    hispanic = weighted.mean(data$hispanic, na.rm = TRUE, w = data$weight),
    male = weighted.mean(data$male, na.rm = TRUE, w = data$weight),
    age01 = weighted.mean(data$age01, na.rm = TRUE), w = data$weight,
    faminc_missmedsub01 = weighted.mean(data$faminc_missmedsub01, na.rm = TRUE, w = data$weight),
    churchatt601 = weighted.mean(data$churchatt601, na.rm = TRUE, w = data$weight)
  )
  
  # Add labels to dataframes
  preds <- predict(reg, newdata = predict_data, type = "response")
  preds <- as.data.frame(preds)
  preds$pid <- pid
  preds$educ <- educ
  preds$institution <- institution
  
  return(preds)
  
}

# Creating dataframe of predictions
dd1 <- pred_func(D2, 1, 0, 0,  'Democrat', 'Bachelor\'s degree', 'Non-elite institution') 
dd2 <- pred_func(D2, 1, 0, 1,  'Democrat', 'Bachelor\'s degree', 'Elite institution') 
dd3 <- pred_func(D2, 1, 1, 0,  'Democrat', 'Graduate degree', 'Non-elite institution') 
dd4 <- pred_func(D2, 1, 1, 1,  'Democrat', 'Graduate degree', 'Elite institution') 


ii1 <- pred_func(I2, 1, 0, 0,  'Independent', 'Bachelor\'s degree', 'Non-elite institution') 
ii2 <- pred_func(I2, 1, 0, 1,  'Independent', 'Bachelor\'s degree', 'Elite institution') 

ii3 <- pred_func(I2, 1, 1, 0,  'Independent', 'Graduate degree', 'Non-elite institution') 
ii4 <- pred_func(I2, 1, 1, 1,  'Independent', 'Graduate degree', 'Elite institution') 


rr1 <- pred_func(R2, 1, 0, 0,  'Republican', 'Bachelor\'s degree', 'Non-elite institution') 
rr2 <- pred_func(R2, 1, 0, 1,  'Republican', 'Bachelor\'s degree', 'Elite institution') 

rr3 <- pred_func(R2, 1, 1, 0,  'Republican', 'Graduate degree', 'Non-elite institution') 
rr4 <- pred_func(R2, 1, 1, 1,  'Republican', 'Graduate degree', 'Elite institution') 


f3.2 <- bind_rows(dd1, dd2, dd3, dd4, ii1, ii2, ii3, ii4, rr1, rr2, rr3, rr4)

f3.2 <- f3.2 %>% 
  mutate(ymax = response + SE,
         ymin = response - SE)


# Plot
f3.2 %>% 
  ggplot(., aes(x = pid, y = response, ymin = ymin, ymax = ymax, fill = institution)) +
  geom_col(color = 'black', position = 'dodge') +
  geom_errorbar(position = position_dodge(width = 0.9), width  = 0.2) +
  david_func() +
  facet_wrap(~educ) +
  labs(x = 'Party identification', y = 'Probability of PID',
       fill = 'Institution type') +
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
  scale_y_continuous(labels = scales::percent) +
  scale_fill_grey(start = 0.5, end = 0.8)

Figure 3.3

So, to get a better sense of the degree to which authentic intellectual identity, rather than the rudimentary indicators of educational attainment and/or educational elitism, correspond to Democratic partisanship, we added the nine-item intellectual identity index that we described in Chapter 2 to the regression model predicting partisan differences.

Show the code
# Models
R3 <- svyglm(pidfac_r ~ justbachelorsdegree + graddegree + collegeelitism + intelfull1001 + black + hispanic + male + age01 + faminc_missmedsub01 + churchatt601, design = mydesign, family = quasibinomial(link = "probit"))

I3 <- svyglm(pidfac_i ~ justbachelorsdegree + graddegree + collegeelitism + intelfull1001 + black + hispanic + male + age01 + faminc_missmedsub01 + churchatt601, design = mydesign, family = quasibinomial(link = "probit"))

D3 <- svyglm(pidfac_d ~ justbachelorsdegree + graddegree + collegeelitism + intelfull1001 + black + hispanic + male + age01 + faminc_missmedsub01 + churchatt601, design = mydesign, family = quasibinomial(link = "probit"))

# Function to extract predicted probabilities
pred_func <- function(reg, intelid, pid, intelidlab) {

    predict_data <- data.frame(
    
    # intellectual ID
    intelfull1001 = intelid,
    
    # covariates fixed at their menas
    justbachelorsdegree = weighted.mean(data$justbachelorsdegree, na.rm = T,
                                        w = data$weight),
    graddegree = weighted.mean(data$graddegree, na.rm = T,
                                        w = data$weight),
    collegeelitism = weighted.mean(data$collegeelitism, na.rm = T,
                                        w = data$weight),
    black = mean(data$black, na.rm = TRUE,
                                        w = data$weight),
    hispanic = mean(data$hispanic, na.rm = TRUE,
                                        w = data$weight),
    male = mean(data$male, na.rm = TRUE,
                                        w = data$weight),
    age01 = mean(data$age01, na.rm = TRUE,
                                        w = data$weight),
    faminc_missmedsub01 = mean(data$faminc_missmedsub01, na.rm = TRUE,
                                        w = data$weight),
    churchatt601 = mean(data$churchatt601, na.rm = TRUE,
                                        w = data$weight)
  )
  
  # Add labels to dataframes
  preds <- predict(reg, newdata = predict_data, type = "response")
  preds <- as.data.frame(preds)
  preds$pid <- pid
  preds$intelidlab <- intelidlab

  return(preds)
  
}

# create dataframe of predictions
intd1 <- pred_func(D3, 0, 'Democrat', 'Least Intellectual')
intd2 <- pred_func(D3, 1, 'Democrat', 'Most Intellectual')

inti1 <- pred_func(I3, 0, 'Independent', 'Least Intellectual')
inti2 <- pred_func(I3, 1, 'Independent', 'Most Intellectual')

intr1 <- pred_func(R3, 0, 'Republican', 'Least Intellectual')
intr2 <- pred_func(R3, 1, 'Republican', 'Most Intellectual')

f3.3 <- bind_rows(intd1, intd2, inti1, inti2, intr1, intr2)

f3.3 <- f3.3 %>% 
  mutate(ymax = response + SE,
         ymin = response - SE)

# Plot
f3.3 %>% 
  ggplot(., aes(x = pid, y = response, ymin = ymin, ymax = ymax, fill = intelidlab)) +
  geom_col(color = 'black',  position = 'dodge') +
  geom_errorbar(width = 0.2, position = position_dodge(width = 0.9)) +
  david_func() +
  scale_y_continuous(labels = scales::percent) +
  labs(x = '', y = 'Probability of PID', fill = 'Intellectual identity index') +
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
   scale_fill_grey(start = 0.5, end = 0.8)

Old (ignore; saving just in case)

Show the code
# 
# data <- data %>% mutate(pidfac_dr = case_when(pid3lean01 == 0 ~ 'Democrat',
#                                            pid3lean01 == 1 ~ 'Republican'),
#                         pidfac_di = case_when(pid3lean01 == 0 ~ 'Democrat',
#                                            pid3lean01 == 0.5 ~ 'Independent'),
#                         pidfac_ir = case_when(pid3lean01 == 1 ~ 'Republican',
#                                            pid3lean01 == 0.5 ~ 'Independent'),
#                         pidfac = case_when(pid3lean01 == 0 ~ 'Democrat',
#                                            pid3lean01 == 0.5 ~ 'Independent',
#                                            pid3lean01 == 1 ~ 'Republican'))
# 
# 
# data$pidfac_dr <- factor(data$pidfac_dr, levels = c('Republican', 'Democrat'))
# data$pidfac_di <- factor(data$pidfac_di, levels = c('Independent', 'Democrat'))
# data$pidfac_ir <- factor(data$pidfac_ir, levels = c('Republican', 'Independent'))
# 
# data$pid3lean01 <- factor(data$pid3lean01)
# 
# mydesign <- svydesign(ids = ~inputstate, weights = ~weight, data = data)
# 
# RtoD <- svyglm(pidfac_dr ~ justbachelorsdegree + graddegree + black + hispanic + male + age01 + faminc_missmedsub01 + churchatt601, design = mydesign, family = quasibinomial(link = "probit"))
# 
# ItoD <- svyglm(pidfac_di ~ justbachelorsdegree + graddegree + black + hispanic + male + age01 + faminc_missmedsub01 + churchatt601, design = mydesign, family = quasibinomial(link = "probit"))
# 
# RtoI <- svyglm(pidfac_ir ~ justbachelorsdegree + graddegree + black + hispanic + male + age01 + faminc_missmedsub01 + churchatt601, design = mydesign, family = quasibinomial(link = "probit"))
# 
# names <- c('t', 't', 't', 't', 't', 't', 't', 't')
# plot_summs(RtoD, ItoD, RtoI,
#            model.names = c('Republican to Democrat',
#                            'Independent to Democrat',
#                            'Republican to Independent'),
#            coefs = c('Bachelor\'s degree' = 'justbachelorsdegree',
#                      'Graduate Degree' = 'graddegree',
#                      'Black'= 'black',
#                      'Hispanic' = 'hispanic',
#                      'Male' = 'male',
#                      'Age' = 'age01',
#                      'Family Income' = 'faminc_missmedsub01',
#                      'Service Attendance' = 'churchatt601'),
#            colors = rep(c('black', 'gray30', 'gray60'),8 ),
#            legend.title = 'Outcome') +
#   david_func() +
#   labs(x = '', y = 'Coefficients')
Show the code
# # Models
# R2 <- svyglm(pidfac_r ~ justbachelorsdegree + graddegree + collegeelitism + black + hispanic + male + age01 + faminc_missmedsub01 + churchatt601, design = mydesign, family = quasibinomial(link = "probit"))
# 
# I2 <- svyglm(pidfac_i ~ justbachelorsdegree + graddegree + collegeelitism + black + hispanic + male + age01 + faminc_missmedsub01 + churchatt601, design = mydesign, family = quasibinomial(link = "probit"))
# 
# D2 <- svyglm(pidfac_d ~ justbachelorsdegree + graddegree + collegeelitism + black + hispanic + male + age01 + faminc_missmedsub01 + churchatt601, design = mydesign, family = quasibinomial(link = "probit"))
# 
# # Function to extract predicted probabilities
# pred_func <- function(reg, bach, grad, elite, pid, educ) {
# 
#     predict_data <- data.frame(
#     
#     # desired levels of education
#     justbachelorsdegree = bach,
#     graddegree = grad,
#     collegeelitism = elite,
#     
#     # covariates fixed at their menas
#     black = mean(data$black, na.rm = TRUE),
#     hispanic = mean(data$hispanic, na.rm = TRUE),
#     male = mean(data$male, na.rm = TRUE),
#     age01 = mean(data$age01, na.rm = TRUE),
#     faminc_missmedsub01 = mean(data$faminc_missmedsub01, na.rm = TRUE),
#     churchatt601 = mean(data$churchatt601, na.rm = TRUE)
#   )
#   
#   # Add labels to dataframes
#   preds <- predict(reg, newdata = predict_data, type = "response")
#   preds <- as.data.frame(preds)
#   preds$pid <- pid
#   preds$educ <- educ
#   return(preds)
#   
# }
# 
# # Creating dataframe of predictions
# dd1 <- pred_func(D2, 1, 0, 0,  'Democrat', 'Bachelor\'s degree (non-elite)') 
# dd2 <- pred_func(D2, 1, 0, 1,  'Democrat', 'Bachelor\'s degree (elite)') 
# dd3 <- pred_func(D2, 1, 1, 0,  'Democrat', 'Graduate degree (non-elite)') 
# dd4 <- pred_func(D2, 1, 1, 1,  'Democrat', 'Graduate degree (elite)') 
# 
# 
# ii1 <- pred_func(I2, 1, 0, 0,  'Independent', 'Bachelor\'s degree (non-elite)') 
# ii2 <- pred_func(I2, 1, 0, 1,  'Independent', 'Bachelor\'s degree (elite)') 
# 
# ii3 <- pred_func(I2, 1, 1, 0,  'Independent', 'Graduate degree (non-elite)') 
# ii4 <- pred_func(I2, 1, 1, 1,  'Independent', 'Graduate degree (elite)') 
# 
# 
# rr1 <- pred_func(R2, 1, 0, 0,  'Republican', 'Bachelor\'s degree (non-elite)') 
# rr2 <- pred_func(R2, 1, 0, 1,  'Republican', 'Bachelor\'s degree (elite)') 
# 
# rr3 <- pred_func(R2, 1, 1, 0,  'Republican', 'Graduate degree (non-elite)') 
# rr4 <- pred_func(R2, 1, 1, 1,  'Republican', 'Graduate degree (elite)') 
# 
# 
# f3.2 <- bind_rows(dd1, dd2, dd3, dd4, ii1, ii2, ii3, ii4, rr1, rr2, rr3, rr4)
# 
# f3.2 <- f3.2 %>% 
#   mutate(ymax = response + SE,
#          ymin = response - SE)
# 
# # Plot
# f3.2 %>% 
#   filter(pid != 'Independent') %>%
#   ggplot(., aes(x = educ, y = response, ymin = ymin, ymax = ymax)) +
#   geom_col(color = 'black', position = 'dodge', fill = 'grey60') +
#   geom_errorbar(color = 'black', position = position_dodge(width = 0.9),
#                 width = 0.2) +
#   facet_wrap(~pid) +
#   david_func() +
#   theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
#   scale_y_continuous(labels = scales::percent)
# 
Show the code
# predict_data <- data.frame(
#   justbachelorsdegree = 0,
#   graddegree = 0,
#   black = mean(data$black, na.rm = TRUE),
#   hispanic = mean(data$hispanic, na.rm = TRUE),
#   male = mean(data$male, na.rm = TRUE),
#   age01 = mean(data$age01, na.rm = TRUE),
#   faminc_missmedsub01 = mean(data$faminc_missmedsub01, na.rm = TRUE),
#   churchatt601 = mean(data$churchatt601, na.rm = TRUE)
# )
# 
# 
# less_than_bachelors <- predict(D, newdata = predict_data, type = "response")
# 
# predict_data <- data.frame(
#   justbachelorsdegree = 1,
#   graddegree = 0,
#   black = mean(data$black, na.rm = TRUE),
#   hispanic = mean(data$hispanic, na.rm = TRUE),
#   male = mean(data$male, na.rm = TRUE),
#   age01 = mean(data$age01, na.rm = TRUE),
#   faminc_missmedsub01 = mean(data$faminc_missmedsub01, na.rm = TRUE),
#   churchatt601 = mean(data$churchatt601, na.rm = TRUE)
# )
# 
# 
# bachelors <- predict(D, newdata = predict_data, type = "response")
# 
# predict_data <- data.frame(
#   justbachelorsdegree = 1,
#   graddegree = 1,
#   black = mean(data$black, na.rm = TRUE),
#   hispanic = mean(data$hispanic, na.rm = TRUE),
#   male = mean(data$male, na.rm = TRUE),
#   age01 = mean(data$age01, na.rm = TRUE),
#   faminc_missmedsub01 = mean(data$faminc_missmedsub01, na.rm = TRUE),
#   churchatt601 = mean(data$churchatt601, na.rm = TRUE)
# )
# 
# 
# graduate <- predict(D, newdata = predict_data, type = "response")
# 
# less_than_bachelors
# bachelors
# graduate
Show the code
# library(nnet)
# multi_mod <- multinom(pidfac ~ justbachelorsdegree + graddegree + black + hispanic + male + age01 + faminc_missmedsub01 + churchatt601, data = data, weights = weight)
# 
# predict_data <- data.frame(
#   justbachelorsdegree = 0,
#   graddegree = 0,
#   black = mean(data$black, na.rm = TRUE),
#   hispanic = mean(data$hispanic, na.rm = TRUE),
#   male = mean(data$male, na.rm = TRUE),
#   age01 = mean(data$age01, na.rm = TRUE),
#   faminc_missmedsub01 = mean(data$faminc_missmedsub01, na.rm = TRUE),
#   churchatt601 = mean(data$churchatt601, na.rm = TRUE)
# )
# 
# 
# predicted_probs <- predict(multi_mod, newdata = predict_data, type = "probs")
# predicted_probs