Gouge CES Mapping Exercise

We are no stranger to the Cooperative Election Study. This is a super rich survey with lots of cool items. Let’s see what we can cook up in this lab!

Let’s load the data.

library(tidyverse)
library(magrittr)
library(rio)
library(broom)
# Let's load the data -----------------------------------------------------


##thanks to Thomas J Wood for uploading this file to github -- gorgeous work and means we don't have to upload files independently to our machines

t1 <- "https://github.com/thomasjwood/constraint/raw/main/ces/ces_22_c.rds" %>% 
  rio::import(
    setclass = "tbl_df"
  )

## Warning: Missing `trust` will be set to FALSE by default for RDS in 2.0.0.

Now that we have the data, let’s make a codebook and poke around at a few variables.

# build a codebook:

cb <- tibble(
  nms = t1 %>% 
    map(
      \(i) 
      i %>% 
        attr(., "label")
    ) %>% 
  unlist %>% 
  names, 
  labs = t1 %>% 
  map(
    \(i)
    i %>% 
      attr(., "label")
  ) %>% 
  unlist
  )


##does the codebook work?
cb

## # A tibble: 697 × 2
##    nms              labs                     
##    <chr>            <chr>                    
##  1 caseid           Case ID                  
##  2 tookpost         Took post-election survey
##  3 commonweight     Weight - Common          
##  4 commonpostweight Weight - Common Post     
##  5 CCEStake         Consent to participate   
##  6 add_confirm      Confirm address          
##  7 inputzip         Zip code                 
##  8 birthyr          Birth Year               
##  9 gender4          Gender                   
## 10 gender4_t        Gender - Other           
## # ℹ 687 more rows

#we can treat the codebook like a list and use map on it 
# then we look at the specific items we’re interested in

c("Executive Orders Pre") %>% 
  map(
    \(i)
    
    cb %>% 
      filter(
        labs %>% 
          str_detect(i)
      )
  )

## [[1]]
## # A tibble: 5 × 2
##   nms       labs                                                                
##   <chr>     <chr>                                                               
## 1 CC22_355a Executive Orders Pre -- The United States re-joins the Paris Climat…
## 2 CC22_355b Executive Orders Pre -- The United States re-joins the World Health…
## 3 CC22_355c Executive Orders Pre -- Order all federal agencies to buy clean ene…
## 4 CC22_355d Executive Orders Pre -- Increase the minimum wage paid to federal c…
## 5 CC22_355e Executive Orders Pre -- Require that all employees at large compani…

#that is so nifty and cool, we could even look at the first few responses 


c("Executive Orders Pre ") %>% 
  map(
    \(i)
    
    cb %>% 
      filter(
        labs %>% 
          str_detect(i)
      ) %>% 
      use_series(nms)
  ) %>% 
  map(
    \(i)
    
    t1 %>% 
      select(
        any_of(i) #any_of is a selector helper, which lets us select variables as a function of characters, in contrast with all_of 
      )
  )

## [[1]]
## # A tibble: 60,000 × 5
##    CC22_355a CC22_355b CC22_355c CC22_355d CC22_355e
##    <fct>     <fct>     <fct>     <fct>     <fct>    
##  1 Support   Support   Support   Support   Support  
##  2 Oppose    Oppose    Oppose    Support   Oppose   
##  3 Support   Support   Oppose    Support   Oppose   
##  4 Support   Support   Support   Support   Support  
##  5 Support   Support   Support   Support   Oppose   
##  6 Oppose    Oppose    Oppose    Oppose    Oppose   
##  7 Oppose    Oppose    Oppose    Oppose    Oppose   
##  8 Support   Support   Support   Support   Oppose   
##  9 Support   Support   Oppose    Support   Support  
## 10 Support   Support   Support   Support   Support  
## # ℹ 59,990 more rows

Executive orders get lots of attention from the media. In this lab, I want us to look at support for Executive Orders.

##first let's select out the variables that we are interested in working with for this exercise. 
t2 <- t1 %>% 
  select(
    caseid,
    commonweight,
    CC22_355a:CC22_355e,
    educ,
    pid3,
    ideo5,
    newsint,
    race
  ) %>% 
  na.omit 

t2

## # A tibble: 59,180 × 12
##      caseid commonweight CC22_355a CC22_355b CC22_355c CC22_355d CC22_355e educ 
##       <dbl>        <dbl> <fct>     <fct>     <fct>     <fct>     <fct>     <fct>
##  1   1.98e9        3.65  Support   Support   Support   Support   Support   Post…
##  2   1.98e9        0.780 Oppose    Oppose    Oppose    Support   Oppose    Some…
##  3   1.98e9        0.892 Support   Support   Oppose    Support   Oppose    4-ye…
##  4   1.98e9        1.10  Support   Support   Support   Support   Support   Post…
##  5   1.98e9        0.543 Support   Support   Support   Support   Oppose    Post…
##  6   1.98e9        0.114 Oppose    Oppose    Oppose    Oppose    Oppose    4-ye…
##  7   1.98e9        0.899 Oppose    Oppose    Oppose    Oppose    Oppose    High…
##  8   1.98e9        0.633 Support   Support   Support   Support   Oppose    Post…
##  9   1.98e9        0.900 Support   Support   Oppose    Support   Support   4-ye…
## 10   1.98e9        0.725 Support   Support   Support   Support   Support   Post…
## # ℹ 59,170 more rows
## # ℹ 4 more variables: pid3 <fct>, ideo5 <fct>, newsint <fct>, race <fct>

If we just want to test relationships between pairs of variables, we can use map2 in the following way:

c("CC22_355a",
  "CC22_355b",
  "CC22_355c",
  "CC22_355d",
  "CC22_355e") %>% 
  map2(
    c("pid3",  
      "ideo5",
      "educ",
      "race",
      "newsint"),
    \(i, j)
    
    str_c(
      "commonweight ~", 
      i, 
      " + ", 
      j
    ) %>%
      xtabs(
        data = t2
      ) %>% 
      prop.table(2) %>% 
      round(2)
  )

## [[1]]
##            pid3
## CC22_355a   Democrat Republican Independent Other Not sure skipped not asked
##   Support       0.89       0.30        0.56  0.51     0.60                  
##   Oppose        0.11       0.70        0.44  0.49     0.40                  
##   skipped       0.00       0.00        0.00  0.00     0.00                  
##   not asked     0.00       0.00        0.00  0.00     0.00                  
## 
## [[2]]
##            ideo5
## CC22_355b   Very liberal Liberal Moderate Conservative Very conservative
##   Support           0.96    0.94     0.75         0.37              0.23
##   Oppose            0.04    0.06     0.25         0.63              0.77
##   skipped           0.00    0.00     0.00         0.00              0.00
##   not asked         0.00    0.00     0.00         0.00              0.00
##            ideo5
## CC22_355b   Not sure skipped not asked
##   Support       0.72                  
##   Oppose        0.28                  
##   skipped       0.00                  
##   not asked     0.00                  
## 
## [[3]]
##            educ
## CC22_355c   No HS High school graduate Some college 2-year 4-year Post-grad
##   Support    0.58                 0.55         0.59   0.56   0.62      0.65
##   Oppose     0.42                 0.45         0.41   0.44   0.38      0.35
##   skipped    0.00                 0.00         0.00   0.00   0.00      0.00
##   not asked  0.00                 0.00         0.00   0.00   0.00      0.00
##            educ
## CC22_355c   skipped not asked
##   Support                    
##   Oppose                     
##   skipped                    
##   not asked                  
## 
## [[4]]
##            race
## CC22_355d   White Black Hispanic Asian Native American Two or more races Other
##   Support    0.65  0.86     0.80  0.78            0.67              0.75  0.59
##   Oppose     0.35  0.14     0.20  0.22            0.33              0.25  0.41
##   skipped    0.00  0.00     0.00  0.00            0.00              0.00  0.00
##   not asked  0.00  0.00     0.00  0.00            0.00              0.00  0.00
##            race
## CC22_355d   Middle Eastern skipped not asked
##   Support             0.80                  
##   Oppose              0.20                  
##   skipped             0.00                  
##   not asked           0.00                  
## 
## [[5]]
##            newsint
## CC22_355e   Most of the time Some of the time Only now and then Hardly at all
##   Support               0.42             0.45              0.43          0.36
##   Oppose                0.58             0.55              0.57          0.64
##   skipped               0.00             0.00              0.00          0.00
##   not asked             0.00             0.00              0.00          0.00
##            newsint
## CC22_355e   Don't know skipped not asked
##   Support         0.43                  
##   Oppose          0.57                  
##   skipped         0.00                  
##   not asked       0.00

The above is useful if we are exploring relationships in the data, but what if we want to be a little more sophisticated. Imagine we want to take the ‘Executive Orders Pre survey items’ CC22_355a:CC22_355e, and ideology, partisanship, education, race, and news interest, and for every combination of item and predictor, report the categorical correspondence.

Well no worries because we can certainly do that!

c("pid3",  
  "ideo5",
  "educ",
  "race",
  "newsint") %>% 
  map2(
    c("CC22_355a",
      "CC22_355b",
      "CC22_355c",
      "CC22_355d",
      "CC22_355e"),
    \(i, j)
    
    lm(
      as.formula(
        str_c
        ("commonweight ~ ", j, " + pid3 + ideo5 + educ + race + newsint")
        ), 
      data = t2
    ) %>% 
      glance()
  )

## [[1]]
## # A tibble: 1 × 12
##   r.squared adj.r.squared sigma statistic p.value    df  logLik     AIC     BIC
##       <dbl>         <dbl> <dbl>     <dbl>   <dbl> <dbl>   <dbl>   <dbl>   <dbl>
## 1    0.0710        0.0706  1.09      174.       0    26 -89001. 178058. 178309.
## # ℹ 3 more variables: deviance <dbl>, df.residual <int>, nobs <int>
## 
## [[2]]
## # A tibble: 1 × 12
##   r.squared adj.r.squared sigma statistic p.value    df  logLik     AIC     BIC
##       <dbl>         <dbl> <dbl>     <dbl>   <dbl> <dbl>   <dbl>   <dbl>   <dbl>
## 1    0.0709        0.0705  1.09      174.       0    26 -89004. 178063. 178315.
## # ℹ 3 more variables: deviance <dbl>, df.residual <int>, nobs <int>
## 
## [[3]]
## # A tibble: 1 × 12
##   r.squared adj.r.squared sigma statistic p.value    df  logLik     AIC     BIC
##       <dbl>         <dbl> <dbl>     <dbl>   <dbl> <dbl>   <dbl>   <dbl>   <dbl>
## 1    0.0709        0.0705  1.09      174.       0    26 -89005. 178067. 178319.
## # ℹ 3 more variables: deviance <dbl>, df.residual <int>, nobs <int>
## 
## [[4]]
## # A tibble: 1 × 12
##   r.squared adj.r.squared sigma statistic p.value    df  logLik     AIC     BIC
##       <dbl>         <dbl> <dbl>     <dbl>   <dbl> <dbl>   <dbl>   <dbl>   <dbl>
## 1    0.0709        0.0704  1.09      173.       0    26 -89006. 178067. 178319.
## # ℹ 3 more variables: deviance <dbl>, df.residual <int>, nobs <int>
## 
## [[5]]
## # A tibble: 1 × 12
##   r.squared adj.r.squared sigma statistic p.value    df  logLik     AIC     BIC
##       <dbl>         <dbl> <dbl>     <dbl>   <dbl> <dbl>   <dbl>   <dbl>   <dbl>
## 1    0.0709        0.0705  1.09      174.       0    26 -89004. 178065. 178316.
## # ℹ 3 more variables: deviance <dbl>, df.residual <int>, nobs <int>

Gouge CES Mapping Exercise

Katie Gouge

2025-02-19