# Load necessary libraries
library(tidyverse)
library(skimr)

Bias in Self-reported Turnout

Surveys are frequently used to measure political behavior such as voter turnout, but some researchers are concerned about the accuracy of self-reports. In particular, they worry about possible social desirability bias where in post-election surveys, respondents who did not vote in an election lie about not having voted because they may feel that they should have voted. Is such a bias present in the American National Election Studies (ANES)? The ANES is a nation-wide survey that has been conducted for every election since 1948. The ANES conducts face-to-face interviews with a nationally representative sample of adults. The table below displays the names and descriptions of variables in the turnout.csv data file.

Name Description
year Election year
VEP Voting Eligible Population (in thousands)
VAP Voting Age Population (in thousands)
total Total ballots cast for highest office (in thousands)
ANES ANES estimate of turnout rate
felons Total ineligible felons (in thousands)
noncitizens Total non-citizens (in thousands)
overseas Total eligible overseas voters (in thousands)
osvotes Total ballots counted by overseas voters (in thousands)

Question 1

Load the data and check the dimensions of the data. Also, obtain a summary of the data. How many observations are there? What is the range of years covered in this data set?

# Load Data
library(readr)
Turnout <- read_csv("~/Library/CloudStorage/GoogleDrive-levantien83@gmail.com/My Drive/🎓 Study/🎓 PhD Business/1️⃣ Sem 1/BU8014 🔢 Quant/Course Material/Week 2/Week 2 Turnout.csv")
## Rows: 14 Columns: 9
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (9): year, VEP, VAP, total, ANES, felons, noncit, overseas, osvoters
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
head(Turnout)
## # A tibble: 6 × 9
##    year    VEP    VAP total  ANES felons noncit overseas osvoters
##   <dbl>  <dbl>  <dbl> <dbl> <dbl>  <dbl>  <dbl>    <dbl>    <dbl>
## 1  1980 159635 164445 86515    71    802   5756     1803       NA
## 2  1982 160467 166028 67616    60    960   6641     1982       NA
## 3  1984 167702 173995 92653    74   1165   7482     2361       NA
## 4  1986 170396 177922 64991    53   1367   8362     2216       NA
## 5  1988 173579 181955 91595    70   1594   9280     2257       NA
## 6  1990 176629 186159 67859    47   1901  10239     2659       NA
# Check dimensions of the data
dim(Turnout)
## [1] 14  9
# Summary of the data
summary(Turnout)
##       year           VEP              VAP             total       
##  Min.   :1980   Min.   :159635   Min.   :164445   Min.   : 64991  
##  1st Qu.:1986   1st Qu.:171192   1st Qu.:178930   1st Qu.: 73179  
##  Median :1993   Median :181140   Median :193018   Median : 89055  
##  Mean   :1993   Mean   :182640   Mean   :194226   Mean   : 89778  
##  3rd Qu.:2000   3rd Qu.:193353   3rd Qu.:209296   3rd Qu.:102370  
##  Max.   :2008   Max.   :213314   Max.   :230872   Max.   :131304  
##                                                                   
##       ANES           felons         noncit         overseas       osvoters  
##  Min.   :47.00   Min.   : 802   Min.   : 5756   Min.   :1803   Min.   :263  
##  1st Qu.:57.00   1st Qu.:1424   1st Qu.: 8592   1st Qu.:2236   1st Qu.:263  
##  Median :70.50   Median :2312   Median :11972   Median :2458   Median :263  
##  Mean   :65.79   Mean   :2177   Mean   :12229   Mean   :2746   Mean   :263  
##  3rd Qu.:73.75   3rd Qu.:3042   3rd Qu.:15910   3rd Qu.:2937   3rd Qu.:263  
##  Max.   :78.00   Max.   :3168   Max.   :19392   Max.   :4972   Max.   :263  
##                                                                NA's   :13
# Skim the data with skimr
skim(Turnout)
Data summary
Name Turnout
Number of rows 14
Number of columns 9
_______________________
Column type frequency:
numeric 9
________________________
Group variables None

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
year 0 1.00 1993.14 8.62 1980 1986.50 1993.0 1999.50 2008 ▇▇▇▇▅
VEP 0 1.00 182640.29 16065.14 159635 171191.75 181139.5 193353.25 213314 ▆▇▆▃▃
VAP 0 1.00 194225.86 20543.94 164445 178930.25 193018.0 209295.50 230872 ▆▇▃▆▃
total 0 1.00 89778.29 20627.89 64991 73179.25 89055.0 102369.50 131304 ▇▃▆▂▃
ANES 0 1.00 65.79 10.47 47 57.00 70.5 73.75 78 ▃▁▂▂▇
felons 0 1.00 2176.64 875.28 802 1423.75 2312.0 3042.25 3168 ▅▃▃▃▇
noncit 0 1.00 12229.14 4470.78 5756 8591.50 11972.0 15910.50 19392 ▇▃▆▃▆
overseas 0 1.00 2745.71 840.05 1803 2236.00 2458.5 2937.00 4972 ▇▅▁▁▁
osvoters 13 0.07 263.00 NA 263 263.00 263.0 263.00 263 ▁▁▇▁▁

Answer - There are 14 observations in the data set. - The range of years covered in this data set is from 1980 to 2008.


Question 2

Calculate the turnout rate based on the voting age population or VAP. Note that for this data set, we must add the total number of eligible overseas voters since the VAP variable does not include these individuals in the count. Next, calculate the turnout rate using the voting eligible population or VEP. What difference do you observe?

# Calculate Adjusted VAP
Turnout$adjusted_VAP <- Turnout$VAP + Turnout$overseas

# Calculate Turnout Rate for VAP
Turnout$turnout_rate_VAP <- (Turnout$total / Turnout$adjusted_VAP) * 100

# Calculate Turnout Rate for VEP
Turnout$turnout_rate_VEP <- (Turnout$total / Turnout$VEP) * 100

# Calculate the difference between VEP and VAP turnout rates
Turnout$difference <- Turnout$turnout_rate_VEP - Turnout$turnout_rate_VAP

# View the results using kable with 2 decimal places
library(kableExtra)
## 
## Attaching package: 'kableExtra'
## The following object is masked from 'package:dplyr':
## 
##     group_rows
Turnout |>
  select(year, turnout_rate_VAP, turnout_rate_VEP, difference) |>
  kable(digits = 2)
year turnout_rate_VAP turnout_rate_VEP difference
1980 52.04 54.20 2.16
1982 40.25 42.14 1.89
1984 52.54 55.25 2.71
1986 36.08 38.14 2.06
1988 49.72 52.77 3.05
1990 35.94 38.42 2.48
1992 54.04 58.11 4.07
1994 38.03 41.13 3.10
1996 47.53 51.66 4.12
1998 34.83 38.09 3.26
2000 49.34 54.22 4.88
2002 35.83 39.51 3.68
2004 54.55 60.10 5.55
2008 55.67 61.55 5.88

Answer


Question 3

Compute the difference between VAP and ANES estimates of turnout rate. How big is the difference on average? What is the range of the difference? Conduct the same comparison for the VEP and ANES estimates of voter turnout. Briefly comment on the results.

# Calculate the difference between VAP and ANES estimates of turnout rate
Turnout$difference_VAP_ANES <- Turnout$turnout_rate_VAP - Turnout$ANES

# Calculate the difference between VEP and ANES estimates of turnout rate
Turnout$difference_VEP_ANES <- Turnout$turnout_rate_VEP - Turnout$ANES

# View the results using kable with 2 decimal places
Turnout |>
  select(year, difference_VAP_ANES, difference_VEP_ANES) |>
  kable(digits = 2)
year difference_VAP_ANES difference_VEP_ANES
1980 -18.96 -16.80
1982 -19.75 -17.86
1984 -21.46 -18.75
1986 -16.92 -14.86
1988 -20.28 -17.23
1990 -11.06 -8.58
1992 -20.96 -16.89
1994 -17.97 -14.87
1996 -25.47 -21.34
1998 -17.17 -13.91
2000 -23.66 -18.78
2002 -26.17 -22.49
2004 -22.45 -16.90
2008 -22.33 -16.45

Answer

It’s overestimated.


Question 4

Compare the VEP turnout rate with the ANES turnout rate separately for presidential elections and midterm elections. Note that the data set excludes the year 2006. Does the bias of the ANES vary across election types?

# Classify years into Presidential and Midterm elections
Turnout$election_type <- ifelse(Turnout$year %% 4 == 0, "Presidential", "Midterm")

# View the updated data
print(Turnout)
## # A tibble: 14 × 16
##     year    VEP    VAP  total  ANES felons noncit overseas osvoters adjusted_VAP
##    <dbl>  <dbl>  <dbl>  <dbl> <dbl>  <dbl>  <dbl>    <dbl>    <dbl>        <dbl>
##  1  1980 159635 164445  86515    71    802   5756     1803       NA       166248
##  2  1982 160467 166028  67616    60    960   6641     1982       NA       168010
##  3  1984 167702 173995  92653    74   1165   7482     2361       NA       176356
##  4  1986 170396 177922  64991    53   1367   8362     2216       NA       180138
##  5  1988 173579 181955  91595    70   1594   9280     2257       NA       184212
##  6  1990 176629 186159  67859    47   1901  10239     2659       NA       188818
##  7  1992 179656 190778 104405    75   2183  11447     2418       NA       193196
##  8  1994 182623 195258  75106    56   2441  12497     2229       NA       197487
##  9  1996 186347 200016  96263    73   2586  13601     2499       NA       202515
## 10  1998 190420 205313  72537    52   2920  14988     2937       NA       208250
## 11  2000 194331 210623 105375    73   3083  16218     2937       NA       213560
## 12  2002 198382 215462  78382    62   3168  17237     3308       NA       218770
## 13  2004 203483 220336 122295    77   3158  18068     3862       NA       224198
## 14  2008 213314 230872 131304    78   3145  19392     4972      263       235844
## # ℹ 6 more variables: turnout_rate_VAP <dbl>, turnout_rate_VEP <dbl>,
## #   difference <dbl>, difference_VAP_ANES <dbl>, difference_VEP_ANES <dbl>,
## #   election_type <chr>
# Plot the difference between VEP and ANES estimates of turnout rate by election type
library(ggplot2)
ggplot(Turnout, aes(x = year, y = difference_VEP_ANES, color = election_type)) +
  geom_point() +
  geom_line() +
  labs(title = "Difference between VEP and ANES Estimates of Turnout Rate by Election Type",
       x = "Year",
       y = "Difference") +
  theme_minimal()

# with boxplot
ggplot(Turnout, aes(x = election_type, y = difference_VEP_ANES, fill = election_type)) +
  geom_boxplot() +
  labs(title = "Difference between VEP and ANES Estimates of Turnout Rate by Election Type",
       x = "Election Type",
       y = "Difference") +
  theme_minimal()


Question 5

Divide the data into half by election years such that you subset the data into two periods. Calculate the difference between the VEP turnout rate and the ANES turnout rate separately for each year within each period. Has the bias of the ANES increased over time?

# Plot the difference between VEP and ANES estimates of turnout rate color by election year (red if year is <= 1994, blue if year is > 1994)
ggplot(Turnout, aes(x = year, y = difference_VEP_ANES, color = ifelse(year <= 1994, "red", "blue"))) +
  geom_point() +
  geom_line() +
  labs(title = "Difference between VEP and ANES Estimates of Turnout Rate by Election Year",
       x = "Year",
       y = "Difference") +
  theme_minimal()

Question 6

The ANES does not interview overseas voters and prisoners. Calculate an adjustment to the 2008 VAP turnout rate. Begin by subtracting the total number of ineligible felons and non-citizens from the VAP to calculate an adjusted VAP. Next, calculate an adjusted VAP turnout rate, taking care to subtract the number of overseas ballots counted from the total ballots in 2008. Compare the adjusted VAP turnout with the unadjusted VAP, VEP, and the ANES turnout rate. Briefly discuss the results.

# Calculate Adjusted VAP and Adjusted Total Ballots for 2008
Turnout$adjusted_VAP_2008 <- Turnout$VAP[Turnout$year == 2008] - Turnout$felons[Turnout$year == 2008] - Turnout$noncit[Turnout$year == 2008]
Turnout$adjusted_total_2008 <- Turnout$total[Turnout$year == 2008] - Turnout$osvoters[Turnout$year == 2008]

# Calculate Adjusted VAP Turnout Rate for 2008
Turnout$adjusted_turnout_rate_VAP_2008 <- (Turnout$adjusted_total_2008 / Turnout$adjusted_VAP_2008) * 100

# View the results using kable with 2 decimal places
Turnout |>
  filter(year == 2008) |>
  select(year, adjusted_VAP_2008, adjusted_total_2008, adjusted_turnout_rate_VAP_2008) |>
  kable(digits = 2)
year adjusted_VAP_2008 adjusted_total_2008 adjusted_turnout_rate_VAP_2008
2008 208335 131041 62.9
# table to Compare the adjusted VAP turnout with the unadjusted VAP, VEP, and the ANES turnout rate. Briefly discuss the results.
Turnout |>
  filter(year == 2008) |>
  select(year, turnout_rate_VAP, turnout_rate_VEP, ANES, adjusted_turnout_rate_VAP_2008) |>
  kable(digits = 2)
year turnout_rate_VAP turnout_rate_VEP ANES adjusted_turnout_rate_VAP_2008
2008 55.67 61.55 78 62.9