# Load necessary libraries
library(tidyverse)
library(skimr)
Bias in Self-reported Turnout
Surveys are frequently used to measure political behavior such as voter turnout, but some researchers are concerned about the accuracy of self-reports. In particular, they worry about possible social desirability bias where in post-election surveys, respondents who did not vote in an election lie about not having voted because they may feel that they should have voted. Is such a bias present in the American National Election Studies (ANES)? The ANES is a nation-wide survey that has been conducted for every election since 1948. The ANES conducts face-to-face interviews with a nationally representative sample of adults. The table below displays the names and descriptions of variables in the turnout.csv data file.
Name | Description |
---|---|
year | Election year |
VEP | Voting Eligible Population (in thousands) |
VAP | Voting Age Population (in thousands) |
total | Total ballots cast for highest office (in thousands) |
ANES | ANES estimate of turnout rate |
felons | Total ineligible felons (in thousands) |
noncitizens | Total non-citizens (in thousands) |
overseas | Total eligible overseas voters (in thousands) |
osvotes | Total ballots counted by overseas voters (in thousands) |
Load the data and check the dimensions of the data. Also, obtain a summary of the data. How many observations are there? What is the range of years covered in this data set?
# Load Data
library(readr)
Turnout <- read_csv("~/Library/CloudStorage/GoogleDrive-levantien83@gmail.com/My Drive/🎓 Study/🎓 PhD Business/1️⃣ Sem 1/BU8014 🔢 Quant/Course Material/Week 2/Week 2 Turnout.csv")
## Rows: 14 Columns: 9
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (9): year, VEP, VAP, total, ANES, felons, noncit, overseas, osvoters
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
head(Turnout)
## # A tibble: 6 × 9
## year VEP VAP total ANES felons noncit overseas osvoters
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 1980 159635 164445 86515 71 802 5756 1803 NA
## 2 1982 160467 166028 67616 60 960 6641 1982 NA
## 3 1984 167702 173995 92653 74 1165 7482 2361 NA
## 4 1986 170396 177922 64991 53 1367 8362 2216 NA
## 5 1988 173579 181955 91595 70 1594 9280 2257 NA
## 6 1990 176629 186159 67859 47 1901 10239 2659 NA
# Check dimensions of the data
dim(Turnout)
## [1] 14 9
# Summary of the data
summary(Turnout)
## year VEP VAP total
## Min. :1980 Min. :159635 Min. :164445 Min. : 64991
## 1st Qu.:1986 1st Qu.:171192 1st Qu.:178930 1st Qu.: 73179
## Median :1993 Median :181140 Median :193018 Median : 89055
## Mean :1993 Mean :182640 Mean :194226 Mean : 89778
## 3rd Qu.:2000 3rd Qu.:193353 3rd Qu.:209296 3rd Qu.:102370
## Max. :2008 Max. :213314 Max. :230872 Max. :131304
##
## ANES felons noncit overseas osvoters
## Min. :47.00 Min. : 802 Min. : 5756 Min. :1803 Min. :263
## 1st Qu.:57.00 1st Qu.:1424 1st Qu.: 8592 1st Qu.:2236 1st Qu.:263
## Median :70.50 Median :2312 Median :11972 Median :2458 Median :263
## Mean :65.79 Mean :2177 Mean :12229 Mean :2746 Mean :263
## 3rd Qu.:73.75 3rd Qu.:3042 3rd Qu.:15910 3rd Qu.:2937 3rd Qu.:263
## Max. :78.00 Max. :3168 Max. :19392 Max. :4972 Max. :263
## NA's :13
# Skim the data with skimr
skim(Turnout)
Name | Turnout |
Number of rows | 14 |
Number of columns | 9 |
_______________________ | |
Column type frequency: | |
numeric | 9 |
________________________ | |
Group variables | None |
Variable type: numeric
skim_variable | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist |
---|---|---|---|---|---|---|---|---|---|---|
year | 0 | 1.00 | 1993.14 | 8.62 | 1980 | 1986.50 | 1993.0 | 1999.50 | 2008 | ▇▇▇▇▅ |
VEP | 0 | 1.00 | 182640.29 | 16065.14 | 159635 | 171191.75 | 181139.5 | 193353.25 | 213314 | ▆▇▆▃▃ |
VAP | 0 | 1.00 | 194225.86 | 20543.94 | 164445 | 178930.25 | 193018.0 | 209295.50 | 230872 | ▆▇▃▆▃ |
total | 0 | 1.00 | 89778.29 | 20627.89 | 64991 | 73179.25 | 89055.0 | 102369.50 | 131304 | ▇▃▆▂▃ |
ANES | 0 | 1.00 | 65.79 | 10.47 | 47 | 57.00 | 70.5 | 73.75 | 78 | ▃▁▂▂▇ |
felons | 0 | 1.00 | 2176.64 | 875.28 | 802 | 1423.75 | 2312.0 | 3042.25 | 3168 | ▅▃▃▃▇ |
noncit | 0 | 1.00 | 12229.14 | 4470.78 | 5756 | 8591.50 | 11972.0 | 15910.50 | 19392 | ▇▃▆▃▆ |
overseas | 0 | 1.00 | 2745.71 | 840.05 | 1803 | 2236.00 | 2458.5 | 2937.00 | 4972 | ▇▅▁▁▁ |
osvoters | 13 | 0.07 | 263.00 | NA | 263 | 263.00 | 263.0 | 263.00 | 263 | ▁▁▇▁▁ |
Answer - There are 14 observations in the data set. - The range of years covered in this data set is from 1980 to 2008.
Calculate the turnout rate based on the voting age population or VAP. Note that for this data set, we must add the total number of eligible overseas voters since the VAP variable does not include these individuals in the count. Next, calculate the turnout rate using the voting eligible population or VEP. What difference do you observe?
# Calculate Adjusted VAP
Turnout$adjusted_VAP <- Turnout$VAP + Turnout$overseas
# Calculate Turnout Rate for VAP
Turnout$turnout_rate_VAP <- (Turnout$total / Turnout$adjusted_VAP) * 100
# Calculate Turnout Rate for VEP
Turnout$turnout_rate_VEP <- (Turnout$total / Turnout$VEP) * 100
# Calculate the difference between VEP and VAP turnout rates
Turnout$difference <- Turnout$turnout_rate_VEP - Turnout$turnout_rate_VAP
# View the results using kable with 2 decimal places
library(kableExtra)
##
## Attaching package: 'kableExtra'
## The following object is masked from 'package:dplyr':
##
## group_rows
Turnout |>
select(year, turnout_rate_VAP, turnout_rate_VEP, difference) |>
kable(digits = 2)
year | turnout_rate_VAP | turnout_rate_VEP | difference |
---|---|---|---|
1980 | 52.04 | 54.20 | 2.16 |
1982 | 40.25 | 42.14 | 1.89 |
1984 | 52.54 | 55.25 | 2.71 |
1986 | 36.08 | 38.14 | 2.06 |
1988 | 49.72 | 52.77 | 3.05 |
1990 | 35.94 | 38.42 | 2.48 |
1992 | 54.04 | 58.11 | 4.07 |
1994 | 38.03 | 41.13 | 3.10 |
1996 | 47.53 | 51.66 | 4.12 |
1998 | 34.83 | 38.09 | 3.26 |
2000 | 49.34 | 54.22 | 4.88 |
2002 | 35.83 | 39.51 | 3.68 |
2004 | 54.55 | 60.10 | 5.55 |
2008 | 55.67 | 61.55 | 5.88 |
Answer
Compute the difference between VAP and ANES estimates of turnout rate. How big is the difference on average? What is the range of the difference? Conduct the same comparison for the VEP and ANES estimates of voter turnout. Briefly comment on the results.
# Calculate the difference between VAP and ANES estimates of turnout rate
Turnout$difference_VAP_ANES <- Turnout$turnout_rate_VAP - Turnout$ANES
# Calculate the difference between VEP and ANES estimates of turnout rate
Turnout$difference_VEP_ANES <- Turnout$turnout_rate_VEP - Turnout$ANES
# View the results using kable with 2 decimal places
Turnout |>
select(year, difference_VAP_ANES, difference_VEP_ANES) |>
kable(digits = 2)
year | difference_VAP_ANES | difference_VEP_ANES |
---|---|---|
1980 | -18.96 | -16.80 |
1982 | -19.75 | -17.86 |
1984 | -21.46 | -18.75 |
1986 | -16.92 | -14.86 |
1988 | -20.28 | -17.23 |
1990 | -11.06 | -8.58 |
1992 | -20.96 | -16.89 |
1994 | -17.97 | -14.87 |
1996 | -25.47 | -21.34 |
1998 | -17.17 | -13.91 |
2000 | -23.66 | -18.78 |
2002 | -26.17 | -22.49 |
2004 | -22.45 | -16.90 |
2008 | -22.33 | -16.45 |
Answer
It’s overestimated.
Compare the VEP turnout rate with the ANES turnout rate separately for presidential elections and midterm elections. Note that the data set excludes the year 2006. Does the bias of the ANES vary across election types?
# Classify years into Presidential and Midterm elections
Turnout$election_type <- ifelse(Turnout$year %% 4 == 0, "Presidential", "Midterm")
# View the updated data
print(Turnout)
## # A tibble: 14 × 16
## year VEP VAP total ANES felons noncit overseas osvoters adjusted_VAP
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 1980 159635 164445 86515 71 802 5756 1803 NA 166248
## 2 1982 160467 166028 67616 60 960 6641 1982 NA 168010
## 3 1984 167702 173995 92653 74 1165 7482 2361 NA 176356
## 4 1986 170396 177922 64991 53 1367 8362 2216 NA 180138
## 5 1988 173579 181955 91595 70 1594 9280 2257 NA 184212
## 6 1990 176629 186159 67859 47 1901 10239 2659 NA 188818
## 7 1992 179656 190778 104405 75 2183 11447 2418 NA 193196
## 8 1994 182623 195258 75106 56 2441 12497 2229 NA 197487
## 9 1996 186347 200016 96263 73 2586 13601 2499 NA 202515
## 10 1998 190420 205313 72537 52 2920 14988 2937 NA 208250
## 11 2000 194331 210623 105375 73 3083 16218 2937 NA 213560
## 12 2002 198382 215462 78382 62 3168 17237 3308 NA 218770
## 13 2004 203483 220336 122295 77 3158 18068 3862 NA 224198
## 14 2008 213314 230872 131304 78 3145 19392 4972 263 235844
## # ℹ 6 more variables: turnout_rate_VAP <dbl>, turnout_rate_VEP <dbl>,
## # difference <dbl>, difference_VAP_ANES <dbl>, difference_VEP_ANES <dbl>,
## # election_type <chr>
# Plot the difference between VEP and ANES estimates of turnout rate by election type
library(ggplot2)
ggplot(Turnout, aes(x = year, y = difference_VEP_ANES, color = election_type)) +
geom_point() +
geom_line() +
labs(title = "Difference between VEP and ANES Estimates of Turnout Rate by Election Type",
x = "Year",
y = "Difference") +
theme_minimal()
# with boxplot
ggplot(Turnout, aes(x = election_type, y = difference_VEP_ANES, fill = election_type)) +
geom_boxplot() +
labs(title = "Difference between VEP and ANES Estimates of Turnout Rate by Election Type",
x = "Election Type",
y = "Difference") +
theme_minimal()
Divide the data into half by election years such that you subset the data into two periods. Calculate the difference between the VEP turnout rate and the ANES turnout rate separately for each year within each period. Has the bias of the ANES increased over time?
# Plot the difference between VEP and ANES estimates of turnout rate color by election year (red if year is <= 1994, blue if year is > 1994)
ggplot(Turnout, aes(x = year, y = difference_VEP_ANES, color = ifelse(year <= 1994, "red", "blue"))) +
geom_point() +
geom_line() +
labs(title = "Difference between VEP and ANES Estimates of Turnout Rate by Election Year",
x = "Year",
y = "Difference") +
theme_minimal()
The ANES does not interview overseas voters and prisoners. Calculate an adjustment to the 2008 VAP turnout rate. Begin by subtracting the total number of ineligible felons and non-citizens from the VAP to calculate an adjusted VAP. Next, calculate an adjusted VAP turnout rate, taking care to subtract the number of overseas ballots counted from the total ballots in 2008. Compare the adjusted VAP turnout with the unadjusted VAP, VEP, and the ANES turnout rate. Briefly discuss the results.
# Calculate Adjusted VAP and Adjusted Total Ballots for 2008
Turnout$adjusted_VAP_2008 <- Turnout$VAP[Turnout$year == 2008] - Turnout$felons[Turnout$year == 2008] - Turnout$noncit[Turnout$year == 2008]
Turnout$adjusted_total_2008 <- Turnout$total[Turnout$year == 2008] - Turnout$osvoters[Turnout$year == 2008]
# Calculate Adjusted VAP Turnout Rate for 2008
Turnout$adjusted_turnout_rate_VAP_2008 <- (Turnout$adjusted_total_2008 / Turnout$adjusted_VAP_2008) * 100
# View the results using kable with 2 decimal places
Turnout |>
filter(year == 2008) |>
select(year, adjusted_VAP_2008, adjusted_total_2008, adjusted_turnout_rate_VAP_2008) |>
kable(digits = 2)
year | adjusted_VAP_2008 | adjusted_total_2008 | adjusted_turnout_rate_VAP_2008 |
---|---|---|---|
2008 | 208335 | 131041 | 62.9 |
# table to Compare the adjusted VAP turnout with the unadjusted VAP, VEP, and the ANES turnout rate. Briefly discuss the results.
Turnout |>
filter(year == 2008) |>
select(year, turnout_rate_VAP, turnout_rate_VEP, ANES, adjusted_turnout_rate_VAP_2008) |>
kable(digits = 2)
year | turnout_rate_VAP | turnout_rate_VEP | ANES | adjusted_turnout_rate_VAP_2008 |
---|---|---|---|---|
2008 | 55.67 | 61.55 | 78 | 62.9 |