Problem Set 1: Bias in Self-reported Turnout

Question 1: Code

Answer

## The dimensions of the dataset are: 14 9 . These numbers indicate that there are 14 observations and 9 variables. The time period ranges between the years of, 1980 2008 , where the maximum number of felons is: 3168

Code

# I could of gotten the answers for this question from the environment, the dimensions, or summary.. but figured I would just mess around with basic functions. Also, I usually echo = FALSE in a chunk, but figured this would be easiest to include it and just cat my answers and output

dimensions = dim(data)
obs = count(data)
obs = as.numeric(obs)
variables = ncol(data)
time = range(data$year, na.rm = TRUE)
max.felons = max(data$felons)

Question 2

Answer

Overall, the VEP turnout rate trends lower than the VAP turnout rate. I included two visual presentations, indicating that of those eligible to vote and those who are of the voting age at the time of the election, VEP has a higher turnout rate.

Turnout Rate by Election Year VAP]
Year	Rate (%)
1980	52.0397
1982	40.2452
1984	52.5375
1986	36.0785
1988	49.7226
1990	35.9388
1992	54.0410
1994	38.0309
1996	47.5338
1998	34.8317
2000	49.3421
2002	35.8285
2004	54.5478
2008	55.6741

Turnout Rate by Election Year [VEP]
Year	Rate (%)
1980	54.1955
1982	42.1370
1984	55.2486
1986	38.1412
1988	52.7685
1990	38.4189
1992	58.1138
1994	41.1263
1996	51.6579
1998	38.0932
2000	54.2245
2002	39.5106
2004	60.1008
2008	61.5543

Plots

Turnout Rate by Election Year VAP]
Year	Rate (%)
1980	52.0397
1982	40.2452
1984	52.5375
1986	36.0785
1988	49.7226
1990	35.9388
1992	54.0410
1994	38.0309
1996	47.5338
1998	34.8317
2000	49.3421
2002	35.8285
2004	54.5478
2008	55.6741

Turnout Rate by Election Year [VEP]
Year	Rate (%)
1980	54.1955
1982	42.1370
1984	55.2486
1986	38.1412
1988	52.7685
1990	38.4189
1992	58.1138
1994	41.1263
1996	51.6579
1998	38.0932
2000	54.2245
2002	39.5106
2004	60.1008
2008	61.5543

Code

# Create the total VAP column to include overseas voters
data$VAP = data$VAP + data$overseas

# Create VAP Turnout
data$`VAP Turnout` = round((data$total/data$VAP)*100, 4)
data = data %>% relocate(`VAP Turnout`, .before = ANES)

#VAP Turnout table by year
VAP = data %>% select(year, `VAP Turnout`)
colnames(VAP) = c("Election Year", "Turnout Rate")
VAP = as.data.frame(VAP)

set_flextable_defaults(
  font.color = "#0a064a",
  font.family = "Arial",
  font.size = "10")


t.1 = flextable(VAP) %>% 
  colformat_num(j = "Election Year", digits = 0, big.mark = "")
t.1 = set_caption(t.1, "Turnout Rate by Election Year VAP]", style = "Table Caption")
t.1 = set_header_labels(t.1, `Election Year` = "Year", `Turnout Rate` = "Rate (%)")

# Create VEP Turnout
data$`VEP Turnout` = round((data$total/data$VEP)*100, 4)
data = data %>% relocate(`VEP Turnout`, .before = ANES)

#VEP Turnout table by year
VEP = data %>% select(year, `VEP Turnout`)
colnames(VEP) = c("Election Year", "Turnout Rate")
VEP = as.data.frame(VEP)

colnames(data) = c("Year", "VEP", "VAP", "Total Votes",  "VAP Turnout", "VEP Turnout", "ANES", "Felons", "NonCitizens", "OS Voters", "OS Ballots")

set_flextable_defaults(
  font.color = "#0a064a",
  font.family = "Arial",
  font.size = "10")


t.2 = flextable(VEP) %>% 
  colformat_num(j = "Election Year", digits = 0, big.mark = "")
t.2 = set_caption(t.2, "Turnout Rate by Election Year [VEP]", style = "Table Caption")
t.2 = set_header_labels(t.2, `Election Year` = "Year", `Turnout Rate` = "Rate (%)")
t.2 = autofit(t.2)

# Curiosity on what the visualisation of the data looks like

df = data %>% select(Year, `VEP Turnout`, `VAP Turnout`) %>%
  gather(key = "Turnout Type", value = "Rate", -Year)

df$`Turnout Type` = as.factor(df$`Turnout Type`)

p1 = ggplot(df, aes(x = Year, y = Rate)) + 
  geom_line(aes(color = `Turnout Type`), size = .5) +
  scale_color_manual(values = c("#00AFBB", "#E7B800")) +
    labs(title="VEP vs WAP Turnout",
       caption = "Based on Data From Problem Set Adapted From Imai, Kosuke.
       Quantitative Social Science: An Introduction. Princeton University Press, 2018.") + theme_minimal() + 
      theme(plot.title = element_text(hjust = 0.5), 
      plot.subtitle = element_text(hjust = 0.5))

p1 = p1 + scale_x_continuous(name = "Year", breaks = seq(1980, 2008, 2))  + scale_y_continuous(name = "Rate", breaks = seq(0, 65, 5), labels = scales::percent_format(scale = 1)) 

df =  df %>% mutate(Rate = ifelse(`Turnout Type` == "VEP Turnout", Rate*(-1), Rate*1))

p2 = ggplot(df, aes(x = Year, y =  `Rate`, fill =  `Turnout Type`)) +  
                              geom_bar(stat = "identity", width = .6) + coord_flip() + scale_x_continuous(name = "Year", breaks = seq(1980, 2008, 2)) +  scale_y_continuous(name = "Rate [By Percent]", c(limits = -70,70), breaks = seq(-70,70,5), labels = abs) + labs(title="VEP vs WAP Turnout",
                              caption = "Based on Data From Problem Set Adapted
                              From Imai, Kosuke. Quantitative Social Science: 
                              An Introduction. 
                              Princeton University Press, 2018.")  +
                              theme(plot.title = element_text(hjust = .5), axis.text.x = element_text(angle = 45, hjust=1)) +   
                              scale_fill_brewer(palette = "Paired")

Question 3

Answer

## The mean difference between the VAP turnout estimate and ANES is, 16.83635 . The range of the differences is  11.0612 to 26.1715 .The mean difference between the VEP turnout estimate and ANES is, 16.83635 . The range of the differences is  8.5811 to 22.4894

There is a lower mean difference between the ANES estimate and VEP turnout than there is with the VAP turnout rate. The range is also vast, implying that there is probably a higher standard deviation.

Code

data$VAPDif = data$ANES - data$`VAP Turnout`
vapdif = descr(data$VAPDif)
vapd.mean = vapdif$mean
vapd.min= vapdif$min
vapd.max = vapdif$max

data$VEPDif = data$ANES - data$`VEP Turnout`
vepdif = descr(data$VEPDif)
vepd.mean = vepdif$mean
vepd.min= vepdif$min
vepd.max = vepdif$max

Question 4

Answer

## Yes, the bias of the ANES varies across the election types, playing a more significant role within the presidential elections. This is evident as it has a value of a mean difference of, 17.89201 , where the mid-term elections have a value of, 15.4288 .

Code

## Create a Column for a Election type
data$`Election Type` = data$Year

data = data %>% mutate(`Election Type` = ifelse(`Election Type` == 2008 |`Election Type` == 2004| `Election Type` == 2000 | `Election Type` == 1996 | `Election Type` == 1992 | `Election Type` == 1988 | `Election Type` == 1984 | `Election Type` == 1980, "Presidential", "Midterm"))

##Midterm Elections
data.midterm = data %>% filter(`Election Type` == "Midterm")

##Presidential Elections
data.presidential = data %>% filter(`Election Type` == "Presidential")

## Mean Diff Midterm
m.midterm = bias(data.midterm$ANES, data.midterm$`VEP Turnout`)

## Mean Diff Presidential
m.pres = bias(data.presidential$ANES, data.presidential$`VEP Turnout`)

Question 5

Answer

Comparison of Turnout Rates for 2008
Year	Rate(%)
Unadjusted VAP	55.6741
Adjusted VAP	61.5563
VEP	61.5543
ANES Turnout	78.0000

## After adjusting the VAP for 2008, the VAP turnout increased by, 5.8822 . Overall, this led to a smaller difference between the ANES and VAP rates and leading to a higher VAP than VEP rate.

Code

## Adjust the year 2008
adj.data = data 
adj.data = data %>% filter(Year == 2008)

# Remove felons and non citizens from VAP count in 2008
adj.data$VAP = adj.data$VAP - adj.data$Felons - adj.data$NonCitizens

# Remove overseas
adj.data$`VAP Turnout` = adj.data$`VAP Turnout` - adj.data$`OS Voters`

# VAP Turnout
adj.data$`VAP Turnout` = round((adj.data$`Total Votes`/adj.data$`VAP`)*100, 4)

unadj.vap = data  %>% filter(Year == 2008)%>% select(`VAP Turnout`)
adj.vap = adj.data  %>% filter(Year == 2008)%>% select(`VAP Turnout`)
VEP = adj.data %>% filter(Year == 2008)%>% select(`VEP Turnout`)
ANES = adj.data  %>% filter(Year == 2008)%>% select(`ANES`)

unadj.vap = as.numeric(unadj.vap)
adj.vap = as.numeric(adj.vap)
VEP = as.numeric(VEP)
ANES = as.numeric(ANES)

q5 = as.data.frame(c("Unadjusted VAP","Adjusted VAP", "VEP", "ANES Turnout"))

q5$`Turning Rate` = c(unadj.vap, adj.vap, VEP, ANES)
colnames(q5) = c("Measure", "Turning Rate")

## Create Table
set_flextable_defaults(
  font.color = "#0a064a",
  font.family = "Arial",
  font.size = "10")

t.3 = flextable(q5)
t.3 = set_caption(t.3, "Comparison of Turnout Rates for 2008", style = "Table Caption")
t.3 = set_header_labels(t.3, `Measure` = "Year", `Turning Rate` = " Rate(%)")
t.3 = autofit(t.3)

vap.dif = adj.vap - unadj.vap
vap.anes = ANES - adj.vap

Problem Set 1: Bias in Self-reported Turnout

Rachel Burgess

May 2022

Question 1: Code

Answer

Code

Question 2

Answer

Plots

Code

Question 3

Answer

Code

Question 4

Answer

Code

Question 5

Answer

Code