## The dimensions of the dataset are: 14 9 . These numbers indicate that there are 14 observations and 9 variables. The time period ranges between the years of, 1980 2008 , where the maximum number of felons is: 3168
# I could of gotten the answers for this question from the environment, the dimensions, or summary.. but figured I would just mess around with basic functions. Also, I usually echo = FALSE in a chunk, but figured this would be easiest to include it and just cat my answers and output
dimensions = dim(data)
obs = count(data)
obs = as.numeric(obs)
variables = ncol(data)
time = range(data$year, na.rm = TRUE)
max.felons = max(data$felons)
Overall, the VEP turnout rate trends lower than the VAP turnout rate. I included two visual presentations, indicating that of those eligible to vote and those who are of the voting age at the time of the election, VEP has a higher turnout rate.
Year | Rate (%) |
1980 | 52.0397 |
1982 | 40.2452 |
1984 | 52.5375 |
1986 | 36.0785 |
1988 | 49.7226 |
1990 | 35.9388 |
1992 | 54.0410 |
1994 | 38.0309 |
1996 | 47.5338 |
1998 | 34.8317 |
2000 | 49.3421 |
2002 | 35.8285 |
2004 | 54.5478 |
2008 | 55.6741 |
Year | Rate (%) |
1980 | 54.1955 |
1982 | 42.1370 |
1984 | 55.2486 |
1986 | 38.1412 |
1988 | 52.7685 |
1990 | 38.4189 |
1992 | 58.1138 |
1994 | 41.1263 |
1996 | 51.6579 |
1998 | 38.0932 |
2000 | 54.2245 |
2002 | 39.5106 |
2004 | 60.1008 |
2008 | 61.5543 |
Year | Rate (%) |
1980 | 52.0397 |
1982 | 40.2452 |
1984 | 52.5375 |
1986 | 36.0785 |
1988 | 49.7226 |
1990 | 35.9388 |
1992 | 54.0410 |
1994 | 38.0309 |
1996 | 47.5338 |
1998 | 34.8317 |
2000 | 49.3421 |
2002 | 35.8285 |
2004 | 54.5478 |
2008 | 55.6741 |
Year | Rate (%) |
1980 | 54.1955 |
1982 | 42.1370 |
1984 | 55.2486 |
1986 | 38.1412 |
1988 | 52.7685 |
1990 | 38.4189 |
1992 | 58.1138 |
1994 | 41.1263 |
1996 | 51.6579 |
1998 | 38.0932 |
2000 | 54.2245 |
2002 | 39.5106 |
2004 | 60.1008 |
2008 | 61.5543 |
# Create the total VAP column to include overseas voters
data$VAP = data$VAP + data$overseas
# Create VAP Turnout
data$`VAP Turnout` = round((data$total/data$VAP)*100, 4)
data = data %>% relocate(`VAP Turnout`, .before = ANES)
#VAP Turnout table by year
VAP = data %>% select(year, `VAP Turnout`)
colnames(VAP) = c("Election Year", "Turnout Rate")
VAP = as.data.frame(VAP)
set_flextable_defaults(
font.color = "#0a064a",
font.family = "Arial",
font.size = "10")
t.1 = flextable(VAP) %>%
colformat_num(j = "Election Year", digits = 0, big.mark = "")
t.1 = set_caption(t.1, "Turnout Rate by Election Year VAP]", style = "Table Caption")
t.1 = set_header_labels(t.1, `Election Year` = "Year", `Turnout Rate` = "Rate (%)")
# Create VEP Turnout
data$`VEP Turnout` = round((data$total/data$VEP)*100, 4)
data = data %>% relocate(`VEP Turnout`, .before = ANES)
#VEP Turnout table by year
VEP = data %>% select(year, `VEP Turnout`)
colnames(VEP) = c("Election Year", "Turnout Rate")
VEP = as.data.frame(VEP)
colnames(data) = c("Year", "VEP", "VAP", "Total Votes", "VAP Turnout", "VEP Turnout", "ANES", "Felons", "NonCitizens", "OS Voters", "OS Ballots")
set_flextable_defaults(
font.color = "#0a064a",
font.family = "Arial",
font.size = "10")
t.2 = flextable(VEP) %>%
colformat_num(j = "Election Year", digits = 0, big.mark = "")
t.2 = set_caption(t.2, "Turnout Rate by Election Year [VEP]", style = "Table Caption")
t.2 = set_header_labels(t.2, `Election Year` = "Year", `Turnout Rate` = "Rate (%)")
t.2 = autofit(t.2)
# Curiosity on what the visualisation of the data looks like
df = data %>% select(Year, `VEP Turnout`, `VAP Turnout`) %>%
gather(key = "Turnout Type", value = "Rate", -Year)
df$`Turnout Type` = as.factor(df$`Turnout Type`)
p1 = ggplot(df, aes(x = Year, y = Rate)) +
geom_line(aes(color = `Turnout Type`), size = .5) +
scale_color_manual(values = c("#00AFBB", "#E7B800")) +
labs(title="VEP vs WAP Turnout",
caption = "Based on Data From Problem Set Adapted From Imai, Kosuke.
Quantitative Social Science: An Introduction. Princeton University Press, 2018.") + theme_minimal() +
theme(plot.title = element_text(hjust = 0.5),
plot.subtitle = element_text(hjust = 0.5))
p1 = p1 + scale_x_continuous(name = "Year", breaks = seq(1980, 2008, 2)) + scale_y_continuous(name = "Rate", breaks = seq(0, 65, 5), labels = scales::percent_format(scale = 1))
df = df %>% mutate(Rate = ifelse(`Turnout Type` == "VEP Turnout", Rate*(-1), Rate*1))
p2 = ggplot(df, aes(x = Year, y = `Rate`, fill = `Turnout Type`)) +
geom_bar(stat = "identity", width = .6) + coord_flip() + scale_x_continuous(name = "Year", breaks = seq(1980, 2008, 2)) + scale_y_continuous(name = "Rate [By Percent]", c(limits = -70,70), breaks = seq(-70,70,5), labels = abs) + labs(title="VEP vs WAP Turnout",
caption = "Based on Data From Problem Set Adapted
From Imai, Kosuke. Quantitative Social Science:
An Introduction.
Princeton University Press, 2018.") +
theme(plot.title = element_text(hjust = .5), axis.text.x = element_text(angle = 45, hjust=1)) +
scale_fill_brewer(palette = "Paired")
## The mean difference between the VAP turnout estimate and ANES is, 16.83635 . The range of the differences is 11.0612 to 26.1715 .The mean difference between the VEP turnout estimate and ANES is, 16.83635 . The range of the differences is 8.5811 to 22.4894
There is a lower mean difference between the ANES estimate and VEP turnout than there is with the VAP turnout rate. The range is also vast, implying that there is probably a higher standard deviation.
data$VAPDif = data$ANES - data$`VAP Turnout`
vapdif = descr(data$VAPDif)
vapd.mean = vapdif$mean
vapd.min= vapdif$min
vapd.max = vapdif$max
data$VEPDif = data$ANES - data$`VEP Turnout`
vepdif = descr(data$VEPDif)
vepd.mean = vepdif$mean
vepd.min= vepdif$min
vepd.max = vepdif$max
## Yes, the bias of the ANES varies across the election types, playing a more significant role within the presidential elections. This is evident as it has a value of a mean difference of, 17.89201 , where the mid-term elections have a value of, 15.4288 .
## Create a Column for a Election type
data$`Election Type` = data$Year
data = data %>% mutate(`Election Type` = ifelse(`Election Type` == 2008 |`Election Type` == 2004| `Election Type` == 2000 | `Election Type` == 1996 | `Election Type` == 1992 | `Election Type` == 1988 | `Election Type` == 1984 | `Election Type` == 1980, "Presidential", "Midterm"))
##Midterm Elections
data.midterm = data %>% filter(`Election Type` == "Midterm")
##Presidential Elections
data.presidential = data %>% filter(`Election Type` == "Presidential")
## Mean Diff Midterm
m.midterm = bias(data.midterm$ANES, data.midterm$`VEP Turnout`)
## Mean Diff Presidential
m.pres = bias(data.presidential$ANES, data.presidential$`VEP Turnout`)
Year | Rate(%) |
Unadjusted VAP | 55.6741 |
Adjusted VAP | 61.5563 |
VEP | 61.5543 |
ANES Turnout | 78.0000 |
## After adjusting the VAP for 2008, the VAP turnout increased by, 5.8822 . Overall, this led to a smaller difference between the ANES and VAP rates and leading to a higher VAP than VEP rate.
## Adjust the year 2008
adj.data = data
adj.data = data %>% filter(Year == 2008)
# Remove felons and non citizens from VAP count in 2008
adj.data$VAP = adj.data$VAP - adj.data$Felons - adj.data$NonCitizens
# Remove overseas
adj.data$`VAP Turnout` = adj.data$`VAP Turnout` - adj.data$`OS Voters`
# VAP Turnout
adj.data$`VAP Turnout` = round((adj.data$`Total Votes`/adj.data$`VAP`)*100, 4)
unadj.vap = data %>% filter(Year == 2008)%>% select(`VAP Turnout`)
adj.vap = adj.data %>% filter(Year == 2008)%>% select(`VAP Turnout`)
VEP = adj.data %>% filter(Year == 2008)%>% select(`VEP Turnout`)
ANES = adj.data %>% filter(Year == 2008)%>% select(`ANES`)
unadj.vap = as.numeric(unadj.vap)
adj.vap = as.numeric(adj.vap)
VEP = as.numeric(VEP)
ANES = as.numeric(ANES)
q5 = as.data.frame(c("Unadjusted VAP","Adjusted VAP", "VEP", "ANES Turnout"))
q5$`Turning Rate` = c(unadj.vap, adj.vap, VEP, ANES)
colnames(q5) = c("Measure", "Turning Rate")
## Create Table
set_flextable_defaults(
font.color = "#0a064a",
font.family = "Arial",
font.size = "10")
t.3 = flextable(q5)
t.3 = set_caption(t.3, "Comparison of Turnout Rates for 2008", style = "Table Caption")
t.3 = set_header_labels(t.3, `Measure` = "Year", `Turning Rate` = " Rate(%)")
t.3 = autofit(t.3)
vap.dif = adj.vap - unadj.vap
vap.anes = ANES - adj.vap