Hershberger Clarity Final

Running Code

When you click the Render button a document will be generated that includes both content and the output of embedded code. You can embed code like this:

library(dplyr)


Attaching package: 'dplyr'

The following objects are masked from 'package:stats':

    filter, lag

The following objects are masked from 'package:base':

    intersect, setdiff, setequal, union

library(tidyr)
library(ggplot2)

Warning: package 'ggplot2' was built under R version 4.5.2

library(scales)
maineresults <- read.csv("maineresults.csv")
mainedemographics <- read.csv("mainedemographics.csv")

You can add options to executable code like this

sum(maineresults$rcv_yes)

[1] 378831

sum(maineresults$rcv_no)

[1] 353856

sum(maineresults$mw_yes)

[1] 412799

sum(maineresults$mw_no)

[1] 332946

sum(maineresults$bg_yes)

[1] 361873

sum(maineresults$bg_no)

[1] 386869

rcv_support <- 378831/(378831+353856)
mj_support <-412799/(412799+332946)
bg_support <-361873/(361873+386869)

rcv_support

[1] 0.5170434

mj_support

[1] 0.5535391

bg_support

[1] 0.483308

For the code above I calculated the average support for all three ballot initiatives.

election_results <- maineresults %>%
  summarise(
    rcv_yes=sum(rcv_yes,na.rm = TRUE),
    rcv_no=sum(rcv_no,na.rm = TRUE),
    mw_yes  = sum(mw_yes,  na.rm = TRUE),
    mw_no   = sum(mw_no,   na.rm = TRUE),
    bg_yes  = sum(bg_yes,  na.rm = TRUE),
    bg_no   = sum(bg_no,   na.rm = TRUE)
  ) %>%
  mutate(
    rcv_support=rcv_yes/(rcv_yes+rcv_no),
    mw_support=mw_yes/(mw_yes+mw_no),
    bg_support=bg_yes/(bg_yes+bg_no)
  )

election_results

  rcv_yes rcv_no mw_yes  mw_no bg_yes  bg_no rcv_support mw_support bg_support
1  378831 353856 412799 332946 361873 386869   0.5170434  0.5535391   0.483308

ballot_results <- election_results %>%
  select(rcv_support, mw_support, bg_support) %>%
  pivot_longer(
    everything(),
    names_to = "measure",
    values_to = "support"
  )

ballot_results$measure <- recode(ballot_results$measure,
  rcv_support = "Ranked Choice Voting",
  mw_support = "Marijuana Legalization",
  bg_support = "Background Checks"
)

ggplot(ballot_results, aes(x = measure, y = support)) +
  geom_col(fill = "steelblue") +
  geom_text(aes(label = percent(support, accuracy = 1)), vjust = -0.4) +
  geom_hline(yintercept = 0.5, linetype = "dashed") +
  scale_y_continuous(
    limits = c(0,1),
    breaks = seq(0,1,0.2),
    labels = percent_format()
  ) +
  labs(
    title = "Statewide Support for Maine Ballot Measures",
    x = "Ballot Measure",
    y = "Yes Vote Share"
  ) +
  theme_minimal()

As shown above I created a bar graph to visualize the findings. I also created a line in the bar graph at the fifty percent mark, to better illustrate how much support over the fifty percent mark each initiative got.

maineprojections <- read.csv("maineprojections.csv")
sum(maineprojections$proj_rcv_yes)

[1] 421842

sum(maineprojections$proj_mw_yes)

[1] 450264

sum(maineprojections$proj_bg_yes)

[1] 390712

proj_results <- maineprojections %>%
  summarise(
    proj_votes   = sum(proj_votes, na.rm = TRUE),
    proj_mw_yes  = sum(proj_mw_yes, na.rm = TRUE),
    proj_bg_yes  = sum(proj_bg_yes, na.rm = TRUE),
    proj_rcv_yes = sum(proj_rcv_yes, na.rm = TRUE)
  ) %>%
  mutate(
    rcv_support = proj_rcv_yes / proj_votes,
    mw_support  = proj_mw_yes  / proj_votes,
    bg_support  = proj_bg_yes  / proj_votes
  )

actual_results <- election_results %>%
  select(rcv_support, mw_support, bg_support) %>%
  pivot_longer(everything(), names_to = "measure", values_to = "support") %>%
  mutate(type = "Actual")

proj_maine <- proj_results %>%
  select(rcv_support, mw_support, bg_support) %>%
  pivot_longer(everything(), names_to = "measure", values_to = "support") %>%
  mutate(type = "Projected")

comparison <- bind_rows(actual_results, proj_maine) %>%
  mutate(measure = recode(measure,
                          rcv_support = "Ranked Choice Voting",
                          mw_support  = "Marijuana Legalization",
                          bg_support  = "Background Checks"))

ggplot(comparison, aes(x = measure, y = support, fill = type)) +
  geom_col(position = "dodge") +
  geom_text(
    aes(label = percent(support, accuracy = 1)),
    position = position_dodge(width = 0.9),
    vjust = -0.4
  ) +
  geom_hline(yintercept = 0.5, linetype = "dashed") +
  scale_y_continuous(
    limits = c(0, 1),
    breaks = seq(0, 1, 0.2),
    labels = percent_format()
  ) +
  labs(
    title = "Projected vs Actual Support for Maine Ballot Measures",
    x = "Ballot Measure",
    y = "Yes Vote Share",
    fill = "Series"
  ) +
  theme_minimal()

As shown in the graph and codings above, I joined the maineprojections dataset with the maineresults dataset. This then allowed me to compare the projections vs the actual results of each dataset on one graph.

mainedemographics <- mainedemographics %>%
   mutate(
    county = case_when(
      geoid >= 2300102060 & geoid <= 2300179585 ~ "ANDROSCOGGIN",
      geoid >= 2300300800 & geoid <= 2300387215 ~ "AROOSTOOK",
      geoid >= 2300502655 & geoid <= 2300587845 ~ "CUMBERLAND",
      geoid >= 2300702235 & geoid <= 2300785850 ~ "FRANKLIN",
      geoid >= 2300901185 & geoid <= 2300986655 ~ "HANCOCK",
      geoid >= 2301100590 & geoid <= 2301186970 ~ "KENNEBEC",
      geoid >= 2301301465 & geoid <= 2301380425 ~ "KNOX",
      geoid >= 2301501010 & geoid <= 2301587075 ~ "LINCOLN",
      geoid >= 2301701325 & geoid <= 2301787355 ~ "OXFORD",
      geoid >= 2301901115 & geoid <= 2301987390 ~ "PENOBSCOT",
      geoid >= 2302100100 & geoid <= 2302185710 ~ "PISCATAQUIS",
      geoid >= 2302301570 & geoid <= 2302387460 ~ "SAGADAHOC",
      geoid >= 2302501395 & geoid <= 2302582840 ~ "SOMERSET",
      geoid >= 2302703950 & geoid <= 2302786760 ~ "WALDO",
      geoid >= 2302900380 & geoid <= 2302985290 ~ "WASHINGTON",
      geoid >= 2303100275 & geoid <= 2303187985 ~ "YORK",
      TRUE ~ NA_character_
    )
  )

head(mainedemographics)

       geoid n_registered share_dem share_rep share_white share_afam
1 2302100100          450     0.207     0.413       0.953      0.002
2 2303100275         1905     0.270     0.316       0.927      0.006
3 2302900380          844     0.262     0.379       0.947      0.004
4 2301100590         1394     0.244     0.370       0.952      0.001
5 2302900660          374     0.257     0.329       0.955      0.011
6 2303100730         2287     0.282     0.272       0.932      0.001
  share_female avg_hhincome avg_popdens avg_partyscore avg_collegescore
1        0.504       41.030      81.967         40.137           33.673
2        0.493       72.358     406.412         42.668           43.955
3        0.519       62.874     135.724         37.743           40.843
4        0.479       70.391     153.811         39.703           36.418
5        0.476       44.395     133.396         43.884           41.506
6        0.507       60.793     262.589         48.724           43.263
  avg_gunownscore avg_gvpscore avg_churchscore avg_marijuanascore
1          68.418       23.165          23.710             54.763
2          60.220       29.071          23.037             63.063
3          67.649       25.181          28.313             58.456
4          68.423       24.403          24.498             60.077
5          70.698       23.934          24.434             62.467
6          55.856       35.669          22.985             62.200
  avg_fiscalprogscore avg_choicescore avg_enviroscore      county
1              24.368          42.687          20.035 PISCATAQUIS
2              29.105          55.195          25.557        YORK
3              27.567          48.887          23.190  WASHINGTON
4              26.337          50.523          21.824    KENNEBEC
5              28.598          52.162          23.616  WASHINGTON
6              30.863          56.128          28.127        YORK

maineresults <- maineresults %>%
   mutate(
    county = case_when(
      geoid >= 2300102060 & geoid <= 2300179585 ~ "ANDROSCOGGIN",
      geoid >= 2300300800 & geoid <= 2300387215 ~ "AROOSTOOK",
      geoid >= 2300502655 & geoid <= 2300587845 ~ "CUMBERLAND",
      geoid >= 2300702235 & geoid <= 2300785850 ~ "FRANKLIN",
      geoid >= 2300901185 & geoid <= 2300986655 ~ "HANCOCK",
      geoid >= 2301100590 & geoid <= 2301186970 ~ "KENNEBEC",
      geoid >= 2301301465 & geoid <= 2301380425 ~ "KNOX",
      geoid >= 2301501010 & geoid <= 2301587075 ~ "LINCOLN",
      geoid >= 2301701325 & geoid <= 2301787355 ~ "OXFORD",
      geoid >= 2301901115 & geoid <= 2301987390 ~ "PENOBSCOT",
      geoid >= 2302100100 & geoid <= 2302185710 ~ "PISCATAQUIS",
      geoid >= 2302301570 & geoid <= 2302387460 ~ "SAGADAHOC",
      geoid >= 2302501395 & geoid <= 2302582840 ~ "SOMERSET",
      geoid >= 2302703950 & geoid <= 2302786760 ~ "WALDO",
      geoid >= 2302900380 & geoid <= 2302985290 ~ "WASHINGTON",
      geoid >= 2303100275 & geoid <= 2303187985 ~ "YORK",
      TRUE ~ NA_character_
    )
  )

As shown above, I created a county variable for both the mainedemographics dataset and the maineresults dataset. I did this because I wanted to compare the results of all three ballot iniatives and demographics within each county.

income_county <- mainedemographics %>%
  group_by(county) %>%
  summarise(
    mean_income = mean(avg_hhincome, na.rm = TRUE)
  )
marijuana_county <- maineresults %>%
  group_by(county) %>%
  summarise(
    mw_yes = sum(mw_yes, na.rm = TRUE),
    mw_no = sum(mw_no, na.rm = TRUE)
  ) %>%
  mutate(
    mw_support = mw_yes / (mw_yes + mw_no)
  )

county_results <- income_county %>%
  left_join(marijuana_county, by = "county")

In this code above I calculated the average income per each county and then caculated the support for the marijuana iniative in each county. Afterwards I combined both datasets.

ggplot(county_results,
       aes(x = mean_income, y = mw_support)) +
  geom_point(size = 3) +
  geom_smooth(method = "lm", se = FALSE) +
  scale_y_continuous(labels = percent_format()) +
  labs(
    title = "Average County Income vs Marijuana Support in Maine",
    x = "Average Household Income",
    y = "Marijuana Yes Vote Share"
  ) +
  theme_minimal()

`geom_smooth()` using formula = 'y ~ x'

ggplot(county_results,
       aes(x = mean_income, y = mw_support)) +
  geom_point(size = 3) +
  geom_text(aes(label = county), vjust = -0.6) +
  geom_smooth(method = "lm", se = FALSE) +
  scale_y_continuous(labels = percent_format()) +
  labs(
    title = "Average County Income vs Marijuana Support in Maine",
    x = "Average Household Income",
    y = "Marijuana Yes Vote Share"
  ) +
  theme_minimal()

`geom_smooth()` using formula = 'y ~ x'

In both graphs shown above I added a linear regression line to show the relationship between the two variables.

ggplot(county_results,
       aes(x = reorder(county, mw_support), y = mw_support)) +
  geom_col(fill = "steelblue") +
  coord_flip() +
  scale_y_continuous(labels = percent_format()) +
  labs(
    title = "Marijuana Support by County",
    x = "County",
    y = "Yes Vote Share"
  ) +
  theme_minimal()

ggplot(county_results,
       aes(x = reorder(county, mean_income), y = mean_income)) +
  geom_col(fill = "steelblue") +
  geom_text(aes(label = dollar(mean_income)), hjust = -0.1) +
  coord_flip() +
  labs(
    title = "Average Household Income by County",
    x = "County",
    y = "Average Household Income"
  ) +
  theme_minimal()

gun_county <- mainedemographics %>%
  group_by(county) %>%
  summarise(
    mean_gun = mean(avg_gunownscore, na.rm = TRUE)
  )

bg_county <- maineresults %>%
  group_by(county) %>%
  summarise(
    bg_yes = sum(bg_yes, na.rm = TRUE),
    bg_no = sum(bg_no, na.rm = TRUE)
  ) %>%
  mutate(
    bg_support = bg_yes / (bg_yes + bg_no)
  )

bg_analysis <- gun_county %>%
  left_join(bg_county, by = "county")

ggplot(bg_analysis,
       aes(x = mean_gun/100, y = bg_support)) +
  
  geom_point(size = 3, color = "black") +
  
  geom_text(
    aes(label = county),
    size = 3,
    nudge_y = 0.01
  ) +
  
  geom_smooth(method = "lm", se = FALSE, color = "blue") +
  
  scale_x_continuous(
    labels = percent_format(),
    limits = c(0.45, 0.67)
  ) +
  
  scale_y_continuous(
    labels = percent_format(),
    limits = c(0.25, 0.70)
  ) +
  
  labs(
    title = "Gun Ownership vs Support for Background Checks in Maine Counties",
    x = "Estimated Gun Ownership Rate by County",
    y = "Background Check Yes Vote Share"
  ) +
  
  theme_minimal()

`geom_smooth()` using formula = 'y ~ x'

college_county <- mainedemographics %>%
  group_by(county) %>%
  summarise(
    mean_college = mean(avg_collegescore, na.rm = TRUE)
  )

bg_county <- maineresults %>%
  group_by(county) %>%
  summarise(
    bg_yes = sum(bg_yes, na.rm = TRUE),
    bg_no = sum(bg_no, na.rm = TRUE)
  ) %>%
  mutate(
    bg_support = bg_yes / (bg_yes + bg_no)
  )

bg_analysis <- college_county %>%
  left_join(bg_county, by = "county")

ggplot(bg_analysis, aes(x = mean_college/100, y = bg_support)) +
  geom_point(size = 3) +
  geom_text(aes(label = county), vjust = -0.6, size = 3) +
  geom_smooth(method = "lm", se = FALSE) +
  scale_x_continuous(labels = percent_format()) +
  scale_y_continuous(labels = percent_format(), limits = c(0,1)) +
  labs(
    title = "College Attainment vs Support for Background Checks (by County)",
    x = "Estimated College Attainment Rate (County)",
    y = "Background Check Support"
  ) +
  theme_minimal()

`geom_smooth()` using formula = 'y ~ x'