library(tidyverse)

## Warning: package 'tidyverse' was built under R version 4.3.3

## Warning: package 'ggplot2' was built under R version 4.3.3

## Warning: package 'readr' was built under R version 4.3.3

## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.3     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.0
## ✔ ggplot2   3.5.1     ✔ tibble    3.2.1
## ✔ lubridate 1.9.2     ✔ tidyr     1.3.0
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

library(tidycensus)
url<-"https://raw.githubusercontent.com/fivethirtyeight/election-results/main/election_results_presidential.csv"
presidential_elections<-read_csv(url)

## Rows: 7423 Columns: 21
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (9): state_abbrev, state, office_name, stage, party, candidate_name, bal...
## dbl (8): id, race_id, office_id, cycle, politician_id, candidate_id, votes, ...
## lgl (4): office_seat_name, special, unopposed, winner
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

Q1

pres_2020<-presidential_elections|>
  filter(cycle == 2020)|>
  filter(stage == "general")|>
  filter(!is.na(state))

counts_mutated<-pres_2020|>
  group_by(state)|>
  mutate(`number of candidates` = n())|>
  arrange(-`number of candidates`)

pres_2020|>
  filter(ballot_party!="W")

pres_2020|>
  # %in% will evaluate true if there's a match on the right hand vector
  filter(state %in% c("Nebraska CD-1", "Nebraska CD-2", "Maine CD-1", "Maine CD-2") )

pres_2020 |>
  # str_detect 
  filter(str_detect(state, "CD-[0-9]") ==FALSE)

party_totals = pres_2020|>
  # remove split electoral counts
  filter(str_detect(state, "CD-[0-9]")==FALSE)|>
  
  # create a three category variable for party vote shares
  mutate(vote_type= case_when(
  `candidate_name` == "Joe Biden" ~ "Joe Biden", 
  `candidate_name` == "Donald Trump" ~ "Donald Trump", 
  !`candidate_name`%in%c("Joe Biden", "Donald Trump") ~ "other"))|>
  # get the total votes for each group
  group_by(state, vote_type)|>
  summarise(votes = sum(votes))|>
  # now get the total number of votes cast: 
  group_by(state)|>
  mutate(total = sum(votes))

## `summarise()` has grouped output by 'state'. You can override using the
## `.groups` argument.

party_totals|>
  pivot_wider(names_from = vote_type, values_from = votes)

final_results <- party_totals |>
  pivot_wider(names_from = vote_type, values_from = votes) |>
  select(-total, -other) # Remove "total" and "other" columns

print(final_results)

## # A tibble: 51 × 3
## # Groups:   state [51]
##    state                `Donald Trump` `Joe Biden`
##    <chr>                         <dbl>       <dbl>
##  1 Alabama                     1441170      849624
##  2 Alaska                       189951      153778
##  3 Arizona                     1661686     1672143
##  4 Arkansas                     760647      423932
##  5 California                  6006429    11110250
##  6 Colorado                    1364607     1804352
##  7 Connecticut                  714717     1080831
##  8 Delaware                     200603      296268
##  9 District of Columbia          18586      317323
## 10 Florida                     5668731     5297045
## # ℹ 41 more rows

Q2

biden_share <- final_results |>
  mutate(
    biden_vote_share = `Joe Biden` / (`Joe Biden` + `Donald Trump`)
  )

print(biden_share)

## # A tibble: 51 × 4
## # Groups:   state [51]
##    state                `Donald Trump` `Joe Biden` biden_vote_share
##    <chr>                         <dbl>       <dbl>            <dbl>
##  1 Alabama                     1441170      849624            0.371
##  2 Alaska                       189951      153778            0.447
##  3 Arizona                     1661686     1672143            0.502
##  4 Arkansas                     760647      423932            0.358
##  5 California                  6006429    11110250            0.649
##  6 Colorado                    1364607     1804352            0.569
##  7 Connecticut                  714717     1080831            0.602
##  8 Delaware                     200603      296268            0.596
##  9 District of Columbia          18586      317323            0.945
## 10 Florida                     5668731     5297045            0.483
## # ℹ 41 more rows

Q3

median_income <- get_acs(geography = "state", 
                         variables = c(median_income = "B19013_001"), 
                         year = 2020)

## Getting data from the 2016-2020 5-year ACS

income_and_votes<-biden_share|>
  # restrict to third party vote shares
  left_join(median_income, by=join_by(state == NAME))

income_and_votes

## # A tibble: 51 × 8
## # Groups:   state [51]
##    state     `Donald Trump` `Joe Biden` biden_vote_share GEOID variable estimate
##    <chr>              <dbl>       <dbl>            <dbl> <chr> <chr>       <dbl>
##  1 Alabama          1441170      849624            0.371 01    median_…    52035
##  2 Alaska            189951      153778            0.447 02    median_…    77790
##  3 Arizona          1661686     1672143            0.502 04    median_…    61529
##  4 Arkansas          760647      423932            0.358 05    median_…    49475
##  5 Californ…        6006429    11110250            0.649 06    median_…    78672
##  6 Colorado         1364607     1804352            0.569 08    median_…    75231
##  7 Connecti…         714717     1080831            0.602 09    median_…    79855
##  8 Delaware          200603      296268            0.596 10    median_…    69110
##  9 District…          18586      317323            0.945 11    median_…    90842
## 10 Florida          5668731     5297045            0.483 12    median_…    57703
## # ℹ 41 more rows
## # ℹ 1 more variable: moe <dbl>

Q4

library(plotly)

## 
## Attaching package: 'plotly'

## The following object is masked from 'package:ggplot2':
## 
##     last_plot

## The following object is masked from 'package:stats':
## 
##     filter

## The following object is masked from 'package:graphics':
## 
##     layout

biden_plot<-income_and_votes|>
  ggplot(aes(x = estimate, y= biden_vote_share, label=state)) +
  geom_point()
ggplotly(biden_plot)

biden_model<-lm(biden_vote_share ~ estimate  , data= income_and_votes)
summary(biden_model)

## 
## Call:
## lm(formula = biden_vote_share ~ estimate, data = income_and_votes)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.223857 -0.034904 -0.002384  0.044009  0.241841 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -2.203e-02  7.327e-02  -0.301    0.765    
## estimate     7.979e-06  1.111e-06   7.183 3.45e-09 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.08681 on 49 degrees of freedom
## Multiple R-squared:  0.5129, Adjusted R-squared:  0.503 
## F-statistic:  51.6 on 1 and 49 DF,  p-value: 3.453e-09

Homework 1 GVPT728

Quentin Hoglund

2025-01-05

Q1

Q2

Q3

Q4