wim_join <-inner_join(white_infant_mort, votes, by ="countyid")wim_join <- wim_join %>%mutate(county_id =str_pad(county_id, width =5, pad ="0")) |>rename(county_fips = county_id)
ACS Variables
library(tidycensus)library(sf)
Linking to GEOS 3.11.2, GDAL 3.8.2, PROJ 9.3.1; sf_use_s2() is TRUE
# load_variables(year = 2019, dataset = "acs5/subject") |># separate(label, into = paste0("label", 1:9), sep = "!!", fill = "right", remove = FALSE)vars <-load_variables(year =2019,dataset ="acs5",cache =TRUE)my_states <-c("AR", "DE", "Fl", "GA", "KY", "LA", "MD", "MS", "OK", "SC", "TN", "TX", "VA", "WV")my_vars <-c(total_pop ="B01003_001", # Total populationmedian_income ="B19013_001", # Median incomemedian_age ="B01002_001", # Median agenativity ="B05012_003", # Foreign-born populationbach_degree ="B15012_001", # 25+ with Bachelor's degree or >insurance ="B27010_033", # Population 20-64 w/o health insuranceblack_pop ="B02001_003", # Black populationlatino_pop ="B03003_003", # Latino population white_pop ="B02001_002"# White population)#Create a loop since we need multiple counties from multiple steps. learned this from here: https://mattherman.info/blog/tidycensus-mult/multi_state <-map_dfr( my_states,~get_acs(geography ="county",variables = my_vars,state = .,year =2019,survey ="acs5",geometry =FALSE ) ) |>print()
Getting data from the 2015-2019 5-year ACS
Warning: • You have not set a Census API key. Users without a key are limited to 500
queries per day and may experience performance limitations.
ℹ For best results, get a Census API key at
http://api.census.gov/data/key_signup.html and then supply the key to the
`census_api_key()` function to use it throughout your tidycensus session.
This warning is displayed once per session.
Getting data from the 2015-2019 5-year ACS
Getting data from the 2015-2019 5-year ACS
Getting data from the 2015-2019 5-year ACS
Getting data from the 2015-2019 5-year ACS
Getting data from the 2015-2019 5-year ACS
Getting data from the 2015-2019 5-year ACS
Getting data from the 2015-2019 5-year ACS
Getting data from the 2015-2019 5-year ACS
Getting data from the 2015-2019 5-year ACS
Getting data from the 2015-2019 5-year ACS
Getting data from the 2015-2019 5-year ACS
Getting data from the 2015-2019 5-year ACS
Getting data from the 2015-2019 5-year ACS
# A tibble: 11,286 × 5
GEOID NAME variable estimate moe
<chr> <chr> <chr> <dbl> <dbl>
1 05001 Arkansas County, Arkansas median_age 40.7 0.6
2 05001 Arkansas County, Arkansas total_pop 17914 NA
3 05001 Arkansas County, Arkansas white_pop 12921 103
4 05001 Arkansas County, Arkansas black_pop 4636 94
5 05001 Arkansas County, Arkansas latino_pop 572 NA
6 05001 Arkansas County, Arkansas nativity 145 94
7 05001 Arkansas County, Arkansas bach_degree 2045 265
8 05001 Arkansas County, Arkansas median_income 46696 2210
9 05001 Arkansas County, Arkansas insurance 475 137
10 05003 Ashley County, Arkansas median_age 41.7 1.2
# ℹ 11,276 more rows
#Convert to tidy formatmulti_state2 <- multi_state |>pivot_wider(names_from = variable, values_from =c(estimate, moe) ) multi_state2 <-multi_state2 |>rename(county_fips = GEOID) #this is the same that I will use to join to the White and Latino datasets
Join covariates to dataframe
#join death data to covariate dtawim_full_data <-inner_join(wim_join, multi_state2, by ="county_fips")#Now I am creating a binary variable to identify the county as either a trump or biden county. wim_full_data1 <- wim_full_data |>mutate(winner =case_when( T ==1~1, # If T is 1, Trump won, so assign 1 B ==1~0# If B is 1, Biden won, so assign 0 ), pct_foreign_born = (estimate_nativity / estimate_total_pop) *100,pct_bach_degree = (estimate_bach_degree / estimate_total_pop) *100,pct_white_pop = (estimate_white_pop / estimate_total_pop) *100, # pct_latino_pop = (estimate_latino_pop / estimate_total_pop) * 100, # For Latino population (switch for Latino analysis)# pct_white_pop = (estimate_white_pop / estimate_total_pop) * 100, # For White population (switch for White analysis)pct_no_insurance = (estimate_insurance / estimate_total_pop) *100,# Log transformation of total population size to handle outlierslog_total_pop =log(estimate_total_pop) )#save to excel for safety and then did some cleaning in excellibrary(writexl)write_xlsx(wim_full_data1, "whiteinfantdatajoin.xlsx")#now bringing back in the dataframe after minor manipulations in excellibrary(readxl)whiteinfantdatajoin_cleaned <-read_excel("whiteinfantdatajoin_cleaned1.xlsx")
New names:
• `other...12` -> `other`
View(whiteinfantdatajoin_cleaned)
GLM for Categories of Support
#Creating the categories of support first...same issues..#thre were hidden spaces so I had to remove them. Note that I converted to values in excel. wim_cat <- whiteinfantdatajoin_cleaned|>mutate(trump =as.numeric(trimws(gsub("[^0-9.]", "", trump))), # remove non-numeric characters and spacesbiden =as.numeric(trimws(gsub("[^0-9.]", "", biden))) # remove non-numeric characters and spaces )sum(is.na(wim_cat$trump)) # Count NAs in trump column
[1] 0
sum(is.na(wim_cat$biden))
[1] 0
wim_cat <- wim_cat|>mutate(support_category =case_when( trump >=70~"Strong Trump", # Trump received 70% or more trump >=55& trump <70~"Trump", # Trump received between 55% and 69.9% biden >=70~"Strong Biden", # Biden received 70% or more biden >=55& biden <70~"Biden", # Biden received between 55% and 69.9% (trump >=45& trump <55) | (biden >=45& biden <55) ~"Neutral")) # Trump or Biden received between 45% and 54.9%head(wim_cat|>select(trump, biden, support_category))
#join death data to covariate dtahim_full <-inner_join(hisp_join, multi_state2, by ="county_fips")#Now I am creating a binary variable to identify the county as either a trump or biden county. him_full <- him_full |>mutate(winner =case_when( T ==1~1, # If T is 1, Trump won, so assign 1 B ==1~0# If B is 1, Biden won, so assign 0 ), pct_foreign_born = (estimate_nativity / estimate_total_pop) *100,pct_bach_degree = (estimate_bach_degree / estimate_total_pop) *100,pct_latino_pop = (estimate_latino_pop / estimate_total_pop) *100, # pct_latino_pop = (estimate_latino_pop / estimate_total_pop) * 100, # For Latino population (switch for Latino analysis)# pct_white_pop = (estimate_white_pop / estimate_total_pop) * 100, # For White population (switch for White analysis)pct_no_insurance = (estimate_insurance / estimate_total_pop) *100,# Log transformation of total population size to handle outlierslog_total_pop =log(estimate_total_pop) )#save to excel for safety and then did some cleaning in excellibrary(writexl)write_xlsx(him_full, "hispanic_infant_join.xlsx")#now bringing back in the dataframe after minor manipulations in excelhisp_infant1 <-read_excel("hispanic_infant_join_cleaned.xlsx")head(hisp_infant1)
hisp_infant1 <- hisp_infant1|>mutate(trump =as.numeric(trimws(gsub("[^0-9.]", "", trump))), # remove non-numeric characters and spacesbiden =as.numeric(trimws(gsub("[^0-9.]", "", biden))) # remove non-numeric characters and spaces )sum(is.na(hisp_infant1$trump)) # Count NAs in trump column
[1] 0
sum(is.na(hisp_infant1$biden))
[1] 0
hisp_infant1 <- hisp_infant1|>mutate(support_category =case_when( trump >=70~"Strong Trump", # Trump received 70% or more trump >=55& trump <70~"Trump", # Trump received between 55% and 69.9% biden >=70~"Strong Biden", # Biden received 70% or more biden >=55& biden <70~"Biden", # Biden received between 55% and 69.9% (trump >=45& trump <55) | (biden >=45& biden <55) ~"Neutral")) # Trump or Biden received between 45% and 54.9%head(hisp_infant1|>select(trump, biden, support_category))
bim_join <-inner_join(black_infant, votes, by ="countyid")bim_join <- bim_join %>%mutate(county_code =str_pad(county_code, width =5, pad ="0")) |>rename(county_fips = county_code)
Join to Covariates
#join death data to covariate dtabim_join1 <-inner_join(bim_join, multi_state2, by ="county_fips")#Now I am creating a binary variable to identify the county as either a trump or biden county. bim_join1 <- bim_join1 |>mutate(winner =case_when( T ==1~1, # If T is 1, Trump won, so assign 1 B ==1~0# If B is 1, Biden won, so assign 0 ), pct_foreign_born = (estimate_nativity / estimate_total_pop) *100,pct_bach_degree = (estimate_bach_degree / estimate_total_pop) *100,pct_black_pop = (estimate_black_pop / estimate_total_pop) *100, # pct_latino_pop = (estimate_latino_pop / estimate_total_pop) * 100, # For Latino population (switch for Latino analysis)# pct_white_pop = (estimate_white_pop / estimate_total_pop) * 100, # For White population (switch for White analysis)pct_no_insurance = (estimate_insurance / estimate_total_pop) *100,# Log transformation of total population size to handle outlierslog_total_pop =log(estimate_total_pop) )#save to excel for safety and then did some cleaning in excellibrary(writexl)write_xlsx(bim_join1, "black_infant_join.xlsx")#now bringing back in the dataframe after minor manipulations in excelbim_join2 <-read_excel("black_infant_join_clean.xlsx")head(bim_join2)
bim_join2 <- bim_join2|>mutate(trump =as.numeric(trimws(gsub("[^0-9.]", "", trump))), # remove non-numeric characters and spacesbiden =as.numeric(trimws(gsub("[^0-9.]", "", biden))) # remove non-numeric characters and spaces )sum(is.na(bim_join2$trump)) # Count NAs in trump column
[1] 0
sum(is.na(bim_join2$biden))
[1] 0
bim_join2 <- bim_join2|>mutate(support_category =case_when( trump >=70~"Strong Trump", # Trump received 70% or more trump >=55& trump <70~"Trump", # Trump received between 55% and 69.9% biden >=70~"Strong Biden", # Biden received 70% or more biden >=55& biden <70~"Biden", # Biden received between 55% and 69.9% (trump >=45& trump <55) | (biden >=45& biden <55) ~"Neutral")) # Trump or Biden received between 45% and 54.9%head(bim_join2|>select(trump, biden, support_category))