Infant Mortality

Author

CRG

1. White Infant Mortality Analysis

Data from CDC Wonder for years 2020-2022.

library(tidyverse)

── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr     1.1.4     ✔ readr     2.1.5
✔ forcats   1.0.0     ✔ stringr   1.5.1
✔ ggplot2   3.5.1     ✔ tibble    3.2.1
✔ lubridate 1.9.3     ✔ tidyr     1.3.1
✔ purrr     1.0.2     
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

library(janitor)


Attaching package: 'janitor'

The following objects are masked from 'package:stats':

    chisq.test, fisher.test

library(psych)


Attaching package: 'psych'

The following objects are masked from 'package:ggplot2':

    %+%, alpha

library(readxl)
library(readr)


white_infant_mort <- read_excel("white infant mortality_unrel.xlsx")
head(white_infant_mort)

# A tibble: 6 × 9
  `State Code` county_id Deaths Births `Death Rate` county        state countyid
         <dbl>     <dbl>  <dbl>  <dbl>        <dbl> <chr>         <chr> <chr>   
1            5      5119     41   6097         6.72 Pulaski Coun… AR    Pulaski…
2            5      5999    430  61410         7    Unidentified… AR    Unident…
3           10     10003     29   7827         3.71 New Castle C… DE    New Cas…
4           10     10999     29   7199         4.03 Unidentified… DE    Unident…
5           12     12009     53   9760         5.43 Brevard Coun… FL    Brevard…
6           12     12011     45  15247         2.95 Broward Coun… FL    Broward…
# ℹ 1 more variable: unreliable <chr>

library(janitor)
clean_names(white_infant_mort)

# A tibble: 89 × 9
   state_code county_id deaths births death_rate county           state countyid
        <dbl>     <dbl>  <dbl>  <dbl>      <dbl> <chr>            <chr> <chr>   
 1          5      5119     41   6097       6.72 Pulaski County   AR    Pulaski…
 2          5      5999    430  61410       7    Unidentified Co… AR    Unident…
 3         10     10003     29   7827       3.71 New Castle Coun… DE    New Cas…
 4         10     10999     29   7199       4.03 Unidentified Co… DE    Unident…
 5         12     12009     53   9760       5.43 Brevard County   FL    Brevard…
 6         12     12011     45  15247       2.95 Broward County   FL    Broward…
 7         12     12031     79  16337       4.84 Duval County     FL    Duval C…
 8         12     12033     38   6108       6.22 Escambia County  FL    Escambi…
 9         12     12057     86  19192       4.48 Hillsborough Co… FL    Hillsbo…
10         12     12069     27   5991       4.51 Lake County      FL    Lake Co…
# ℹ 79 more rows
# ℹ 1 more variable: unreliable <chr>

library(psych)
describe(white_infant_mort)

            vars  n     mean       sd   median  trimmed      mad  min       max
State Code     1 89    30.75    16.69    24.00    30.79    23.72    5     54.00
county_id      2 89 31014.22 16730.66 24999.00 31038.45 22280.51 5119  54999.00
Deaths         3 89    97.87   157.95    37.00    58.67    25.20    0    741.00
Births         4 89 19537.00 27510.72  9760.00 12983.67  6397.42  133 142001.00
Death Rate     5 89     4.36     1.29     4.28     4.35     1.57    0      7.08
county*        6 89    42.37    22.74    44.00    43.36    31.13    1     74.00
state*         7 89     7.65     4.01     7.00     7.64     5.93    1     14.00
countyid*      8 89    45.00    25.84    45.00    45.00    32.62    1     89.00
unreliable*    9 13     1.00     0.00     1.00     1.00     0.00    1      1.00
                range  skew kurtosis      se
State Code      49.00 -0.05    -1.80    1.77
county_id    49880.00 -0.05    -1.80 1773.45
Deaths         741.00  2.54     5.60   16.74
Births      141868.00  2.65     6.70 2916.13
Death Rate       7.08 -0.13     0.08    0.14
county*         73.00 -0.22    -1.31    2.41
state*          13.00 -0.05    -1.62    0.43
countyid*       88.00  0.00    -1.24    2.74
unreliable*      0.00   NaN      NaN    0.00

Note* there are rates that are unreliable

Join to voter data

votes <- read_excel("combined election results_countyid_GOOD.xlsx")

New names:
• `other` -> `other...9`
• `other` -> `other...12`

head(votes)

# A tibble: 6 × 15
  county   `total vote`     T     B margin `perc margin` trump   biden other...9
  <chr>           <dbl> <dbl> <dbl>  <dbl>         <dbl> <chr>   <chr> <chr>    
1 Arkansas         6292     1     2   2486         0.395  68.40%  28.…  2.70%   
2 Ashley           7926     1     2   3423         0.432  70.00%  26.…  3.19%   
3 Baxter          21007     1     2  11201         0.533  75.38%  22.…  2.55%   
4 Benton         119912     1     2  31716         0.264  61.68%  35.…  3.08%   
5 Boone           17114     1     2  10588         0.619  79.77%  17.…  2.33%   
6 Bradley          3654     1     2   1121         0.307  63.90%  33.…  2.87%   
# ℹ 6 more variables: `trump n` <dbl>, `biden n` <dbl>, other...12 <chr>,
#   state <chr>, `county name` <chr>, countyid <chr>

wim_join <- inner_join(white_infant_mort, votes, by = "countyid")

wim_join <- wim_join %>%
  mutate(county_id = str_pad(county_id, width = 5, pad = "0")) |>
  rename(county_fips = county_id)

ACS Variables

library(tidycensus)
library(sf)

Linking to GEOS 3.11.2, GDAL 3.8.2, PROJ 9.3.1; sf_use_s2() is TRUE

# load_variables(year = 2019, dataset = "acs5/subject") |>
# separate(label, into = paste0("label", 1:9), sep = "!!", fill = "right", remove = FALSE)

vars <- load_variables(year = 2019,
                      dataset = "acs5",
                      cache = TRUE)

my_states <- c("AR", "DE", "Fl", "GA", "KY", "LA", "MD", "MS", "OK", "SC", "TN", "TX", "VA", "WV")



my_vars <- c(
  total_pop = "B01003_001",                     # Total population
  median_income = "B19013_001",                 # Median income
  median_age = "B01002_001",                    # Median age
  nativity = "B05012_003",                  # Foreign-born population
  bach_degree = "B15012_001",                   # 25+ with Bachelor's degree or >
  insurance = "B27010_033",            # Population 20-64 w/o health insurance
  black_pop = "B02001_003",                     # Black population
  latino_pop = "B03003_003",                    # Latino population 
  white_pop = "B02001_002"                      # White population
)


#Create a loop since we need multiple counties from multiple steps. learned this from here: https://mattherman.info/blog/tidycensus-mult/

multi_state <- map_dfr(
  my_states,
    ~ get_acs(
        geography = "county",
        variables = my_vars,
        state = .,
        year = 2019,
        survey = "acs5",
        geometry = FALSE
        )
    ) |>
  print()

Getting data from the 2015-2019 5-year ACS

Warning: • You have not set a Census API key. Users without a key are limited to 500
queries per day and may experience performance limitations.
ℹ For best results, get a Census API key at
http://api.census.gov/data/key_signup.html and then supply the key to the
`census_api_key()` function to use it throughout your tidycensus session.
This warning is displayed once per session.

Getting data from the 2015-2019 5-year ACS

Getting data from the 2015-2019 5-year ACS
Getting data from the 2015-2019 5-year ACS
Getting data from the 2015-2019 5-year ACS
Getting data from the 2015-2019 5-year ACS
Getting data from the 2015-2019 5-year ACS
Getting data from the 2015-2019 5-year ACS
Getting data from the 2015-2019 5-year ACS
Getting data from the 2015-2019 5-year ACS
Getting data from the 2015-2019 5-year ACS
Getting data from the 2015-2019 5-year ACS
Getting data from the 2015-2019 5-year ACS
Getting data from the 2015-2019 5-year ACS

# A tibble: 11,286 × 5
   GEOID NAME                      variable      estimate    moe
   <chr> <chr>                     <chr>            <dbl>  <dbl>
 1 05001 Arkansas County, Arkansas median_age        40.7    0.6
 2 05001 Arkansas County, Arkansas total_pop      17914     NA  
 3 05001 Arkansas County, Arkansas white_pop      12921    103  
 4 05001 Arkansas County, Arkansas black_pop       4636     94  
 5 05001 Arkansas County, Arkansas latino_pop       572     NA  
 6 05001 Arkansas County, Arkansas nativity         145     94  
 7 05001 Arkansas County, Arkansas bach_degree     2045    265  
 8 05001 Arkansas County, Arkansas median_income  46696   2210  
 9 05001 Arkansas County, Arkansas insurance        475    137  
10 05003 Ashley County, Arkansas   median_age        41.7    1.2
# ℹ 11,276 more rows

#Convert to tidy format
multi_state2 <- multi_state |>
  pivot_wider(
    names_from = variable,  
    values_from = c(estimate, moe)  
  )


 multi_state2 <-multi_state2 |> rename(county_fips = GEOID) #this is the same that I will use to join to the White and Latino datasets

Join covariates to dataframe

#join death data to covariate dta
wim_full_data <- inner_join(wim_join, multi_state2, by = "county_fips")


#Now I am creating a binary variable to identify the county as either a trump or biden county. 

wim_full_data1 <- wim_full_data |>
  mutate(
      winner = case_when(
      T == 1 ~ 1,  # If T is 1, Trump won, so assign 1
      B == 1 ~ 0   # If B is 1, Biden won, so assign 0
      ), 
    pct_foreign_born = (estimate_nativity / estimate_total_pop) * 100,
    pct_bach_degree = (estimate_bach_degree / estimate_total_pop) * 100,
    pct_white_pop = (estimate_white_pop / estimate_total_pop) * 100,   
    # pct_latino_pop = (estimate_latino_pop / estimate_total_pop) * 100, # For Latino population (switch for Latino analysis)
    # pct_white_pop = (estimate_white_pop / estimate_total_pop) * 100,   # For White population (switch for White analysis)
    pct_no_insurance = (estimate_insurance / estimate_total_pop) * 100,
    # Log transformation of total population size to handle outliers
    log_total_pop = log(estimate_total_pop)
  )
      

#save to excel for safety and then did some cleaning in excel
library(writexl)
write_xlsx(wim_full_data1, "whiteinfantdatajoin.xlsx")


#now bringing back in the dataframe after minor manipulations in excel
library(readxl)
whiteinfantdatajoin_cleaned <- read_excel("whiteinfantdatajoin_cleaned1.xlsx")

New names:
• `other...12` -> `other`

View(whiteinfantdatajoin_cleaned)

GLM for Categories of Support

#Creating the categories of support first...same issues..

#thre were hidden spaces so I had to remove them. Note that I converted to values in excel.  
wim_cat <- whiteinfantdatajoin_cleaned|>
  mutate(
    trump = as.numeric(trimws(gsub("[^0-9.]", "", trump))),  # remove non-numeric characters and spaces
    biden = as.numeric(trimws(gsub("[^0-9.]", "", biden)))   # remove non-numeric characters and spaces
  )

sum(is.na(wim_cat$trump))  # Count NAs in trump column

[1] 0

sum(is.na(wim_cat$biden))

[1] 0

wim_cat <- wim_cat|>
mutate(
  support_category = case_when(
    trump >= 70 ~ "Strong Trump",        # Trump received 70% or more
    trump >= 55 & trump < 70 ~ "Trump",  # Trump received between 55% and 69.9%
    biden >= 70 ~ "Strong Biden",        # Biden received 70% or more
    biden >= 55 & biden < 70 ~ "Biden",  # Biden received between 55% and 69.9%
    (trump >= 45 & trump < 55) | (biden >= 45 & biden < 55) ~ "Neutral")) # Trump or Biden received between 45% and 54.9%

head(wim_cat|>select(trump, biden, support_category))

# A tibble: 6 × 3
  trump biden support_category
  <dbl> <dbl> <chr>           
1  37.5  60.0 Biden           
2  67.8  30.7 Trump           
3  57.5  41.1 Trump           
4  34.7  64.5 Biden           
5  47.3  51.1 Neutral         
6  56.6  41.5 Trump

GLM Categories

model_wim_glm <- glm(crude_rate ~ support_category + estimate_median_age + pct_foreign_born + 
                 pct_bach_degree + pct_white_pop + log_total_pop + pct_no_insurance, 
                 family = Gamma(link = "log"), data = wim_cat)

summary(model_wim_glm)


Call:
glm(formula = crude_rate ~ support_category + estimate_median_age + 
    pct_foreign_born + pct_bach_degree + pct_white_pop + log_total_pop + 
    pct_no_insurance, family = Gamma(link = "log"), data = wim_cat)

Coefficients:
                              Estimate Std. Error t value Pr(>|t|)    
(Intercept)                   1.938699   0.699001   2.774 0.007286 ** 
support_categoryNeutral       0.025146   0.067750   0.371 0.711759    
support_categoryStrong Biden -0.165919   0.152353  -1.089 0.280283    
support_categoryStrong Trump  0.103137   0.103015   1.001 0.320569    
support_categoryTrump        -0.020461   0.067844  -0.302 0.763962    
estimate_median_age          -0.001978   0.005827  -0.339 0.735440    
pct_foreign_born             -0.015353   0.003750  -4.094 0.000123 ***
pct_bach_degree              -0.013208   0.004903  -2.694 0.009038 ** 
pct_white_pop                 0.002528   0.002108   1.199 0.234873    
log_total_pop                -0.010426   0.053538  -0.195 0.846226    
pct_no_insurance              0.015110   0.025407   0.595 0.554164    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

(Dispersion parameter for Gamma family taken to be 0.03763019)

    Null deviance: 5.1775  on 73  degrees of freedom
Residual deviance: 2.4531  on 63  degrees of freedom
AIC: 188.75

Number of Fisher Scoring iterations: 5

GLM for Binary

glm_winner_wim <- glm(
  formula = crude_rate ~ winner + estimate_median_age + pct_foreign_born + 
    pct_bach_degree + pct_white_pop + log_total_pop + pct_no_insurance, 
  family = Gamma(link = "log"), 
  data = wim_cat
)

# Display summary of the GLM model
summary(glm_winner_wim)


Call:
glm(formula = crude_rate ~ winner + estimate_median_age + pct_foreign_born + 
    pct_bach_degree + pct_white_pop + log_total_pop + pct_no_insurance, 
    family = Gamma(link = "log"), data = wim_cat)

Coefficients:
                     Estimate Std. Error t value Pr(>|t|)    
(Intercept)          1.804027   0.709770   2.542 0.013388 *  
winner               0.039160   0.053853   0.727 0.469701    
estimate_median_age -0.002488   0.005820  -0.428 0.670397    
pct_foreign_born    -0.014550   0.003696  -3.936 0.000202 ***
pct_bach_degree     -0.013598   0.004625  -2.940 0.004519 ** 
pct_white_pop        0.001764   0.001981   0.891 0.376367    
log_total_pop        0.004987   0.055040   0.091 0.928078    
pct_no_insurance     0.009898   0.025054   0.395 0.694063    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

(Dispersion parameter for Gamma family taken to be 0.03816685)

    Null deviance: 5.1775  on 73  degrees of freedom
Residual deviance: 2.5610  on 66  degrees of freedom
AIC: 185.96

Number of Fisher Scoring iterations: 5

2. Hispanic Infant Mortality Analysis

hisp_infant <- read_excel("hisp_infant.xlsx")
View(hisp_infant)

library(janitor)
clean_names(hisp_infant)

# A tibble: 79 × 11
   state    state_code county    county_code deaths births crude_rate unreliable
   <chr>         <dbl> <chr>           <dbl>  <dbl>  <dbl>      <dbl> <chr>     
 1 Arkansas          5 Unidenti…        5999     54  10871       4.97 <NA>      
 2 Delaware         10 New Cast…       10003     15   3039       4.94 Y         
 3 Florida          12 Brevard …       12009     15   2330       6.44 Y         
 4 Florida          12 Broward …       12011     74  20524       3.61 <NA>      
 5 Florida          12 Collier …       12021     20   4865       4.11 <NA>      
 6 Florida          12 Duval Co…       12031     31   5725       5.41 <NA>      
 7 Florida          12 Hillsbor…       12057     92  17816       5.16 <NA>      
 8 Florida          12 Lake Cou…       12069     13   2511       5.18 Y         
 9 Florida          12 Lee Coun…       12071     31   8267       3.75 <NA>      
10 Florida          12 Manatee …       12081     24   3385       7.09 <NA>      
# ℹ 69 more rows
# ℹ 3 more variables: county_2 <chr>, state_2 <chr>, countyid <chr>

library(psych)
describe(hisp_infant)

            vars  n     mean       sd   median  trimmed      mad     min
State*         1 79     7.72     4.05     7.00     7.72     5.93    1.00
State Code     2 79    30.75    16.77    24.00    30.77    20.76    5.00
County*        3 79    40.00    22.95    40.00    40.00    29.65    1.00
county_code    4 79 31014.63 16818.35 24999.00 31028.57 22233.07 5999.00
Deaths         5 79    63.08    97.95    31.00    40.82    20.76   10.00
Births         6 79 12692.13 19604.47  6242.00  8256.49  4644.99 1405.00
crude_rate     7 79     5.24     1.03     5.18     5.17     0.96    3.43
unreliable*    8 20     1.00     0.00     1.00     1.00     0.00    1.00
county*        9 79    37.59    20.14    39.00    38.43    28.17    1.00
state*        10 79     7.72     4.05     7.00     7.72     5.93    1.00
countyid*     11 79    40.00    22.95    40.00    40.00    29.65    1.00
                  max     range  skew kurtosis      se
State*          13.00     12.00 -0.05    -1.73    0.46
State Code      51.00     46.00 -0.03    -1.85    1.89
County*         79.00     78.00  0.00    -1.25    2.58
county_code  51999.00  46000.00 -0.04    -1.85 1892.21
Deaths         631.00    621.00  3.76    16.03   11.02
Births      121043.00 119638.00  3.62    14.64 2205.68
crude_rate       9.25      5.82  0.94     1.92    0.12
unreliable*      1.00      0.00   NaN      NaN    0.00
county*         66.00     65.00 -0.21    -1.31    2.27
state*          13.00     12.00 -0.05    -1.73    0.46
countyid*       79.00     78.00  0.00    -1.25    2.58

Join to Voter

# votes <- read_excel("combined election results_countyid_GOOD.xlsx")
# head(votes)

hisp_join <- inner_join(hisp_infant, votes, by = "countyid")

hisp_join <- hisp_join %>%
  mutate(county_code = str_pad(county_code, width = 5, pad = "0")) |>
  rename(county_fips = county_code)

Join to Covariates

#join death data to covariate dta
him_full <- inner_join(hisp_join, multi_state2, by = "county_fips")


#Now I am creating a binary variable to identify the county as either a trump or biden county. 

him_full <- him_full |>
  mutate(
      winner = case_when(
      T == 1 ~ 1,  # If T is 1, Trump won, so assign 1
      B == 1 ~ 0   # If B is 1, Biden won, so assign 0
      ), 
    pct_foreign_born = (estimate_nativity / estimate_total_pop) * 100,
    pct_bach_degree = (estimate_bach_degree / estimate_total_pop) * 100,
    pct_latino_pop = (estimate_latino_pop / estimate_total_pop) * 100,   
    # pct_latino_pop = (estimate_latino_pop / estimate_total_pop) * 100, # For Latino population (switch for Latino analysis)
    # pct_white_pop = (estimate_white_pop / estimate_total_pop) * 100,   # For White population (switch for White analysis)
    pct_no_insurance = (estimate_insurance / estimate_total_pop) * 100,
    # Log transformation of total population size to handle outliers
    log_total_pop = log(estimate_total_pop)
  )
      

#save to excel for safety and then did some cleaning in excel
library(writexl)
write_xlsx(him_full, "hispanic_infant_join.xlsx")


#now bringing back in the dataframe after minor manipulations in excel
hisp_infant1 <- read_excel("hispanic_infant_join_cleaned.xlsx")
head(hisp_infant1)

# A tibble: 6 × 31
  `State Code` county_fips Deaths Births crude_rate unreliable county.x  state.x
         <dbl> <chr>        <dbl>  <dbl>      <dbl> <chr>      <chr>     <chr>  
1           10 10003           15   3039       4.94 Y          New Cast… DE     
2           12 12009           15   2330       6.44 Y          Brevard … FL     
3           12 12011           74  20524       3.61 <NA>       Broward … FL     
4           12 12021           20   4865       4.11 <NA>       Collier … FL     
5           12 12031           31   5725       5.41 <NA>       Duval Co… FL     
6           12 12057           92  17816       5.16 <NA>       Hillsbor… FL     
# ℹ 23 more variables: countyid <chr>, `total vote` <dbl>, T <dbl>, B <dbl>,
#   trump <chr>, biden <chr>, `trump n` <dbl>, `biden n` <dbl>,
#   estimate_median_age <dbl>, estimate_total_pop <dbl>,
#   estimate_white_pop <dbl>, estimate_black_pop <dbl>,
#   estimate_latino_pop <dbl>, estimate_nativity <dbl>,
#   estimate_bach_degree <dbl>, estimate_median_income <dbl>,
#   estimate_insurance <dbl>, winner <dbl>, pct_foreign_born <dbl>, …

GLM for Categories

hisp_infant1 <- hisp_infant1|>
  mutate(
    trump = as.numeric(trimws(gsub("[^0-9.]", "", trump))),  # remove non-numeric characters and spaces
    biden = as.numeric(trimws(gsub("[^0-9.]", "", biden)))   # remove non-numeric characters and spaces
  )

sum(is.na(hisp_infant1$trump))  # Count NAs in trump column

[1] 0

sum(is.na(hisp_infant1$biden))

[1] 0

hisp_infant1 <- hisp_infant1|>
mutate(
  support_category = case_when(
    trump >= 70 ~ "Strong Trump",        # Trump received 70% or more
    trump >= 55 & trump < 70 ~ "Trump",  # Trump received between 55% and 69.9%
    biden >= 70 ~ "Strong Biden",        # Biden received 70% or more
    biden >= 55 & biden < 70 ~ "Biden",  # Biden received between 55% and 69.9%
    (trump >= 45 & trump < 55) | (biden >= 45 & biden < 55) ~ "Neutral")) # Trump or Biden received between 45% and 54.9%

head(hisp_infant1|>select(trump, biden, support_category))

# A tibble: 6 × 3
  trump biden support_category
  <dbl> <dbl> <chr>           
1  67.8  30.7 Trump           
2  57.5  41.1 Trump           
3  34.7  64.5 Biden           
4  61.9  37.3 Trump           
5  47.3  51.1 Neutral         
6  45.8  52.7 Neutral

model_him_glm_cat <- glm(crude_rate ~ support_category + estimate_median_age + pct_foreign_born + 
                 pct_bach_degree + pct_latino_pop + log_total_pop + pct_no_insurance, 
                 family = Gamma(link = "log"), data = hisp_infant1)

summary(model_him_glm_cat)


Call:
glm(formula = crude_rate ~ support_category + estimate_median_age + 
    pct_foreign_born + pct_bach_degree + pct_latino_pop + log_total_pop + 
    pct_no_insurance, family = Gamma(link = "log"), data = hisp_infant1)

Coefficients:
                               Estimate Std. Error t value Pr(>|t|)   
(Intercept)                   1.9982231  0.6563929   3.044  0.00355 **
support_categoryNeutral      -0.0032638  0.0662835  -0.049  0.96090   
support_categoryStrong Biden -0.3033696  0.1919228  -1.581  0.11958   
support_categoryStrong Trump -0.0525393  0.0893455  -0.588  0.55886   
support_categoryTrump         0.0503763  0.0713259   0.706  0.48294   
estimate_median_age           0.0002229  0.0055524   0.040  0.96812   
pct_foreign_born             -0.0108963  0.0041584  -2.620  0.01129 * 
pct_bach_degree               0.0051740  0.0048235   1.073  0.28802   
pct_latino_pop               -0.0010457  0.0021749  -0.481  0.63254   
log_total_pop                -0.0342591  0.0443978  -0.772  0.44357   
pct_no_insurance              0.0342871  0.0215755   1.589  0.11765   
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

(Dispersion parameter for Gamma family taken to be 0.03091349)

    Null deviance: 2.5971  on 66  degrees of freedom
Residual deviance: 1.7259  on 56  degrees of freedom
AIC: 188.39

Number of Fisher Scoring iterations: 5

GLM for Binary

glm_winner_him <- glm(
  formula = crude_rate ~ winner + estimate_median_age + pct_foreign_born + 
    pct_bach_degree + pct_latino_pop + log_total_pop + pct_no_insurance, 
  family = Gamma(link = "log"), 
  data = hisp_infant1
)

# Display summary of the GLM model
summary(glm_winner_him)


Call:
glm(formula = crude_rate ~ winner + estimate_median_age + pct_foreign_born + 
    pct_bach_degree + pct_latino_pop + log_total_pop + pct_no_insurance, 
    family = Gamma(link = "log"), data = hisp_infant1)

Coefficients:
                     Estimate Std. Error t value Pr(>|t|)   
(Intercept)          2.171286   0.684338   3.173   0.0024 **
winner               0.009261   0.057437   0.161   0.8725   
estimate_median_age  0.001902   0.005362   0.355   0.7240   
pct_foreign_born    -0.010178   0.004042  -2.518   0.0145 * 
pct_bach_degree      0.003130   0.004593   0.681   0.4983   
pct_latino_pop      -0.001100   0.002140  -0.514   0.6092   
log_total_pop       -0.046685   0.046808  -0.997   0.3226   
pct_no_insurance     0.028473   0.021373   1.332   0.1879   
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

(Dispersion parameter for Gamma family taken to be 0.03162258)

    Null deviance: 2.5971  on 66  degrees of freedom
Residual deviance: 1.8571  on 59  degrees of freedom
AIC: 187.32

Number of Fisher Scoring iterations: 5

3. Black Infant Mortality Analysis

black_infant <- read_excel("black_infant.xlsx")
View(black_infant)

library(janitor)
clean_names(black_infant)

# A tibble: 88 × 9
   state_code county_code deaths births crude_rate county         state countyid
        <dbl>       <dbl>  <dbl>  <dbl>      <dbl> <chr>          <chr> <chr>   
 1          5        5119     84   6567      12.8  Pulaski County AR    Pulaski…
 2          5        5999    169  13229      12.8  Unidentified … AR    Unident…
 3         10       10003     53   5475       9.68 New Castle Co… DE    New Cas…
 4         10       10999     34   2811      12.1  Unidentified … DE    Unident…
 5         12       12009     25   2373      10.5  Brevard County FL    Brevard…
 6         12       12011    185  22703       8.15 Broward County FL    Broward…
 7         12       12021     11   1382       7.96 Collier County FL    Collier…
 8         12       12031    150  13002      11.5  Duval County   FL    Duval C…
 9         12       12033     36   3375      10.7  Escambia Coun… FL    Escambi…
10         12       12057    148  10815      13.7  Hillsborough … FL    Hillsbo…
# ℹ 78 more rows
# ℹ 1 more variable: unreliable <chr>

library(psych)
describe(black_infant)

            vars  n     mean       sd   median  trimmed      mad     min   max
State Code     1 88    29.53    16.61    24.00    29.35    17.79    5.00    54
county_code    2 88 29789.35 16658.68 24029.00 29571.17 17819.37 5119.00 54999
Deaths         3 88    93.59   116.12    52.50    68.47    46.70   11.00   636
Births         4 88  8831.01 10890.09  4419.00  6503.99  3934.08 1271.00 65208
crude_rate     5 88    18.07    70.44    10.21    10.44     1.99    6.02   671
county*        6 88    42.01    22.61    43.50    42.99    31.13    1.00    73
state*         7 88     7.38     3.98     7.00     7.32     5.93    1.00    14
countyid*      8 88    44.50    25.55    44.50    44.50    32.62    1.00    88
unreliable*    9 14     1.00     0.00     1.00     1.00     0.00    1.00     1
               range  skew kurtosis      se
State Code     49.00  0.09    -1.79    1.77
county_code 49880.00  0.09    -1.78 1775.82
Deaths        625.00  2.56     7.08   12.38
Births      63937.00  2.68     8.45 1160.89
crude_rate    664.98  9.05    80.90    7.51
county*        72.00 -0.21    -1.31    2.41
state*         13.00  0.07    -1.61    0.42
countyid*      87.00  0.00    -1.24    2.72
unreliable*     0.00   NaN      NaN    0.00

Join to Voter

bim_join <- inner_join(black_infant, votes, by = "countyid")

bim_join <- bim_join %>%
  mutate(county_code = str_pad(county_code, width = 5, pad = "0")) |>
  rename(county_fips = county_code)

Join to Covariates

#join death data to covariate dta
bim_join1 <- inner_join(bim_join, multi_state2, by = "county_fips")


#Now I am creating a binary variable to identify the county as either a trump or biden county. 

bim_join1 <- bim_join1 |>
  mutate(
      winner = case_when(
      T == 1 ~ 1,  # If T is 1, Trump won, so assign 1
      B == 1 ~ 0   # If B is 1, Biden won, so assign 0
      ), 
    pct_foreign_born = (estimate_nativity / estimate_total_pop) * 100,
    pct_bach_degree = (estimate_bach_degree / estimate_total_pop) * 100,
    pct_black_pop = (estimate_black_pop / estimate_total_pop) * 100,   
    # pct_latino_pop = (estimate_latino_pop / estimate_total_pop) * 100, # For Latino population (switch for Latino analysis)
    # pct_white_pop = (estimate_white_pop / estimate_total_pop) * 100,   # For White population (switch for White analysis)
    pct_no_insurance = (estimate_insurance / estimate_total_pop) * 100,
    # Log transformation of total population size to handle outliers
    log_total_pop = log(estimate_total_pop)
  )
      

#save to excel for safety and then did some cleaning in excel
library(writexl)
write_xlsx(bim_join1, "black_infant_join.xlsx")


#now bringing back in the dataframe after minor manipulations in excel
bim_join2 <- read_excel("black_infant_join_clean.xlsx")
head(bim_join2)

# A tibble: 6 × 31
  `State Code` county_fips Deaths Births crude_rate county.x    state.x countyid
         <dbl> <chr>        <dbl>  <dbl>      <dbl> <chr>       <chr>   <chr>   
1            5 05119           84   6567      12.8  Pulaski Co… AR      Pulaski…
2           10 10003           53   5475       9.68 New Castle… DE      New Cas…
3           12 12009           25   2373      10.5  Brevard Co… FL      Brevard…
4           12 12011          185  22703       8.15 Broward Co… FL      Broward…
5           12 12021           11   1382       7.96 Collier Co… FL      Collier…
6           12 12031          150  13002      11.5  Duval Coun… FL      Duval C…
# ℹ 23 more variables: unreliable <chr>, `total vote` <dbl>, T <dbl>, B <dbl>,
#   trump <chr>, biden <chr>, `trump n` <dbl>, `biden n` <dbl>,
#   estimate_median_age <dbl>, estimate_total_pop <dbl>,
#   estimate_white_pop <dbl>, estimate_black_pop <dbl>,
#   estimate_latino_pop <dbl>, estimate_nativity <dbl>,
#   estimate_bach_degree <dbl>, estimate_median_income <dbl>,
#   estimate_insurance <dbl>, winner <dbl>, pct_foreign_born <dbl>, …

GLM for Categories

bim_join2 <- bim_join2|>
  mutate(
    trump = as.numeric(trimws(gsub("[^0-9.]", "", trump))),  # remove non-numeric characters and spaces
    biden = as.numeric(trimws(gsub("[^0-9.]", "", biden)))   # remove non-numeric characters and spaces
  )

sum(is.na(bim_join2$trump))  # Count NAs in trump column

[1] 0

sum(is.na(bim_join2$biden))

[1] 0

bim_join2 <- bim_join2|>
mutate(
  support_category = case_when(
    trump >= 70 ~ "Strong Trump",        # Trump received 70% or more
    trump >= 55 & trump < 70 ~ "Trump",  # Trump received between 55% and 69.9%
    biden >= 70 ~ "Strong Biden",        # Biden received 70% or more
    biden >= 55 & biden < 70 ~ "Biden",  # Biden received between 55% and 69.9%
    (trump >= 45 & trump < 55) | (biden >= 45 & biden < 55) ~ "Neutral")) # Trump or Biden received between 45% and 54.9%

head(bim_join2|>select(trump, biden, support_category))

# A tibble: 6 × 3
  trump biden support_category
  <dbl> <dbl> <chr>           
1  37.5  60.0 Biden           
2  67.8  30.7 Trump           
3  57.5  41.1 Trump           
4  34.7  64.5 Biden           
5  61.9  37.3 Trump           
6  47.3  51.1 Neutral

model_bim_glm_cat <- glm(crude_rate ~ support_category + estimate_median_age + pct_foreign_born + 
                 pct_bach_degree + pct_black_pop + log_total_pop + pct_no_insurance, 
                 family = Gamma(link = "log"), data = bim_join2)

summary(model_bim_glm_cat)


Call:
glm(formula = crude_rate ~ support_category + estimate_median_age + 
    pct_foreign_born + pct_bach_degree + pct_black_pop + log_total_pop + 
    pct_no_insurance, family = Gamma(link = "log"), data = bim_join2)

Coefficients:
                               Estimate Std. Error t value Pr(>|t|)
(Intercept)                   8.6763659  6.5924223   1.316    0.193
support_categoryNeutral      -0.0550294  0.6226286  -0.088    0.930
support_categoryStrong Biden  0.3475691  1.3940803   0.249    0.804
support_categoryStrong Trump  0.2606108  0.8717752   0.299    0.766
support_categoryTrump         0.6835350  0.6518898   1.049    0.298
estimate_median_age          -0.0553697  0.0562289  -0.985    0.329
pct_foreign_born              0.0006676  0.0339881   0.020    0.984
pct_bach_degree              -0.0724592  0.0444048  -1.632    0.108
pct_black_pop                -0.0046484  0.0176955  -0.263    0.794
log_total_pop                -0.0960559  0.4633191  -0.207    0.836
pct_no_insurance             -0.2212503  0.2213936  -0.999    0.321

(Dispersion parameter for Gamma family taken to be 3.154198)

    Null deviance: 86.675  on 73  degrees of freedom
Residual deviance: 46.081  on 63  degrees of freedom
AIC: 558.32

Number of Fisher Scoring iterations: 21

GLM for Binary

glm_winner_bim <- glm(
  formula = crude_rate ~ winner + estimate_median_age + pct_foreign_born + 
    pct_bach_degree + pct_black_pop + log_total_pop + pct_no_insurance, 
  family = Gamma(link = "log"), 
  data = bim_join2
)

# Display summary of the GLM model
summary(glm_winner_bim)


Call:
glm(formula = crude_rate ~ winner + estimate_median_age + pct_foreign_born + 
    pct_bach_degree + pct_black_pop + log_total_pop + pct_no_insurance, 
    family = Gamma(link = "log"), data = bim_join2)

Coefficients:
                      Estimate Std. Error t value Pr(>|t|)  
(Intercept)          9.8163040  7.4334933   1.321   0.1912  
winner               0.2999422  0.5428723   0.553   0.5825  
estimate_median_age -0.0484905  0.0609550  -0.796   0.4292  
pct_foreign_born    -0.0007244  0.0369592  -0.020   0.9844  
pct_bach_degree     -0.0779615  0.0458677  -1.700   0.0939 .
pct_black_pop       -0.0078559  0.0167166  -0.470   0.6399  
log_total_pop       -0.1670909  0.5314260  -0.314   0.7542  
pct_no_insurance    -0.2442281  0.2420855  -1.009   0.3167  
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

(Dispersion parameter for Gamma family taken to be 3.94454)

    Null deviance: 86.675  on 73  degrees of freedom
Residual deviance: 50.669  on 66  degrees of freedom
AIC: 560.08

Number of Fisher Scoring iterations: 21