options(digits = 2)
library(kirkegaard)
## Loading required package: tidyverse
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ ggplot2 3.3.6 ✔ purrr 0.3.5
## ✔ tibble 3.1.8 ✔ dplyr 1.0.10
## ✔ tidyr 1.2.1 ✔ stringr 1.4.1
## ✔ readr 2.1.3 ✔ forcats 0.5.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## Loading required package: magrittr
##
##
## Attaching package: 'magrittr'
##
##
## The following object is masked from 'package:purrr':
##
## set_names
##
##
## The following object is masked from 'package:tidyr':
##
## extract
##
##
## Loading required package: weights
##
## Loading required package: Hmisc
##
## Loading required package: lattice
##
## Loading required package: survival
##
## Loading required package: Formula
##
##
## Attaching package: 'Hmisc'
##
##
## The following objects are masked from 'package:dplyr':
##
## src, summarize
##
##
## The following objects are masked from 'package:base':
##
## format.pval, units
##
##
## Loading required package: assertthat
##
##
## Attaching package: 'assertthat'
##
##
## The following object is masked from 'package:tibble':
##
## has_name
##
##
## Loading required package: psych
##
##
## Attaching package: 'psych'
##
##
## The following object is masked from 'package:Hmisc':
##
## describe
##
##
## The following objects are masked from 'package:ggplot2':
##
## %+%, alpha
##
##
##
## Attaching package: 'kirkegaard'
##
##
## The following object is masked from 'package:psych':
##
## rescale
##
##
## The following object is masked from 'package:assertthat':
##
## are_equal
##
##
## The following objects are masked from 'package:purrr':
##
## is_logical, is_numeric
##
##
## The following object is masked from 'package:base':
##
## +
load_packages(
rms,
ggeffects
)
## Loading required package: SparseM
##
## Attaching package: 'SparseM'
##
## The following object is masked from 'package:base':
##
## backsolve
#2022 article
systemic_racism_data = read_rds("data/systemic racism data_out.rds") %>%
mutate(FIPS = FIPS %>% as.numeric())
systemic_racism_data_table = df_var_table(systemic_racism_data)
#2016 article
counties2016 = read_csv("data/2016 study/data_merged.csv") %>%
df_legalize_names()
## New names:
## Rows: 3146 Columns: 93
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," chr
## (7): County, State, temp_bins, lat_bins, lon_bins, precip_bins, elevati... dbl
## (86): ...1, FIPS, Less.Than.High.School, At.Least.High.School.Diploma, A...
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## • `` -> `...1`
counties2016_table = df_var_table(counties2016)
#county health rankings data
#https://www.countyhealthrankings.org/explore-health-rankings/rankings-data-documentation
chr_data2022 = haven::read_sas("data/analytic_data2022.sas7bdat")
chr_data2022_table = df_var_table(chr_data2022)
#inquisitive bird
ib_data = read_csv("data/Bird FullData.csv") %>%
df_legalize_names() %>%
mutate(fips = fips %>% as.numeric())
## Rows: 3221 Columns: 14
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): fips, County
## dbl (12): Population, Homicide Rate, Violent Crime Rate, Non-Hispanic White ...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
ib_data_table = df_var_table(ib_data)
#join the data we need
d = full_join(
ib_data %>% filter(!str_detect(County, ", Puerto Rico")),
systemic_racism_data,
by = c("fips" = "FIPS")
) %>% full_join(
counties2016 %>% select(FIPS, White:SIRE_homogeneity, S, temp, precip, lon, lat) %>% filter(!is.na(FIPS)),
by = c("fips" = "FIPS")
)
#no dups!
assert_that(!anyDuplicated(d$fips))
## [1] TRUE
First we analyze data from US counties. The data are from Kirkegaard (2016).
#other race variable
d %<>% mutate(
Other_share = 1-(Non_Hispanic_White_Share+Black_Share+Native_Share+Latino_Share+Asian_Share),
race_homogeneity = rowSums(cbind(Non_Hispanic_White_Share, Black_Share, Native_Share, Latino_Share, Asian_Share, Other_share)^2),
sqrt_pop = sqrt(popsize2019),
g = cs_mn_all
)
#correlations
d %>%
select(S, g, Homicide_Rate, Non_Hispanic_White_Share, Black_Share, Native_Share, Latino_Share, Asian_Share, Other_share, race_homogeneity) %>%
wtd.cors()
## S g Homicide_Rate Non_Hispanic_White_Share
## S 1.000 0.760 -0.64004 0.465
## g 0.760 1.000 -0.58310 0.571
## Homicide_Rate -0.640 -0.583 1.00000 -0.534
## Non_Hispanic_White_Share 0.465 0.571 -0.53419 1.000
## Black_Share -0.554 -0.487 0.66687 -0.585
## Native_Share -0.184 -0.303 0.18949 -0.298
## Latino_Share -0.084 -0.201 -0.00085 -0.606
## Asian_Share 0.268 0.154 -0.04549 -0.308
## Other_share 0.176 0.081 -0.04154 -0.055
## race_homogeneity 0.276 0.373 -0.42069 0.815
## Black_Share Native_Share Latino_Share Asian_Share
## S -0.554 -0.184 -0.08405 0.268
## g -0.487 -0.303 -0.20098 0.154
## Homicide_Rate 0.667 0.189 -0.00085 -0.045
## Non_Hispanic_White_Share -0.585 -0.298 -0.60565 -0.308
## Black_Share 1.000 -0.102 -0.10521 0.033
## Native_Share -0.102 1.000 -0.05655 -0.032
## Latino_Share -0.105 -0.057 1.00000 0.166
## Asian_Share 0.033 -0.032 0.16566 1.000
## Other_share -0.143 0.203 -0.11843 0.359
## race_homogeneity -0.538 -0.128 -0.43056 -0.407
## Other_share race_homogeneity
## S 0.176 0.28
## g 0.081 0.37
## Homicide_Rate -0.042 -0.42
## Non_Hispanic_White_Share -0.055 0.81
## Black_Share -0.143 -0.54
## Native_Share 0.203 -0.13
## Latino_Share -0.118 -0.43
## Asian_Share 0.359 -0.41
## Other_share 1.000 -0.25
## race_homogeneity -0.254 1.00
d %>%
select(S, g, Homicide_Rate, Non_Hispanic_White_Share, Black_Share, Native_Share, Latino_Share, Asian_Share, Other_share, race_homogeneity) %>%
GG_heatmap()
GG_save("figures/cors_heatmap.png")
#plot
GG_scatter(d, "race_homogeneity", "S", case_names = F) +
scale_x_continuous("Racial homogeneity (based on self-reported race/ethnicity)", breaks = seq(0, 1, .1)) +
scale_y_continuous("General social status factor (S-factor)") +
geom_smooth()
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
GG_save("figures/USA_homo_S.png")
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
#plot
GG_scatter(d, "race_homogeneity", "Homicide_Rate", case_names = F) +
scale_x_continuous("Racial homogeneity (based on self-reported race/ethnicity)", breaks = seq(0, 1, .1)) +
scale_y_continuous("Homicide rate (2010-2020)") +
geom_smooth()
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
GG_save("figures/USA_homo_homicide_rate.png")
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
#models
list(
ols(S ~ race_homogeneity, data = d),
ols(S ~ race_homogeneity + Black_Share + Native_Share + Latino_Share + Asian_Share + Other_share, data = d),
ols(S ~ race_homogeneity + Black_Share + Native_Share + Latino_Share + Asian_Share + Other_share + g, data = d),
ols(S ~ race_homogeneity + Black_Share + Native_Share + Latino_Share + Asian_Share + Other_share + g + lon + lat + temp + precip, data = d)
) %>%
summarize_models()
## # A tibble: 14 × 5
## `Predictor/Model` `1` `2` `3` `4`
## <chr> <chr> <chr> <chr> <chr>
## 1 Intercept -1.30 (0.062***) 1.10 (0.129***) 0.39 (0.107***) -0.15 (…
## 2 race_homogeneity 1.46 (0.091***) -1.13 (0.135***) -0.67 (0.111***) -0.94 (…
## 3 Black_Share <NA> -4.99 (0.140***) -2.42 (0.133***) -1.62 (…
## 4 Native_Share <NA> -3.61 (0.176***) -0.90 (0.163***) -1.82 (…
## 5 Latino_Share <NA> -2.22 (0.128***) -0.63 (0.113***) -0.65 (…
## 6 Asian_Share <NA> 8.94 (0.491***) 5.18 (0.414***) 4.60 (0…
## 7 Other_share <NA> -2.96 (0.765***) -0.18 (0.632) -0.10 (…
## 8 g <NA> <NA> 2.15 (0.056***) 2.02 (0…
## 9 lon <NA> <NA> <NA> 0.00 (0…
## 10 lat <NA> <NA> <NA> 0.02 (0…
## 11 temp <NA> <NA> <NA> -0.03 (…
## 12 precip <NA> <NA> <NA> 0.00 (0…
## 13 R2 adj. 0.076 0.497 0.660 0.739
## 14 N 3123 3123 3095 1520
list(
ols(Homicide_Rate ~ race_homogeneity, data = d),
ols(Homicide_Rate ~ race_homogeneity + Black_Share + Native_Share + Latino_Share + Asian_Share + Other_share, data = d),
ols(Homicide_Rate ~ race_homogeneity + Black_Share + Native_Share + Latino_Share + Asian_Share + Other_share + g, data = d),
ols(Homicide_Rate ~ race_homogeneity + Black_Share + Native_Share + Latino_Share + Asian_Share + Other_share + g + lon + lat + temp + precip, data = d)
) %>%
summarize_models()
## # A tibble: 14 × 5
## `Predictor/Model` `1` `2` `3` `4`
## <chr> <chr> <chr> <chr> <chr>
## 1 Intercept 12.28 (0.294***) -0.37 (0.629) 0.76 (0.597) 3.87 …
## 2 race_homogeneity -11.26 (0.434***) 2.43 (0.659***) 1.84 (0.620**) 3.03 …
## 3 Black_Share <NA> 26.11 (0.686***) 21.24 (0.739***) 19.99…
## 4 Native_Share <NA> 17.17 (0.858***) 11.89 (0.901***) 14.31…
## 5 Latino_Share <NA> 5.47 (0.623***) 2.39 (0.626***) 0.56 …
## 6 Asian_Share <NA> -13.46 (2.400***) -5.43 (2.301) 2.86 …
## 7 Other_share <NA> 18.91 (3.705***) 14.16 (3.491***) 17.28…
## 8 g <NA> <NA> -4.26 (0.309***) -3.72…
## 9 lon <NA> <NA> <NA> -0.03…
## 10 lat <NA> <NA> <NA> -0.16…
## 11 temp <NA> <NA> <NA> -0.02…
## 12 precip <NA> <NA> <NA> 0.00 …
## 13 R2 adj. 0.177 0.528 0.572 0.593
## 14 N 3136 3136 3100 1524
#standardized effects
d_std = d %>% select(g, S, Non_Hispanic_White_Share, Black_Share, Native_Share, Latino_Share, Asian_Share, Other_share, race_homogeneity, Homicide_Rate, lon, lat, temp, precip) %>% df_standardize(exclude_range_01 = F)
#models
list(
ols(S ~ race_homogeneity, data = d_std),
ols(S ~ race_homogeneity + Black_Share + Native_Share + Latino_Share + Asian_Share + Other_share, data = d_std),
ols(S ~ race_homogeneity + Black_Share + Native_Share + Latino_Share + Asian_Share + Other_share + g, data = d_std),
ols(S ~ race_homogeneity + Black_Share + Native_Share + Latino_Share + Asian_Share + Other_share + g + lon + lat + temp + precip, data = d_std)
) %>%
summarize_models()
## # A tibble: 14 × 5
## `Predictor/Model` `1` `2` `3` `4`
## <chr> <chr> <chr> <chr> <chr>
## 1 Intercept 0.00 (0.017) 0.00 (0.013) 0.00 (0.010) -0.01 (0…
## 2 race_homogeneity 0.28 (0.017***) -0.21 (0.025***) -0.13 (0.021***) -0.18 (0…
## 3 Black_Share <NA> -0.75 (0.021***) -0.36 (0.020***) -0.24 (0…
## 4 Native_Share <NA> -0.30 (0.015***) -0.07 (0.013***) -0.15 (0…
## 5 Latino_Share <NA> -0.32 (0.019***) -0.09 (0.016***) -0.09 (0…
## 6 Asian_Share <NA> 0.27 (0.015***) 0.16 (0.013***) 0.14 (0.…
## 7 Other_share <NA> -0.07 (0.017***) 0.00 (0.014) 0.00 (0.…
## 8 g <NA> <NA> 0.57 (0.015***) 0.53 (0.…
## 9 lon <NA> <NA> <NA> -0.05 (0…
## 10 lat <NA> <NA> <NA> 0.14 (0.…
## 11 temp <NA> <NA> <NA> -0.14 (0…
## 12 precip <NA> <NA> <NA> -0.10 (0…
## 13 R2 adj. 0.076 0.497 0.660 0.739
## 14 N 3123 3123 3095 1520
list(
ols(Homicide_Rate ~ race_homogeneity, data = d_std),
ols(Homicide_Rate ~ race_homogeneity + Black_Share + Native_Share + Latino_Share + Asian_Share + Other_share, data = d_std),
ols(Homicide_Rate ~ race_homogeneity + Black_Share + Native_Share + Latino_Share + Asian_Share + Other_share + g, data = d_std),
ols(Homicide_Rate ~ race_homogeneity + Black_Share + Native_Share + Latino_Share + Asian_Share + Other_share + g + lon + lat + temp + precip, data = d_std)
) %>%
summarize_models()
## # A tibble: 14 × 5
## `Predictor/Model` `1` `2` `3` `4`
## <chr> <chr> <chr> <chr> <chr>
## 1 Intercept 0.00 (0.016) 0.00 (0.012) 0.00 (0.011) 0.01 (0…
## 2 race_homogeneity -0.42 (0.016***) 0.09 (0.025***) 0.07 (0.023**) 0.11 (0…
## 3 Black_Share <NA> 0.77 (0.020***) 0.63 (0.022***) 0.59 (0…
## 4 Native_Share <NA> 0.28 (0.014***) 0.19 (0.015***) 0.23 (0…
## 5 Latino_Share <NA> 0.16 (0.018***) 0.07 (0.018***) 0.02 (0…
## 6 Asian_Share <NA> -0.08 (0.015***) -0.03 (0.014) 0.02 (0…
## 7 Other_share <NA> 0.08 (0.016***) 0.06 (0.016***) 0.08 (0…
## 8 g <NA> <NA> -0.22 (0.016***) -0.19 (…
## 9 lon <NA> <NA> <NA> -0.08 (…
## 10 lat <NA> <NA> <NA> -0.18 (…
## 11 temp <NA> <NA> <NA> -0.02 (…
## 12 precip <NA> <NA> <NA> 0.00 (0…
## 13 R2 adj. 0.177 0.528 0.572 0.593
## 14 N 3136 3136 3100 1524
We see the expected pattern for when there is a large majority group and within group variation in cognitive ability. The bivariate analysis shows a clear relationship between racial homogeneity and better outcomes. But when we include just the main effects of the non-largest groups, the beta shrinks substantially (83%), but still retains some validity. When we furthermore add CA, it becomes p>alpha, despite the large sample size. And finally, looking for non-linear effets of homogeneity also yields nothing but a 0.2% increase in model variance explained.