Data
Read data files, merge, rename, recode.
#Kirkegaard 2016 datafile
#social, demographics, SES etc.
maindata = read_csv("data/data_merged.csv") %>% filter(!duplicated(FIPS))
## Warning: Missing column names filled in: 'X1' [1]
##
## ── Column specification ────────────────────────────────────────────────────────
## cols(
## .default = col_double(),
## County = col_character(),
## State = col_character(),
## temp_bins = col_character(),
## lat_bins = col_character(),
## lon_bins = col_character(),
## precip_bins = col_character(),
## elevation_bins = col_character()
## )
## ℹ Use `spec()` for the full column specifications.
#remove units without names
maindata %<>% filter(!is.na(County))
#fix data error
maindata$lon[which(maindata$County == "Aleutians West Census Area, Alaska")] = -178
#rename old elevation
maindata %<>% rename(
elevation_old = elevation
)
#remove unit types
rm_unit_type = function(x) {
x %>%
str_replace(" City and Borough$", "") %>%
#these cause duplicates unfortunately
# str_replace(" City$", "") %>%
# str_replace(" city$", "") %>%
str_replace(" Borough$", "") %>%
str_replace(" County$", "") %>%
str_replace(" Parish$", "") %>%
str_replace(" Census( Area)?$", "") %>%
str_replace(" Municipality$", "")
}
#mutate for joining with datasets without FIPS
maindata %<>% mutate(
county_name = County %>% str_match("(.+), ") %>% .[, 2],
county_name2 = County %>% str_match("(.+), ") %>% .[, 2] %>%
rm_unit_type(),
state_name = County %>% str_match(", (.+)") %>% .[, 2],
state_abbrev = mapvalues(state_name, from = state.name, to = state.abb),
county_state = str_glue("{county_name2} - {state_abbrev}") %>% str_to_title(),
county_state2 = str_glue("{county_name2} - {state_name}") %>% str_to_title()
)
#dups in new id?
any(duplicated(maindata$county_state))
## [1] FALSE
#SEDA pooled data
#https://edopportunity.org/get-the-data/seda-archive-downloads/
seda_CS = haven::read_dta("data/SEDA/3.0/seda_county_pool_cs_v30.dta") %>%
rename(
FIPS = countyid,
) %>%
mutate(
FIPS = FIPS %>% as.character() %>% as.numeric()
)
#spread mean and SE by SIRE
seda_CS_wide = cbind(
{
seda_CS %>%
select(FIPS, subgroup, mn_avg_ol) %>%
spread(key = subgroup, value = mn_avg_ol) %>%
select(FIPS, all, asn, blk, hsp, wht, wbg, whg, wag)
},
{
. = seda_CS %>%
select(FIPS, subgroup, mn_avg_ol_se) %>%
spread(key = subgroup, value = mn_avg_ol_se) %>%
select(FIPS, all, asn, blk, hsp, wht, wbg, whg, wag)
#add _se to names
colnames(.)[-1] = colnames(.)[-1] + "_se"
.[-1]
}
)
#SEDA covariates
seda_covars = read_dta("data/SEDA/3.0/SEDA_cov_county_pool_v30.dta") %>%
rename(
FIPS = countyid,
#SES composites
SES_all = sesavgall,
SES_white = sesavgwht,
SES_black = sesavgblk,
SES_hispanic = sesavghsp,
#SES gaps
SES_d_bw = sesavgwhtblk,
SES_d_hw = sesavgwhthsp,
#demographics
frc_amerindian = perind,
frc_asian = perasn,
frc_hispanic = perhsp,
frc_black = perblk,
frc_white = perwht,
) %>%
mutate(
FIPS = FIPS %>% as.character() %>% as.numeric()
)
#climate
#https://wonder.cdc.gov/nasa-nldas.html
climate = readxl::read_excel("data/North America Land Data Assimilation System (NLDAS) Daily Air Temperatures and Heat Index (1979-2011).xlsx") %>%
rename(FIPS = Fips)
## Warning in read_fun(path = enc2native(normalizePath(path)), sheet_i = sheet, :
## Expecting logical in A3113 / R3113C1: got 'Total'
#UV
#https://gis.cancer.gov/tools/uv-exposure/
uv = readxl::read_excel("data/uv-county.xlsx")
uv$FIPS = uv$COUNTY_FIPS %>% as.numeric()
#altitude data AHRF
#https://www.usgs.gov/core-science-systems/ngp/board-on-geographic-names/download-gnis-data
AHRF = read_sas("data/AHRF_2018-2019_SAS/ahrf2019.sas7bdat")
AHRF_dict = AHRF %>% df_var_table()
AHRF$FIPS = AHRF$f00002 %>% as.numeric()
AHRF$elevation = AHRF$f0081176 * 0.3048
AHRF$elevation_z = AHRF$elevation %>% standardize()
#disease data
disease_sheets = readxl::excel_sheets("data/IHME_USA_COUNTY_INFECT_DIS_MORT_1980_2014_NATIONAL_Y2018M03D27.XLSX")
# disease = read_excel("data/IHME_USA_COUNTY_INFECT_DIS_MORT_1980_2014_NATIONAL_Y2018M03D27.XLSX", skip = 1)
disease = map_df(disease_sheets, function(s) {
# browser()
#read sheet
d_s = read_excel("data/IHME_USA_COUNTY_INFECT_DIS_MORT_1980_2014_NATIONAL_Y2018M03D27.XLSX", skip = 1,
sheet = s)
#restructure to long form
d_s %>%
filter(!is.na(FIPS)) %>%
select(-11) %>%
mutate(
disease = s
) %>%
gather(key = year, value = rate_orig, starts_with("Mortality"))
}) %>%
mutate(
rate = str_match(rate_orig, "(\\d+?\\.?\\d+?) \\((\\d+?\\.?\\d+?), (\\d+?\\.?\\d+?)\\)")[, 2] %>% as.numeric(),
CI_lower = str_match(rate_orig, "(\\d+?\\.?\\d+?) \\((\\d+?\\.?\\d+?), (\\d+?\\.?\\d+?)\\)")[, 3] %>% as.numeric(),
CI_upper = str_match(rate_orig, "(\\d+?\\.?\\d+?) \\((\\d+?\\.?\\d+?), (\\d+?\\.?\\d+?)\\)")[, 4] %>% as.numeric(),
year = str_match(year, "\\d+")[, 1] %>% as.numeric()
)
#disease wide for 2014 data
disease_wide = disease %>%
filter(year == 2014) %>%
select(FIPS, disease, rate) %>%
spread(key = disease, value = rate) %>%
df_legalize_names()
#save dataset for manual treatment
fluoride_county = read_csv("data/fluoride_county.csv")
##
## ── Column specification ────────────────────────────────────────────────────────
## cols(
## county_state = col_character(),
## state = col_character(),
## waterworks = col_double(),
## population_served = col_double(),
## fluoridated = col_double(),
## fluoride_conc = col_double(),
## exact = col_logical(),
## FIPS = col_double()
## )
#lead
#https://mytapscore.com/pages/the-lead-map
#screenshot in figures folder if site goes down
lead_county = read_csv("data/lead_county.csv") %>%
mutate(
lead_index = ageidx + gw_corr_idx
)
##
## ── Column specification ────────────────────────────────────────────────────────
## cols(
## name_2 = col_character(),
## name = col_character(),
## ca_average_age = col_double(),
## ageidx = col_double(),
## gw_corr_idx = col_double(),
## state_abbr = col_character(),
## county_state = col_character(),
## exact = col_logical(),
## FIPS = col_double()
## )
#lead poisoning counts
#https://www.reuters.com/investigates/special-report/usa-lead-testing/
#kinda problematic data source
#voting data in 2016
voting = read_csv("data/votingdata_KirkPesta2019.csv")
##
## ── Column specification ────────────────────────────────────────────────────────
## cols(
## .default = col_double(),
## name_16 = col_character(),
## votes16_jacobp = col_logical(),
## votes16_whitej = col_logical(),
## votes16_mooreheadm = col_logical(),
## votes16_none_of_these_candidates = col_logical(),
## votes16_duncanr = col_logical(),
## votes16_skewesp = col_logical(),
## votes16_giordanir = col_logical(),
## name_prev = col_character(),
## ST = col_character(),
## statecode_prev = col_character(),
## County = col_character(),
## State = col_character(),
## temp_bins = col_character(),
## lat_bins = col_character(),
## lon_bins = col_character(),
## precip_bins = col_character(),
## elevation_bins = col_character()
## )
## ℹ Use `spec()` for the full column specifications.
#no dups
assert_that(!any(duplicated(maindata$FIPS)))
## [1] TRUE
assert_that(!any(duplicated(seda_CS_wide$FIPS)))
## [1] TRUE
assert_that(!any(duplicated(climate$FIPS)))
## [1] TRUE
assert_that(!any(duplicated(uv$COUNTY_FIPS)))
## [1] TRUE
assert_that(!any(duplicated(AHRF$FIPS)))
## [1] TRUE
assert_that(!any(duplicated(voting$FIPS)))
## [1] TRUE
assert_that(!any(duplicated(seda_covars$FIPS)))
## [1] TRUE
assert_that(!any(duplicated(lead_county$FIPS)))
## [1] TRUE
assert_that(!any(duplicated(fluoride_county$FIPS)))
## [1] TRUE
assert_that(!any(duplicated(disease_wide$FIPS)))
## [1] TRUE
#merge
d = full_join(maindata, climate, by = "FIPS") %>%
full_join(uv, by = "FIPS") %>%
full_join(AHRF %>% select(FIPS, elevation, elevation_z), by = "FIPS") %>%
full_join(seda_CS_wide, by = "FIPS") %>%
full_join(voting %>% select(FIPS, rep16_frac:dem16_frac2), by = "FIPS") %>%
full_join(seda_covars %>% select(FIPS, frc_amerindian:frc_white, SES_all:SES_d_hw), by = "FIPS") %>%
full_join(lead_county %>% select(FIPS, lead_index), by = "FIPS") %>%
full_join(fluoride_county %>% select(FIPS, waterworks:fluoride_conc), by = "FIPS") %>%
left_join(disease_wide, by = "FIPS")
#remove units without CA data
#do we need
# d %<>% filter(!is.na(County.x), !is.na(CA))
#rename
d$UV = d$`UV_ Wh/m²` %>% standardize()
d$UV_orig = d$`UV_ Wh/m²`
d$avg_max_temp = d$`Avg Daily Max Heat Index (C)` %>% standardize()
d$avg_max_temp_orig = d$`Avg Daily Max Heat Index (C)`
d$avg_min_air_temp = d$`Avg Daily Min Air Temperature (C)` %>% standardize()
d$avg_min_air_temp_orig = d$`Avg Daily Min Air Temperature (C)`
d$avg_temp = d$`Avg Temperature (1979 to 2011)` %>% standardize()
d$avg_temp_orig = d$`Avg Temperature (1979 to 2011)`
#z score geoposition
d$lat_z = d$lat %>% standardize()
d$lon_z = d$lon %>% standardize()
#cognitive ability standardization
#keep originals as well
d$old_CA = d$CA
d$CA_orig = d$all
d$CA_all_orig = d$CA_orig #synonym
d$CA_asian_orig = d$asn
d$CA_black_orig = d$blk
d$CA_hispanic_orig = d$hsp
d$CA_white_orig = d$wht
d$CA = d$all %>% standardize()
d$CA_all = d$CA #synonym
d$CA_asian = d$asn %>% standardize()
d$CA_black = d$blk %>% standardize()
d$CA_hispanic = d$hsp %>% standardize()
d$CA_white = d$wht %>% standardize()
d$CA_d_bw = d$wbg
d$CA_d_hw = d$whg
d$CA_d_aw = d$wag
#SES variables standardization
d$SES_all_orig = d$SES_all
d$SES_black_orig = d$SES_black
d$SES_hispanic_orig = d$SES_hispanic
d$SES_white_orig = d$SES_white
d$SES_all = d$SES_all %>% standardize()
d$SES_black = d$SES_black %>% standardize()
d$SES_hispanic = d$SES_hispanic %>% standardize()
d$SES_white = d$SES_white %>% standardize()
#more population sizes
d$pop_amerindian = d$Amerindian * d$Total.Population
d$pop_asian = d$Asian * d$Total.Population
d$pop_black = d$Black * d$Total.Population
d$pop_hispanic = d$Hispanic * d$Total.Population
d$pop_other = d$Other * d$Total.Population
d$pop_white = d$White * d$Total.Population
d$pop_all = d$Total.Population
d$pop_weight = d$Total.Population %>% sqrt()
#other renamings / z scorings
d$lead_index = d$lead_index
d$lead_index_z = d$lead_index %>% standardize()
d$fluoride_conc_z = d$fluoride_conc %>% standardize()
d$Diarrheal_diseases_z = d$Diarrheal_diseases %>% standardize()
d$Hepatitis_z = d$Hepatitis %>% standardize()
d$Tuberculosis_z = d$Tuberculosis %>% standardize()
d$HIV_AIDS_z = d$HIV_AIDS %>% standardize()
d$Lower_respiratory_infections_z = d$Lower_respiratory_infections %>% standardize()
d$Meningitis_z = d$Meningitis %>% standardize()
#demographics divide by 100
#these are the old variables from ACS and Kirk 2016
race_vars = c("White", "Black", "Asian", "Hispanic", "Amerindian", "Other")
for (v in race_vars) d[[v]] = d[[v]] / 100
d[race_vars] %>% rowSums() %>% psych::describe() %>% as.matrix()
## vars n mean sd median trimmed mad min max range skew kurtosis
## X1 1 3143 1 0.00049 1 1 0.00074 1 1 0.003 0.036 -0.12
## se
## X1 8.7e-06
#new ones from CEDA
race_vars2 = c("frc_white", "frc_black", "frc_asian", "frc_hispanic", "frc_amerindian")
d[race_vars2] %>% rowSums() %>% psych::describe() %>% as.matrix()
## vars n mean sd median trimmed mad min max range skew kurtosis
## X1 1 3124 1 2.1e-09 1 1 1.4e-09 1 1 2.5e-08 0.16 4
## se
## X1 3.7e-11
#var lists
climate_vars = c("UV", "avg_temp", "lat", "lon", "elevation", "lat_z", "lon_z", "elevation_z", "fluoride_conc", "fluoride_conc_z", "lead_index", "lead_index_z")
climate_vars_model = c("UV", "avg_temp", "lat_z", "lon_z", "elevation_z")
water_vars = c("lead_index_z", "fluoride_conc_z")
disease_vars = c("Diarrheal_diseases_z", "Hepatitis_z", "HIV_AIDS_z", "Lower_respiratory_infections_z", "Meningitis_z", "Tuberculosis_z")
CA_vars = c("CA", "CA_all", "CA_asian", "CA_black", "CA_hispanic", "CA_white")
SES_vars = c("SES_all", "SES_black", "SES_hispanic", "SES_white")
main_vars = c(CA_vars, SES_vars, race_vars2, climate_vars)
main_vars2 = c(CA_vars, SES_vars, climate_vars)
main_vars3 = c(main_vars2, water_vars, disease_vars)
#make spatial lag variables
#slow because it recalculates distances every time
#we cant just precompute because the function cannot handle missing data properly
for (v in main_vars) {
#no NA
tmp = d[!is.na(d[[v]]) & !is.na(d$lat) & !is.na(d$lon), ]
tmp_var = str_glue("{v}_lag") %>% as.character()
#estimate
tmp[[tmp_var]] = SAC_knsnr(tmp, v, k = 3)
#join
d[[tmp_var]] = NULL
d = left_join(d, tmp %>% select(FIPS, !!tmp_var), by = "FIPS")
}
Spatial data
#load shapefile
#https://catalog.data.gov/dataset/tiger-line-shapefile-2017-nation-u-s-current-county-and-equivalent-national-shapefile
county_sf = st_read("data/spatial/tl_2017_us_county.shp")
## Reading layer `tl_2017_us_county' from data source `/media/luks8tb2/Projects/UV USA counties/data/spatial/tl_2017_us_county.shp' using driver `ESRI Shapefile'
## Simple feature collection with 3233 features and 17 fields
## geometry type: MULTIPOLYGON
## dimension: XY
## bbox: xmin: -180 ymin: -15 xmax: 180 ymax: 71
## CRS: 4269
#make FIPS
county_sf$FIPS = (as.character(county_sf$STATEFP) + as.character(county_sf$COUNTYFP)) %>% as.numeric()
county_sf = county_sf %>% filter(FIPS %in% d$FIPS)
#join in data
county_sf_data = left_join(county_sf, d, by = "FIPS")
#calculate area
county_sf_data$area = st_area(county_sf_data) %>% as.numeric()
#same as existing variable? yep! perfectly sums to 1
ols(ALAND ~ area, data = county_sf_data)
## Linear Regression Model
##
## ols(formula = ALAND ~ area, data = county_sf_data)
##
## Model Likelihood Discrimination
## Ratio Test Indexes
## Obs 3224 LR chi2 13211.32 R2 0.983
## sigma1.2e+09 d.f. 1 R2 adj 0.983
## d.f. 3222 Pr(> chi2) 0.0000 g 3.2e+09
##
## Residuals
##
## Min 1Q Median 3Q Max
## -3.162e+10 5.051e+07 9.160e+07 1.402e+08 2.012e+10
##
##
## Coef S.E. t Pr(>|t|)
## Intercept -1.6e+07 2.2e+07 -0.74 0.4566
## area 9.3e-01 2.1e-03 436.77 <0.0001
##
ols(area ~ ALAND + AWATER, data = county_sf_data)
## Linear Regression Model
##
## ols(formula = area ~ ALAND + AWATER, data = county_sf_data)
##
## Model Likelihood Discrimination
## Ratio Test Indexes
## Obs 3224 LR chi2 111735.33 R2 1.000
## sigma289.1779 d.f. 2 R2 adj 1.000
## d.f. 3221 Pr(> chi2) 0.0000 g 3.4e+09
##
## Residuals
##
## Min 1Q Median 3Q Max
## -3752.401 -110.288 -77.642 1.031 3868.906
##
##
## Coef S.E. t Pr(>|t|)
## Intercept 110.0715 5.3347 2.1e+01 <0.0001
## ALAND 1.0000 0.0000 1.7e+09 <0.0001
## AWATER 1.0000 0.0000 2.5e+08 <0.0001
##
#water proportion
county_sf_data$water_area_prop = county_sf_data$AWATER / county_sf_data$area
#nicer names
county_sf_data$land_area = county_sf_data$ALAND
county_sf_data$water_area = county_sf_data$AWATER
#new latitude and longitudide
#https://stackoverflow.com/questions/52522872/r-sf-package-centroid-within-polygon
county_sf_data$lat2 = map_dbl(county_sf_data$geometry, ~st_point_on_surface(.x)[[2]])
county_sf_data$lon2 = map_dbl(county_sf_data$geometry, ~st_point_on_surface(.x)[[1]])
#z score versions
county_sf_data %<>% mutate(
land_area_z = land_area %>% standardize(),
water_area_z = water_area %>% standardize(),
water_area_prop_z = water_area_prop %>% standardize(),
#pop density
pop_density = Total.Population / land_area,
pop_density_log10 = pop_density %>% log10(),
pop_density_log10_z = pop_density_log10 %>% standardize()
)
#back to normal
d = county_sf_data %>% st_drop_geometry()
State level data
Aggregate to state level and compute spatial lags.
#aggregate
d_state = d %>%
filter(!State.x %in% c("Hawaii", "Alaska")) %>%
plyr::ddply("State.x", .fun = function(dd) {
# browser()
# if (dd$State.x[1] == "District of Columbia") browser()
#sum populations
y = tibble(
#population counts
population = wtd_sum(dd$Total.Population),
pop_all = population,
pop_amerindian = wtd_sum(dd$Total.Population * dd$frc_amerindian),
pop_asian = wtd_sum(dd$Total.Population * dd$frc_asian),
pop_black = wtd_sum(dd$Total.Population * dd$frc_black),
pop_hispanic = wtd_sum(dd$Total.Population * dd$frc_hispanic),
# pop_other = wtd_sum(dd$Total.Population * dd$Other),
pop_white = wtd_sum(dd$Total.Population * dd$frc_white),
#fractions
frc_amerindian = pop_amerindian / population,
frc_asian = pop_asian / population,
frc_black = pop_black / population,
frc_hispanic = pop_hispanic / population,
# Other = pop_other / population,
frc_white = pop_white / population
)
#loop vars
#not population composition
for (v in c(main_vars3)) {
#compute
y[[v]] = wtd_mean(dd[[v]], w = dd$Total.Population)
}
y
})
#pop weight
d_state$pop_weight = d_state$population %>% sqrt()
#standardize again
#not population composition
for (v in c(main_vars2)) d_state[[v]] = d_state[[v]] %>% standardize()
#make spatial lag variables
for (v in c(main_vars)) {
#no NA
tmp = d_state[!is.na(d_state[[v]]) & !is.na(d_state$lat) & !is.na(d_state$lon), ]
tmp_var = str_glue("{v}_lag") %>% as.character()
#estimate
tmp[[tmp_var]] = SAC_knsnr(tmp, v, k = 3)
#join
d_state[[tmp_var]] = NULL
d_state = left_join(d_state, tmp %>% select(State.x, !!tmp_var), by = "State.x")
}
National data
Needed for subanalysis.
#UV
#http://apps.who.int/gho/data/view.main.35300
UV_UN = read_csv("data/UV countries.csv", col_names = c("Country", "UV"), skip = 1) %>%
#skip world
.[-1, ] %>%
#ISO
mutate(
ISO = pu_translate(Country)
)
##
## ── Column specification ────────────────────────────────────────────────────────
## cols(
## Country = col_character(),
## UV = col_double()
## )
#rainfall
#https://data.worldbank.org/indicator/AG.LND.PRCP.MM?end=2014&start=2014&view=map
rainfall = read_excel("data/API_AG.LND.PRCP.MM_DS2_en_excel_v2_202879.xls", skip = 3) %>%
df_legalize_names() %>%
rename(
ISO = Country_Code,
rainfall2012 = x2012
)
#merge with collection to get latitude etc.
mega = read_csv("data/Megadataset_v2.0p.csv") %>%
rename(
ISO = X1
)
##
## ── Column specification ────────────────────────────────────────────────────────
## cols(
## .default = col_double(),
## X1 = col_character(),
## LVnotes = col_character(),
## Names = col_character(),
## Capital = col_character(),
## se_Land = col_character(),
## se_abbrev = col_character(),
## de_Country = col_character(),
## de_abbrev = col_character(),
## de_Country_EN = col_character(),
## de_Western_neighbor = col_logical(),
## de_NW_European = col_logical(),
## dk_Origin = col_character(),
## Country_EN = col_character()
## )
## ℹ Use `spec()` for the full column specifications.
#merge
assert_that(!any(duplicated(mega$ISO)))
## [1] TRUE
assert_that(!any(duplicated(UV_UN$ISO)))
## [1] TRUE
assert_that(!any(duplicated(rainfall$ISO)))
## [1] TRUE
natl = full_join(UV_UN, mega, by = "ISO") %>%
full_join(rainfall, by ="ISO") %>%
mutate(
abs_lat = abs(lat)
)
Standardized variants
#do it
#exclude the population sizes
d_std = d %>%
df_standardize(exclude = d %>% names() %>% str_subset("pop_"),
messages = F)
#states too
d_state_std = d_state %>%
df_standardize(exclude = d_state %>% names() %>% str_subset("pop_"),
messages = F)
Analyses
Descriptives
#descriptives for all main variables
#use original scale variables here
d %>% select(CA_all_orig:CA_white_orig,
SES_all_orig:SES_white_orig,
frc_white:frc_amerindian,
UV_orig, avg_temp_orig, lat, lon, elevation, fluoride_conc, lead_index) %>% describe()
#descriptives for old SIRE variables
d %>% select(!!race_vars) %>% describe()
#correlations, unweighted
wtd.cors(d %>% select(White:Other, frc_amerindian:frc_white))
## White Black Hispanic Asian Amerindian Other frc_amerindian
## White 1.00 -0.6225 -0.585 -0.2640 -0.2878 -0.169 -0.258
## Black -0.62 1.0000 -0.107 0.0129 -0.1004 -0.087 -0.119
## Hispanic -0.59 -0.1068 1.000 0.1387 -0.0383 -0.025 -0.051
## Asian -0.26 0.0129 0.139 1.0000 -0.0092 0.424 0.029
## Amerindian -0.29 -0.1004 -0.038 -0.0092 1.0000 0.275 0.948
## Other -0.17 -0.0872 -0.025 0.4240 0.2748 1.000 0.538
## frc_amerindian -0.26 -0.1189 -0.051 0.0293 0.9475 0.538 1.000
## frc_asian -0.19 -0.0028 0.106 0.9140 -0.0234 0.356 -0.015
## frc_hispanic -0.56 -0.1239 0.973 0.1638 -0.0598 -0.019 -0.075
## frc_black -0.61 0.9839 -0.123 0.0250 -0.1013 -0.094 -0.122
## frc_white 0.97 -0.6302 -0.566 -0.2414 -0.2494 -0.163 -0.255
## frc_asian frc_hispanic frc_black frc_white
## White -0.1913 -0.556 -0.6066 0.97
## Black -0.0028 -0.124 0.9839 -0.63
## Hispanic 0.1061 0.973 -0.1230 -0.57
## Asian 0.9140 0.164 0.0250 -0.24
## Amerindian -0.0234 -0.060 -0.1013 -0.25
## Other 0.3564 -0.019 -0.0944 -0.16
## frc_amerindian -0.0148 -0.075 -0.1220 -0.26
## frc_asian 1.0000 0.132 0.0091 -0.20
## frc_hispanic 0.1318 1.000 -0.1361 -0.57
## frc_black 0.0091 -0.136 1.0000 -0.63
## frc_white -0.1983 -0.565 -0.6314 1.00
#with sample sizes
pairwiseCount(d %>% select(White:Other, frc_amerindian:frc_white))
## White Black Hispanic Asian Amerindian Other frc_amerindian
## White 3140 3140 3140 3140 3140 3140 3122
## Black 3140 3140 3140 3140 3140 3140 3122
## Hispanic 3140 3140 3140 3140 3140 3140 3122
## Asian 3140 3140 3140 3140 3140 3140 3122
## Amerindian 3140 3140 3140 3140 3140 3140 3122
## Other 3140 3140 3140 3140 3140 3140 3122
## frc_amerindian 3122 3122 3122 3122 3122 3122 3124
## frc_asian 3122 3122 3122 3122 3122 3122 3124
## frc_hispanic 3122 3122 3122 3122 3122 3122 3124
## frc_black 3122 3122 3122 3122 3122 3122 3124
## frc_white 3122 3122 3122 3122 3122 3122 3124
## frc_asian frc_hispanic frc_black frc_white
## White 3122 3122 3122 3122
## Black 3122 3122 3122 3122
## Hispanic 3122 3122 3122 3122
## Asian 3122 3122 3122 3122
## Amerindian 3122 3122 3122 3122
## Other 3122 3122 3122 3122
## frc_amerindian 3124 3124 3124 3124
## frc_asian 3124 3124 3124 3124
## frc_hispanic 3124 3124 3124 3124
## frc_black 3124 3124 3124 3124
## frc_white 3124 3124 3124 3124
Old and new data
#cors
combine_upperlower(
wtd.cors(d %>% select(S, SES_all, old_CA, CA)),
wtd.cors(d %>% select(S, SES_all, old_CA, CA), weight = d$pop_weight)
)
## S SES_all old_CA CA
## S NA 0.92 0.71 0.76
## SES_all 0.93 NA 0.69 0.73
## old_CA 0.71 0.69 NA 0.85
## CA 0.77 0.75 0.89 NA
#scatterpots of new and old CA
GG_scatter(d, "CA", "old_CA") +
xlab("New cognitive data (SEDA 3.0)") +
ylab("Old cognitive data (Kirkegaard 2016)")
## `geom_smooth()` using formula 'y ~ x'

GG_scatter(d, "CA", "old_CA", weights = "pop_weight") +
xlab("New cognitive data (SEDA 3.0)") +
ylab("Old cognitive data (Kirkegaard 2016)")
## `geom_smooth()` using formula 'y ~ x'

#SES/S
GG_scatter(d, "SES_all", "S") +
xlab("New SES data (SEDA 3.0)") +
ylab("Old SES data (Kirkegaard 2016)")
## `geom_smooth()` using formula 'y ~ x'

GG_scatter(d, "SES_all", "S", weights = "pop_weight") +
xlab("New SES data (SEDA 3.0)") +
ylab("Old SES data (Kirkegaard 2016)")
## `geom_smooth()` using formula 'y ~ x'

Correlations
Get impression of data by looking at the correlations
#single output
combine_upperlower(
#no weights
d %>% select(!!main_vars2) %>% select(-contains("_z")) %>% wtd.cors(),
#weights
d %>% select(!!main_vars2) %>% select(-contains("_z")) %>% wtd.cors(weight = d$pop_weight)
)
## CA CA_all CA_asian CA_black CA_hispanic CA_white SES_all
## CA NA 1.000 0.447 0.672 0.598 0.757 0.735
## CA_all 1.000 NA 0.447 0.672 0.598 0.757 0.735
## CA_asian 0.478 0.478 NA 0.306 0.389 0.529 0.249
## CA_black 0.647 0.647 0.343 NA 0.540 0.470 0.455
## CA_hispanic 0.607 0.607 0.386 0.588 NA 0.421 0.243
## CA_white 0.687 0.687 0.587 0.405 0.350 NA 0.581
## SES_all 0.748 0.748 0.331 0.432 0.249 0.605 NA
## SES_black 0.419 0.419 0.308 0.554 0.242 0.413 0.646
## SES_hispanic 0.337 0.337 0.288 0.269 0.352 0.326 0.559
## SES_white 0.401 0.401 0.436 0.209 0.083 0.756 0.728
## UV -0.367 -0.367 -0.054 -0.171 -0.161 -0.105 -0.195
## avg_temp -0.392 -0.392 0.069 -0.194 -0.032 -0.132 -0.340
## lat 0.312 0.312 -0.126 0.132 -0.028 0.099 0.326
## lon 0.199 0.199 0.312 0.115 0.305 0.111 -0.055
## elevation 0.053 0.053 -0.180 0.110 -0.065 -0.024 0.115
## fluoride_conc 0.020 0.020 0.018 -0.028 -0.042 0.110 0.098
## lead_index -0.045 -0.045 0.104 -0.073 -0.036 0.021 -0.130
## SES_black SES_hispanic SES_white UV avg_temp lat lon
## CA 0.4349 0.284 0.369 -0.327 -0.4221 0.32822 0.0824
## CA_all 0.4349 0.284 0.369 -0.327 -0.4221 0.32822 0.0824
## CA_asian 0.2467 0.236 0.327 -0.065 0.0508 -0.12758 0.3277
## CA_black 0.4958 0.222 0.198 -0.252 -0.3226 0.27202 0.0609
## CA_hispanic 0.2365 0.278 0.099 -0.167 -0.0729 0.04058 0.2467
## CA_white 0.3953 0.285 0.646 -0.101 -0.2017 0.19139 -0.0075
## SES_all 0.6459 0.502 0.726 -0.190 -0.4050 0.39275 -0.1503
## SES_black NA 0.496 0.495 -0.022 -0.1249 0.14866 -0.0947
## SES_hispanic 0.5525 NA 0.461 0.048 0.0085 0.00074 -0.0503
## SES_white 0.5582 0.498 NA 0.076 -0.1219 0.19703 -0.2119
## UV 0.1162 0.101 0.058 NA 0.7304 -0.73481 -0.3360
## avg_temp 0.0187 0.078 -0.051 0.774 NA -0.92471 0.0563
## lat 0.0093 -0.068 0.078 -0.752 -0.9347 NA -0.2873
## lon -0.1454 -0.089 -0.079 -0.416 -0.0986 -0.09121 NA
## elevation 0.0840 0.016 0.023 0.247 -0.2664 0.18372 -0.4131
## fluoride_conc 0.0601 0.071 0.149 0.043 -0.0426 0.03357 -0.0351
## lead_index -0.1934 -0.222 -0.038 -0.376 -0.1830 0.09036 0.5146
## elevation fluoride_conc lead_index
## CA 0.124 0.030 -0.1271
## CA_all 0.124 0.030 -0.1271
## CA_asian -0.140 0.031 0.0655
## CA_black 0.143 0.017 -0.1470
## CA_hispanic -0.094 -0.046 -0.0529
## CA_white 0.106 0.108 -0.1126
## SES_all 0.239 0.101 -0.1854
## SES_black 0.140 0.101 -0.2256
## SES_hispanic 0.049 0.041 -0.1901
## SES_white 0.195 0.149 -0.1272
## UV 0.296 0.135 -0.2076
## avg_temp -0.336 0.025 0.0267
## lat 0.227 -0.033 -0.0894
## lon -0.555 -0.025 0.4611
## elevation NA 0.170 -0.3779
## fluoride_conc 0.156 NA 0.0027
## lead_index -0.382 -0.033 NA
#sample sizes
d %>% select(!!main_vars2) %>% select(-contains("_z")) %>% pairwiseCount()
## CA CA_all CA_asian CA_black CA_hispanic CA_white SES_all
## CA 3134 3134 1483 2135 2647 3112 3124
## CA_all 3134 3134 1483 2135 2647 3112 3124
## CA_asian 1483 1483 1483 1394 1475 1483 1473
## CA_black 2135 2135 1394 2135 2015 2120 2125
## CA_hispanic 2647 2647 1475 2015 2647 2643 2637
## CA_white 3112 3112 1483 2120 2643 3112 3102
## SES_all 3124 3124 1473 2125 2637 3102 3124
## SES_black 2108 2108 1373 2108 1989 2093 2108
## SES_hispanic 2624 2624 1465 1998 2624 2620 2624
## SES_white 3099 3099 1473 2109 2631 3099 3099
## UV 3099 3099 1465 2125 2629 3080 3093
## avg_temp 3098 3098 1464 2124 2628 3079 3092
## lat 3132 3132 1483 2135 2647 3112 3122
## lon 3132 3132 1483 2135 2647 3112 3122
## elevation 3070 3070 1444 2096 2601 3050 3064
## fluoride_conc 2209 2209 930 1493 1838 2190 2207
## lead_index 3130 3130 1482 2133 2645 3110 3120
## SES_black SES_hispanic SES_white UV avg_temp lat lon
## CA 2108 2624 3099 3099 3098 3132 3132
## CA_all 2108 2624 3099 3099 3098 3132 3132
## CA_asian 1373 1465 1473 1465 1464 1483 1483
## CA_black 2108 1998 2109 2125 2124 2135 2135
## CA_hispanic 1989 2624 2631 2629 2628 2647 2647
## CA_white 2093 2620 3099 3080 3079 3112 3112
## SES_all 2108 2624 3099 3093 3092 3122 3122
## SES_black 2108 1982 2092 2102 2101 2108 2108
## SES_hispanic 1982 2624 2618 2610 2609 2624 2624
## SES_white 2092 2618 3099 3071 3070 3099 3099
## UV 2102 2610 3071 3106 3105 3106 3106
## avg_temp 2101 2609 3070 3105 3105 3105 3105
## lat 2108 2624 3099 3106 3105 3140 3140
## lon 2108 2624 3099 3106 3105 3140 3140
## elevation 2073 2582 3041 3073 3072 3074 3074
## fluoride_conc 1485 1826 2186 2189 2188 2216 2216
## lead_index 2106 2622 3097 3104 3103 3138 3138
## elevation fluoride_conc lead_index
## CA 3070 2209 3130
## CA_all 3070 2209 3130
## CA_asian 1444 930 1482
## CA_black 2096 1493 2133
## CA_hispanic 2601 1838 2645
## CA_white 3050 2190 3110
## SES_all 3064 2207 3120
## SES_black 2073 1485 2106
## SES_hispanic 2582 1826 2622
## SES_white 3041 2186 3097
## UV 3073 2189 3104
## avg_temp 3072 2188 3103
## lat 3074 2216 3138
## lon 3074 2216 3138
## elevation 3075 2160 3073
## fluoride_conc 2160 2216 2214
## lead_index 3073 2214 3138
#national correlations
natl %>%
select(UV, abs_lat) %>%
wtd.cors()
## UV abs_lat
## UV 1.00 -0.89
## abs_lat -0.89 1.00
#plot of UV and latitude
GG_scatter(d, "lat", "UV", case_names = "County.x", case_names_color = "green") +
xlab("Latitude") +
ylab("UV index (z score)")
## `geom_smooth()` using formula 'y ~ x'

GG_save("figs/lat_UV_counties.png")
## `geom_smooth()` using formula 'y ~ x'
Basic spatial stats
#how much spatial autocorrelation is there?
#we measure this by seeing how much we can predict each county by its 3 closest neighbors' values
#counties
wtd.cors(d %>% select(!!!main_vars, !!!(main_vars + "_lag")))[main_vars, (main_vars + "_lag")] %>%
{
y = diag(.)
names(y) = rownames(.)
y
}
## CA CA_all CA_asian CA_black CA_hispanic
## 0.60 0.60 0.39 0.46 0.42
## CA_white SES_all SES_black SES_hispanic SES_white
## 0.60 0.75 0.56 0.43 0.80
## frc_white frc_black frc_asian frc_hispanic frc_amerindian
## 0.80 0.84 0.64 0.87 0.70
## UV avg_temp lat lon elevation
## 1.00 0.99 1.00 1.00 0.96
## lat_z lon_z elevation_z fluoride_conc fluoride_conc_z
## 1.00 1.00 0.96 0.58 0.58
## lead_index lead_index_z
## 0.81 0.81
#states
wtd.cors(d_state %>% select(!!!main_vars, !!!(main_vars + "_lag")))[main_vars, (main_vars + "_lag")] %>%
{
y = diag(.)
names(y) = rownames(.)
y
}
## CA CA_all CA_asian CA_black CA_hispanic
## 0.58 0.58 0.57 0.36 0.42
## CA_white SES_all SES_black SES_hispanic SES_white
## 0.20 0.68 0.38 0.57 0.31
## frc_white frc_black frc_asian frc_hispanic frc_amerindian
## 0.68 0.72 0.13 0.63 0.53
## UV avg_temp lat lon elevation
## 0.94 0.94 0.98 0.98 0.68
## lat_z lon_z elevation_z fluoride_conc fluoride_conc_z
## 0.98 0.98 0.68 0.56 0.56
## lead_index lead_index_z
## 0.82 0.82
Main regressions
Carry out the usual regressions.
#formulas for models
form_1 = CA ~ UV
form_2 = CA ~ frc_black + frc_asian + frc_hispanic + frc_amerindian
form_3 = CA ~ frc_black + frc_asian + frc_hispanic + frc_amerindian + UV
form_3b = CA ~ UV + frc_white
form_3c = CA ~ CA_lag + UV + frc_black + frc_asian + frc_hispanic + frc_amerindian+ avg_temp
form_4 = CA ~ frc_black + frc_asian + frc_hispanic + frc_amerindian + UV + avg_temp + lat_z + lon_z + elevation_z
form_5 = CA ~ frc_black + frc_asian + frc_hispanic + frc_amerindian + UV + avg_temp + lat_z + lon_z + elevation_z + CA_lag
form_5b = CA ~ frc_black + frc_asian + frc_hispanic + frc_amerindian + UV + avg_temp + lat_z + lon_z + elevation_z + fluoride_conc_z + CA_lag
form_5c = CA ~ frc_black + frc_asian + frc_hispanic + frc_amerindian + UV + avg_temp + lat_z + lon_z + elevation_z + lead_index_z + fluoride_conc_z + CA_lag
form_5d = CA ~ frc_black + frc_asian + frc_hispanic + frc_amerindian + UV + avg_temp + lat_z + lon_z + elevation_z + Diarrheal_diseases_z + Hepatitis_z + HIV_AIDS_z + Lower_respiratory_infections_z + Meningitis_z + Tuberculosis_z + CA_lag
form_6 = CA ~ CA_lag + frc_black + frc_asian + frc_hispanic + frc_amerindian + rcs(UV) + avg_temp + lat_z + lon_z + elevation_z
form_7 = CA ~ CA_lag + UV + frc_black*UV + frc_asian*UV + frc_hispanic*UV + frc_amerindian*UV + avg_temp + lat_z + lon_z + elevation_z
#UV only
(model_1 = ols(form_1, data = d, weights = d$pop_weight))
## Frequencies of Missing Values Due to Each Variable
## CA UV (weights)
## 90 118 84
##
## Linear Regression Model
##
## ols(formula = form_1, data = d, weights = d$pop_weight)
##
##
## Model Likelihood Discrimination
## Ratio Test Indexes
## Obs 3099 LR chi2 449.25 R2 0.135
## sigma13.7075 d.f. 1 R2 adj 0.135
## d.f. 3097 Pr(> chi2) 0.0000 g 0.376
##
## Residuals
##
## Min 1Q Median 3Q Max
## -4.76357 -0.63754 -0.03289 0.51446 3.34724
##
##
## Coef S.E. t Pr(>|t|)
## Intercept 0.0942 0.0164 5.75 <0.0001
## UV -0.3347 0.0152 -21.98 <0.0001
##
#demographics only
(model_2 = ols(form_2, data = d, weights = d$pop_weight))
## Frequencies of Missing Values Due to Each Variable
## CA frc_black frc_asian frc_hispanic frc_amerindian
## 90 100 100 100 100
## (weights)
## 84
##
## Linear Regression Model
##
## ols(formula = form_2, data = d, weights = d$pop_weight)
##
##
## Model Likelihood Discrimination
## Ratio Test Indexes
## Obs 3122 LR chi2 1734.97 R2 0.426
## sigma11.1986 d.f. 4 R2 adj 0.426
## d.f. 3117 Pr(> chi2) 0.0000 g 0.689
##
## Residuals
##
## Min 1Q Median 3Q Max
## -3.7222 -0.5960 -0.1093 0.3481 2.4635
##
##
## Coef S.E. t Pr(>|t|)
## Intercept 0.7274 0.0224 32.52 <0.0001
## frc_black -2.8440 0.0729 -39.01 <0.0001
## frc_asian 5.1768 0.2949 17.56 <0.0001
## frc_hispanic -1.9295 0.0749 -25.76 <0.0001
## frc_amerindian -3.7858 0.1891 -20.02 <0.0001
##
#demo + UV
(model_3 = ols(form_3, data = d, weights = d$pop_weight))
## Frequencies of Missing Values Due to Each Variable
## CA frc_black frc_asian frc_hispanic frc_amerindian
## 90 100 100 100 100
## UV (weights)
## 118 84
##
## Linear Regression Model
##
## ols(formula = form_3, data = d, weights = d$pop_weight)
##
##
## Model Likelihood Discrimination
## Ratio Test Indexes
## Obs 3093 LR chi2 1880.51 R2 0.456
## sigma10.8898 d.f. 5 R2 adj 0.455
## d.f. 3087 Pr(> chi2) 0.0000 g 0.705
##
## Residuals
##
## Min 1Q Median 3Q Max
## -2.99222 -0.55837 -0.07525 0.36663 2.40816
##
##
## Coef S.E. t Pr(>|t|)
## Intercept 0.6224 0.0247 25.22 <0.0001
## frc_black -2.7262 0.0741 -36.81 <0.0001
## frc_asian 6.7410 0.3306 20.39 <0.0001
## frc_hispanic -1.6454 0.0927 -17.74 <0.0001
## frc_amerindian -3.4211 0.2052 -16.67 <0.0001
## UV -0.1074 0.0155 -6.92 <0.0001
##
#climate
(model_4 = ols(form_4, data = d, weights = d$pop_weight))
## Frequencies of Missing Values Due to Each Variable
## CA frc_black frc_asian frc_hispanic frc_amerindian
## 90 100 100 100 100
## UV avg_temp lat_z lon_z elevation_z
## 118 119 84 84 149
## (weights)
## 84
##
## Linear Regression Model
##
## ols(formula = form_4, data = d, weights = d$pop_weight)
##
##
## Model Likelihood Discrimination
## Ratio Test Indexes
## Obs 3062 LR chi2 2283.97 R2 0.526
## sigma10.1785 d.f. 9 R2 adj 0.524
## d.f. 3052 Pr(> chi2) 0.0000 g 0.739
##
## Residuals
##
## Min 1Q Median 3Q Max
## -2.99195 -0.53719 -0.06725 0.38842 2.38029
##
##
## Coef S.E. t Pr(>|t|)
## Intercept 0.5717 0.0238 24.05 <0.0001
## frc_black -3.0225 0.0784 -38.53 <0.0001
## frc_asian 9.0644 0.3395 26.70 <0.0001
## frc_hispanic -1.4146 0.0898 -15.75 <0.0001
## frc_amerindian -3.0865 0.1970 -15.67 <0.0001
## UV -0.1638 0.0315 -5.21 <0.0001
## avg_temp 0.3868 0.0512 7.56 <0.0001
## lat_z 0.2668 0.0501 5.32 <0.0001
## lon_z 0.3505 0.0195 17.95 <0.0001
## elevation_z 0.2984 0.0247 12.07 <0.0001
##
#spatial autocorrelation in residual?
d$model_4_resid = resid(model_4) %>% standardize()
SAC_measures(d %>% filter(!is.na(model_4_resid)), "model_4_resid", measures = "KNSNR")
#spatial lag
(model_5 = ols(form_5, data = d, weights = d$pop_weight))
## Frequencies of Missing Values Due to Each Variable
## CA frc_black frc_asian frc_hispanic frc_amerindian
## 90 100 100 100 100
## UV avg_temp lat_z lon_z elevation_z
## 118 119 84 84 149
## CA_lag (weights)
## 92 84
##
## Linear Regression Model
##
## ols(formula = form_5, data = d, weights = d$pop_weight)
##
##
## Model Likelihood Discrimination
## Ratio Test Indexes
## Obs 3062 LR chi2 2620.25 R2 0.575
## sigma9.6362 d.f. 10 R2 adj 0.574
## d.f. 3051 Pr(> chi2) 0.0000 g 0.793
##
## Residuals
##
## Min 1Q Median 3Q Max
## -2.85759 -0.48683 -0.07155 0.35909 2.47774
##
##
## Coef S.E. t Pr(>|t|)
## Intercept 0.5440 0.0226 24.12 <0.0001
## frc_black -2.6827 0.0764 -35.10 <0.0001
## frc_asian 7.7561 0.3289 23.59 <0.0001
## frc_hispanic -1.5484 0.0853 -18.15 <0.0001
## frc_amerindian -2.7955 0.1872 -14.94 <0.0001
## UV -0.1036 0.0299 -3.46 0.0005
## avg_temp 0.3508 0.0485 7.24 <0.0001
## lat_z 0.1712 0.0477 3.59 0.0003
## lon_z 0.2434 0.0193 12.59 <0.0001
## elevation_z 0.2236 0.0237 9.42 <0.0001
## CA_lag 0.3495 0.0186 18.82 <0.0001
##
#No SAC in residuals now
d$model_5_resid = resid(model_5) %>% standardize()
SAC_measures(d %>% filter(!is.na(model_5_resid)), "model_5_resid", measures = "KNSNR")
#nonlinear UV
(model_6 = ols(form_6, data = d, weights = d$pop_weight))
## Frequencies of Missing Values Due to Each Variable
## CA CA_lag frc_black frc_asian frc_hispanic
## 90 92 100 100 100
## frc_amerindian UV avg_temp lat_z lon_z
## 100 118 119 84 84
## elevation_z (weights)
## 149 84
##
## Linear Regression Model
##
## ols(formula = form_6, data = d, weights = d$pop_weight)
##
##
## Model Likelihood Discrimination
## Ratio Test Indexes
## Obs 3062 LR chi2 2630.20 R2 0.576
## sigma9.6253 d.f. 13 R2 adj 0.575
## d.f. 3048 Pr(> chi2) 0.0000 g 0.791
##
## Residuals
##
## Min 1Q Median 3Q Max
## -2.80324 -0.48663 -0.07103 0.35421 2.55881
##
##
## Coef S.E. t Pr(>|t|)
## Intercept 0.4676 0.0914 5.12 <0.0001
## CA_lag 0.3458 0.0187 18.50 <0.0001
## frc_black -2.7309 0.0780 -35.01 <0.0001
## frc_asian 7.6536 0.3312 23.11 <0.0001
## frc_hispanic -1.4838 0.0938 -15.82 <0.0001
## frc_amerindian -2.8328 0.1877 -15.09 <0.0001
## UV -0.1156 0.0708 -1.63 0.1028
## UV' 0.1902 0.6160 0.31 0.7575
## UV'' 0.3642 1.8813 0.19 0.8465
## UV''' -2.2352 2.2602 -0.99 0.3228
## avg_temp 0.3393 0.0490 6.93 <0.0001
## lat_z 0.2192 0.0501 4.38 <0.0001
## lon_z 0.2663 0.0215 12.40 <0.0001
## elevation_z 0.2144 0.0241 8.91 <0.0001
##
#UV x race
(model_7 = ols(form_7, data = d, weights = d$pop_weight))
## Frequencies of Missing Values Due to Each Variable
## CA CA_lag UV frc_black frc_asian
## 90 92 118 100 100
## frc_hispanic frc_amerindian avg_temp lat_z lon_z
## 100 100 119 84 84
## elevation_z (weights)
## 149 84
##
## Linear Regression Model
##
## ols(formula = form_7, data = d, weights = d$pop_weight)
##
##
## Model Likelihood Discrimination
## Ratio Test Indexes
## Obs 3062 LR chi2 2763.71 R2 0.594
## sigma9.4193 d.f. 14 R2 adj 0.593
## d.f. 3047 Pr(> chi2) 0.0000 g 0.777
##
## Residuals
##
## Min 1Q Median 3Q Max
## -2.78002 -0.49616 -0.07814 0.35145 2.54808
##
##
## Coef S.E. t Pr(>|t|)
## Intercept 0.5525 0.0227 24.31 <0.0001
## CA_lag 0.3346 0.0187 17.90 <0.0001
## UV 0.1051 0.0391 2.69 0.0073
## frc_black -2.9387 0.0797 -36.87 <0.0001
## frc_asian 8.3827 0.3342 25.08 <0.0001
## frc_hispanic -1.4080 0.1051 -13.40 <0.0001
## frc_amerindian -2.8321 0.1885 -15.02 <0.0001
## avg_temp 0.1898 0.0494 3.84 0.0001
## lat_z 0.1342 0.0477 2.81 0.0049
## lon_z 0.2331 0.0198 11.75 <0.0001
## elevation_z 0.1453 0.0243 5.97 <0.0001
## UV * frc_black 0.5083 0.1099 4.63 <0.0001
## UV * frc_asian -2.8707 0.2659 -10.80 <0.0001
## UV * frc_hispanic -0.1943 0.0603 -3.23 0.0013
## UV * frc_amerindian -0.3310 0.1440 -2.30 0.0216
##
#effect sizes comparisons
#proportion R2s
map_df(mget("model_" + 1:7), model_partialR2s)
#numerical
(model_5_partialR2s = plot(anova(model_5), what='proportion R2', pl = F))
## frc_black frc_asian CA_lag frc_hispanic frc_amerindian
## 0.2984 0.1347 0.0858 0.0798 0.0540
## lon_z elevation_z avg_temp lat_z UV
## 0.0384 0.0215 0.0127 0.0031 0.0029
sum(model_5_partialR2s[c(race_vars2[-1])])
## [1] 0.57
sum(model_5_partialR2s[climate_vars_model])
## [1] 0.079
(model_7_partialR2s = plot(anova(model_7), what='proportion R2', pl = F))
## frc_black frc_asian frc_hispanic CA_lag
## 0.3093 0.1484 0.0760 0.0717
## frc_amerindian UV lon_z UV * frc_asian
## 0.0575 0.0355 0.0309 0.0261
## elevation_z UV * frc_black avg_temp UV * frc_hispanic
## 0.0080 0.0048 0.0033 0.0023
## lat_z UV * frc_amerindian
## 0.0018 0.0012
sum(model_7_partialR2s[c(race_vars2[-1])])
## [1] 0.59
sum(model_5_partialR2s[climate_vars_model])
## [1] 0.079
#all UV * SIRE interactions
sum(model_7_partialR2s[str_subset(names(model_7_partialR2s), "\\*")])
## [1] 0.034
#summarize model results
mget("model_" + 1:7) %>%
summarize_models()
#supplementary models
list(
ols(form_3b, data = d, weights = d$pop_weight),
ols(form_3c, data = d, weights = d$pop_weight),
ols(form_5b, data = d, weights = d$pop_weight),
ols(form_5c, data = d, weights = d$pop_weight),
ols(form_5d, data = d, weights = d$pop_weight)
) %>%
walk(function(x) {
print(x)
cat("Relative importance: partial R2s\n")
print(model_partialR2s(x))
})
## Frequencies of Missing Values Due to Each Variable
## CA UV frc_white (weights)
## 90 118 100 84
##
## Linear Regression Model
##
## ols(formula = form_3b, data = d, weights = d$pop_weight)
##
##
## Model Likelihood Discrimination
## Ratio Test Indexes
## Obs 3093 LR chi2 999.59 R2 0.276
## sigma12.5504 d.f. 2 R2 adj 0.276
## d.f. 3090 Pr(> chi2) 0.0000 g 0.550
##
## Residuals
##
## Min 1Q Median 3Q Max
## -3.8842 -0.7157 -0.1845 0.3050 2.8013
##
##
## Coef S.E. t Pr(>|t|)
## Intercept -1.0168 0.0481 -21.15 <0.0001
## UV -0.1241 0.0165 -7.53 <0.0001
## frc_white 1.6926 0.0693 24.41 <0.0001
##
## Relative importance: partial R2s
## # A tibble: 1 x 2
## frc_white UV
## <dbl> <dbl>
## 1 0.505 0.0481
## Frequencies of Missing Values Due to Each Variable
## CA CA_lag UV frc_black frc_asian
## 90 92 118 100 100
## frc_hispanic frc_amerindian avg_temp (weights)
## 100 100 119 84
##
## Linear Regression Model
##
## ols(formula = form_3c, data = d, weights = d$pop_weight)
##
##
## Model Likelihood Discrimination
## Ratio Test Indexes
## Obs 3092 LR chi2 2435.78 R2 0.545
## sigma 9.9585 d.f. 7 R2 adj 0.544
## d.f. 3084 Pr(> chi2) 0.0000 g 0.804
##
## Residuals
##
## Min 1Q Median 3Q Max
## -2.82297 -0.49647 -0.08481 0.34240 2.90949
##
##
## Coef S.E. t Pr(>|t|)
## Intercept 0.5788 0.0228 25.40 <0.0001
## CA_lag 0.4348 0.0181 24.01 <0.0001
## UV -0.0381 0.0205 -1.86 0.0634
## frc_black -2.4592 0.0753 -32.68 <0.0001
## frc_asian 5.9749 0.3068 19.47 <0.0001
## frc_hispanic -1.7787 0.0857 -20.75 <0.0001
## frc_amerindian -2.9232 0.1902 -15.37 <0.0001
## avg_temp 0.1073 0.0213 5.03 <0.0001
##
## Relative importance: partial R2s
## # A tibble: 1 x 7
## frc_black CA_lag frc_hispanic frc_asian frc_amerindian avg_temp UV
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 0.289 0.156 0.116 0.103 0.0639 0.00683 0.000933
## Frequencies of Missing Values Due to Each Variable
## CA frc_black frc_asian frc_hispanic frc_amerindian
## 90 100 100 100 100
## UV avg_temp lat_z lon_z elevation_z
## 118 119 84 84 149
## fluoride_conc_z CA_lag (weights)
## 1008 92 84
##
## Linear Regression Model
##
## ols(formula = form_5b, data = d, weights = d$pop_weight)
##
##
## Model Likelihood Discrimination
## Ratio Test Indexes
## Obs 2153 LR chi2 2015.71 R2 0.608
## sigma8.7183 d.f. 11 R2 adj 0.606
## d.f. 2141 Pr(> chi2) 0.0000 g 0.771
##
## Residuals
##
## Min 1Q Median 3Q Max
## -3.13986 -0.47069 -0.05363 0.37739 2.07743
##
##
## Coef S.E. t Pr(>|t|)
## Intercept 0.4229 0.0251 16.84 <0.0001
## frc_black -2.7802 0.0819 -33.94 <0.0001
## frc_asian 13.8716 0.5711 24.29 <0.0001
## frc_hispanic -1.4864 0.0991 -15.00 <0.0001
## frc_amerindian -2.8079 0.4083 -6.88 <0.0001
## UV 0.3135 0.0499 6.29 <0.0001
## avg_temp -0.1883 0.0839 -2.24 0.0249
## lat_z -0.0368 0.0693 -0.53 0.5953
## lon_z 0.1362 0.0348 3.92 <0.0001
## elevation_z -0.0669 0.0410 -1.63 0.1029
## fluoride_conc_z 0.0155 0.0179 0.86 0.3883
## CA_lag 0.2859 0.0215 13.28 <0.0001
##
## Relative importance: partial R2s
## # A tibble: 1 x 11
## frc_black frc_asian frc_hispanic CA_lag frc_amerindian UV lon_z avg_temp
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 0.347 0.178 0.0678 0.0531 0.0143 0.0119 0.00462 0.00152
## # … with 3 more variables: elevation_z <dbl>, fluoride_conc_z <dbl>,
## # lat_z <dbl>
## Frequencies of Missing Values Due to Each Variable
## CA frc_black frc_asian frc_hispanic frc_amerindian
## 90 100 100 100 100
## UV avg_temp lat_z lon_z elevation_z
## 118 119 84 84 149
## lead_index_z fluoride_conc_z CA_lag (weights)
## 86 1008 92 84
##
## Linear Regression Model
##
## ols(formula = form_5c, data = d, weights = d$pop_weight)
##
##
## Model Likelihood Discrimination
## Ratio Test Indexes
## Obs 2152 LR chi2 2119.16 R2 0.626
## sigma8.5111 d.f. 12 R2 adj 0.624
## d.f. 2139 Pr(> chi2) 0.0000 g 0.792
##
## Residuals
##
## Min 1Q Median 3Q Max
## -2.89522 -0.43880 -0.02404 0.38089 2.11818
##
##
## Coef S.E. t Pr(>|t|)
## Intercept 0.3616 0.0252 14.34 <0.0001
## frc_black -2.5580 0.0828 -30.89 <0.0001
## frc_asian 12.6287 0.5706 22.13 <0.0001
## frc_hispanic -1.4105 0.0970 -14.54 <0.0001
## frc_amerindian -2.9846 0.3989 -7.48 <0.0001
## UV 0.4304 0.0500 8.61 <0.0001
## avg_temp -0.2507 0.0821 -3.05 0.0023
## lat_z 0.0376 0.0681 0.55 0.5812
## lon_z 0.3346 0.0392 8.55 <0.0001
## elevation_z -0.1151 0.0403 -2.86 0.0043
## lead_index_z -0.1719 0.0167 -10.29 <0.0001
## fluoride_conc_z 0.0153 0.0175 0.88 0.3807
## CA_lag 0.2758 0.0210 13.11 <0.0001
##
## Relative importance: partial R2s
## # A tibble: 1 x 12
## frc_black frc_asian frc_hispanic CA_lag lead_index_z UV lon_z
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 0.266 0.137 0.0589 0.0479 0.0295 0.0207 0.0204
## # … with 5 more variables: frc_amerindian <dbl>, avg_temp <dbl>,
## # elevation_z <dbl>, fluoride_conc_z <dbl>, lat_z <dbl>
## Frequencies of Missing Values Due to Each Variable
## CA frc_black
## 90 100
## frc_asian frc_hispanic
## 100 100
## frc_amerindian UV
## 100 118
## avg_temp lat_z
## 119 84
## lon_z elevation_z
## 84 149
## Diarrheal_diseases_z Hepatitis_z
## 82 82
## HIV_AIDS_z Lower_respiratory_infections_z
## 82 82
## Meningitis_z Tuberculosis_z
## 82 82
## CA_lag (weights)
## 92 84
##
## Linear Regression Model
##
## ols(formula = form_5d, data = d, weights = d$pop_weight)
##
##
## Model Likelihood Discrimination
## Ratio Test Indexes
## Obs 3062 LR chi2 3215.73 R2 0.650
## sigma8.7520 d.f. 16 R2 adj 0.648
## d.f. 3045 Pr(> chi2) 0.0000 g 0.860
##
## Residuals
##
## Min 1Q Median 3Q Max
## -3.23696 -0.46236 -0.06136 0.31593 3.05156
##
##
## Coef S.E. t Pr(>|t|)
## Intercept 0.3922 0.0272 14.42 <0.0001
## frc_black -1.5491 0.1249 -12.40 <0.0001
## frc_asian 6.8560 0.3134 21.88 <0.0001
## frc_hispanic -1.5441 0.0865 -17.85 <0.0001
## frc_amerindian -1.5919 0.1983 -8.03 <0.0001
## UV -0.1753 0.0276 -6.34 <0.0001
## avg_temp 0.4788 0.0444 10.78 <0.0001
## lat_z 0.1021 0.0440 2.32 0.0205
## lon_z 0.2308 0.0199 11.58 <0.0001
## elevation_z 0.3044 0.0224 13.58 <0.0001
## Diarrheal_diseases_z -0.0205 0.0121 -1.70 0.0898
## Hepatitis_z -0.0957 0.0164 -5.83 <0.0001
## HIV_AIDS_z 0.0470 0.0168 2.79 0.0052
## Lower_respiratory_infections_z -0.1108 0.0171 -6.47 <0.0001
## Meningitis_z -0.3350 0.0269 -12.45 <0.0001
## Tuberculosis_z -0.0170 0.0275 -0.62 0.5369
## CA_lag 0.2871 0.0176 16.30 <0.0001
##
## Relative importance: partial R2s
## # A tibble: 1 x 16
## frc_asian frc_hispanic CA_lag elevation_z Meningitis_z frc_black lon_z
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 0.0846 0.0563 0.0469 0.0326 0.0274 0.0272 0.0237
## # … with 9 more variables: avg_temp <dbl>, frc_amerindian <dbl>,
## # Lower_respiratory_infections_z <dbl>, UV <dbl>, Hepatitis_z <dbl>,
## # HIV_AIDS_z <dbl>, lat_z <dbl>, Diarrheal_diseases_z <dbl>,
## # Tuberculosis_z <dbl>
Standardized betas
Alternative way to compare importance as numerical predictors, ignoring the variance as a function of prevalence issue.
list(
ols(form_1, data = d_std, weights = d_std$pop_weight),
ols(form_2, data = d_std, weights = d_std$pop_weight),
ols(form_3, data = d_std, weights = d_std$pop_weight),
ols(form_4, data = d_std, weights = d_std$pop_weight),
ols(form_5, data = d_std, weights = d_std$pop_weight),
ols(form_6, data = d_std, weights = d_std$pop_weight),
ols(form_7, data = d_std, weights = d_std$pop_weight)
) %>%
summarize_models()
Robust regressions
Lasker requested these, but they are pretty much the same as above but with worse output.
#UV
(model_1r = MASS::rlm(form_1, data = d, weights = d$pop_weight) %>% summary())
##
## Call: rlm(formula = form_1, data = d, weights = d$pop_weight)
## Residuals:
## Min 1Q Median 3Q Max
## -75.9602 -8.2043 -0.0881 6.6366 76.9205
##
## Coefficients:
## Value Std. Error t value
## (Intercept) 0.066 0.014 4.736
## UV -0.324 0.013 -24.895
##
## Residual standard error: 11 on 3097 degrees of freedom
## (125 observations deleted due to missingness)
#demos
(model_2r = MASS::rlm(form_2, data = d, weights = d$pop_weight) %>% summary())
##
## Call: rlm(formula = form_2, data = d, weights = d$pop_weight)
## Residuals:
## Min 1Q Median 3Q Max
## -223.642 -6.325 -0.564 5.157 55.888
##
## Coefficients:
## Value Std. Error t value
## (Intercept) 0.585 0.018 31.730
## frc_black -2.939 0.060 -48.903
## frc_asian 11.294 0.243 46.461
## frc_hispanic -1.884 0.062 -30.512
## frc_amerindian -3.468 0.156 -22.251
##
## Residual standard error: 8.5 on 3117 degrees of freedom
## (102 observations deleted due to missingness)
#UV and demos
(model_3r = MASS::rlm(form_3, data = d, weights = d$pop_weight) %>% summary())
##
## Call: rlm(formula = form_3, data = d, weights = d$pop_weight)
## Residuals:
## Min 1Q Median 3Q Max
## -157.762 -6.142 -0.477 5.155 55.395
##
## Coefficients:
## Value Std. Error t value
## (Intercept) 0.536 0.021 25.922
## frc_black -2.869 0.062 -46.208
## frc_asian 11.965 0.277 43.168
## frc_hispanic -1.724 0.078 -22.169
## frc_amerindian -3.193 0.172 -18.566
## UV -0.054 0.013 -4.148
##
## Residual standard error: 8.3 on 3087 degrees of freedom
## (131 observations deleted due to missingness)
#all climate
(model_4r = MASS::rlm(form_4, data = d, weights = d$pop_weight) %>% summary())
##
## Call: rlm(formula = form_4, data = d, weights = d$pop_weight)
## Residuals:
## Min 1Q Median 3Q Max
## -123.591 -6.087 -0.484 4.937 46.267
##
## Coefficients:
## Value Std. Error t value
## (Intercept) 0.520 0.021 24.918
## frc_black -3.019 0.069 -43.832
## frc_asian 12.715 0.298 42.651
## frc_hispanic -1.591 0.079 -20.166
## frc_amerindian -3.015 0.173 -17.429
## UV -0.113 0.028 -4.084
## avg_temp 0.395 0.045 8.801
## lat_z 0.329 0.044 7.486
## lon_z 0.321 0.017 18.728
## elevation_z 0.269 0.022 12.398
##
## Residual standard error: 8.2 on 3052 degrees of freedom
## (162 observations deleted due to missingness)
#all main variables + spatial lag
(model_5r = MASS::rlm(form_5, data = d, weights = d$pop_weight) %>% summary())
##
## Call: rlm(formula = form_5, data = d, weights = d$pop_weight)
## Residuals:
## Min 1Q Median 3Q Max
## -110.881 -5.488 -0.385 4.717 51.188
##
## Coefficients:
## Value Std. Error t value
## (Intercept) 0.461 0.020 23.622
## frc_black -2.476 0.066 -37.418
## frc_asian 10.439 0.285 36.661
## frc_hispanic -1.651 0.074 -22.348
## frc_amerindian -2.657 0.162 -16.394
## UV -0.060 0.026 -2.325
## avg_temp 0.338 0.042 8.064
## lat_z 0.208 0.041 5.025
## lon_z 0.201 0.017 12.029
## elevation_z 0.201 0.021 9.803
## CA_lag 0.384 0.016 23.902
##
## Residual standard error: 7.6 on 3051 degrees of freedom
## (162 observations deleted due to missingness)
#rcs UV
(model_6r = MASS::rlm(form_6, data = d, weights = d$pop_weight) %>% summary())
##
## Call: rlm(formula = form_6, data = d, weights = d$pop_weight)
## Residuals:
## Min 1Q Median 3Q Max
## -117.00 -5.45 -0.47 4.71 50.87
##
## Coefficients:
## Value Std. Error t value
## (Intercept) 0.506 0.079 6.405
## CA_lag 0.372 0.016 23.037
## frc_black -2.546 0.067 -37.760
## frc_asian 10.590 0.286 37.001
## frc_hispanic -1.567 0.081 -19.339
## frc_amerindian -2.692 0.162 -16.599
## rcs(UV)UV 0.028 0.061 0.463
## rcs(UV)UV' -0.718 0.532 -1.349
## rcs(UV)UV'' 3.172 1.626 1.951
## rcs(UV)UV''' -5.599 1.953 -2.867
## avg_temp 0.307 0.042 7.259
## lat_z 0.239 0.043 5.524
## lon_z 0.215 0.019 11.603
## elevation_z 0.192 0.021 9.236
##
## Residual standard error: 7.5 on 3048 degrees of freedom
## (162 observations deleted due to missingness)
#demo interactions
(model_7r = MASS::rlm(form_7, data = d, weights = d$pop_weight) %>% summary())
##
## Call: rlm(formula = form_7, data = d, weights = d$pop_weight)
## Residuals:
## Min 1Q Median 3Q Max
## -86.265 -5.501 -0.448 4.669 50.355
##
## Coefficients:
## Value Std. Error t value
## (Intercept) 0.470 0.020 23.534
## CA_lag 0.371 0.016 22.603
## UV 0.124 0.034 3.607
## frc_black -2.752 0.070 -39.315
## frc_asian 10.399 0.294 35.424
## frc_hispanic -1.461 0.092 -15.835
## frc_amerindian -2.648 0.166 -15.992
## avg_temp 0.193 0.043 4.439
## lat_z 0.166 0.042 3.976
## lon_z 0.210 0.017 12.070
## elevation_z 0.140 0.021 6.554
## UV:frc_black 0.467 0.096 4.840
## UV:frc_asian -3.696 0.234 -15.826
## UV:frc_hispanic -0.172 0.053 -3.250
## UV:frc_amerindian -0.276 0.127 -2.182
##
## Residual standard error: 7.6 on 3047 degrees of freedom
## (162 observations deleted due to missingness)
Correlations vs. OLS?
Can weighted regression betas differ from weighted correlations? Apparently, yes.
d %>%
filter(!is.na(CA), !is.na(UV), !is.na(pop_weight)) %>% {
#restandardized in complete cases subset
.$CA2 = .$CA %>% standardize()
.$UV2 = .$UV %>% standardize()
#compute
list(
CA = describe(.$CA),
UV = describe(.$UV),
cor = wtd.cors(select(., CA, UV)),
cor_wt = wtd.cors(select(., CA, UV), w = .$pop_weight),
ols = ols(CA ~ UV, data = .) %>% coef(),
ols_wt = ols(CA ~ UV, data = ., weights = .$pop_weight) %>% coef(),
ols_restd = ols(CA2 ~ UV2, data = .) %>% coef(),
ols_wt_restd = ols(CA2 ~ UV2, data = ., weights = .$pop_weight) %>% coef()
)
}
## $CA
## vars n mean sd median trimmed mad min max range skew kurtosis
## X1 1 3099 0.0087 0.98 0.073 0.051 0.92 -4.5 2.9 7.4 -0.49 0.86
## se
## X1 0.018
##
## $UV
## vars n mean sd median trimmed mad min max range skew kurtosis se
## X1 1 3099 -0.00013 1 -0.064 -0.058 1.1 -3.1 3.4 6.4 0.48 0.16 0.018
##
## $cor
## CA UV
## CA 1.00 -0.33
## UV -0.33 1.00
##
## $cor_wt
## CA UV
## CA 1.00 -0.37
## UV -0.37 1.00
##
## $ols
## Intercept UV
## 0.0087 -0.3219
##
## $ols_wt
## Intercept UV
## 0.094 -0.335
##
## $ols_restd
## Intercept UV2
## -1.1e-17 -3.3e-01
##
## $ols_wt_restd
## Intercept UV2
## 0.087 -0.340
State level
For comparison with Leon.
#UV only
(model_1s = ols(form_1, data = d_state, weights = d_state$pop_weight))
## Frequencies of Missing Values Due to Each Variable
## CA UV (weights)
## 1 1 1
##
## Linear Regression Model
##
## ols(formula = form_1, data = d_state, weights = d_state$pop_weight)
##
##
## Model Likelihood Discrimination
## Ratio Test Indexes
## Obs 49 LR chi2 19.57 R2 0.329
## sigma35.8247 d.f. 1 R2 adj 0.315
## d.f. 47 Pr(> chi2) 0.0000 g 0.546
##
## Residuals
##
## Min 1Q Median 3Q Max
## -3.1375 -0.4679 0.1638 0.4223 1.6269
##
##
## Coef S.E. t Pr(>|t|)
## Intercept 0.0125 0.1091 0.11 0.9095
## UV -0.5058 0.1053 -4.80 <0.0001
##
#demographics only
(model_2s = ols(form_2, data = d_state, weights = d_state$pop_weight))
## Frequencies of Missing Values Due to Each Variable
## CA frc_black frc_asian frc_hispanic frc_amerindian
## 1 1 1 1 1
## (weights)
## 1
##
## Linear Regression Model
##
## ols(formula = form_2, data = d_state, weights = d_state$pop_weight)
##
##
## Model Likelihood Discrimination
## Ratio Test Indexes
## Obs 49 LR chi2 27.40 R2 0.428
## sigma34.1821 d.f. 4 R2 adj 0.376
## d.f. 44 Pr(> chi2) 0.0000 g 0.753
##
## Residuals
##
## Min 1Q Median 3Q Max
## -1.98570 -0.47002 0.05129 0.50455 1.52791
##
##
## Coef S.E. t Pr(>|t|)
## Intercept 1.0991 0.2857 3.85 0.0004
## frc_black -4.1687 0.8797 -4.74 <0.0001
## frc_asian 5.6319 4.3970 1.28 0.2070
## frc_hispanic -3.1272 0.8245 -3.79 0.0005
## frc_amerindian -6.5548 3.8188 -1.72 0.0931
##
#demo + UV
(model_3s = ols(form_3, data = d_state, weights = d_state$pop_weight))
## Frequencies of Missing Values Due to Each Variable
## CA frc_black frc_asian frc_hispanic frc_amerindian
## 1 1 1 1 1
## UV (weights)
## 1 1
##
## Linear Regression Model
##
## ols(formula = form_3, data = d_state, weights = d_state$pop_weight)
##
##
## Model Likelihood Discrimination
## Ratio Test Indexes
## Obs 49 LR chi2 33.69 R2 0.497
## sigma32.4295 d.f. 5 R2 adj 0.439
## d.f. 43 Pr(> chi2) 0.0000 g 0.788
##
## Residuals
##
## Min 1Q Median 3Q Max
## -1.86540 -0.39426 0.04279 0.31334 1.40189
##
##
## Coef S.E. t Pr(>|t|)
## Intercept 0.6179 0.3359 1.84 0.0727
## frc_black -3.1410 0.9359 -3.36 0.0017
## frc_asian 1.0041 4.5871 0.22 0.8278
## frc_hispanic -0.4505 1.3526 -0.33 0.7407
## frc_amerindian -3.8699 3.7883 -1.02 0.3127
## UV -0.4202 0.1732 -2.43 0.0195
##
#climate
(model_4s = ols(form_4, data = d_state, weights = d_state$pop_weight))
## Frequencies of Missing Values Due to Each Variable
## CA frc_black frc_asian frc_hispanic frc_amerindian
## 1 1 1 1 1
## UV avg_temp lat_z lon_z elevation_z
## 1 1 1 1 1
## (weights)
## 1
##
## Linear Regression Model
##
## ols(formula = form_4, data = d_state, weights = d_state$pop_weight)
##
##
## Model Likelihood Discrimination
## Ratio Test Indexes
## Obs 49 LR chi2 62.56 R2 0.721
## sigma25.3630 d.f. 9 R2 adj 0.657
## d.f. 39 Pr(> chi2) 0.0000 g 0.909
##
## Residuals
##
## Min 1Q Median 3Q Max
## -2.06879 -0.18211 0.05978 0.27205 0.90978
##
##
## Coef S.E. t Pr(>|t|)
## Intercept 0.4332 0.2774 1.56 0.1265
## frc_black -4.4850 0.9401 -4.77 <0.0001
## frc_asian 16.4494 4.9652 3.31 0.0020
## frc_hispanic -1.3160 1.1108 -1.18 0.2433
## frc_amerindian -3.6634 3.1824 -1.15 0.2567
## UV -0.7981 0.2461 -3.24 0.0024
## avg_temp 1.7740 0.5268 3.37 0.0017
## lat_z 1.1112 0.4665 2.38 0.0222
## lon_z 0.8534 0.1716 4.97 <0.0001
## elevation_z 0.9042 0.2147 4.21 0.0001
##
#spatial autocorrelation in residual?
d_state$model_4s_resid = resid(model_4s) %>% standardize()
SAC_measures(d_state %>% filter(!is.na(model_4s_resid)), "model_4s_resid", measures = "KNSNR")
#spatial lag
(model_5s = ols(form_5, data = d_state, weights = d_state$pop_weight))
## Frequencies of Missing Values Due to Each Variable
## CA frc_black frc_asian frc_hispanic frc_amerindian
## 1 1 1 1 1
## UV avg_temp lat_z lon_z elevation_z
## 1 1 1 1 1
## CA_lag (weights)
## 1 1
##
## Linear Regression Model
##
## ols(formula = form_5, data = d_state, weights = d_state$pop_weight)
##
##
## Model Likelihood Discrimination
## Ratio Test Indexes
## Obs 49 LR chi2 62.91 R2 0.723
## sigma25.6011 d.f. 10 R2 adj 0.650
## d.f. 38 Pr(> chi2) 0.0000 g 0.910
##
## Residuals
##
## Min 1Q Median 3Q Max
## -1.9694 -0.2172 0.0677 0.2924 0.8829
##
##
## Coef S.E. t Pr(>|t|)
## Intercept 0.4010 0.2866 1.40 0.1699
## frc_black -4.3825 0.9687 -4.52 <0.0001
## frc_asian 17.3630 5.3030 3.27 0.0023
## frc_hispanic -1.3673 1.1255 -1.21 0.2319
## frc_amerindian -3.5101 3.2254 -1.09 0.2833
## UV -0.8181 0.2512 -3.26 0.0024
## avg_temp 1.8307 0.5426 3.37 0.0017
## lat_z 1.0687 0.4778 2.24 0.0313
## lon_z 0.8399 0.1751 4.80 <0.0001
## elevation_z 0.9240 0.2199 4.20 0.0002
## CA_lag 0.1103 0.2092 0.53 0.6011
##
#SAC in residuals now
d_state$model_5s_resid = resid(model_5s) %>% standardize()
SAC_measures(d_state %>% filter(!is.na(model_5s_resid)), "model_5s_resid", measures = "KNSNR")
#nonlinear UV
(model_6s = ols(form_6, data = d_state, weights = d_state$pop_weight))
## Frequencies of Missing Values Due to Each Variable
## CA CA_lag frc_black frc_asian frc_hispanic
## 1 1 1 1 1
## frc_amerindian UV avg_temp lat_z lon_z
## 1 1 1 1 1
## elevation_z (weights)
## 1 1
##
## Linear Regression Model
##
## ols(formula = form_6, data = d_state, weights = d_state$pop_weight)
##
##
## Model Likelihood Discrimination
## Ratio Test Indexes
## Obs 49 LR chi2 65.08 R2 0.735
## sigma26.0919 d.f. 13 R2 adj 0.637
## d.f. 35 Pr(> chi2) 0.0000 g 0.927
##
## Residuals
##
## Min 1Q Median 3Q Max
## -1.85503 -0.27715 0.07099 0.26890 0.83428
##
##
## Coef S.E. t Pr(>|t|)
## Intercept 0.3295 0.8160 0.40 0.6888
## CA_lag -0.0253 0.2432 -0.10 0.9179
## frc_black -4.5325 1.0000 -4.53 <0.0001
## frc_asian 14.4935 5.8904 2.46 0.0190
## frc_hispanic -0.6532 1.3079 -0.50 0.6206
## frc_amerindian -3.9672 3.3849 -1.17 0.2491
## UV -0.6174 0.8143 -0.76 0.4534
## UV' -2.2669 27.4190 -0.08 0.9346
## UV'' 5.4466 41.0354 0.13 0.8952
## UV''' -5.9599 17.6434 -0.34 0.7375
## avg_temp 1.8167 0.5897 3.08 0.0040
## lat_z 1.4342 0.6023 2.38 0.0228
## lon_z 0.9417 0.2077 4.54 <0.0001
## elevation_z 0.8046 0.2487 3.24 0.0027
##
#UV x race
(model_7s = ols(form_7, data = d_state, weights = d_state$pop_weight))
## Frequencies of Missing Values Due to Each Variable
## CA CA_lag UV frc_black frc_asian
## 1 1 1 1 1
## frc_hispanic frc_amerindian avg_temp lat_z lon_z
## 1 1 1 1 1
## elevation_z (weights)
## 1 1
##
## Linear Regression Model
##
## ols(formula = form_7, data = d_state, weights = d_state$pop_weight)
##
##
## Model Likelihood Discrimination
## Ratio Test Indexes
## Obs 49 LR chi2 83.29 R2 0.817
## sigma21.9833 d.f. 14 R2 adj 0.742
## d.f. 34 Pr(> chi2) 0.0000 g 0.974
##
## Residuals
##
## Min 1Q Median 3Q Max
## -1.31880 -0.23782 0.04668 0.23206 0.74668
##
##
## Coef S.E. t Pr(>|t|)
## Intercept 0.5546 0.2743 2.02 0.0511
## CA_lag -0.0809 0.2059 -0.39 0.6966
## UV 0.9556 0.5687 1.68 0.1021
## frc_black -4.9719 0.8734 -5.69 <0.0001
## frc_asian 13.0377 5.1196 2.55 0.0156
## frc_hispanic -0.4472 1.2957 -0.35 0.7321
## frc_amerindian 1.7082 3.6537 0.47 0.6431
## avg_temp 0.1981 0.6405 0.31 0.7590
## lat_z 0.3427 0.5266 0.65 0.5196
## lon_z 0.6677 0.1867 3.58 0.0011
## elevation_z 0.2202 0.2728 0.81 0.4252
## UV * frc_black -0.5415 1.4087 -0.38 0.7031
## UV * frc_asian -10.8260 3.5178 -3.08 0.0041
## UV * frc_hispanic -0.7802 0.7706 -1.01 0.3185
## UV * frc_amerindian -10.4274 4.0766 -2.56 0.0152
##
#effect sizes comparisons
#proportion R2
map_df(mget("model_" + 1:7 + "s"), model_partialR2s)
#numerical values
(model_5s_partialR2s = plot(anova(model_5s), what='proportion R2', pl = F))
## lon_z frc_black elevation_z avg_temp frc_asian
## 0.2319 0.2063 0.1779 0.1148 0.1081
## UV lat_z frc_hispanic frc_amerindian CA_lag
## 0.1069 0.0504 0.0149 0.0119 0.0028
sum(model_5s_partialR2s[c(race_vars2[-1])])
## [1] 0.34
sum(model_5s_partialR2s[climate_vars_model])
## [1] 0.68
(model_7s_partialR2s = plot(anova(model_7s), what='proportion R2', pl = F))
## frc_black UV frc_asian lon_z
## 0.21573 0.20987 0.09793 0.08414
## UV * frc_asian frc_amerindian UV * frc_amerindian frc_hispanic
## 0.06227 0.05794 0.04302 0.01384
## UV * frc_hispanic elevation_z lat_z CA_lag
## 0.00674 0.00428 0.00278 0.00102
## UV * frc_black avg_temp
## 0.00097 0.00063
sum(model_7s_partialR2s[c(race_vars2[-1])])
## [1] 0.39
sum(model_7s_partialR2s[climate_vars_model])
## [1] 0.3
sum(model_7s_partialR2s[str_subset(names(model_7s_partialR2s), "\\*")])
## [1] 0.11
#summarize model results
list(model_1s, model_2s, model_3s, model_4s, model_5s, model_6s, model_7s) %>%
summarize_models()
#supplement models
list(
ols(form_3b, data = d, weights = d$pop_weight)
) %>%
walk(function(x) {
print(x)
cat("Relative importance: partial R2s\n")
print(model_partialR2s(x))
})
## Frequencies of Missing Values Due to Each Variable
## CA UV frc_white (weights)
## 90 118 100 84
##
## Linear Regression Model
##
## ols(formula = form_3b, data = d, weights = d$pop_weight)
##
##
## Model Likelihood Discrimination
## Ratio Test Indexes
## Obs 3093 LR chi2 999.59 R2 0.276
## sigma12.5504 d.f. 2 R2 adj 0.276
## d.f. 3090 Pr(> chi2) 0.0000 g 0.550
##
## Residuals
##
## Min 1Q Median 3Q Max
## -3.8842 -0.7157 -0.1845 0.3050 2.8013
##
##
## Coef S.E. t Pr(>|t|)
## Intercept -1.0168 0.0481 -21.15 <0.0001
## UV -0.1241 0.0165 -7.53 <0.0001
## frc_white 1.6926 0.0693 24.41 <0.0001
##
## Relative importance: partial R2s
## # A tibble: 1 x 2
## frc_white UV
## <dbl> <dbl>
## 1 0.505 0.0481
Standardized
list(
ols(form_1, data = d_state_std, weights = d_state_std$pop_weight),
ols(form_2, data = d_state_std, weights = d_state_std$pop_weight),
ols(form_3, data = d_state_std, weights = d_state_std$pop_weight),
ols(form_4, data = d_state_std, weights = d_state_std$pop_weight),
ols(form_5, data = d_state_std, weights = d_state_std$pop_weight),
ols(form_6, data = d_state_std, weights = d_state_std$pop_weight),
ols(form_7, data = d_state_std, weights = d_state_std$pop_weight)
) %>% summarize_models()
Pseudo-states
Examples
#algorithm
assign_random_states = function(x, x_neighbors = NULL, n_states = 48, var = "new_state", verbose = F) {
#pick random counties as seeds
#set state status
var_s = sym(var)
x$..rowi = 1:nrow(x)
x[[var]] = NA
x[[var]][sample(1:nrow(x), size = n_states)] = 1:48
#loop while adding counties until none left
#construct neighbor matrix if needed
if (is.null(x_neighbors)) x_neighbors = spdep::poly2nb(as(x, "Spatial"), queen = FALSE)
#states without free neighbors remaining
states_nofree_nbs = c()
#itooliketolivedangerously.png
stuck_count = 0
last_unit = -1
states_tried = c()
while (T) {
#status
if (verbose) message(str_glue("Units done {sum(!is.na(x[[var]]))}"))
# browser()
#check if done
#all units assigned
if (!any(is.na(x[[var]]))) {
break
}
#no states have any county to expand to
if (length(states_nofree_nbs) == n_states) {
break
}
#pick a random state to expand (with free units remaining)
state_i = sample(setdiff(1:n_states, states_nofree_nbs), size = 1)
states_tried = c(states_tried, state_i)
#stuck?
if (last_unit == state_i) {
stuck_count = stuck_count + 1
}
if (stuck_count >= 10) browser()
#units in state
state_i_counties = x %>% filter(!!var_s == state_i) %>% as.data.frame() %>% pull(..rowi)
#find neighboring units
state_i_neighbors = x_neighbors[state_i_counties] %>% unlist() %>% unique()
#filter to those not already owned
state_i_neighbors = x[state_i_neighbors, ] %>% filter(is.na(!!var_s))
#any of them not taken?
#if all taken, try again
if (nrow(state_i_neighbors) == 0) {
#add state to list of those without more free neighbors
states_nofree_nbs = c(states_nofree_nbs, state_i)
next
}
#assign take a random one
x[[var]][sample(state_i_neighbors$..rowi, size = 1)] = state_i
#last unit
last_unit = state_i
#stuck count
stuck_count = 0
}
x
}
#without outlying states
county_sf_data_no = county_sf_data %>%
# filter(!State.x %in% c("Alaska", "Hawaii")) %>%
#remove Alaska and Hawaii, and outlying islands
filter(
!STATEFP %in% c("02", "15"),
#tricky, but this removes them
as.numeric(as.character(STATEFP)) <= 56
)
#find neighbors
#https://github.com/r-spatial/sf/issues/234
#this method is slower
county_sf_data_no_nbs = spdep::poly2nb(as(county_sf_data_no, "Spatial"), queen = FALSE)
## Registered S3 method overwritten by 'spdep':
## method from
## plot.mst ape
#fast but includes corners and self and messes up results for us
# county_sf_data_no_nbs2 = st_intersects(county_sf_data_no, county_sf_data_no)
#inspect
county_sf_data_no_nbs
## Neighbour list object:
## Number of regions: 3108
## Number of nonzero links: 17744
## Percentage nonzero weights: 0.18
## Average number of links: 5.7
#table of counts of neighbors
county_sf_data_no_nbs_counts = county_sf_data_no_nbs %>% map_int(function(x) {
if (identical(x, 0L)) return(0L)
length(x)
})
(county_sf_data_no_nbs_counts_table = county_sf_data_no_nbs_counts %>% table())
## .
## 1 2 3 4 5 6 7 8 9 10 11 13
## 13 26 68 343 860 1066 515 166 40 9 1 1
#they are all cities in Virginia which are enclaved but their exclaves are missing due to no IQ data
#because the county they are enclaved in does not have any data and were filtered earlier
#so we need to remove them as well
county_sf_data_no = county_sf_data_no[county_sf_data_no_nbs %>% map_lgl(~!identical(., 0L)), ]
county_sf_data_no_nbs = spdep::poly2nb(as(county_sf_data_no, "Spatial"), queen = FALSE)
#assign new states
set.seed(1)
new_states_example = assign_random_states(county_sf_data_no, county_sf_data_no_nbs, verbose = F)
#merge into state geometries
new_states_example_states = tibble(
new_state = new_states_example$new_state %>% unique()
)
st_geometry(new_states_example_states) = st_union_by(new_states_example$geometry, new_states_example$new_state)
#plot states
new_states_example_states %>%
ggplot() +
#plot by color, with borders since some are hard to see
geom_sf(aes(fill = factor(new_state)), lwd = 1) +
scale_fill_discrete("State", guide = F) +
#add a label on the center
geom_sf_text(aes(label = new_state)) +
theme_classic()

GG_save("figs/example_new_states.png")
#real states
#merge
real_states = tibble(
real_state = county_sf_data_no$State.x %>% unique()
)
st_geometry(real_states) = st_union_by(county_sf_data_no$geometry, county_sf_data_no$State.x)
real_states %>%
ggplot() +
#plot by color, with borders since some are hard to see
geom_sf(aes(fill = factor(real_state)), lwd = 1) +
scale_fill_discrete("State", guide = F) +
#add a label on the center
geom_sf_text(aes(label = real_state), check_overlap = T, size = 3) +
theme_classic()
## Warning: Removed 1 rows containing missing values (geom_text).

GG_save("figs/real_states.png")
## Warning: Removed 1 rows containing missing values (geom_text).
Analysis
We generate many new state maps as above. We save the cache of this because it takes a few hours to complete.
#settings
set.seed(1)
random_states_n = 1000
#generate
random_states = cache_object({
plyr::llply(1:random_states_n, .parallel = T, .fun = function(i) {
new_states = assign_random_states(county_sf_data_no, county_sf_data_no_nbs, verbose = F)
new_states$new_state
})
}, filename = "cache/new_states.rds", renew = F)
## Cache found, reading object from disk
#aggregate datasets
random_states_datasets = cache_object({plyr::llply(random_states, .parallel = T, .fun = function(x) {
#add vector
county_sf_data_no %>%
mutate(new_state = x) %>%
#aggregate
plyr::ddply("new_state", .fun = function(dd) {
# browser()
#weighted mean of main variables
#sum population
y = tibble(
population = sum(dd$Total.Population),
pop_weight = sqrt(population)
)
#loop vars
for (v in c(main_vars, "lon")) y[[v]] = wtd_mean(dd[[v]], w = dd$Total.Population)
y
})
})}, filename = "cache/random_states_datasets.rds")
## Cache found, reading object from disk
#fit regressions
random_states_results = plyr::ldply(seq_along(random_states_datasets), .fun = function(i) {
#fetch
i_state = random_states_datasets[[i]]
#have to do obnoxious stuff with the formula because it has the global env bound
#formula.tools:::as.character.formula is convenience function to avoid issues with long formulas and base R
#run regressions
#UV only
model_1 = ols(as.formula(formula.tools:::as.character.formula(form_1)), data = i_state, weights = i_state$pop_weight)
#demographics only
model_2 = ols(as.formula(formula.tools:::as.character.formula(form_2)), data = i_state, weights = i_state$pop_weight)
#demo + UV
model_3 = ols(as.formula(formula.tools:::as.character.formula(form_3)), data = i_state, weights = i_state$pop_weight)
#white + UV
model_3b = ols(as.formula(formula.tools:::as.character.formula(form_3b)), data = i_state, weights = i_state$pop_weight)
#save stats
bind_rows(
model_1 %>%
summary.lm() %>%
tidy() %>%
mutate(model = "1",
adj_r2 = model_1 %>% summary.lm() %>%
.$adj.r.squared),
model_2 %>%
summary.lm() %>%
tidy() %>%
mutate(model = "2",
adj_r2 = model_2 %>%
summary.lm() %>%
.$adj.r.squared,
partial_r2 = c(NA, rms:::plot.anova.rms(anova(model_2), what = 'partial R2', pl = F, sort = "none"))
),
model_3 %>%
summary.lm() %>%
tidy() %>%
mutate(model = "3", adj_r2 = model_3 %>% summary.lm() %>%
.$adj.r.squared,
partial_r2 = c(NA, rms:::plot.anova.rms(anova(model_3), what = 'partial R2', pl = F, sort = "none"))
),
model_3b %>%
summary.lm() %>%
tidy() %>%
mutate(model = "3b",
adj_r2 = model_3b %>% summary.lm() %>%
.$adj.r.squared,
partial_r2 = c(NA, rms:::plot.anova.rms(anova(model_3b), what = 'partial R2', pl = F, sort = "none")))
) %>%
mutate(model = ordered(model),
i = i)
})
#plots
random_states_results %>%
filter(term == "Intercept") %>%
ggplot(aes(model, adj_r2)) +
geom_boxplot() +
theme_bw()

GG_save("figs/pseudostates_model_r2s.png")
#partial_r2
#all demos
random_states_results %>%
filter(term != "Intercept", model == 3) %>%
ggplot(aes(term, partial_r2, fill = term)) +
geom_boxplot() +
theme_bw()

GG_save("figs/pseudostates_model3_partialR2s.png")
#demos vs. UV
random_states_results %>%
filter(term != "Intercept", model == 3) %>%
select(i, term, partial_r2) %>%
spread(key = term, value = partial_r2) %>%
mutate(
demo = frc_black + frc_asian + frc_hispanic + frc_amerindian,
ratio = demo / UV,
demo_greater = (demo > UV) %>% as.numeric()
) %>%
select(demo, UV, ratio, demo_greater) %>%
describe()
#white vs. UV
random_states_results %>%
filter(term != "Intercept", model == "3b") %>%
ggplot(aes(term, partial_r2, fill = term)) +
geom_boxplot() +
theme_bw()

GG_save("figs/pseudostates_model3b_partialR2s.png")
Gap analysis
Within county gaps.
#gap sizes within units
#white-black
seda_CS %>%
filter(subgroup == "wbg") %>%
{
wtd_mean(.$mn_avg_ol) %>% print()
wtd_mean(.$mn_avg_ol, w = 1/.$mn_avg_ol_se) %>% print()
}
## [1] 0.53
## [1] 0.6
#white hispanic
seda_CS %>%
filter(subgroup == "whg") %>%
{
wtd_mean(.$mn_avg_ol) %>% print()
wtd_mean(.$mn_avg_ol, 1/.$mn_avg_ol_se) %>% print()
}
## [1] 0.36
## [1] 0.44
#white asian
seda_CS %>%
filter(subgroup == "wag") %>%
{
wtd_mean(.$mn_avg_ol) %>% print()
wtd_mean(.$mn_avg_ol, .$mn_avg_ol_se) %>% print()
}
## [1] -0.17
## [1] -0.16
#gap relationships
d %>%
select(FIPS, CA, CA_asian:CA_white, CA_d_bw:CA_d_aw, SES_all:SES_d_hw, UV, frc_white, dem16_frac, pop_weight) %>%
select(-FIPS) %>%
{
combine_upperlower(
wtd.cors(., w = .$pop_weight),
wtd.cors(.),
.diag = 1
) %>%
.[-nrow(.), -ncol(.)]
}
## CA CA_asian CA_black CA_hispanic CA_white CA_d_bw CA_d_hw
## CA 1.0000 4.8e-01 0.647 0.607 0.687 0.129 0.150
## CA_asian 0.4473 1.0e+00 0.343 0.386 0.587 0.252 0.224
## CA_black 0.6717 3.1e-01 1.000 0.588 0.405 -0.457 -0.089
## CA_hispanic 0.5978 3.9e-01 0.540 1.000 0.350 -0.165 -0.481
## CA_white 0.7570 5.3e-01 0.470 0.421 1.000 0.626 0.649
## CA_d_bw 0.1655 2.2e-01 -0.458 -0.088 0.565 1.000 0.710
## CA_d_hw 0.1909 1.5e-01 -0.024 -0.498 0.569 0.608 1.000
## CA_d_aw -0.0067 -7.9e-01 -0.047 -0.159 0.102 0.147 0.241
## SES_all 0.7348 2.5e-01 0.455 0.243 0.581 0.221 0.346
## SES_black 0.4349 2.5e-01 0.496 0.236 0.395 -0.063 0.176
## SES_hispanic 0.2843 2.4e-01 0.222 0.278 0.285 0.089 0.021
## SES_white 0.3689 3.3e-01 0.198 0.099 0.646 0.491 0.541
## SES_d_bw -0.2673 -7.1e-02 -0.452 -0.218 -0.074 0.348 0.127
## SES_d_hw -0.0245 5.2e-02 -0.076 -0.221 0.196 0.271 0.386
## UV -0.3271 -6.5e-02 -0.252 -0.167 -0.101 0.055 0.015
## frc_white 0.6020 -6.5e-05 0.397 0.321 0.029 -0.296 -0.259
## dem16_frac -0.2049 2.4e-01 -0.117 -0.080 0.224 0.383 0.327
## CA_d_aw SES_all SES_black SES_hispanic SES_white SES_d_bw SES_d_hw
## CA -0.0646 0.748 0.419 0.337 0.401 -0.2372 -0.039
## CA_asian -0.7572 0.331 0.308 0.288 0.436 -0.0812 0.068
## CA_black -0.1003 0.432 0.554 0.269 0.209 -0.5343 -0.108
## CA_hispanic -0.1883 0.249 0.242 0.352 0.083 -0.2459 -0.319
## CA_white 0.0820 0.605 0.413 0.326 0.756 0.0011 0.281
## CA_d_bw 0.1762 0.242 -0.072 0.084 0.566 0.4577 0.365
## CA_d_hw 0.2327 0.363 0.192 0.017 0.645 0.2029 0.523
## CA_d_aw 1.0000 0.060 -0.076 -0.083 0.097 0.1396 0.147
## SES_all 0.1339 1.000 0.646 0.559 0.728 -0.3311 -0.064
## SES_black -0.0274 0.646 1.000 0.553 0.558 -0.8427 -0.149
## SES_hispanic -0.0371 0.502 0.496 1.000 0.498 -0.3538 -0.685
## SES_white 0.1514 0.726 0.495 0.461 1.000 -0.0571 0.223
## SES_d_bw 0.0884 -0.360 -0.865 -0.314 -0.048 1.0000 0.346
## SES_d_hw 0.1159 -0.064 -0.162 -0.717 0.163 0.3055 1.000
## UV -0.0129 -0.190 -0.022 0.048 0.076 -0.0167 -0.049
## frc_white -0.0517 0.442 0.118 0.065 -0.226 -0.2232 -0.209
## dem16_frac 0.0025 -0.196 0.038 0.011 0.292 0.1752 0.254
## UV frc_white dem16_frac
## CA -0.3674 0.515 -0.1020
## CA_asian -0.0542 -0.069 0.2629
## CA_black -0.1711 0.307 -0.1090
## CA_hispanic -0.1609 0.323 -0.1527
## CA_white -0.1045 -0.156 0.4479
## CA_d_bw 0.0156 -0.387 0.5334
## CA_d_hw 0.0067 -0.399 0.5473
## CA_d_aw -0.0300 -0.049 0.0550
## SES_all -0.1952 0.305 0.0039
## SES_black 0.1162 -0.050 0.1341
## SES_hispanic 0.1009 0.019 0.0438
## SES_white 0.0580 -0.349 0.5035
## SES_d_bw -0.1500 -0.140 0.1836
## SES_d_hw -0.1061 -0.283 0.3808
## UV 1.0000 -0.530 -0.0393
## frc_white -0.5315 1.000 -0.6488
## dem16_frac -0.0913 -0.583 1.0000
#pairwise sample sizes
d %>%
select(CA, CA_asian:CA_white, CA_d_bw:CA_d_aw, SES_all:SES_d_hw, UV, frc_white, dem16_frac) %>%
pairwiseCount()
## CA CA_asian CA_black CA_hispanic CA_white CA_d_bw CA_d_hw
## CA 3134 1483 2135 2647 3112 2116 2639
## CA_asian 1483 1483 1394 1475 1483 1393 1475
## CA_black 2135 1394 2135 2015 2120 2116 2012
## CA_hispanic 2647 1475 2015 2647 2643 2011 2639
## CA_white 3112 1483 2120 2643 3112 2116 2639
## CA_d_bw 2116 1393 2116 2011 2116 2116 2009
## CA_d_hw 2639 1475 2012 2639 2639 2009 2639
## CA_d_aw 1483 1483 1394 1475 1483 1393 1475
## SES_all 3124 1473 2125 2637 3102 2106 2629
## SES_black 2108 1373 2108 1989 2093 2091 1986
## SES_hispanic 2624 1465 1998 2624 2620 1994 2618
## SES_white 3099 1473 2109 2631 3099 2106 2627
## SES_d_bw 2092 1373 2092 1988 2092 2091 1986
## SES_d_hw 2618 1465 1997 2618 2618 1994 2616
## UV 3099 1465 2125 2629 3080 2106 2621
## frc_white 3124 1473 2125 2637 3102 2106 2629
## dem16_frac 3103 1469 2129 2633 3084 2110 2625
## CA_d_aw SES_all SES_black SES_hispanic SES_white SES_d_bw SES_d_hw
## CA 1483 3124 2108 2624 3099 2092 2618
## CA_asian 1483 1473 1373 1465 1473 1373 1465
## CA_black 1394 2125 2108 1998 2109 2092 1997
## CA_hispanic 1475 2637 1989 2624 2631 1988 2618
## CA_white 1483 3102 2093 2620 3099 2092 2618
## CA_d_bw 1393 2106 2091 1994 2106 2091 1994
## CA_d_hw 1475 2629 1986 2618 2627 1986 2616
## CA_d_aw 1483 1473 1373 1465 1473 1373 1465
## SES_all 1473 3124 2108 2624 3099 2092 2618
## SES_black 1373 2108 2108 1982 2092 2092 1981
## SES_hispanic 1465 2624 1982 2624 2618 1981 2618
## SES_white 1473 3099 2092 2618 3099 2092 2618
## SES_d_bw 1373 2092 2092 1981 2092 2092 1981
## SES_d_hw 1465 2618 1981 2618 2618 1981 2618
## UV 1465 3093 2102 2610 3071 2086 2604
## frc_white 1473 3124 2108 2624 3099 2092 2618
## dem16_frac 1469 3094 2103 2611 3072 2087 2605
## UV frc_white dem16_frac
## CA 3099 3124 3103
## CA_asian 1465 1473 1469
## CA_black 2125 2125 2129
## CA_hispanic 2629 2637 2633
## CA_white 3080 3102 3084
## CA_d_bw 2106 2106 2110
## CA_d_hw 2621 2629 2625
## CA_d_aw 1465 1473 1469
## SES_all 3093 3124 3094
## SES_black 2102 2108 2103
## SES_hispanic 2610 2624 2611
## SES_white 3071 3099 3072
## SES_d_bw 2086 2092 2087
## SES_d_hw 2604 2618 2605
## UV 3106 3093 3106
## frc_white 3093 3124 3094
## dem16_frac 3106 3094 3111
Within SIRE regressions
Alterative method is to regression within SIRE.
#regression function
do_regs = function(group, data = d, y = "CA") {
# browser()
#models
# wts = data[[str_glue("{str_to_sentence(y)}_pop")]] %>% sqrt()
wts = data[[str_glue("pop_{group}")]] %>% sqrt()
mods = list(
ols(as.formula(str_glue("{y}_{group} ~ UV")), data = data, weights = wts),
ols(as.formula(str_glue("{y}_{group} ~ {y}_{group}_lag + UV + avg_temp")), data = data, weights = wts),
ols(as.formula(str_glue("{y}_{group} ~ {str_c(climate_vars_model, collapse = ' + ')}")), data = data, weights = wts),
ols(as.formula(str_glue("{y}_{group} ~ {str_c(climate_vars_model, collapse = ' + ')} + {y}_{group}_lag")), data = data, weights = wts),
ols(as.formula(str_glue("{y}_{group} ~ {str_c(c(climate_vars_model, water_vars), collapse = ' + ')} + {y}_{group}_lag")), data = data, weights = wts)
)
mods
}
#do regs and summarize
do_regs("black") %>% summarize_models()
do_regs("hispanic") %>% summarize_models()
do_regs("asian") %>% summarize_models()
do_regs("white") %>% summarize_models()
do_regs("all") %>% summarize_models() #just for comparison
#state level
do_regs("black", data = d_state) %>% summarize_models()
do_regs("hispanic", data = d_state) %>% summarize_models()
do_regs("asian", data = d_state) %>% summarize_models()
do_regs("white", data = d_state) %>% summarize_models()
do_regs("all", data = d_state) %>% summarize_models() #just for comparison
Standardized
#do regs and summarize
do_regs("black", data = d_std) %>% summarize_models()
do_regs("hispanic", data = d_std) %>% summarize_models()
do_regs("asian", data = d_std) %>% summarize_models()
do_regs("white", data = d_std) %>% summarize_models()
do_regs("all", data = d_std) %>% summarize_models() #just for comparison
#state level
do_regs("black", data = d_state_std) %>% summarize_models()
do_regs("hispanic", data = d_state_std) %>% summarize_models()
do_regs("asian", data = d_state_std) %>% summarize_models()
do_regs("white", data = d_state_std) %>% summarize_models()
do_regs("all", data = d_state_std) %>% summarize_models() #just for comparison
Diseases
#correlations
combine_upperlower(
#no weights
d %>% select(CA_all:CA_white, !!disease_vars) %>% wtd.cors(),
#weights
d %>% select(CA_all:CA_white, !!disease_vars) %>% wtd.cors(weight = d$pop_weight)
)
## CA_all CA_asian CA_black CA_hispanic CA_white
## CA_all NA 0.44730 0.6717 0.598 0.757
## CA_asian 0.478 NA 0.3058 0.389 0.529
## CA_black 0.647 0.34316 NA 0.540 0.470
## CA_hispanic 0.607 0.38592 0.5881 NA 0.421
## CA_white 0.687 0.58697 0.4050 0.350 NA
## Diarrheal_diseases_z 0.024 0.09960 -0.0081 0.061 -0.043
## Hepatitis_z -0.352 -0.14011 -0.2081 -0.265 -0.072
## HIV_AIDS_z -0.335 0.11489 -0.2141 -0.076 0.122
## Lower_respiratory_infections_z -0.358 -0.08421 -0.2033 -0.043 -0.367
## Meningitis_z -0.632 -0.11404 -0.3970 -0.173 -0.288
## Tuberculosis_z -0.575 0.00068 -0.3409 -0.208 -0.068
## Diarrheal_diseases_z Hepatitis_z HIV_AIDS_z
## CA_all -0.0461 -0.278 -0.353
## CA_asian 0.1739 -0.126 0.103
## CA_black 0.0404 -0.168 -0.260
## CA_hispanic 0.0853 -0.177 -0.060
## CA_white -0.0864 -0.117 -0.054
## Diarrheal_diseases_z NA 0.102 0.099
## Hepatitis_z 0.0169 NA 0.583
## HIV_AIDS_z 0.0225 0.503 NA
## Lower_respiratory_infections_z 0.2242 0.013 0.193
## Meningitis_z 0.1287 0.295 0.556
## Tuberculosis_z -0.0075 0.407 0.595
## Lower_respiratory_infections_z Meningitis_z
## CA_all -0.401 -0.684
## CA_asian -0.033 -0.077
## CA_black -0.274 -0.471
## CA_hispanic -0.065 -0.179
## CA_white -0.369 -0.371
## Diarrheal_diseases_z 0.291 0.203
## Hepatitis_z 0.123 0.308
## HIV_AIDS_z 0.290 0.558
## Lower_respiratory_infections_z NA 0.639
## Meningitis_z 0.608 NA
## Tuberculosis_z 0.340 0.722
## Tuberculosis_z
## CA_all -0.5899
## CA_asian -0.0051
## CA_black -0.4057
## CA_hispanic -0.1801
## CA_white -0.1679
## Diarrheal_diseases_z 0.0821
## Hepatitis_z 0.2857
## HIV_AIDS_z 0.5209
## Lower_respiratory_infections_z 0.3906
## Meningitis_z 0.7441
## Tuberculosis_z NA
Predict SES instead
Requested by reviewer. These are the models 1-7 from “Main regressions” above, but swap outcome to SES_all
.
#update forms
SES_y_forms = mget("form_" + 1:7) %>%
#change dependent to
map(~update(., SES_all ~ .))
#fits
SES_y_forms %>%
map(function(x) {
ols(x, data = d, weights = d$pop_weight)
}) %>%
summarize_models()
Standardized
#fits
SES_y_forms %>%
map(function(x) {
ols(x, data = d_std, weights = d_std$pop_weight)
}) %>%
summarize_models()
Within SIRE
Predict SES within SIRE. Note there is no SES_Asian
variable in CEDA.
#do regs and summarize
do_regs("black", y = "SES") %>% summarize_models()
do_regs("hispanic", y = "SES") %>% summarize_models()
#do_regs("asian", y = "SES") %>% summarize_models()
do_regs("white", y = "SES") %>% summarize_models()
do_regs("all", y = "SES") %>% summarize_models() #just for comparison
#state level
do_regs("black", data = d_state, y = "SES") %>% summarize_models()
do_regs("hispanic", data = d_state, y = "SES") %>% summarize_models()
# do_regs("asian", data = d_state, y = "SES") %>% summarize_models()
do_regs("white", data = d_state, y = "SES") %>% summarize_models()
do_regs("all", data = d_state, y = "SES") %>% summarize_models() #just for comparison
Standardized
#do regs and summarize
do_regs("black", y = "SES", data = d_std) %>% summarize_models()
do_regs("hispanic", y = "SES", data = d_std) %>% summarize_models()
#do_regs("asian", y = "SES") %>% summarize_models()
do_regs("white", y = "SES", data = d_std) %>% summarize_models()
do_regs("all", y = "SES", data = d_std) %>% summarize_models() #just for comparison
#state level
do_regs("black", data = d_state_std, y = "SES") %>% summarize_models()
do_regs("hispanic", data = d_state_std, y = "SES") %>% summarize_models()
# do_regs("asian", data = d_state_std, y = "SES") %>% summarize_models()
do_regs("white", data = d_state_std, y = "SES") %>% summarize_models()
do_regs("all", data = d_state_std, y = "SES") %>% summarize_models() #just for comparison
Maps / plots
Make a lot of maps and bivariate scatterplots.
#plot maps
#we plot one for each main variable for future use
var_to_plot = tibble(
var = main_vars[-1] %>% str_subset("_z", negate = T) %>% setdiff(c("lat", "lon")),
nice_name = c("Cognitive ability (All)",
"Cognitive ability (Asian)",
"Cognitive ability (Black)",
"Cognitive ability (Hispanic)",
"Cognitive ability (White)",
"Socioeconomic status (All)",
"Socioeconomic status (Black)",
"Socioeconomic status (Hispanic)",
"Socioeconomic status (White)",
"Proportion White",
"Proportion Black",
"Proportion Asian",
"Proportion Hispanic",
"Proportion Amerindian",
"Ultraviolet Radiation",
"Average temperature",
"Elevation",
"Fluoride concentration in water",
"Lead index")
)
#plot!
for (i in seq_along_rows(var_to_plot)) {
i_var = var_to_plot$var[i]
message(i_var)
i_var2 = as.symbol(i_var)
#limits
lims = c(NA, NA)
local_data = county_sf_data
if (str_detect(i_var, "^(SES|CA)")) {
lims = c(-3, 3)
#winsorize data to limits
local_data[[i_var]] = local_data[[i_var]] %>% winsorise(lower = -3, 3)
}
if (i_var %in% race_vars2) {
lims = c(0, 1)
}
#plot
local_data %>%
# filter(!State.x %in% c("Alaska", "Hawaii")) %>%
#remove Alaska and Hawaii, and outlying islands
filter(
!STATEFP %in% c("02", "15"),
#tricky, but this removes them
as.numeric(as.character(STATEFP)) <= 56
) %>%
ggplot() +
geom_sf(aes(fill = !!i_var2), lwd = 0) +
scale_fill_gradient2(var_to_plot$nice_name[i], low = "red", high = "blue", mid = "green", limits = lims, na.value = "lightgrey") +
theme_classic() ->
i_plot
GG_save(plot = i_plot, str_glue("figs/maps/{i_var}_map.png"))
}
## CA_all
## CA_asian
## CA_black
## CA_hispanic
## CA_white
## SES_all
## SES_black
## SES_hispanic
## SES_white
## frc_white
## frc_black
## frc_asian
## frc_hispanic
## frc_amerindian
## UV
## avg_temp
## elevation
## fluoride_conc
## lead_index
#bivariate scatterplots
#pairs
pairs_to_plot = gtools::combinations(nrow(var_to_plot), r = 2, v = var_to_plot$var)
for (i in 1:nrow(pairs_to_plot)) {
message(str_glue("{i} of {nrow(pairs_to_plot)}"))
#var names
i_var1 = pairs_to_plot[i, 1]
i_var2 = pairs_to_plot[i, 2]
#weights are tricky, we use group weights when possible
#and then harmonic combination when 2 group variables
i_groups = tibble(
asian = str_detect(c(i_var1, i_var2), "_asian") %>% any(),
black = str_detect(c(i_var1, i_var2), "_black") %>% any(),
hispanic = str_detect(c(i_var1, i_var2), "_hispanic") %>% any(),
white = str_detect(c(i_var1, i_var2), "_white") %>% any()
)
#weights defaults to standard, but changes to group specific if needed
i_wt = d$pop_weight
#if one group
if (sum(as.matrix(i_groups)) == 1) {
i_group = which(as.matrix(i_groups))
i_wt = switch(i_group,
"1" = d$pop_asian %>% sqrt(),
"2" = d$pop_black %>% sqrt(),
"3" = d$pop_hispanic %>% sqrt(),
"4" = d$pop_white %>% sqrt()
)
}
#if two
if (sum(as.matrix(i_groups)) == 2) {
i_group = which(as.matrix(i_groups)) %>% as.numeric()
#if then for each of the 6 combos
if (identical(i_group, c(1, 2))) i_wt = matrix(c(d$pop_asian %>% sqrt(),
d$pop_black %>% sqrt()),
ncol = 2) %>% t() %>% harmonic.mean()
if (identical(i_group, c(1, 3))) i_wt = matrix(c(d$pop_asian %>% sqrt(),
d$pop_hispanic %>% sqrt()),
ncol = 2) %>% t() %>% harmonic.mean()
if (identical(i_group, c(1, 4))) i_wt = matrix(c(d$pop_asian %>% sqrt(),
d$pop_white %>% sqrt()),
ncol = 2) %>% t() %>% harmonic.mean()
if (identical(i_group, c(2, 3))) i_wt = matrix(c(d$pop_black %>% sqrt(),
d$pop_hispanic %>% sqrt()),
ncol = 2) %>% t() %>% harmonic.mean()
if (identical(i_group, c(2, 4))) i_wt = matrix(c(d$pop_black %>% sqrt(),
d$pop_white %>% sqrt()),
ncol = 2) %>% t() %>% harmonic.mean()
if (identical(i_group, c(3, 4))) i_wt = matrix(c(d$pop_hispanic %>% sqrt(),
d$pop_white %>% sqrt()),
ncol = 2) %>% t() %>% harmonic.mean()
}
#plot with good labels and correct weights!
#we hope
GG_scatter(d, i_var1, i_var2, weights = i_wt) +
xlab(var_to_plot$nice_name[var_to_plot$var==i_var1]) +
ylab(var_to_plot$nice_name[var_to_plot$var==i_var2])
GG_save(str_glue("figs/pairs/{i_var1} x {i_var2}.png"))
}
## 1 of 171
## `geom_smooth()` using formula 'y ~ x'
## 2 of 171
## `geom_smooth()` using formula 'y ~ x'
## 3 of 171
## `geom_smooth()` using formula 'y ~ x'
## 4 of 171
## `geom_smooth()` using formula 'y ~ x'
## 5 of 171
## `geom_smooth()` using formula 'y ~ x'
## 6 of 171
## `geom_smooth()` using formula 'y ~ x'
## 7 of 171
## `geom_smooth()` using formula 'y ~ x'
## 8 of 171
## `geom_smooth()` using formula 'y ~ x'
## 9 of 171
## `geom_smooth()` using formula 'y ~ x'
## 10 of 171
## `geom_smooth()` using formula 'y ~ x'
## 11 of 171
## `geom_smooth()` using formula 'y ~ x'
## 12 of 171
## `geom_smooth()` using formula 'y ~ x'
## 13 of 171
## `geom_smooth()` using formula 'y ~ x'
## 14 of 171
## `geom_smooth()` using formula 'y ~ x'
## 15 of 171
## `geom_smooth()` using formula 'y ~ x'
## 16 of 171
## `geom_smooth()` using formula 'y ~ x'
## 17 of 171
## `geom_smooth()` using formula 'y ~ x'
## 18 of 171
## `geom_smooth()` using formula 'y ~ x'
## 19 of 171
## `geom_smooth()` using formula 'y ~ x'
## 20 of 171
## `geom_smooth()` using formula 'y ~ x'
## 21 of 171
## `geom_smooth()` using formula 'y ~ x'
## 22 of 171
## `geom_smooth()` using formula 'y ~ x'
## 23 of 171
## `geom_smooth()` using formula 'y ~ x'
## 24 of 171
## `geom_smooth()` using formula 'y ~ x'
## 25 of 171
## `geom_smooth()` using formula 'y ~ x'
## 26 of 171
## `geom_smooth()` using formula 'y ~ x'
## 27 of 171
## `geom_smooth()` using formula 'y ~ x'
## 28 of 171
## `geom_smooth()` using formula 'y ~ x'
## 29 of 171
## `geom_smooth()` using formula 'y ~ x'
## 30 of 171
## `geom_smooth()` using formula 'y ~ x'
## 31 of 171
## `geom_smooth()` using formula 'y ~ x'
## 32 of 171
## `geom_smooth()` using formula 'y ~ x'
## 33 of 171
## `geom_smooth()` using formula 'y ~ x'
## 34 of 171
## `geom_smooth()` using formula 'y ~ x'
## 35 of 171
## `geom_smooth()` using formula 'y ~ x'
## 36 of 171
## `geom_smooth()` using formula 'y ~ x'
## 37 of 171
## `geom_smooth()` using formula 'y ~ x'
## 38 of 171
## `geom_smooth()` using formula 'y ~ x'
## 39 of 171
## `geom_smooth()` using formula 'y ~ x'
## 40 of 171
## `geom_smooth()` using formula 'y ~ x'
## 41 of 171
## `geom_smooth()` using formula 'y ~ x'
## 42 of 171
## `geom_smooth()` using formula 'y ~ x'
## 43 of 171
## `geom_smooth()` using formula 'y ~ x'
## 44 of 171
## `geom_smooth()` using formula 'y ~ x'
## 45 of 171
## `geom_smooth()` using formula 'y ~ x'
## 46 of 171
## `geom_smooth()` using formula 'y ~ x'
## 47 of 171
## `geom_smooth()` using formula 'y ~ x'
## 48 of 171
## `geom_smooth()` using formula 'y ~ x'
## 49 of 171
## `geom_smooth()` using formula 'y ~ x'
## 50 of 171
## `geom_smooth()` using formula 'y ~ x'
## 51 of 171
## `geom_smooth()` using formula 'y ~ x'
## 52 of 171
## `geom_smooth()` using formula 'y ~ x'
## 53 of 171
## `geom_smooth()` using formula 'y ~ x'
## 54 of 171
## `geom_smooth()` using formula 'y ~ x'
## 55 of 171
## `geom_smooth()` using formula 'y ~ x'
## 56 of 171
## `geom_smooth()` using formula 'y ~ x'
## 57 of 171
## `geom_smooth()` using formula 'y ~ x'
## 58 of 171
## `geom_smooth()` using formula 'y ~ x'
## 59 of 171
## `geom_smooth()` using formula 'y ~ x'
## 60 of 171
## `geom_smooth()` using formula 'y ~ x'
## 61 of 171
## `geom_smooth()` using formula 'y ~ x'
## 62 of 171
## `geom_smooth()` using formula 'y ~ x'
## 63 of 171
## `geom_smooth()` using formula 'y ~ x'
## 64 of 171
## `geom_smooth()` using formula 'y ~ x'
## 65 of 171
## `geom_smooth()` using formula 'y ~ x'
## 66 of 171
## `geom_smooth()` using formula 'y ~ x'
## 67 of 171
## `geom_smooth()` using formula 'y ~ x'
## 68 of 171
## `geom_smooth()` using formula 'y ~ x'
## 69 of 171
## `geom_smooth()` using formula 'y ~ x'
## 70 of 171
## `geom_smooth()` using formula 'y ~ x'
## 71 of 171
## `geom_smooth()` using formula 'y ~ x'
## 72 of 171
## `geom_smooth()` using formula 'y ~ x'
## 73 of 171
## `geom_smooth()` using formula 'y ~ x'
## 74 of 171
## `geom_smooth()` using formula 'y ~ x'
## 75 of 171
## `geom_smooth()` using formula 'y ~ x'
## 76 of 171
## `geom_smooth()` using formula 'y ~ x'
## 77 of 171
## `geom_smooth()` using formula 'y ~ x'
## 78 of 171
## `geom_smooth()` using formula 'y ~ x'
## 79 of 171
## `geom_smooth()` using formula 'y ~ x'
## 80 of 171
## `geom_smooth()` using formula 'y ~ x'
## 81 of 171
## `geom_smooth()` using formula 'y ~ x'
## 82 of 171
## `geom_smooth()` using formula 'y ~ x'
## 83 of 171
## `geom_smooth()` using formula 'y ~ x'
## 84 of 171
## `geom_smooth()` using formula 'y ~ x'
## 85 of 171
## `geom_smooth()` using formula 'y ~ x'
## 86 of 171
## `geom_smooth()` using formula 'y ~ x'
## 87 of 171
## `geom_smooth()` using formula 'y ~ x'
## 88 of 171
## `geom_smooth()` using formula 'y ~ x'
## 89 of 171
## `geom_smooth()` using formula 'y ~ x'
## 90 of 171
## `geom_smooth()` using formula 'y ~ x'
## 91 of 171
## `geom_smooth()` using formula 'y ~ x'
## 92 of 171
## `geom_smooth()` using formula 'y ~ x'
## 93 of 171
## `geom_smooth()` using formula 'y ~ x'
## 94 of 171
## `geom_smooth()` using formula 'y ~ x'
## 95 of 171
## `geom_smooth()` using formula 'y ~ x'
## 96 of 171
## `geom_smooth()` using formula 'y ~ x'
## 97 of 171
## `geom_smooth()` using formula 'y ~ x'
## 98 of 171
## `geom_smooth()` using formula 'y ~ x'
## 99 of 171
## `geom_smooth()` using formula 'y ~ x'
## 100 of 171
## `geom_smooth()` using formula 'y ~ x'
## 101 of 171
## `geom_smooth()` using formula 'y ~ x'
## 102 of 171
## `geom_smooth()` using formula 'y ~ x'
## 103 of 171
## `geom_smooth()` using formula 'y ~ x'
## 104 of 171
## `geom_smooth()` using formula 'y ~ x'
## 105 of 171
## `geom_smooth()` using formula 'y ~ x'
## 106 of 171
## `geom_smooth()` using formula 'y ~ x'
## 107 of 171
## `geom_smooth()` using formula 'y ~ x'
## 108 of 171
## `geom_smooth()` using formula 'y ~ x'
## 109 of 171
## `geom_smooth()` using formula 'y ~ x'
## 110 of 171
## `geom_smooth()` using formula 'y ~ x'
## 111 of 171
## `geom_smooth()` using formula 'y ~ x'
## 112 of 171
## `geom_smooth()` using formula 'y ~ x'
## 113 of 171
## `geom_smooth()` using formula 'y ~ x'
## 114 of 171
## `geom_smooth()` using formula 'y ~ x'
## 115 of 171
## `geom_smooth()` using formula 'y ~ x'
## 116 of 171
## `geom_smooth()` using formula 'y ~ x'
## 117 of 171
## `geom_smooth()` using formula 'y ~ x'
## 118 of 171
## `geom_smooth()` using formula 'y ~ x'
## 119 of 171
## `geom_smooth()` using formula 'y ~ x'
## 120 of 171
## `geom_smooth()` using formula 'y ~ x'
## 121 of 171
## `geom_smooth()` using formula 'y ~ x'
## 122 of 171
## `geom_smooth()` using formula 'y ~ x'
## 123 of 171
## `geom_smooth()` using formula 'y ~ x'
## 124 of 171
## `geom_smooth()` using formula 'y ~ x'
## 125 of 171
## `geom_smooth()` using formula 'y ~ x'
## 126 of 171
## `geom_smooth()` using formula 'y ~ x'
## 127 of 171
## `geom_smooth()` using formula 'y ~ x'
## 128 of 171
## `geom_smooth()` using formula 'y ~ x'
## 129 of 171
## `geom_smooth()` using formula 'y ~ x'
## 130 of 171
## `geom_smooth()` using formula 'y ~ x'
## 131 of 171
## `geom_smooth()` using formula 'y ~ x'
## 132 of 171
## `geom_smooth()` using formula 'y ~ x'
## 133 of 171
## `geom_smooth()` using formula 'y ~ x'
## 134 of 171
## `geom_smooth()` using formula 'y ~ x'
## 135 of 171
## `geom_smooth()` using formula 'y ~ x'
## 136 of 171
## `geom_smooth()` using formula 'y ~ x'
## 137 of 171
## `geom_smooth()` using formula 'y ~ x'
## 138 of 171
## `geom_smooth()` using formula 'y ~ x'
## 139 of 171
## `geom_smooth()` using formula 'y ~ x'
## 140 of 171
## `geom_smooth()` using formula 'y ~ x'
## 141 of 171
## `geom_smooth()` using formula 'y ~ x'
## 142 of 171
## `geom_smooth()` using formula 'y ~ x'
## 143 of 171
## `geom_smooth()` using formula 'y ~ x'
## 144 of 171
## `geom_smooth()` using formula 'y ~ x'
## 145 of 171
## `geom_smooth()` using formula 'y ~ x'
## 146 of 171
## `geom_smooth()` using formula 'y ~ x'
## 147 of 171
## `geom_smooth()` using formula 'y ~ x'
## 148 of 171
## `geom_smooth()` using formula 'y ~ x'
## 149 of 171
## `geom_smooth()` using formula 'y ~ x'
## 150 of 171
## `geom_smooth()` using formula 'y ~ x'
## 151 of 171
## `geom_smooth()` using formula 'y ~ x'
## 152 of 171
## `geom_smooth()` using formula 'y ~ x'
## 153 of 171
## `geom_smooth()` using formula 'y ~ x'
## 154 of 171
## `geom_smooth()` using formula 'y ~ x'
## 155 of 171
## `geom_smooth()` using formula 'y ~ x'
## 156 of 171
## `geom_smooth()` using formula 'y ~ x'
## 157 of 171
## `geom_smooth()` using formula 'y ~ x'
## 158 of 171
## `geom_smooth()` using formula 'y ~ x'
## 159 of 171
## `geom_smooth()` using formula 'y ~ x'
## 160 of 171
## `geom_smooth()` using formula 'y ~ x'
## 161 of 171
## `geom_smooth()` using formula 'y ~ x'
## 162 of 171
## `geom_smooth()` using formula 'y ~ x'
## 163 of 171
## `geom_smooth()` using formula 'y ~ x'
## 164 of 171
## `geom_smooth()` using formula 'y ~ x'
## 165 of 171
## `geom_smooth()` using formula 'y ~ x'
## 166 of 171
## `geom_smooth()` using formula 'y ~ x'
## 167 of 171
## `geom_smooth()` using formula 'y ~ x'
## 168 of 171
## `geom_smooth()` using formula 'y ~ x'
## 169 of 171
## `geom_smooth()` using formula 'y ~ x'
## 170 of 171
## `geom_smooth()` using formula 'y ~ x'
## 171 of 171
## `geom_smooth()` using formula 'y ~ x'
##old code
# county_sf_data %>%
# filter(!State.x %in% c("Alaska", "Hawaii")) %>%
# ggplot() +
# geom_sf(aes(fill = CA), lwd = 0) +
# scale_fill_gradient2("Intelligence", low = "red", high = "blue", mid = "green") +
# theme_classic()
# GG_save("figs/CA_map.png")
#
# county_sf_data %>%
# filter(!State.x %in% c("Alaska", "Hawaii")) %>%
# ggplot() +
# geom_sf(aes(fill = UV), lwd = 0) +
# scale_fill_gradient2(low = "red", high = "blue", mid = "green") +
# theme_classic()
# GG_save("figs/UV_map.png")
#
# county_sf_data %>%
# filter(!State.x %in% c("Alaska", "Hawaii")) %>%
# ggplot() +
# geom_sf(aes(fill = avg_temp), lwd = 0) +
# scale_fill_gradient2(low = "red", high = "blue", mid = "green") +
# theme_classic()
# GG_save("figs/avg_temp_map.png")