library(nortest)
library(car)
## Loading required package: carData
library(lmtest)
## Loading required package: zoo
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
library(classInt)
library(sandwich)
library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v ggplot2 3.3.5 v purrr 0.3.4
## v tibble 3.1.3 v dplyr 1.0.7
## v tidyr 1.1.3 v stringr 1.4.0
## v readr 2.0.1 v forcats 0.5.1
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
## x dplyr::recode() masks car::recode()
## x purrr::some() masks car::some()
library(spdep)
## Loading required package: sp
## Loading required package: spData
## To access larger datasets in this package, install the spDataLarge
## package with: `install.packages('spDataLarge',
## repos='https://nowosad.github.io/drat/', type='source')`
## Loading required package: sf
## Linking to GEOS 3.9.0, GDAL 3.2.1, PROJ 7.2.1
library(spatialreg)
## Loading required package: Matrix
##
## Attaching package: 'Matrix'
## The following objects are masked from 'package:tidyr':
##
## expand, pack, unpack
##
## Attaching package: 'spatialreg'
## The following objects are masked from 'package:spdep':
##
## as.spam.listw, as_dgRMatrix_listw, as_dsCMatrix_I,
## as_dsCMatrix_IrW, as_dsTMatrix_listw, can.be.simmed, cheb_setup,
## create_WX, do_ldet, eigen_pre_setup, eigen_setup, eigenw,
## errorsarlm, get.ClusterOption, get.coresOption, get.mcOption,
## get.VerboseOption, get.ZeroPolicyOption, GMargminImage, GMerrorsar,
## griffith_sone, gstsls, Hausman.test, impacts, intImpacts,
## Jacobian_W, jacobianSetup, l_max, lagmess, lagsarlm, lextrB,
## lextrS, lextrW, lmSLX, LU_prepermutate_setup, LU_setup,
## Matrix_J_setup, Matrix_setup, mcdet_setup, MCMCsamp, ME, mom_calc,
## mom_calc_int2, moments_setup, powerWeights, sacsarlm,
## SE_classic_setup, SE_interp_setup, SE_whichMin_setup,
## set.ClusterOption, set.coresOption, set.mcOption,
## set.VerboseOption, set.ZeroPolicyOption, similar.listw, spam_setup,
## spam_update_setup, SpatialFiltering, spautolm, spBreg_err,
## spBreg_lag, spBreg_sac, stsls, subgraph_eigenw, trW
library(sf)
library(dplyr)
#Load Data
daturl<-"https://github.com/coreysparks/data/blob/master/uscounty_data.Rdata?raw=true"
load(url(daturl))
summary(usco)
## pblack phisp prural rucc
## Min. : 0.0000 Min. : 0.000 Min. : 0.00 Length:3101
## 1st Qu.: 0.4603 1st Qu.: 1.154 1st Qu.: 33.60 Class :character
## Median : 2.1182 Median : 2.353 Median : 59.50 Mode :character
## Mean : 9.0072 Mean : 7.114 Mean : 58.62
## 3rd Qu.:10.3887 3rd Qu.: 6.478 3rd Qu.: 87.20
## Max. :85.9945 Max. :97.571 Max. :100.00
##
## ppov pfemhh unemp medhouseval
## Min. : 2.50 Min. : 2.7 Min. : 1.800 Min. : 31500
## 1st Qu.:10.80 1st Qu.:12.7 1st Qu.: 4.100 1st Qu.: 84500
## Median :14.20 Median :15.5 Median : 5.100 Median :108900
## Mean :15.33 Mean :16.7 Mean : 5.408 Mean :130283
## 3rd Qu.:18.60 3rd Qu.:19.3 3rd Qu.: 6.200 3rd Qu.:152900
## Max. :51.00 Max. :47.8 Max. :16.000 Max. :912600
## NA's :1
## popdensity gini County Deaths
## Min. :9.269e+04 Min. :0.2070 Length:3101 Min. : 12
## 1st Qu.:1.715e+07 1st Qu.:0.4070 Class :character 1st Qu.: 646
## Median :4.405e+07 Median :0.4290 Mode :character Median : 1402
## Mean :2.279e+08 Mean :0.4313 Mean : 4182
## 3rd Qu.:1.058e+08 3rd Qu.:0.4530 3rd Qu.: 3368
## Max. :6.979e+10 Max. :0.6450 Max. :298108
## NA's :2
## Population Age.Adjusted.Rate state geoid
## Min. : 481 Min. : 300.9 Length:3101 Length:3101
## 1st Qu.: 56089 1st Qu.: 721.6 Class :character Class :character
## Median : 131260 Median : 806.4 Mode :character Mode :character
## Mean : 511096 Mean : 815.3
## 3rd Qu.: 344108 3rd Qu.: 906.4
## Max. :50036337 Max. :1435.7
## NA's :7
## geometry metro_cut
## MULTIPOLYGON :3101 Metropolitan :2049
## epsg:9311 : 0 Non-Metropolitan:1052
## +proj=laea...: 0
##
##
##
##
head(usco)
## Simple feature collection with 6 features and 17 fields
## Geometry type: MULTIPOLYGON
## Dimension: XY
## Bounding box: xmin: 1259088 ymin: -1452583 xmax: 1390058 ymax: -1107765
## Projected CRS: NAD27 / US National Atlas Equal Area
## pblack phisp prural rucc ppov pfemhh unemp medhouseval popdensity
## 1 3.810512 1.8326418 100.0 08 15.5 14.5 3.5 101000 25816818
## 2 18.489258 3.2830777 47.2 04 15.1 18.5 3.5 137000 67111751
## 3 31.616198 1.5051066 100.0 08 18.7 20.5 4.6 73100 17147879
## 4 12.566549 0.9458693 69.7 06 18.4 19.2 4.0 90600 35909345
## 5 24.557442 0.7867706 100.0 08 23.5 22.7 4.3 68100 22546153
## 6 20.917781 3.3047510 50.9 04 16.0 21.4 4.0 103300 86871603
## gini County Deaths Population Age.Adjusted.Rate state geoid
## 1 0.424 Cleburne County, AL 926 75007 979.53 01 01029
## 2 0.452 Coffee County, AL 2484 254213 851.78 01 01031
## 3 0.443 Coosa County, AL 687 55109 954.81 01 01037
## 4 0.475 Covington County, AL 2578 189254 963.49 01 01039
## 5 0.466 Crenshaw County, AL 876 69471 989.55 01 01041
## 6 0.424 Dale County, AL 2392 248927 876.17 01 01045
## geometry metro_cut
## 1 MULTIPOLYGON (((1349148 -11... Non-Metropolitan
## 2 MULTIPOLYGON (((1327060 -13... Metropolitan
## 3 MULTIPOLYGON (((1305589 -12... Non-Metropolitan
## 4 MULTIPOLYGON (((1306630 -14... Metropolitan
## 5 MULTIPOLYGON (((1315155 -13... Non-Metropolitan
## 6 MULTIPOLYGON (((1354333 -14... Metropolitan
usco <- st_transform(usco, crs = 2163)
#DV = Age adjusted mortality rates, IVs = the Gini coefficient, % persons in poverty, States
uscosub <- subset(usco, state == "06"|state == "48")
uscosub$state <- as.numeric(uscosub$state)
uscosub$state<-Recode(uscosub$state,"48=0")
uscosub$state<-Recode(uscosub$state,"06=1")
uscosub$state<-as.factor(uscosub$state)
summary(uscosub)
## pblack phisp prural rucc
## Min. : 0.000 Min. : 2.448 Min. : 0.00 Length:312
## 1st Qu.: 1.429 1st Qu.:13.091 1st Qu.: 20.40 Class :character
## Median : 3.652 Median :21.816 Median : 46.40 Mode :character
## Mean : 6.254 Mean :28.941 Mean : 50.53
## 3rd Qu.: 9.035 3rd Qu.:41.245 3rd Qu.: 75.85
## Max. :35.297 Max. :97.571 Max. :100.00
##
## ppov pfemhh unemp medhouseval
## Min. : 5.00 Min. : 4.90 Min. : 2.700 Min. : 31500
## 1st Qu.:13.50 1st Qu.:13.70 1st Qu.: 4.300 1st Qu.: 72975
## Median :17.20 Median :16.30 Median : 5.000 Median : 91900
## Mean :17.63 Mean :16.69 Mean : 5.469 Mean :134313
## 3rd Qu.:20.60 3rd Qu.:19.52 3rd Qu.: 6.000 3rd Qu.:153025
## Max. :45.70 Max. :31.00 Max. :15.800 Max. :785100
##
## popdensity gini County Deaths
## Min. :9.269e+04 Min. :0.2070 Length:312 Min. : 12.0
## 1st Qu.:7.606e+06 1st Qu.:0.4220 Class :character 1st Qu.: 425.5
## Median :2.575e+07 Median :0.4440 Mode :character Median : 1324.0
## Mean :1.947e+08 Mean :0.4448 Mean : 6876.0
## 3rd Qu.:9.195e+07 3rd Qu.:0.4642 3rd Qu.: 4285.2
## Max. :1.578e+10 Max. :0.6260 Max. :298108.0
## NA's :2
## Population Age.Adjusted.Rate state geoid
## Min. : 481 Min. : 414.2 0:254 Length:312
## 1st Qu.: 41301 1st Qu.: 724.8 1: 58 Class :character
## Median : 115794 Median : 815.1 Mode :character
## Mean : 1039307 Mean : 803.1
## 3rd Qu.: 442529 3rd Qu.: 892.4
## Max. :50036337 Max. :1168.2
## NA's :3
## geometry metro_cut
## MULTIPOLYGON :312 Metropolitan :215
## epsg:9311 : 0 Non-Metropolitan: 97
## +proj=laea...: 0
##
##
##
##
#OLS Model
fit.0<-lm(Age.Adjusted.Rate~state+scale(ppov)+scale(gini),
data=uscosub)
summary(fit.0)
##
## Call:
## lm(formula = Age.Adjusted.Rate ~ state + scale(ppov) + scale(gini),
## data = uscosub)
##
## Residuals:
## Min 1Q Median 3Q Max
## -448.97 -67.88 3.76 76.12 291.47
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 828.108 7.296 113.501 < 2e-16 ***
## state1 -129.693 17.499 -7.412 1.24e-12 ***
## scale(ppov) 26.806 7.451 3.597 0.000375 ***
## scale(gini) -25.119 7.638 -3.288 0.001125 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 114.6 on 305 degrees of freedom
## (3 observations deleted due to missingness)
## Multiple R-squared: 0.2353, Adjusted R-squared: 0.2278
## F-statistic: 31.28 on 3 and 305 DF, p-value: < 2.2e-16
Results: California counties have lower mortality rates than Texas counties, based on the first coefficient in the model -128.693. County characteristics associated with higher mortality rates include % poverty. County characteristics associated with lower mortality rates include the Gini coefficient.
#Spatial regime model building through interactions
fit.a<-lm(Age.Adjusted.Rate~state+scale(ppov)+scale(gini),
data=uscosub)
fit.b<-lm(Age.Adjusted.Rate~state/scale(ppov)+scale(gini),
data=uscosub)
anova(fit.a, fit.b, test="F")
## Analysis of Variance Table
##
## Model 1: Age.Adjusted.Rate ~ state + scale(ppov) + scale(gini)
## Model 2: Age.Adjusted.Rate ~ state/scale(ppov) + scale(gini)
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 305 4006849
## 2 304 3843009 1 163839 12.96 0.0003714 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Results:The test is significant an that the coefficients for California and Texas models are not the same. Thus, I can split the data into regimes and examine the results.
#k= 4 nearest neighbors for the whole dataset
nbs<-knearneigh(coordinates(as_Spatial(uscosub)) , k=4)
nbs<-knn2nb(nbs,sym=T)
wts<-nb2listw(nbs)
#k = 4 nearest neighbors for the Texas counties
nbsmg_m<-knearneigh(coordinates(as_Spatial(uscosub[uscosub$state=="0",])), k=4)
nbs_m<-knn2nb(nbsmg_m, sym=T)
wts_m<-nb2listw(nbs_m)
#k = 4 nearest neighbors for the California counties
nbsmg_n<-knearneigh(coordinates(as_Spatial(uscosub[uscosub$state=="1",])), k=4)
nbs_n<-knn2nb(nbsmg_n, sym=T)
wts_n<-nb2listw(nbs_n)
#Global Model
efit<-errorsarlm(Age.Adjusted.Rate~scale(ppov)+scale(gini),
data=uscosub,
listw=wts,
method="MC")
#saturated model
efit0<-errorsarlm(Age.Adjusted.Rate~state/scale(ppov)+scale(gini),
data=uscosub,
listw=wts,
method="MC")
anova(efit, efit0)
## Model df AIC logLik Test L.Ratio p-value
## efit 1 5 3733.5 -1861.8 1
## efit0 2 7 3727.1 -1856.5 2 10.464 0.0053432
Results indicate that the regimes have different regression effects.
efit1<-errorsarlm(Age.Adjusted.Rate~(scale(ppov)+scale(gini)),
data=uscosub[uscosub$state=="0",],
listw=wts_m)
summary(efit1, Nagelkerke=T)
##
## Call:errorsarlm(formula = Age.Adjusted.Rate ~ (scale(ppov) + scale(gini)),
## data = uscosub[uscosub$state == "0", ], listw = wts_m)
##
## Residuals:
## Min 1Q Median 3Q Max
## -328.3039 -49.0400 2.5076 63.6914 293.6625
##
## Type: error
## Coefficients: (asymptotic standard errors)
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 830.3706 15.3596 54.0620 < 2.2e-16
## scale(ppov) 39.4573 9.0974 4.3372 1.443e-05
## scale(gini) -13.5799 7.0872 -1.9161 0.05535
##
## Lambda: 0.59871, LR test value: 67.401, p-value: 2.2204e-16
## Asymptotic standard error: 0.061609
## z-value: 9.718, p-value: < 2.22e-16
## Wald statistic: 94.44, p-value: < 2.22e-16
##
## Log likelihood: -1518.268 for error model
## ML residual variance (sigma squared): 9534.9, (sigma: 97.647)
## Nagelkerke pseudo-R-squared: 0.25492
## Number of observations: 251
## Number of parameters estimated: 5
## AIC: 3046.5, (AIC for lm: 3111.9)
efit1<-errorsarlm(Age.Adjusted.Rate~(scale(ppov)+scale(gini)),
data=uscosub[uscosub$state=="1",],
listw=wts_n)
summary(efit1, Nagelkerke=T)
##
## Call:errorsarlm(formula = Age.Adjusted.Rate ~ (scale(ppov) + scale(gini)),
## data = uscosub[uscosub$state == "1", ], listw = wts_n)
##
## Residuals:
## Min 1Q Median 3Q Max
## -212.5134 -31.7734 -3.7029 25.4867 193.0247
##
## Type: error
## Coefficients: (asymptotic standard errors)
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 683.739 16.997 40.2259 < 2.2e-16
## scale(ppov) 60.054 11.457 5.2419 1.589e-07
## scale(gini) -23.210 10.856 -2.1380 0.03251
##
## Lambda: 0.42279, LR test value: 6.0728, p-value: 0.013728
## Asymptotic standard error: 0.15553
## z-value: 2.7184, p-value: 0.0065606
## Wald statistic: 7.3895, p-value: 0.0065606
##
## Log likelihood: -333.7237 for error model
## ML residual variance (sigma squared): 5582.9, (sigma: 74.719)
## Nagelkerke pseudo-R-squared: 0.51023
## Number of observations: 58
## Number of parameters estimated: 5
## AIC: 677.45, (AIC for lm: 681.52)
Results: Only one factor in the Texas counties model is significant. Percent of poverty is associated with higher mortality rates. Although the Gini coefficient is associated with lower mortality rates, it was not statistically significant, p = 0.06. This contrasts with the global model that showed the Gini coefficient is statistically significantly associated with lower mortality rates, p < 0.01.
Both factors in the California counties model are significant. Percent of poverty is associated with higher mortality rates. Also, the Gini coefficient is associated with lower mortality rates. This aligns with the global model that showed percent of poverty is statistically significantly associated with higher mortality rates and the Gini coefficient is statistically significantly associated with lower mortality rates.
Between both models, the California counties model has a higher pseudo r-squared compared to the Texas counties model indicating a better model fit.