library(nortest)
library(car)
## Loading required package: carData
library(lmtest)
## Loading required package: zoo
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
library(classInt)
library(sandwich)
library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v ggplot2 3.3.5     v purrr   0.3.4
## v tibble  3.1.3     v dplyr   1.0.7
## v tidyr   1.1.3     v stringr 1.4.0
## v readr   2.0.1     v forcats 0.5.1
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
## x dplyr::recode() masks car::recode()
## x purrr::some()   masks car::some()
library(spdep)
## Loading required package: sp
## Loading required package: spData
## To access larger datasets in this package, install the spDataLarge
## package with: `install.packages('spDataLarge',
## repos='https://nowosad.github.io/drat/', type='source')`
## Loading required package: sf
## Linking to GEOS 3.9.0, GDAL 3.2.1, PROJ 7.2.1
library(spatialreg)
## Loading required package: Matrix
## 
## Attaching package: 'Matrix'
## The following objects are masked from 'package:tidyr':
## 
##     expand, pack, unpack
## 
## Attaching package: 'spatialreg'
## The following objects are masked from 'package:spdep':
## 
##     as.spam.listw, as_dgRMatrix_listw, as_dsCMatrix_I,
##     as_dsCMatrix_IrW, as_dsTMatrix_listw, can.be.simmed, cheb_setup,
##     create_WX, do_ldet, eigen_pre_setup, eigen_setup, eigenw,
##     errorsarlm, get.ClusterOption, get.coresOption, get.mcOption,
##     get.VerboseOption, get.ZeroPolicyOption, GMargminImage, GMerrorsar,
##     griffith_sone, gstsls, Hausman.test, impacts, intImpacts,
##     Jacobian_W, jacobianSetup, l_max, lagmess, lagsarlm, lextrB,
##     lextrS, lextrW, lmSLX, LU_prepermutate_setup, LU_setup,
##     Matrix_J_setup, Matrix_setup, mcdet_setup, MCMCsamp, ME, mom_calc,
##     mom_calc_int2, moments_setup, powerWeights, sacsarlm,
##     SE_classic_setup, SE_interp_setup, SE_whichMin_setup,
##     set.ClusterOption, set.coresOption, set.mcOption,
##     set.VerboseOption, set.ZeroPolicyOption, similar.listw, spam_setup,
##     spam_update_setup, SpatialFiltering, spautolm, spBreg_err,
##     spBreg_lag, spBreg_sac, stsls, subgraph_eigenw, trW
library(sf)
library(dplyr)
#Load Data
daturl<-"https://github.com/coreysparks/data/blob/master/uscounty_data.Rdata?raw=true"
load(url(daturl))
summary(usco)
##      pblack            phisp            prural           rucc          
##  Min.   : 0.0000   Min.   : 0.000   Min.   :  0.00   Length:3101       
##  1st Qu.: 0.4603   1st Qu.: 1.154   1st Qu.: 33.60   Class :character  
##  Median : 2.1182   Median : 2.353   Median : 59.50   Mode  :character  
##  Mean   : 9.0072   Mean   : 7.114   Mean   : 58.62                     
##  3rd Qu.:10.3887   3rd Qu.: 6.478   3rd Qu.: 87.20                     
##  Max.   :85.9945   Max.   :97.571   Max.   :100.00                     
##                                                                        
##       ppov           pfemhh         unemp         medhouseval    
##  Min.   : 2.50   Min.   : 2.7   Min.   : 1.800   Min.   : 31500  
##  1st Qu.:10.80   1st Qu.:12.7   1st Qu.: 4.100   1st Qu.: 84500  
##  Median :14.20   Median :15.5   Median : 5.100   Median :108900  
##  Mean   :15.33   Mean   :16.7   Mean   : 5.408   Mean   :130283  
##  3rd Qu.:18.60   3rd Qu.:19.3   3rd Qu.: 6.200   3rd Qu.:152900  
##  Max.   :51.00   Max.   :47.8   Max.   :16.000   Max.   :912600  
##                                                  NA's   :1       
##    popdensity             gini           County              Deaths      
##  Min.   :9.269e+04   Min.   :0.2070   Length:3101        Min.   :    12  
##  1st Qu.:1.715e+07   1st Qu.:0.4070   Class :character   1st Qu.:   646  
##  Median :4.405e+07   Median :0.4290   Mode  :character   Median :  1402  
##  Mean   :2.279e+08   Mean   :0.4313                      Mean   :  4182  
##  3rd Qu.:1.058e+08   3rd Qu.:0.4530                      3rd Qu.:  3368  
##  Max.   :6.979e+10   Max.   :0.6450                      Max.   :298108  
##                                                          NA's   :2       
##    Population       Age.Adjusted.Rate    state              geoid          
##  Min.   :     481   Min.   : 300.9    Length:3101        Length:3101       
##  1st Qu.:   56089   1st Qu.: 721.6    Class :character   Class :character  
##  Median :  131260   Median : 806.4    Mode  :character   Mode  :character  
##  Mean   :  511096   Mean   : 815.3                                         
##  3rd Qu.:  344108   3rd Qu.: 906.4                                         
##  Max.   :50036337   Max.   :1435.7                                         
##                     NA's   :7                                              
##           geometry               metro_cut   
##  MULTIPOLYGON :3101   Metropolitan    :2049  
##  epsg:9311    :   0   Non-Metropolitan:1052  
##  +proj=laea...:   0                          
##                                              
##                                              
##                                              
## 
head(usco)
## Simple feature collection with 6 features and 17 fields
## Geometry type: MULTIPOLYGON
## Dimension:     XY
## Bounding box:  xmin: 1259088 ymin: -1452583 xmax: 1390058 ymax: -1107765
## Projected CRS: NAD27 / US National Atlas Equal Area
##      pblack     phisp prural rucc ppov pfemhh unemp medhouseval popdensity
## 1  3.810512 1.8326418  100.0   08 15.5   14.5   3.5      101000   25816818
## 2 18.489258 3.2830777   47.2   04 15.1   18.5   3.5      137000   67111751
## 3 31.616198 1.5051066  100.0   08 18.7   20.5   4.6       73100   17147879
## 4 12.566549 0.9458693   69.7   06 18.4   19.2   4.0       90600   35909345
## 5 24.557442 0.7867706  100.0   08 23.5   22.7   4.3       68100   22546153
## 6 20.917781 3.3047510   50.9   04 16.0   21.4   4.0      103300   86871603
##    gini               County Deaths Population Age.Adjusted.Rate state geoid
## 1 0.424  Cleburne County, AL    926      75007            979.53    01 01029
## 2 0.452    Coffee County, AL   2484     254213            851.78    01 01031
## 3 0.443     Coosa County, AL    687      55109            954.81    01 01037
## 4 0.475 Covington County, AL   2578     189254            963.49    01 01039
## 5 0.466  Crenshaw County, AL    876      69471            989.55    01 01041
## 6 0.424      Dale County, AL   2392     248927            876.17    01 01045
##                         geometry        metro_cut
## 1 MULTIPOLYGON (((1349148 -11... Non-Metropolitan
## 2 MULTIPOLYGON (((1327060 -13...     Metropolitan
## 3 MULTIPOLYGON (((1305589 -12... Non-Metropolitan
## 4 MULTIPOLYGON (((1306630 -14...     Metropolitan
## 5 MULTIPOLYGON (((1315155 -13... Non-Metropolitan
## 6 MULTIPOLYGON (((1354333 -14...     Metropolitan
usco <- st_transform(usco, crs = 2163)
#DV = Age adjusted mortality rates, IVs = the Gini coefficient, % persons in poverty, States
uscosub <- subset(usco, state == "06"|state == "48")
uscosub$state <- as.numeric(uscosub$state)
uscosub$state<-Recode(uscosub$state,"48=0")
uscosub$state<-Recode(uscosub$state,"06=1")
uscosub$state<-as.factor(uscosub$state)
summary(uscosub)
##      pblack           phisp            prural           rucc          
##  Min.   : 0.000   Min.   : 2.448   Min.   :  0.00   Length:312        
##  1st Qu.: 1.429   1st Qu.:13.091   1st Qu.: 20.40   Class :character  
##  Median : 3.652   Median :21.816   Median : 46.40   Mode  :character  
##  Mean   : 6.254   Mean   :28.941   Mean   : 50.53                     
##  3rd Qu.: 9.035   3rd Qu.:41.245   3rd Qu.: 75.85                     
##  Max.   :35.297   Max.   :97.571   Max.   :100.00                     
##                                                                       
##       ppov           pfemhh          unemp         medhouseval    
##  Min.   : 5.00   Min.   : 4.90   Min.   : 2.700   Min.   : 31500  
##  1st Qu.:13.50   1st Qu.:13.70   1st Qu.: 4.300   1st Qu.: 72975  
##  Median :17.20   Median :16.30   Median : 5.000   Median : 91900  
##  Mean   :17.63   Mean   :16.69   Mean   : 5.469   Mean   :134313  
##  3rd Qu.:20.60   3rd Qu.:19.52   3rd Qu.: 6.000   3rd Qu.:153025  
##  Max.   :45.70   Max.   :31.00   Max.   :15.800   Max.   :785100  
##                                                                   
##    popdensity             gini           County              Deaths        
##  Min.   :9.269e+04   Min.   :0.2070   Length:312         Min.   :    12.0  
##  1st Qu.:7.606e+06   1st Qu.:0.4220   Class :character   1st Qu.:   425.5  
##  Median :2.575e+07   Median :0.4440   Mode  :character   Median :  1324.0  
##  Mean   :1.947e+08   Mean   :0.4448                      Mean   :  6876.0  
##  3rd Qu.:9.195e+07   3rd Qu.:0.4642                      3rd Qu.:  4285.2  
##  Max.   :1.578e+10   Max.   :0.6260                      Max.   :298108.0  
##                                                          NA's   :2         
##    Population       Age.Adjusted.Rate state      geoid          
##  Min.   :     481   Min.   : 414.2    0:254   Length:312        
##  1st Qu.:   41301   1st Qu.: 724.8    1: 58   Class :character  
##  Median :  115794   Median : 815.1            Mode  :character  
##  Mean   : 1039307   Mean   : 803.1                              
##  3rd Qu.:  442529   3rd Qu.: 892.4                              
##  Max.   :50036337   Max.   :1168.2                              
##                     NA's   :3                                   
##           geometry              metro_cut  
##  MULTIPOLYGON :312   Metropolitan    :215  
##  epsg:9311    :  0   Non-Metropolitan: 97  
##  +proj=laea...:  0                         
##                                            
##                                            
##                                            
## 
#OLS Model
fit.0<-lm(Age.Adjusted.Rate~state+scale(ppov)+scale(gini),
          data=uscosub)

summary(fit.0)
## 
## Call:
## lm(formula = Age.Adjusted.Rate ~ state + scale(ppov) + scale(gini), 
##     data = uscosub)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -448.97  -67.88    3.76   76.12  291.47 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  828.108      7.296 113.501  < 2e-16 ***
## state1      -129.693     17.499  -7.412 1.24e-12 ***
## scale(ppov)   26.806      7.451   3.597 0.000375 ***
## scale(gini)  -25.119      7.638  -3.288 0.001125 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 114.6 on 305 degrees of freedom
##   (3 observations deleted due to missingness)
## Multiple R-squared:  0.2353, Adjusted R-squared:  0.2278 
## F-statistic: 31.28 on 3 and 305 DF,  p-value: < 2.2e-16

Results: California counties have lower mortality rates than Texas counties, based on the first coefficient in the model -128.693. County characteristics associated with higher mortality rates include % poverty. County characteristics associated with lower mortality rates include the Gini coefficient.

#Spatial regime model building through interactions
fit.a<-lm(Age.Adjusted.Rate~state+scale(ppov)+scale(gini),
          data=uscosub)
fit.b<-lm(Age.Adjusted.Rate~state/scale(ppov)+scale(gini),
          data=uscosub)

anova(fit.a, fit.b, test="F")
## Analysis of Variance Table
## 
## Model 1: Age.Adjusted.Rate ~ state + scale(ppov) + scale(gini)
## Model 2: Age.Adjusted.Rate ~ state/scale(ppov) + scale(gini)
##   Res.Df     RSS Df Sum of Sq     F    Pr(>F)    
## 1    305 4006849                                 
## 2    304 3843009  1    163839 12.96 0.0003714 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Results:The test is significant an that the coefficients for California and Texas models are not the same. Thus, I can split the data into regimes and examine the results.

#k= 4 nearest neighbors for the whole dataset
nbs<-knearneigh(coordinates(as_Spatial(uscosub)) , k=4)
nbs<-knn2nb(nbs,sym=T)
wts<-nb2listw(nbs)
#k = 4  nearest neighbors for the Texas counties
nbsmg_m<-knearneigh(coordinates(as_Spatial(uscosub[uscosub$state=="0",])), k=4)
nbs_m<-knn2nb(nbsmg_m, sym=T)
wts_m<-nb2listw(nbs_m)
#k = 4  nearest neighbors for the California counties
nbsmg_n<-knearneigh(coordinates(as_Spatial(uscosub[uscosub$state=="1",])), k=4)
nbs_n<-knn2nb(nbsmg_n, sym=T)
wts_n<-nb2listw(nbs_n)
#Global Model
efit<-errorsarlm(Age.Adjusted.Rate~scale(ppov)+scale(gini),
                 data=uscosub,
                 listw=wts,
                 method="MC")
#saturated model
efit0<-errorsarlm(Age.Adjusted.Rate~state/scale(ppov)+scale(gini),
                 data=uscosub,
                 listw=wts,
                 method="MC")

anova(efit, efit0)
##       Model df    AIC  logLik Test L.Ratio   p-value
## efit      1  5 3733.5 -1861.8    1                  
## efit0     2  7 3727.1 -1856.5    2  10.464 0.0053432

Results indicate that the regimes have different regression effects.

efit1<-errorsarlm(Age.Adjusted.Rate~(scale(ppov)+scale(gini)),
                  data=uscosub[uscosub$state=="0",],
                  listw=wts_m)

summary(efit1, Nagelkerke=T)
## 
## Call:errorsarlm(formula = Age.Adjusted.Rate ~ (scale(ppov) + scale(gini)), 
##     data = uscosub[uscosub$state == "0", ], listw = wts_m)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -328.3039  -49.0400    2.5076   63.6914  293.6625 
## 
## Type: error 
## Coefficients: (asymptotic standard errors) 
##             Estimate Std. Error z value  Pr(>|z|)
## (Intercept) 830.3706    15.3596 54.0620 < 2.2e-16
## scale(ppov)  39.4573     9.0974  4.3372 1.443e-05
## scale(gini) -13.5799     7.0872 -1.9161   0.05535
## 
## Lambda: 0.59871, LR test value: 67.401, p-value: 2.2204e-16
## Asymptotic standard error: 0.061609
##     z-value: 9.718, p-value: < 2.22e-16
## Wald statistic: 94.44, p-value: < 2.22e-16
## 
## Log likelihood: -1518.268 for error model
## ML residual variance (sigma squared): 9534.9, (sigma: 97.647)
## Nagelkerke pseudo-R-squared: 0.25492 
## Number of observations: 251 
## Number of parameters estimated: 5 
## AIC: 3046.5, (AIC for lm: 3111.9)
efit1<-errorsarlm(Age.Adjusted.Rate~(scale(ppov)+scale(gini)),
                  data=uscosub[uscosub$state=="1",],
                  listw=wts_n)

summary(efit1, Nagelkerke=T)
## 
## Call:errorsarlm(formula = Age.Adjusted.Rate ~ (scale(ppov) + scale(gini)), 
##     data = uscosub[uscosub$state == "1", ], listw = wts_n)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -212.5134  -31.7734   -3.7029   25.4867  193.0247 
## 
## Type: error 
## Coefficients: (asymptotic standard errors) 
##             Estimate Std. Error z value  Pr(>|z|)
## (Intercept)  683.739     16.997 40.2259 < 2.2e-16
## scale(ppov)   60.054     11.457  5.2419 1.589e-07
## scale(gini)  -23.210     10.856 -2.1380   0.03251
## 
## Lambda: 0.42279, LR test value: 6.0728, p-value: 0.013728
## Asymptotic standard error: 0.15553
##     z-value: 2.7184, p-value: 0.0065606
## Wald statistic: 7.3895, p-value: 0.0065606
## 
## Log likelihood: -333.7237 for error model
## ML residual variance (sigma squared): 5582.9, (sigma: 74.719)
## Nagelkerke pseudo-R-squared: 0.51023 
## Number of observations: 58 
## Number of parameters estimated: 5 
## AIC: 677.45, (AIC for lm: 681.52)

Results: Only one factor in the Texas counties model is significant. Percent of poverty is associated with higher mortality rates. Although the Gini coefficient is associated with lower mortality rates, it was not statistically significant, p = 0.06. This contrasts with the global model that showed the Gini coefficient is statistically significantly associated with lower mortality rates, p < 0.01.

Both factors in the California counties model are significant. Percent of poverty is associated with higher mortality rates. Also, the Gini coefficient is associated with lower mortality rates. This aligns with the global model that showed percent of poverty is statistically significantly associated with higher mortality rates and the Gini coefficient is statistically significantly associated with lower mortality rates.

Between both models, the California counties model has a higher pseudo r-squared compared to the Texas counties model indicating a better model fit.