library(spdep)
## Loading required package: sp
## Loading required package: spData
## To access larger datasets in this package, install the spDataLarge
## package with: `install.packages('spDataLarge',
## repos='https://nowosad.github.io/drat/', type='source')`
## Loading required package: sf
## Linking to GEOS 3.9.0, GDAL 3.2.1, PROJ 7.2.1
library(MASS)
library(spatialreg)
## Loading required package: Matrix
##
## Attaching package: 'spatialreg'
## The following objects are masked from 'package:spdep':
##
## as.spam.listw, as_dgRMatrix_listw, as_dsCMatrix_I,
## as_dsCMatrix_IrW, as_dsTMatrix_listw, can.be.simmed, cheb_setup,
## create_WX, do_ldet, eigen_pre_setup, eigen_setup, eigenw,
## errorsarlm, get.ClusterOption, get.coresOption, get.mcOption,
## get.VerboseOption, get.ZeroPolicyOption, GMargminImage, GMerrorsar,
## griffith_sone, gstsls, Hausman.test, impacts, intImpacts,
## Jacobian_W, jacobianSetup, l_max, lagmess, lagsarlm, lextrB,
## lextrS, lextrW, lmSLX, LU_prepermutate_setup, LU_setup,
## Matrix_J_setup, Matrix_setup, mcdet_setup, MCMCsamp, ME, mom_calc,
## mom_calc_int2, moments_setup, powerWeights, sacsarlm,
## SE_classic_setup, SE_interp_setup, SE_whichMin_setup,
## set.ClusterOption, set.coresOption, set.mcOption,
## set.VerboseOption, set.ZeroPolicyOption, similar.listw, spam_setup,
## spam_update_setup, SpatialFiltering, spautolm, spBreg_err,
## spBreg_lag, spBreg_sac, stsls, subgraph_eigenw, trW
library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v ggplot2 3.3.5 v purrr 0.3.4
## v tibble 3.1.4 v dplyr 1.0.7
## v tidyr 1.1.3 v stringr 1.4.0
## v readr 2.0.1 v forcats 0.5.1
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x tidyr::expand() masks Matrix::expand()
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
## x tidyr::pack() masks Matrix::pack()
## x dplyr::select() masks MASS::select()
## x tidyr::unpack() masks Matrix::unpack()
library(ggplot2)
library(rio)
library(sas7bdat)
library(haven)
library(dplyr)
library(tigris)
## To enable
## caching of data, set `options(tigris_use_cache = TRUE)` in your R script or .Rprofile.
library(sf)
library(MASS)
For this analysis,I am going to use Area Health Resource Files for year 2020 from the Health Resource and Service Administration (HRSA). I am going to use Crude Death Rate and the total population as the numerator and denominator. Total Population is the denominator/offset variable.
There are four predictors in this analysis: poverty rate in 2018, the rural urban continuum code (RUCC) from USDA, child poverty in 2018, and medium house value in 2018.
ahrf2020 <- read_sas("C:/Users/chris/OneDrive - University of Texas at San Antonio/FALL 2021/FALL 2021/SPATIAL DEMOGRAPHY DEM 7263/ACS DATA/ahrf2020.sas7bdat")
ahrf2020<-ahrf2020%>%
mutate(cofips=f00004,
coname=f00010,
state = f00011,
pop = f1198418,
death = f1255818,
pov = f1332118,
childpov= f1332218,
medhouvl=f1461314,
rucc= as.factor(f0002013))%>%
dplyr::select(pop, death, state, cofips, coname, rucc, pov, childpov, rucc, medhouvl)%>%
filter(complete.cases(.))
#as.data.frame()
head(ahrf2020)
## # A tibble: 6 x 9
## pop death state cofips coname rucc pov childpov medhouvl
## <dbl> <dbl> <chr> <chr> <chr> <fct> <dbl> <dbl> <dbl>
## 1 55601 541 01 01001 Autauga 02 13.8 19.3 147900
## 2 218022 2326 01 01003 Baldwin 03 9.8 13.9 189800
## 3 24881 312 01 01005 Barbour 06 30.9 43.9 92900
## 4 22400 252 01 01007 Bibb 01 21.8 27.8 96500
## 5 57840 657 01 01009 Blount 01 13.2 18 124700
## 6 10138 109 01 01011 Bullock 06 42.5 68.3 77500
summary(ahrf2020)
## pop death state cofips
## Min. : 277 Min. : 0.0 Length:3140 Length:3140
## 1st Qu.: 10963 1st Qu.: 120.0 Class :character Class :character
## Median : 25800 Median : 282.0 Mode :character Mode :character
## Mean : 104193 Mean : 902.9
## 3rd Qu.: 67913 3rd Qu.: 699.5
## Max. :10105518 Max. :68164.0
##
## coname rucc pov childpov
## Length:3140 06 :593 Min. : 2.60 Min. : 2.50
## Class :character 07 :433 1st Qu.:10.80 1st Qu.:14.50
## Mode :character 01 :432 Median :14.10 Median :20.10
## 09 :423 Mean :15.17 Mean :21.11
## 02 :378 3rd Qu.:18.30 3rd Qu.:26.30
## 03 :355 Max. :54.00 Max. :68.30
## (Other):526
## medhouvl
## Min. : 20700
## 1st Qu.: 93775
## Median : 122600
## Mean : 147161
## 3rd Qu.: 168300
## Max. :1056500
##
options(tigris_class="sf")
usco<-counties(cb=T, year=2010)
usco$cofips<-substr(usco$GEO_ID, 10, 15)
sts<-states(cb = T, year=2010)
sts<-st_boundary(sts)%>%
filter(!STATE %in% c("02", "15", "60", "66", "69", "72", "78"))
ahrf2020_m<-geo_join(usco, ahrf2020, by_sp="cofips", by_df="cofips",how="left" )
## Warning: We recommend using the dplyr::*_join() family of functions instead.
## Warning: `group_by_()` was deprecated in dplyr 0.7.0.
## Please use `group_by()` instead.
## See vignette('programming') for more help
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was generated.
ahrf2020_m%>%
filter(!STATE %in% c("02", "15", "60", "66", "69", "72", "78"))%>%
mutate(cdr=(death/pop)*1000)%>% #Calculating Crude Birth Rate
mutate(cdr_range = cut(cdr, breaks=quantile(cdr, p=seq(0,1,length.out = 6), na.rm=T ), include.lowest=T ))%>% #Making quantile map
ggplot()+
geom_sf(aes(fill=cdr_range, color=NA))+
scale_color_brewer(palette = "Blues")+
scale_fill_brewer(palette = "Blues",na.value = "grey50")+
geom_sf(data=sts, color="black")+
coord_sf(crs = 2163)+
ggtitle(label = "Crude Death Rates, United States, 2018")
ahrf2020wadt<-filter(ahrf2020_m, is.na(death)==F)
fit_pois<- glm(death ~ offset(log(pop)) + pov + childpov + medhouvl + rucc,
family=poisson,
data=ahrf2020wadt)
summary(fit_pois)
##
## Call:
## glm(formula = death ~ offset(log(pop)) + pov + childpov + medhouvl +
## rucc, family = poisson, data = ahrf2020wadt)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -70.691 -1.784 0.154 2.311 49.490
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -4.758e+00 2.526e-03 -1883.41 <2e-16 ***
## pov -4.195e-02 4.046e-04 -103.68 <2e-16 ***
## childpov 3.180e-02 2.658e-04 119.61 <2e-16 ***
## medhouvl -3.732e-07 4.635e-09 -80.53 <2e-16 ***
## rucc02 9.784e-02 1.604e-03 61.00 <2e-16 ***
## rucc03 1.792e-01 2.187e-03 81.94 <2e-16 ***
## rucc04 2.345e-01 2.875e-03 81.57 <2e-16 ***
## rucc05 2.058e-01 4.637e-03 44.39 <2e-16 ***
## rucc06 2.715e-01 2.750e-03 98.71 <2e-16 ***
## rucc07 2.770e-01 3.566e-03 77.68 <2e-16 ***
## rucc08 2.922e-01 6.434e-03 45.42 <2e-16 ***
## rucc09 2.786e-01 6.019e-03 46.28 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for poisson family taken to be 1)
##
## Null deviance: 175447 on 3072 degrees of freedom
## Residual deviance: 97235 on 3061 degrees of freedom
## AIC: 120395
##
## Number of Fisher Scoring iterations: 4
The result show that there are significant effects on the death rates from the predictors with the RUCC categories showing similar levels of results, except counties in Metropolitan areas with a population of 250,000 to 1 million population.
scale<-sqrt(fit_pois$deviance/fit_pois$df.residual)
scale
## [1] 5.636118
This value of 5.636118 is greater than 1 which means the mean and the variance are not equal, which suggests that the variance is greater than the mean. Therefore, there’s an over-dispersion.
1-pchisq(fit_pois$deviance, df = fit_pois$df.residual)
## [1] 0
In this case, the statistic has a p-value of 0, indicating the model does not fit the data well at all. When overdispersion is present in a model, the model results cannot be trusted, because the test statistics of the model rely on the assumptions to be correct.
fit_q_pois<- glm(death ~ offset(log(pop)) + pov + childpov + medhouvl + rucc,
family=quasipoisson,
data=ahrf2020wadt)
summary(fit_q_pois)
##
## Call:
## glm(formula = death ~ offset(log(pop)) + pov + childpov + medhouvl +
## rucc, family = quasipoisson, data = ahrf2020wadt)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -70.691 -1.784 0.154 2.311 49.490
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -4.758e+00 1.417e-02 -335.660 < 2e-16 ***
## pov -4.195e-02 2.270e-03 -18.478 < 2e-16 ***
## childpov 3.180e-02 1.492e-03 21.317 < 2e-16 ***
## medhouvl -3.732e-07 2.601e-08 -14.352 < 2e-16 ***
## rucc02 9.784e-02 9.000e-03 10.871 < 2e-16 ***
## rucc03 1.792e-01 1.227e-02 14.603 < 2e-16 ***
## rucc04 2.345e-01 1.613e-02 14.537 < 2e-16 ***
## rucc05 2.058e-01 2.602e-02 7.911 3.54e-15 ***
## rucc06 2.715e-01 1.543e-02 17.593 < 2e-16 ***
## rucc07 2.770e-01 2.001e-02 13.844 < 2e-16 ***
## rucc08 2.922e-01 3.610e-02 8.095 8.19e-16 ***
## rucc09 2.786e-01 3.377e-02 8.247 2.37e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for quasipoisson family taken to be 31.48411)
##
## Null deviance: 175447 on 3072 degrees of freedom
## Residual deviance: 97235 on 3061 degrees of freedom
## AIC: NA
##
## Number of Fisher Scoring iterations: 4
This output shows that the new test statistics are significantly lower compared to the values under the Poisson assumptions, and the dispersion parameter is 31.48, in the regular Poisson model, it was assumed to be 1.
While the substantive differences of the two models are the same, the quasi-Poisson model should be used in this case.
fit_nb<- glm.nb(death ~ offset(log(pop)) + pov + childpov + medhouvl + rucc,
data=ahrf2020wadt)
summary(fit_nb)
##
## Call:
## glm.nb(formula = death ~ offset(log(pop)) + pov + childpov +
## medhouvl + rucc, data = ahrf2020wadt, init.theta = 30.48715508,
## link = log)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -5.2944 -0.6286 0.0281 0.5848 3.5413
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -4.663e+00 1.807e-02 -258.038 < 2e-16 ***
## pov -2.911e-02 1.700e-03 -17.117 < 2e-16 ***
## childpov 2.698e-02 1.179e-03 22.883 < 2e-16 ***
## medhouvl -7.320e-07 4.742e-08 -15.436 < 2e-16 ***
## rucc02 3.373e-02 1.368e-02 2.466 0.0137 *
## rucc03 5.791e-02 1.423e-02 4.070 4.71e-05 ***
## rucc04 8.645e-02 1.630e-02 5.303 1.14e-07 ***
## rucc05 5.271e-02 2.207e-02 2.388 0.0169 *
## rucc06 1.139e-01 1.340e-02 8.498 < 2e-16 ***
## rucc07 1.086e-01 1.430e-02 7.595 3.07e-14 ***
## rucc08 1.269e-01 1.799e-02 7.053 1.75e-12 ***
## rucc09 8.068e-02 1.592e-02 5.069 4.00e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for Negative Binomial(30.4872) family taken to be 1)
##
## Null deviance: 5182.7 on 3072 degrees of freedom
## Residual deviance: 3292.4 on 3061 degrees of freedom
## AIC: 34084
##
## Number of Fisher Scoring iterations: 1
##
##
## Theta: 30.487
## Std. Err.: 0.925
##
## 2 x log-likelihood: -34058.013
gain, we see the same overall interpretation of the model effects, but the risk ratios are different compared to the Poisson model:
exp(coef(fit_nb))
## (Intercept) pov childpov medhouvl rucc02 rucc03
## 0.009435983 0.971313737 1.027348122 0.999999268 1.034305404 1.059615183
## rucc04 rucc05 rucc06 rucc07 rucc08 rucc09
## 1.090301421 1.054128784 1.120595276 1.114733270 1.135309366 1.084022565
The Negative Binomial model shows even higher levels of relative risk in the undeserved areas, with a 6.35% increase in risk for counties are totally under-served, and 5.16% difference for counties that are partially under-served. This actually makes more sense that the Poisson model results:
exp(coef(fit_pois))
## (Intercept) pov childpov medhouvl rucc02 rucc03
## 0.008584031 0.958919571 1.032307822 0.999999627 1.102788388 1.196220379
## rucc04 rucc05 rucc06 rucc07 rucc08 rucc09
## 1.264322631 1.228521660 1.311941374 1.319205587 1.339400331 1.321218045
The counties that have higher poverty, also have higher child poverty and show greater risk of death.
nbs<-knearneigh(coordinates(as(ahrf2020wadt, "Spatial")), k = 4, longlat = T)
nbs<-knn2nb(nbs, sym = T)
us.wt4<-nb2listw(nbs, style = "W")
lm.morantest(fit_pois, listw = us.wt4)
##
## Global Moran I for regression residuals
##
## data:
## model: glm(formula = death ~ offset(log(pop)) + pov + childpov +
## medhouvl + rucc, family = poisson, data = ahrf2020wadt)
## weights: us.wt4
##
## Moran I statistic standard deviate = 30.064, p-value < 2.2e-16
## alternative hypothesis: greater
## sample estimates:
## Observed Moran I Expectation Variance
## 3.620865e-01 -3.902831e-06 1.450614e-04
lm.morantest(fit_nb, listw = us.wt4)
##
## Global Moran I for regression residuals
##
## data:
## model: glm.nb(formula = death ~ offset(log(pop)) + pov + childpov +
## medhouvl + rucc, data = ahrf2020wadt, init.theta = 30.48715508, link =
## log)
## weights: us.wt4
##
## Moran I statistic standard deviate = 33.957, p-value < 2.2e-16
## alternative hypothesis: greater
## sample estimates:
## Observed Moran I Expectation Variance
## 0.4088221368 -0.0000517893 0.0001449800
Moran’s I for poisson model is extremely bigger than the negative binomial models value.
#we have to provide the theta value for the NB family, I got this from the model fit above (fit_nb)
me.fit<-ME(death ~ offset(log(pop)) + pov + childpov + medhouvl + rucc, family=negative.binomial(30.487),
data=ahrf2020wadt, listw = us.wt4, verbose=T,alpha=.05 )
## eV[,3], I: 0.1547179 ZI: NA, pr(ZI): 0.01
## eV[,15], I: 0.1444257 ZI: NA, pr(ZI): 0.01
## eV[,1], I: 0.137544 ZI: NA, pr(ZI): 0.01
## eV[,864], I: 0.1302322 ZI: NA, pr(ZI): 0.01
## eV[,2728], I: 0.1229486 ZI: NA, pr(ZI): 0.01
## eV[,2607], I: 0.1117532 ZI: NA, pr(ZI): 0.01
## eV[,2702], I: 0.09729095 ZI: NA, pr(ZI): 0.01
## eV[,1527], I: 0.0888949 ZI: NA, pr(ZI): 0.01
## eV[,2802], I: 0.0807082 ZI: NA, pr(ZI): 0.01
## eV[,2010], I: 0.07341165 ZI: NA, pr(ZI): 0.01
## eV[,504], I: 0.06618969 ZI: NA, pr(ZI): 0.01
## eV[,1698], I: 0.05993647 ZI: NA, pr(ZI): 0.01
## eV[,2744], I: 0.05424391 ZI: NA, pr(ZI): 0.01
## eV[,2700], I: 0.0488431 ZI: NA, pr(ZI): 0.01
## eV[,1521], I: 0.04348554 ZI: NA, pr(ZI): 0.01
## eV[,506], I: 0.03830265 ZI: NA, pr(ZI): 0.02
## eV[,2604], I: 0.03325588 ZI: NA, pr(ZI): 0.03
## eV[,2603], I: 0.02821804 ZI: NA, pr(ZI): 0.03
## eV[,2011], I: 0.02354713 ZI: NA, pr(ZI): 0.03
## eV[,2788], I: 0.01915904 ZI: NA, pr(ZI): 0.08
me.fit
## Eigenvector ZI pr(ZI)
## 0 NA NA 0.01
## 1 3 NA 0.01
## 2 15 NA 0.01
## 3 1 NA 0.01
## 4 864 NA 0.01
## 5 2728 NA 0.01
## 6 2607 NA 0.01
## 7 2702 NA 0.01
## 8 1527 NA 0.01
## 9 2802 NA 0.01
## 10 2010 NA 0.01
## 11 504 NA 0.01
## 12 1698 NA 0.01
## 13 2744 NA 0.01
## 14 2700 NA 0.01
## 15 1521 NA 0.01
## 16 506 NA 0.02
## 17 2604 NA 0.03
## 18 2603 NA 0.03
## 19 2011 NA 0.03
## 20 2788 NA 0.08
From this above output I can conclude that the autocorrelation is no longer significant.
fits<-data.frame(fitted(me.fit))
ahrf2020wadt$me15<-fits$vec15
ahrf2020wadt$me3<-fits$vec3
ahrf2020wadt%>%
filter(!STATE %in% c("02", "15", "60", "66", "69", "72", "78"))%>%
ggplot()+
geom_sf(aes(fill=me15, color=me15))+
scale_color_viridis_c()+
scale_fill_viridis_c(na.value = "grey50")+
geom_sf(data=sts, color="black")+
coord_sf(crs = 2163)+
ggtitle(label = "Spatial Distribution Moran Eigenvector 15")
ahrf2020wadt%>%
filter(!STATE %in% c("02", "15", "60", "66", "69", "72", "78"))%>%
ggplot()+
geom_sf(aes(fill=me3, color=me3))+
scale_color_viridis_c()+
scale_fill_viridis_c(na.value = "grey50")+
geom_sf(data=sts, color="black")+
coord_sf(crs = 2163)+
ggtitle(label = "Spatial Distribution Moran Eigenvector 3")
clean.nb<-glm.nb(death ~ offset(log(pop)) + pov + childpov + medhouvl + rucc + fitted(me.fit), ahrf2020wadt)
summary(clean.nb)
##
## Call:
## glm.nb(formula = death ~ offset(log(pop)) + pov + childpov +
## medhouvl + rucc + fitted(me.fit), data = ahrf2020wadt, init.theta = 34.64891065,
## link = log)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -5.5304 -0.6492 -0.0039 0.5887 3.7621
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -4.673e+00 1.718e-02 -271.944 < 2e-16 ***
## pov -2.813e-02 1.621e-03 -17.349 < 2e-16 ***
## childpov 2.678e-02 1.127e-03 23.768 < 2e-16 ***
## medhouvl -8.057e-07 4.506e-08 -17.883 < 2e-16 ***
## rucc02 3.627e-02 1.295e-02 2.802 0.005086 **
## rucc03 6.319e-02 1.354e-02 4.667 3.06e-06 ***
## rucc04 9.314e-02 1.542e-02 6.041 1.53e-09 ***
## rucc05 7.566e-02 2.106e-02 3.593 0.000327 ***
## rucc06 1.263e-01 1.279e-02 9.870 < 2e-16 ***
## rucc07 1.195e-01 1.380e-02 8.661 < 2e-16 ***
## rucc08 1.311e-01 1.729e-02 7.579 3.49e-14 ***
## rucc09 8.578e-02 1.564e-02 5.485 4.13e-08 ***
## fitted(me.fit)vec3 -1.513e+00 1.876e-01 -8.066 7.28e-16 ***
## fitted(me.fit)vec15 1.391e+00 1.921e-01 7.242 4.43e-13 ***
## fitted(me.fit)vec1 -2.689e+00 1.958e-01 -13.735 < 2e-16 ***
## fitted(me.fit)vec864 -2.991e-01 1.909e-01 -1.567 0.117216
## fitted(me.fit)vec2728 2.669e-01 1.882e-01 1.418 0.156195
## fitted(me.fit)vec2607 3.185e-01 1.848e-01 1.723 0.084889 .
## fitted(me.fit)vec2702 -3.392e-01 1.953e-01 -1.737 0.082376 .
## fitted(me.fit)vec1527 -4.180e-01 1.914e-01 -2.184 0.028976 *
## fitted(me.fit)vec2802 4.095e-01 1.939e-01 2.112 0.034685 *
## fitted(me.fit)vec2010 -1.494e-01 1.909e-01 -0.783 0.433685
## fitted(me.fit)vec504 3.775e-01 1.911e-01 1.975 0.048263 *
## fitted(me.fit)vec1698 -5.481e-01 1.976e-01 -2.773 0.005555 **
## fitted(me.fit)vec2744 2.789e-01 1.898e-01 1.470 0.141682
## fitted(me.fit)vec2700 1.108e-01 1.886e-01 0.588 0.556824
## fitted(me.fit)vec1521 3.898e-01 1.945e-01 2.004 0.045115 *
## fitted(me.fit)vec506 -3.193e-01 1.851e-01 -1.726 0.084435 .
## fitted(me.fit)vec2604 -1.869e-01 1.842e-01 -1.015 0.310126
## fitted(me.fit)vec2603 1.667e-01 1.845e-01 0.903 0.366353
## fitted(me.fit)vec2011 -1.716e-01 1.853e-01 -0.926 0.354226
## fitted(me.fit)vec2788 -1.552e-01 1.919e-01 -0.809 0.418701
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for Negative Binomial(34.6489) family taken to be 1)
##
## Null deviance: 5770.4 on 3072 degrees of freedom
## Residual deviance: 3294.7 on 3041 degrees of freedom
## AIC: 33794
##
## Number of Fisher Scoring iterations: 1
##
##
## Theta: 34.65
## Std. Err.: 1.07
##
## 2 x log-likelihood: -33728.43
lm.morantest(clean.nb, listw=us.wt4)
##
## Global Moran I for regression residuals
##
## data:
## model: glm.nb(formula = death ~ offset(log(pop)) + pov + childpov +
## medhouvl + rucc + fitted(me.fit), data = ahrf2020wadt, init.theta =
## 34.64891065, link = log)
## weights: us.wt4
##
## Moran I statistic standard deviate = 29.476, p-value < 2.2e-16
## alternative hypothesis: greater
## sample estimates:
## Observed Moran I Expectation Variance
## 3.571098e-01 -3.087205e-05 1.468059e-04
The Moran’s I value is still higher, which means the cleaning did not really help, that means it did not filter out much of spatial autocorrelation.
AIC(fit_pois)
## [1] 120395.4
AIC(fit_nb)
## [1] 34084.01
AIC(clean.nb)
## [1] 33794.43
The clean negative binomial model has the lowest AIC, which means we have been able to decrease the autocorrelation in the model, but only marginally.