library(dplyr)
arf2018<-arf2018%>%
mutate(cofips=f00004,
coname=f00010,
state = f00011,
AIDSdeaths12=f1193012,
AIDSdeaths11=f1193011,
TotDeaths12=f1255812,
TotDeaths11=f1255811,
TotPop=f1198411,
HealthIns= f1475112,
Poverty= f1332112,
Unemploy=f0679512,
rucc= as.factor(f0002013),
hpsa10= as.factor(f0978710),
)%>%
select(cofips, AIDSdeaths12, AIDSdeaths11,TotDeaths12, TotDeaths11, TotPop,state, cofips, Unemploy, coname, HealthIns,Poverty, rucc, hpsa10)%>%
filter(complete.cases(.))%>%
as.data.frame()
summary(arf2018)
## cofips AIDSdeaths12 AIDSdeaths11 TotDeaths12
## Length:122 Min. : 11.00 Min. : 11.00 Min. : 1476
## Class :character 1st Qu.: 15.00 1st Qu.: 15.00 1st Qu.: 3787
## Mode :character Median : 20.00 Median : 20.50 Median : 6036
## Mean : 38.48 Mean : 40.49 Mean : 8005
## 3rd Qu.: 41.00 3rd Qu.: 46.25 3rd Qu.: 9883
## Max. :233.00 Max. :233.00 Max. :61856
##
## TotDeaths11 TotPop state Unemploy
## Min. : 1415 Min. : 156433 Length:122 Min. : 5.100
## 1st Qu.: 3634 1st Qu.: 494923 Class :character 1st Qu.: 7.300
## Median : 5872 Median : 799086 Mode :character Median : 8.450
## Mean : 7817 Mean :1068866 Mean : 8.544
## 3rd Qu.: 9529 3rd Qu.:1214999 3rd Qu.: 9.375
## Max. :60611 Max. :9889056 Max. :15.200
##
## coname HealthIns Poverty rucc hpsa10
## Length:122 Min. : 4.00 Min. : 6.60 01 :87 : 0
## Class :character 1st Qu.:13.95 1st Qu.:14.10 02 :34 0: 4
## Mode :character Median :18.20 Median :18.00 03 : 1 1:53
## Mean :17.85 Mean :17.61 : 0 2:65
## 3rd Qu.:20.98 3rd Qu.:20.38 04 : 0
## Max. :38.60 Max. :34.20 05 : 0
## (Other): 0
summary(usco)
## pblack phisp prural rucc
## Min. : 0.0000 Min. : 0.000 Min. : 0.00 Length:3101
## 1st Qu.: 0.4603 1st Qu.: 1.154 1st Qu.: 33.60 Class :character
## Median : 2.1182 Median : 2.353 Median : 59.50 Mode :character
## Mean : 9.0072 Mean : 7.114 Mean : 58.62
## 3rd Qu.:10.3887 3rd Qu.: 6.478 3rd Qu.: 87.20
## Max. :85.9945 Max. :97.571 Max. :100.00
##
## ppov pfemhh unemp medhouseval
## Min. : 2.50 Min. : 2.7 Min. : 1.800 Min. : 31500
## 1st Qu.:10.80 1st Qu.:12.7 1st Qu.: 4.100 1st Qu.: 84500
## Median :14.20 Median :15.5 Median : 5.100 Median :108900
## Mean :15.33 Mean :16.7 Mean : 5.408 Mean :130283
## 3rd Qu.:18.60 3rd Qu.:19.3 3rd Qu.: 6.200 3rd Qu.:152900
## Max. :51.00 Max. :47.8 Max. :16.000 Max. :912600
## NA's :1
## popdensity gini County Deaths
## Min. :9.269e+04 Min. :0.2070 Length:3101 Min. : 12
## 1st Qu.:1.715e+07 1st Qu.:0.4070 Class :character 1st Qu.: 646
## Median :4.405e+07 Median :0.4290 Mode :character Median : 1402
## Mean :2.279e+08 Mean :0.4313 Mean : 4182
## 3rd Qu.:1.058e+08 3rd Qu.:0.4530 3rd Qu.: 3368
## Max. :6.979e+10 Max. :0.6450 Max. :298108
## NA's :2
## Population Age.Adjusted.Rate state geoid
## Min. : 481 Min. : 300.9 Length:3101 Length:3101
## 1st Qu.: 56089 1st Qu.: 721.6 Class :character Class :character
## Median : 131260 Median : 806.4 Mode :character Mode :character
## Mean : 511096 Mean : 815.3
## 3rd Qu.: 344108 3rd Qu.: 906.4
## Max. :50036337 Max. :1435.7
## NA's :7
## geometry metro_cut
## MULTIPOLYGON :3101 Metropolitan :2049
## epsg:9311 : 0 Non-Metropolitan:1052
## +proj=laea...: 0
##
##
##
##
library(tigris)
## To enable
## caching of data, set `options(tigris_use_cache = TRUE)` in your R script or .Rprofile.
library(sf)
library(ggplot2)
options(tigris_class="sf")
usco<-counties(cb=T, year=2010)
usco$cofips<-substr(usco$GEO_ID, 10, 15)
sts<-states(cb = T, year=2010)
sts<-st_boundary(sts)%>%
filter(!STATE %in% c("02", "15", "60", "66", "69", "72", "78"))
arf2018_m<-geo_join(usco, arf2018, by_sp="cofips", by_df="cofips",how="left" )
## Warning: We recommend using the dplyr::*_join() family of functions instead.
## Warning: `group_by_()` was deprecated in dplyr 0.7.0.
## Please use `group_by()` instead.
## See vignette('programming') for more help
arf2018_m%>%
filter(!STATE %in% c("02", "15", "60", "66", "69", "72", "78"))%>%
mutate(lbrate=AIDSdeaths12/TotPop)%>%
mutate(lb_group = cut(lbrate, breaks=quantile(lbrate, p=seq(0,1,length.out = 6), na.rm=T ), include.lowest=T ))%>%
ggplot()+
geom_sf(aes(fill=lb_group, color=NA))+
scale_color_brewer(palette = "Blues")+
scale_fill_brewer(palette = "Blues",na.value = "grey50")+
geom_sf(data=sts, color="black")+
coord_sf(crs = 2163)+
ggtitle(label = "AIDS Mortality, 2012-2014")
arf2018sub<-filter(arf2018_m, is.na(AIDSdeaths12)==F)
#Poisson Analysis
fit_pois<- glm(AIDSdeaths12 ~ offset(log(TotPop)) + scale (HealthIns) + scale (Poverty) +scale(Unemploy),
family=poisson,
data= arf2018sub)
summary(fit_pois)
##
## Call:
## glm(formula = AIDSdeaths12 ~ offset(log(TotPop)) + scale(HealthIns) +
## scale(Poverty) + scale(Unemploy), family = poisson, data = arf2018sub)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -9.6343 -2.1259 0.0249 1.6064 12.0154
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -10.280910 0.015564 -660.538 <2e-16 ***
## scale(HealthIns) -0.008689 0.014743 -0.589 0.5556
## scale(Poverty) 0.374350 0.018933 19.772 <2e-16 ***
## scale(Unemploy) -0.062538 0.016077 -3.890 0.0001 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for poisson family taken to be 1)
##
## Null deviance: 1934.5 on 118 degrees of freedom
## Residual deviance: 1468.0 on 115 degrees of freedom
## AIC: 2085
##
## Number of Fisher Scoring iterations: 5
exp(coef(fit_pois))
## (Intercept) scale(HealthIns) scale(Poverty) scale(Unemploy)
## 3.428133e-05 9.913490e-01 1.454047e+00 9.393771e-01
scale<-sqrt(fit_pois$deviance/fit_pois$df.residual)
scale
## [1] 3.57279
1-pchisq(fit_pois$deviance, df = fit_pois$df.residual)
## [1] 0
library(MASS)
fit_nb<- glm.nb(AIDSdeaths12 ~ offset(log(TotPop)) + scale (HealthIns) + scale (Poverty) +scale(Unemploy),
data=arf2018sub)
summary(fit_nb)
##
## Call:
## glm.nb(formula = AIDSdeaths12 ~ offset(log(TotPop)) + scale(HealthIns) +
## scale(Poverty) + scale(Unemploy), data = arf2018sub, init.theta = 3.811051054,
## link = log)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.8952 -0.8713 -0.1871 0.4250 3.0372
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.018e+01 5.040e-02 -201.963 < 2e-16 ***
## scale(HealthIns) 9.349e-04 5.754e-02 0.016 0.987
## scale(Poverty) 3.455e-01 6.110e-02 5.656 1.55e-08 ***
## scale(Unemploy) -7.191e-03 5.492e-02 -0.131 0.896
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for Negative Binomial(3.8111) family taken to be 1)
##
## Null deviance: 159.93 on 118 degrees of freedom
## Residual deviance: 118.96 on 115 degrees of freedom
## AIC: 991.97
##
## Number of Fisher Scoring iterations: 1
##
##
## Theta: 3.811
## Std. Err.: 0.528
##
## 2 x log-likelihood: -981.969
exp(coef(fit_nb))
## (Intercept) scale(HealthIns) scale(Poverty) scale(Unemploy)
## 0.0000379558 1.0009353770 1.4127496300 0.9928346623
library(spdep)
library(lmtest)
## Loading required package: zoo
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
nbs<-knearneigh(coordinates(as_Spatial(arf2018sub)), k = 4)
nbs<-knn2nb(nbs, sym = T)
us.wt4<-nb2listw(nbs, style = "W")
lm.morantest(fit_pois, listw = us.wt4)
##
## Global Moran I for regression residuals
##
## data:
## model: glm(formula = AIDSdeaths12 ~ offset(log(TotPop)) +
## scale(HealthIns) + scale(Poverty) + scale(Unemploy), family = poisson,
## data = arf2018sub)
## weights: us.wt4
##
## Moran I statistic standard deviate = 6.9342, p-value = 2.042e-12
## alternative hypothesis: greater
## sample estimates:
## Observed Moran I Expectation Variance
## 0.4110844638 -0.0004950083 0.0035229881
lm.morantest(fit_nb, listw = us.wt4)
##
## Global Moran I for regression residuals
##
## data:
## model: glm.nb(formula = AIDSdeaths12 ~ offset(log(TotPop)) +
## scale(HealthIns) + scale(Poverty) + scale(Unemploy), data = arf2018sub,
## init.theta = 3.811051054, link = log)
## weights: us.wt4
##
## Moran I statistic standard deviate = 7.5535, p-value = 2.118e-14
## alternative hypothesis: greater
## sample estimates:
## Observed Moran I Expectation Variance
## 0.431457945 -0.005534537 0.003346928
library(spdep)
arf2018sub<-arf2018sub%>%
filter(!STATEFP %in% c("02", "15", "60", "66", "69", "72", "78"))
nbs<-knearneigh(coordinates(as(arf2018sub, "Spatial")), k = 4, longlat = T)
nbs<-knn2nb(nbs, sym = T)
us.wt4<-nb2listw(nbs, style = "W")
fitnb<-glm.nb(AIDSdeaths12 ~ offset(log(TotPop)) + HealthIns+ Poverty + Unemploy,
data=arf2018sub)
fitnb$theta
## [1] 3.811051
summary(fit_nb)
##
## Call:
## glm.nb(formula = AIDSdeaths12 ~ offset(log(TotPop)) + scale(HealthIns) +
## scale(Poverty) + scale(Unemploy), data = arf2018sub, init.theta = 3.811051054,
## link = log)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.8952 -0.8713 -0.1871 0.4250 3.0372
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.018e+01 5.040e-02 -201.963 < 2e-16 ***
## scale(HealthIns) 9.349e-04 5.754e-02 0.016 0.987
## scale(Poverty) 3.455e-01 6.110e-02 5.656 1.55e-08 ***
## scale(Unemploy) -7.191e-03 5.492e-02 -0.131 0.896
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for Negative Binomial(3.8111) family taken to be 1)
##
## Null deviance: 159.93 on 118 degrees of freedom
## Residual deviance: 118.96 on 115 degrees of freedom
## AIC: 991.97
##
## Number of Fisher Scoring iterations: 1
##
##
## Theta: 3.811
## Std. Err.: 0.528
##
## 2 x log-likelihood: -981.969
me.fit<-ME(AIDSdeaths12 ~ offset(log(TotPop)) + HealthIns+ Poverty + Unemploy,
data=arf2018sub,
family=poisson(),
listw = us.wt4,
verbose=T,alpha=.05 )
## eV[,3], I: 0.1703164 ZI: NA, pr(ZI): 0.01
## eV[,8], I: 0.1120488 ZI: NA, pr(ZI): 0.01
## eV[,7], I: 0.0625373 ZI: NA, pr(ZI): 0.12
me.fit
## Eigenvector ZI pr(ZI)
## 0 NA NA 0.01
## 1 3 NA 0.01
## 2 8 NA 0.01
## 3 7 NA 0.12
fits<-data.frame(fitted(me.fit))
arf2018sub$me3<-fits$vec3
arf2018sub$me8<-fits$vec8
arf2018sub%>%
ggplot()+
geom_sf(aes(fill=me3))+
scale_fill_viridis_c()+
coord_sf(crs = 2163)+
ggtitle("First Moran Eigenvector")
arf2018sub%>%
ggplot()+
geom_sf(aes(fill=me8))+
scale_fill_viridis_c()+
coord_sf(crs = 2163)+
ggtitle("First Moran Eigenvector")
clean.nb<-glm.nb(AIDSdeaths12 ~ offset(log(TotPop)) + HealthIns+ Poverty + Unemploy +fitted(me.fit), arf2018sub)
summary(clean.nb)
##
## Call:
## glm.nb(formula = AIDSdeaths12 ~ offset(log(TotPop)) + HealthIns +
## Poverty + Unemploy + fitted(me.fit), data = arf2018sub, init.theta = 6.526874198,
## link = log)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -3.3256 -0.6016 -0.1877 0.3391 2.9554
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -11.159974 0.212220 -52.587 < 2e-16 ***
## HealthIns -0.011915 0.008611 -1.384 0.1665
## Poverty 0.070601 0.009169 7.700 1.37e-14 ***
## Unemploy -0.009790 0.025196 -0.389 0.6976
## fitted(me.fit)vec3 -2.574509 0.436538 -5.898 3.69e-09 ***
## fitted(me.fit)vec8 1.127609 0.460287 2.450 0.0143 *
## fitted(me.fit)vec7 -2.681995 0.475365 -5.642 1.68e-08 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for Negative Binomial(6.5269) family taken to be 1)
##
## Null deviance: 254.06 on 118 degrees of freedom
## Residual deviance: 115.25 on 112 degrees of freedom
## AIC: 939.94
##
## Number of Fisher Scoring iterations: 1
##
##
## Theta: 6.527
## Std. Err.: 0.997
##
## 2 x log-likelihood: -923.939
lm.morantest(fitnb, listw=us.wt4)
##
## Global Moran I for regression residuals
##
## data:
## model: glm.nb(formula = AIDSdeaths12 ~ offset(log(TotPop)) + HealthIns
## + Poverty + Unemploy, data = arf2018sub, init.theta = 3.811051054, link
## = log)
## weights: us.wt4
##
## Moran I statistic standard deviate = 7.8868, p-value = 1.55e-15
## alternative hypothesis: greater
## sample estimates:
## Observed Moran I Expectation Variance
## 0.446788188 -0.005499588 0.003288750
lm.morantest(clean.nb, listw=us.wt4)
##
## Global Moran I for regression residuals
##
## data:
## model: glm.nb(formula = AIDSdeaths12 ~ offset(log(TotPop)) + HealthIns
## + Poverty + Unemploy + fitted(me.fit), data = arf2018sub, init.theta =
## 6.526874198, link = log)
## weights: us.wt4
##
## Moran I statistic standard deviate = 2.8282, p-value = 0.00234
## alternative hypothesis: greater
## sample estimates:
## Observed Moran I Expectation Variance
## 0.156353716 -0.008295555 0.003389114
Denominator/offset variable: Total Population (2012) Numerator: AIDS Deaths (2012) IV: Persons in Poverty (2012) IV: < 64 with Health Insurance (2012) IV: Unemployment (2012)
Poisson model results: As Poverty increases by one unit, the number of AIDs deaths increases by 0.37 units. p < .0001 As Health Insurance increases by one unit, the number of AIDs deaths decreases by 0.001 units. p = .556 As Unemployment rate increases by one unit, the number of AIDs deaths decreases by 0.001 units. p < .0001 The model is over-dispersed, p < .05 Negative binomial model results: As Poverty increases by one unit, the number of AIDs deaths increases by 0.37 units. p < .0001 As Health Insurance increases by one unit, the number of AIDs deaths decreases by 0.001 units. p = .987 As Unemployment rate increases by one unit, the number of AIDs deaths decreases by 0.001 units. p = .896. Yes, the filtering approach reduced the special autocorrelation in the model. Original Moran’s I = .44, Clean Moran’s I = .16.