Homework 4

library(dplyr)
arf2018<-arf2018%>%
  mutate(cofips=f00004, 
         coname=f00010,
         state = f00011,
         AIDSdeaths12=f1193012,
         AIDSdeaths11=f1193011,
         TotDeaths12=f1255812,
         TotDeaths11=f1255811,
         TotPop=f1198411,
         HealthIns= f1475112,
         Poverty= f1332112,
         Unemploy=f0679512,
         rucc= as.factor(f0002013),
         hpsa10= as.factor(f0978710),
         )%>%
  select(cofips, AIDSdeaths12, AIDSdeaths11,TotDeaths12, TotDeaths11, TotPop,state, cofips, Unemploy, coname, HealthIns,Poverty, rucc, hpsa10)%>%
  filter(complete.cases(.))%>%
  as.data.frame()


summary(arf2018)

##     cofips           AIDSdeaths12     AIDSdeaths11     TotDeaths12   
##  Length:122         Min.   : 11.00   Min.   : 11.00   Min.   : 1476  
##  Class :character   1st Qu.: 15.00   1st Qu.: 15.00   1st Qu.: 3787  
##  Mode  :character   Median : 20.00   Median : 20.50   Median : 6036  
##                     Mean   : 38.48   Mean   : 40.49   Mean   : 8005  
##                     3rd Qu.: 41.00   3rd Qu.: 46.25   3rd Qu.: 9883  
##                     Max.   :233.00   Max.   :233.00   Max.   :61856  
##                                                                      
##   TotDeaths11        TotPop           state              Unemploy     
##  Min.   : 1415   Min.   : 156433   Length:122         Min.   : 5.100  
##  1st Qu.: 3634   1st Qu.: 494923   Class :character   1st Qu.: 7.300  
##  Median : 5872   Median : 799086   Mode  :character   Median : 8.450  
##  Mean   : 7817   Mean   :1068866                      Mean   : 8.544  
##  3rd Qu.: 9529   3rd Qu.:1214999                      3rd Qu.: 9.375  
##  Max.   :60611   Max.   :9889056                      Max.   :15.200  
##                                                                       
##     coname            HealthIns        Poverty           rucc    hpsa10
##  Length:122         Min.   : 4.00   Min.   : 6.60   01     :87    : 0  
##  Class :character   1st Qu.:13.95   1st Qu.:14.10   02     :34   0: 4  
##  Mode  :character   Median :18.20   Median :18.00   03     : 1   1:53  
##                     Mean   :17.85   Mean   :17.61          : 0   2:65  
##                     3rd Qu.:20.98   3rd Qu.:20.38   04     : 0         
##                     Max.   :38.60   Max.   :34.20   05     : 0         
##                                                     (Other): 0

summary(usco)

##      pblack            phisp            prural           rucc          
##  Min.   : 0.0000   Min.   : 0.000   Min.   :  0.00   Length:3101       
##  1st Qu.: 0.4603   1st Qu.: 1.154   1st Qu.: 33.60   Class :character  
##  Median : 2.1182   Median : 2.353   Median : 59.50   Mode  :character  
##  Mean   : 9.0072   Mean   : 7.114   Mean   : 58.62                     
##  3rd Qu.:10.3887   3rd Qu.: 6.478   3rd Qu.: 87.20                     
##  Max.   :85.9945   Max.   :97.571   Max.   :100.00                     
##                                                                        
##       ppov           pfemhh         unemp         medhouseval    
##  Min.   : 2.50   Min.   : 2.7   Min.   : 1.800   Min.   : 31500  
##  1st Qu.:10.80   1st Qu.:12.7   1st Qu.: 4.100   1st Qu.: 84500  
##  Median :14.20   Median :15.5   Median : 5.100   Median :108900  
##  Mean   :15.33   Mean   :16.7   Mean   : 5.408   Mean   :130283  
##  3rd Qu.:18.60   3rd Qu.:19.3   3rd Qu.: 6.200   3rd Qu.:152900  
##  Max.   :51.00   Max.   :47.8   Max.   :16.000   Max.   :912600  
##                                                  NA's   :1       
##    popdensity             gini           County              Deaths      
##  Min.   :9.269e+04   Min.   :0.2070   Length:3101        Min.   :    12  
##  1st Qu.:1.715e+07   1st Qu.:0.4070   Class :character   1st Qu.:   646  
##  Median :4.405e+07   Median :0.4290   Mode  :character   Median :  1402  
##  Mean   :2.279e+08   Mean   :0.4313                      Mean   :  4182  
##  3rd Qu.:1.058e+08   3rd Qu.:0.4530                      3rd Qu.:  3368  
##  Max.   :6.979e+10   Max.   :0.6450                      Max.   :298108  
##                                                          NA's   :2       
##    Population       Age.Adjusted.Rate    state              geoid          
##  Min.   :     481   Min.   : 300.9    Length:3101        Length:3101       
##  1st Qu.:   56089   1st Qu.: 721.6    Class :character   Class :character  
##  Median :  131260   Median : 806.4    Mode  :character   Mode  :character  
##  Mean   :  511096   Mean   : 815.3                                         
##  3rd Qu.:  344108   3rd Qu.: 906.4                                         
##  Max.   :50036337   Max.   :1435.7                                         
##                     NA's   :7                                              
##           geometry               metro_cut   
##  MULTIPOLYGON :3101   Metropolitan    :2049  
##  epsg:9311    :   0   Non-Metropolitan:1052  
##  +proj=laea...:   0                          
##                                              
##                                              
##                                              
##

library(tigris)

## To enable 
## caching of data, set `options(tigris_use_cache = TRUE)` in your R script or .Rprofile.

library(sf)
library(ggplot2)

options(tigris_class="sf")
usco<-counties(cb=T, year=2010)
usco$cofips<-substr(usco$GEO_ID, 10, 15)
sts<-states(cb = T, year=2010)
sts<-st_boundary(sts)%>%
  filter(!STATE %in% c("02", "15", "60", "66", "69", "72", "78"))

arf2018_m<-geo_join(usco, arf2018, by_sp="cofips", by_df="cofips",how="left" )

## Warning: We recommend using the dplyr::*_join() family of functions instead.

## Warning: `group_by_()` was deprecated in dplyr 0.7.0.
## Please use `group_by()` instead.
## See vignette('programming') for more help

arf2018_m%>%
  filter(!STATE %in% c("02", "15", "60", "66", "69", "72", "78"))%>%
  mutate(lbrate=AIDSdeaths12/TotPop)%>%
  mutate(lb_group = cut(lbrate, breaks=quantile(lbrate, p=seq(0,1,length.out = 6), na.rm=T ), include.lowest=T ))%>%
  ggplot()+
  geom_sf(aes(fill=lb_group, color=NA))+
  scale_color_brewer(palette = "Blues")+
  scale_fill_brewer(palette = "Blues",na.value = "grey50")+
  geom_sf(data=sts, color="black")+
  coord_sf(crs = 2163)+
  ggtitle(label = "AIDS Mortality, 2012-2014")

arf2018sub<-filter(arf2018_m, is.na(AIDSdeaths12)==F)

#Poisson Analysis

fit_pois<- glm(AIDSdeaths12 ~ offset(log(TotPop)) + scale (HealthIns) + scale (Poverty) +scale(Unemploy),
               family=poisson, 
               data= arf2018sub)
summary(fit_pois)

## 
## Call:
## glm(formula = AIDSdeaths12 ~ offset(log(TotPop)) + scale(HealthIns) + 
##     scale(Poverty) + scale(Unemploy), family = poisson, data = arf2018sub)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -9.6343  -2.1259   0.0249   1.6064  12.0154  
## 
## Coefficients:
##                    Estimate Std. Error  z value Pr(>|z|)    
## (Intercept)      -10.280910   0.015564 -660.538   <2e-16 ***
## scale(HealthIns)  -0.008689   0.014743   -0.589   0.5556    
## scale(Poverty)     0.374350   0.018933   19.772   <2e-16 ***
## scale(Unemploy)   -0.062538   0.016077   -3.890   0.0001 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for poisson family taken to be 1)
## 
##     Null deviance: 1934.5  on 118  degrees of freedom
## Residual deviance: 1468.0  on 115  degrees of freedom
## AIC: 2085
## 
## Number of Fisher Scoring iterations: 5

exp(coef(fit_pois))

##      (Intercept) scale(HealthIns)   scale(Poverty)  scale(Unemploy) 
##     3.428133e-05     9.913490e-01     1.454047e+00     9.393771e-01

scale<-sqrt(fit_pois$deviance/fit_pois$df.residual)
scale

## [1] 3.57279

1-pchisq(fit_pois$deviance, df = fit_pois$df.residual)

## [1] 0

library(MASS)
fit_nb<- glm.nb(AIDSdeaths12 ~ offset(log(TotPop)) + scale (HealthIns) + scale (Poverty) +scale(Unemploy), 
               data=arf2018sub)
summary(fit_nb)

## 
## Call:
## glm.nb(formula = AIDSdeaths12 ~ offset(log(TotPop)) + scale(HealthIns) + 
##     scale(Poverty) + scale(Unemploy), data = arf2018sub, init.theta = 3.811051054, 
##     link = log)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.8952  -0.8713  -0.1871   0.4250   3.0372  
## 
## Coefficients:
##                    Estimate Std. Error  z value Pr(>|z|)    
## (Intercept)      -1.018e+01  5.040e-02 -201.963  < 2e-16 ***
## scale(HealthIns)  9.349e-04  5.754e-02    0.016    0.987    
## scale(Poverty)    3.455e-01  6.110e-02    5.656 1.55e-08 ***
## scale(Unemploy)  -7.191e-03  5.492e-02   -0.131    0.896    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for Negative Binomial(3.8111) family taken to be 1)
## 
##     Null deviance: 159.93  on 118  degrees of freedom
## Residual deviance: 118.96  on 115  degrees of freedom
## AIC: 991.97
## 
## Number of Fisher Scoring iterations: 1
## 
## 
##               Theta:  3.811 
##           Std. Err.:  0.528 
## 
##  2 x log-likelihood:  -981.969

exp(coef(fit_nb))

##      (Intercept) scale(HealthIns)   scale(Poverty)  scale(Unemploy) 
##     0.0000379558     1.0009353770     1.4127496300     0.9928346623

library(spdep)
library(lmtest)

## Loading required package: zoo

## 
## Attaching package: 'zoo'

## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric

nbs<-knearneigh(coordinates(as_Spatial(arf2018sub)), k = 4)
nbs<-knn2nb(nbs, sym = T)
us.wt4<-nb2listw(nbs, style = "W")
lm.morantest(fit_pois, listw = us.wt4)

## 
##  Global Moran I for regression residuals
## 
## data:  
## model: glm(formula = AIDSdeaths12 ~ offset(log(TotPop)) +
## scale(HealthIns) + scale(Poverty) + scale(Unemploy), family = poisson,
## data = arf2018sub)
## weights: us.wt4
## 
## Moran I statistic standard deviate = 6.9342, p-value = 2.042e-12
## alternative hypothesis: greater
## sample estimates:
## Observed Moran I      Expectation         Variance 
##     0.4110844638    -0.0004950083     0.0035229881

lm.morantest(fit_nb, listw = us.wt4)

## 
##  Global Moran I for regression residuals
## 
## data:  
## model: glm.nb(formula = AIDSdeaths12 ~ offset(log(TotPop)) +
## scale(HealthIns) + scale(Poverty) + scale(Unemploy), data = arf2018sub,
## init.theta = 3.811051054, link = log)
## weights: us.wt4
## 
## Moran I statistic standard deviate = 7.5535, p-value = 2.118e-14
## alternative hypothesis: greater
## sample estimates:
## Observed Moran I      Expectation         Variance 
##      0.431457945     -0.005534537      0.003346928

library(spdep)
arf2018sub<-arf2018sub%>%
  filter(!STATEFP %in% c("02", "15", "60", "66", "69", "72", "78"))

nbs<-knearneigh(coordinates(as(arf2018sub, "Spatial")), k = 4, longlat = T)
nbs<-knn2nb(nbs, sym = T)
us.wt4<-nb2listw(nbs, style = "W")

fitnb<-glm.nb(AIDSdeaths12 ~ offset(log(TotPop)) + HealthIns+ Poverty + Unemploy,
           data=arf2018sub)
fitnb$theta

## [1] 3.811051

summary(fit_nb)

## 
## Call:
## glm.nb(formula = AIDSdeaths12 ~ offset(log(TotPop)) + scale(HealthIns) + 
##     scale(Poverty) + scale(Unemploy), data = arf2018sub, init.theta = 3.811051054, 
##     link = log)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.8952  -0.8713  -0.1871   0.4250   3.0372  
## 
## Coefficients:
##                    Estimate Std. Error  z value Pr(>|z|)    
## (Intercept)      -1.018e+01  5.040e-02 -201.963  < 2e-16 ***
## scale(HealthIns)  9.349e-04  5.754e-02    0.016    0.987    
## scale(Poverty)    3.455e-01  6.110e-02    5.656 1.55e-08 ***
## scale(Unemploy)  -7.191e-03  5.492e-02   -0.131    0.896    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for Negative Binomial(3.8111) family taken to be 1)
## 
##     Null deviance: 159.93  on 118  degrees of freedom
## Residual deviance: 118.96  on 115  degrees of freedom
## AIC: 991.97
## 
## Number of Fisher Scoring iterations: 1
## 
## 
##               Theta:  3.811 
##           Std. Err.:  0.528 
## 
##  2 x log-likelihood:  -981.969

me.fit<-ME(AIDSdeaths12 ~ offset(log(TotPop)) + HealthIns+ Poverty + Unemploy,
           data=arf2018sub,
           family=poisson(),
           listw = us.wt4,
           verbose=T,alpha=.05 )

## eV[,3], I: 0.1703164 ZI: NA, pr(ZI): 0.01
## eV[,8], I: 0.1120488 ZI: NA, pr(ZI): 0.01
## eV[,7], I: 0.0625373 ZI: NA, pr(ZI): 0.12

me.fit

##   Eigenvector ZI pr(ZI)
## 0          NA NA   0.01
## 1           3 NA   0.01
## 2           8 NA   0.01
## 3           7 NA   0.12

fits<-data.frame(fitted(me.fit))
arf2018sub$me3<-fits$vec3
arf2018sub$me8<-fits$vec8

arf2018sub%>%
  ggplot()+
  geom_sf(aes(fill=me3))+
  scale_fill_viridis_c()+
  coord_sf(crs = 2163)+
  ggtitle("First Moran Eigenvector")

arf2018sub%>%
  ggplot()+
  geom_sf(aes(fill=me8))+
  scale_fill_viridis_c()+
  coord_sf(crs = 2163)+
  ggtitle("First Moran Eigenvector")

clean.nb<-glm.nb(AIDSdeaths12 ~ offset(log(TotPop)) + HealthIns+ Poverty + Unemploy +fitted(me.fit), arf2018sub)
summary(clean.nb)

## 
## Call:
## glm.nb(formula = AIDSdeaths12 ~ offset(log(TotPop)) + HealthIns + 
##     Poverty + Unemploy + fitted(me.fit), data = arf2018sub, init.theta = 6.526874198, 
##     link = log)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -3.3256  -0.6016  -0.1877   0.3391   2.9554  
## 
## Coefficients:
##                      Estimate Std. Error z value Pr(>|z|)    
## (Intercept)        -11.159974   0.212220 -52.587  < 2e-16 ***
## HealthIns           -0.011915   0.008611  -1.384   0.1665    
## Poverty              0.070601   0.009169   7.700 1.37e-14 ***
## Unemploy            -0.009790   0.025196  -0.389   0.6976    
## fitted(me.fit)vec3  -2.574509   0.436538  -5.898 3.69e-09 ***
## fitted(me.fit)vec8   1.127609   0.460287   2.450   0.0143 *  
## fitted(me.fit)vec7  -2.681995   0.475365  -5.642 1.68e-08 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for Negative Binomial(6.5269) family taken to be 1)
## 
##     Null deviance: 254.06  on 118  degrees of freedom
## Residual deviance: 115.25  on 112  degrees of freedom
## AIC: 939.94
## 
## Number of Fisher Scoring iterations: 1
## 
## 
##               Theta:  6.527 
##           Std. Err.:  0.997 
## 
##  2 x log-likelihood:  -923.939

lm.morantest(fitnb, listw=us.wt4)

## 
##  Global Moran I for regression residuals
## 
## data:  
## model: glm.nb(formula = AIDSdeaths12 ~ offset(log(TotPop)) + HealthIns
## + Poverty + Unemploy, data = arf2018sub, init.theta = 3.811051054, link
## = log)
## weights: us.wt4
## 
## Moran I statistic standard deviate = 7.8868, p-value = 1.55e-15
## alternative hypothesis: greater
## sample estimates:
## Observed Moran I      Expectation         Variance 
##      0.446788188     -0.005499588      0.003288750

lm.morantest(clean.nb, listw=us.wt4)

## 
##  Global Moran I for regression residuals
## 
## data:  
## model: glm.nb(formula = AIDSdeaths12 ~ offset(log(TotPop)) + HealthIns
## + Poverty + Unemploy + fitted(me.fit), data = arf2018sub, init.theta =
## 6.526874198, link = log)
## weights: us.wt4
## 
## Moran I statistic standard deviate = 2.8282, p-value = 0.00234
## alternative hypothesis: greater
## sample estimates:
## Observed Moran I      Expectation         Variance 
##      0.156353716     -0.008295555      0.003389114

Denominator/offset variable: Total Population (2012) Numerator: AIDS Deaths (2012) IV: Persons in Poverty (2012) IV: < 64 with Health Insurance (2012) IV: Unemployment (2012)

Poisson model results: As Poverty increases by one unit, the number of AIDs deaths increases by 0.37 units. p < .0001 As Health Insurance increases by one unit, the number of AIDs deaths decreases by 0.001 units. p = .556 As Unemployment rate increases by one unit, the number of AIDs deaths decreases by 0.001 units. p < .0001 The model is over-dispersed, p < .05 Negative binomial model results: As Poverty increases by one unit, the number of AIDs deaths increases by 0.37 units. p < .0001 As Health Insurance increases by one unit, the number of AIDs deaths decreases by 0.001 units. p = .987 As Unemployment rate increases by one unit, the number of AIDs deaths decreases by 0.001 units. p = .896. Yes, the filtering approach reduced the special autocorrelation in the model. Original Moran’s I = .44, Clean Moran’s I = .16.