HW6

library(spdep)

## Loading required package: sp

## Loading required package: spData

## To access larger datasets in this package, install the spDataLarge
## package with: `install.packages('spDataLarge',
## repos='https://nowosad.github.io/drat/', type='source')`

## Loading required package: sf

## Linking to GEOS 3.9.0, GDAL 3.2.1, PROJ 7.2.1

library(MASS)
library(spatialreg)

## Loading required package: Matrix

## 
## Attaching package: 'spatialreg'

## The following objects are masked from 'package:spdep':
## 
##     as.spam.listw, as_dgRMatrix_listw, as_dsCMatrix_I,
##     as_dsCMatrix_IrW, as_dsTMatrix_listw, can.be.simmed, cheb_setup,
##     create_WX, do_ldet, eigen_pre_setup, eigen_setup, eigenw,
##     errorsarlm, get.ClusterOption, get.coresOption, get.mcOption,
##     get.VerboseOption, get.ZeroPolicyOption, GMargminImage, GMerrorsar,
##     griffith_sone, gstsls, Hausman.test, impacts, intImpacts,
##     Jacobian_W, jacobianSetup, l_max, lagmess, lagsarlm, lextrB,
##     lextrS, lextrW, lmSLX, LU_prepermutate_setup, LU_setup,
##     Matrix_J_setup, Matrix_setup, mcdet_setup, MCMCsamp, ME, mom_calc,
##     mom_calc_int2, moments_setup, powerWeights, sacsarlm,
##     SE_classic_setup, SE_interp_setup, SE_whichMin_setup,
##     set.ClusterOption, set.coresOption, set.mcOption,
##     set.VerboseOption, set.ZeroPolicyOption, similar.listw, spam_setup,
##     spam_update_setup, SpatialFiltering, spautolm, spBreg_err,
##     spBreg_lag, spBreg_sac, stsls, subgraph_eigenw, trW

library(tidyverse)

## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --

## v ggplot2 3.3.5     v purrr   0.3.4
## v tibble  3.1.3     v dplyr   1.0.7
## v tidyr   1.1.3     v stringr 1.4.0
## v readr   2.0.1     v forcats 0.5.1

## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x tidyr::expand() masks Matrix::expand()
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
## x tidyr::pack()   masks Matrix::pack()
## x dplyr::select() masks MASS::select()
## x tidyr::unpack() masks Matrix::unpack()

library(ggplot2)
library(INLA)

## Loading required package: foreach

## 
## Attaching package: 'foreach'

## The following objects are masked from 'package:purrr':
## 
##     accumulate, when

## Loading required package: parallel

## This is INLA_21.02.23 built 2021-08-23 15:24:01 UTC.
##  - See www.r-inla.org/contact-us for how to get help.
##  - Save 379.7Mb of storage running 'inla.prune()'

library(tigris)

## To enable 
## caching of data, set `options(tigris_use_cache = TRUE)` in your R script or .Rprofile.

library(tidycensus)

arf2018<-"https://github.com/coreysparks/data/blob/master/arf2018.Rdata?raw=true"
load(url(arf2018))



library(dplyr)
df<-arf2018%>%
  mutate(cofips=f00004, 
         coname=f00010,
         state = f00011,
         AIDSdeaths12=f1193012,
         AIDSdeaths11=f1193011,
         TotDeaths12=f1255812, #you need total population, not total deaths
         TotDeaths11=f1255811,
         TotPop=f1198411,
         HealthIns= f1475112,
         Poverty= f1332112,
         Unemploy=f0679512,
         rucc= as.factor(f0002013),
         hpsa10= as.factor(f0978710),
         )%>%
  select(cofips, AIDSdeaths12, AIDSdeaths11,TotDeaths12, TotDeaths11, TotPop,state, cofips, Unemploy, coname, HealthIns,Poverty, rucc, hpsa10)%>%
  filter(complete.cases(.))%>%
  as.data.frame()


head(df)

##   cofips AIDSdeaths12 AIDSdeaths11 TotDeaths12 TotDeaths11  TotPop state
## 1  01073           28           28        6902        6895  658931    01
## 2  01097           27           24        4239        4149  412577    01
## 3  01101           16           14        2040        2046  232032    01
## 4  04013           66           65       27139       26041 3880244    04
## 5  04019           17           16        8990        8611  989569    04
## 6  05119           18           17        3447        3377  386299    05
##   Unemploy     coname HealthIns Poverty rucc hpsa10
## 1      6.8  Jefferson      14.5    18.6   01      2
## 2      8.4     Mobile      17.7    21.0   02      2
## 3      7.7 Montgomery      15.4    21.7   02      2
## 4      7.1   Maricopa      20.0    17.4   01      1
## 5      7.3       Pima      18.8    19.9   02      1
## 6      6.6    Pulaski      16.8    18.6   02      1

library(sf)
library(ggplot2)
options(tigris_class="sf")
cov_dat<-counties(cb=T, year=2010)
cov_dat$cofips<-substr(cov_dat$GEO_ID, 10, 15)
sts<-states(cb = T, year=2010)
sts<-st_boundary(sts)%>%
  filter(!STATE %in% c("02", "15", "60", "66", "69", "72", "78"))

cov_dat$struct<-1:dim(cov_dat)[1]  
nbs<-knearneigh(st_centroid(cov_dat), k = 5, longlat = T)

## Warning in st_centroid.sf(cov_dat): st_centroid assumes attributes are constant
## over geometries of x

## Warning in knearneigh(st_centroid(cov_dat), k = 5, longlat = T): dnearneigh:
## longlat argument overrides object

nbs<-knn2nb(nbs, row.names = cov_dat$struct, sym = T) 
mat <- nb2mat(nbs, style="B",zero.policy=TRUE)
colnames(mat) <- rownames(mat) 
mat <- as.matrix(mat[1:dim(mat)[1], 1:dim(mat)[1]])


nb2INLA("cl_graph",nbs)
am_adj <-paste(getwd(),"/cl_graph",sep="")
H<-inla.read.graph(filename="cl_graph")


final.dat<-merge(cov_dat, df, by="cofips") #join spatial data first
head(final.dat)

final.dat$E_d<-final.dat$TotPop*(sum(final.dat$AIDSdeaths12)/sum(final.dat$TotPop))
final.dat<-final.dat[order(final.dat$cofips),]
#final.dat$id<-1:dim(final.dat)[1]

head(final.dat)

## Simple feature collection with 6 features and 23 fields
## Geometry type: MULTIPOLYGON
## Dimension:     XY
## Bounding box:  xmin: -113.3344 ymin: 30.22803 xmax: -85.91883 ymax: 35.01448
## Geodetic CRS:  NAD83
##   cofips         GEO_ID STATE COUNTY       NAME   LSAD CENSUSAREA COUNTYFP
## 1  01073 0500000US01073    01    073  Jefferson County   1111.276      073
## 2  01097 0500000US01097    01    097     Mobile County   1229.435      097
## 3  01101 0500000US01101    01    101 Montgomery County    784.247      101
## 4  04013 0500000US04013    04    013   Maricopa County   9200.143      013
## 5  04019 0500000US04019    04    019       Pima County   9187.036      019
## 6  05119 0500000US05119    05    119    Pulaski County    759.763      119
##   STATEFP struct AIDSdeaths12 AIDSdeaths11 TotDeaths12 TotDeaths11  TotPop
## 1      01     34           28           28        6902        6895  658931
## 2      01     40           27           24        4239        4149  412577
## 3      01     60           16           14        2040        2046  232032
## 4      04    104           66           65       27139       26041 3880244
## 5      04    107           17           16        8990        8611  989569
## 6      05    179           18           17        3447        3377  386299
##   state Unemploy     coname HealthIns Poverty rucc hpsa10
## 1    01      6.8  Jefferson      14.5    18.6   01      2
## 2    01      8.4     Mobile      17.7    21.0   02      2
## 3    01      7.7 Montgomery      15.4    21.7   02      2
## 4    04      7.1   Maricopa      20.0    17.4   01      1
## 5    04      7.3       Pima      18.8    19.9   02      1
## 6    05      6.6    Pulaski      16.8    18.6   02      1
##                         geometry        E_d
## 1 MULTIPOLYGON (((-86.75257 3...  23.176040
## 2 MULTIPOLYGON (((-88.02399 3...  14.511233
## 3 MULTIPOLYGON (((-86.08611 3...   8.161071
## 4 MULTIPOLYGON (((-112.9158 3... 136.476641
## 5 MULTIPOLYGON (((-112.3881 3...  34.805299
## 6 MULTIPOLYGON (((-92.65256 3...  13.586978

library(tigris)
us_co<-counties( cb = T)
us_co<-us_co%>%
  subset(!STATEFP%in%c("02", "15", "60", "66", "69", "72", "78"))

ggplot(data = final.dat)+geom_histogram(aes(x =(AIDSdeaths12/TotPop) ,
                                            y=0.5*..density..))+

  ggtitle(label = "Distribution of Crude HIV Death rate",
          subtitle = "Southern US Counties, 2012")

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

ggplot(data = final.dat)+geom_histogram(aes(x =AIDSdeaths12/E_d ,
                                            y=0.5*..density..))+
  
  ggtitle(label = "Distribution of HIV SMR", 
          subtitle = "Southern US Counties, 2012")

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

sts<- states(cb=T)%>%
  st_transform(crs= 2163)%>%
  st_boundary()%>%
  subset(!STATEFP%in%c("02", "15", "60", "66", "69", "72", "78"))

final.dat%>%
 
   ggplot()+
  geom_sf(aes(fill=AIDSdeaths12/E_d))+
 

  scale_fill_viridis_c()+
  guides(fill=guide_legend(title="Relative Risk"))+
   geom_sf(data=sts, color="black")+
  ggtitle(label="Relative Risk")+
  coord_sf(crs = 2163)

f0<-glm.nb(AIDSdeaths12~ scale (HealthIns) + scale (Poverty) +scale(Unemploy)+offset(log(E_d)), data=final.dat)
summary(f0)

## 
## Call:
## glm.nb(formula = AIDSdeaths12 ~ scale(HealthIns) + scale(Poverty) + 
##     scale(Unemploy) + offset(log(E_d)), data = final.dat, init.theta = 3.811051054, 
##     link = log)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.8952  -0.8713  -0.1871   0.4250   3.0372  
## 
## Coefficients:
##                    Estimate Std. Error z value Pr(>|z|)    
## (Intercept)       0.0761668  0.0504006   1.511    0.131    
## scale(HealthIns)  0.0009349  0.0575426   0.016    0.987    
## scale(Poverty)    0.3455379  0.0610951   5.656 1.55e-08 ***
## scale(Unemploy)  -0.0071911  0.0549176  -0.131    0.896    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for Negative Binomial(3.8111) family taken to be 1)
## 
##     Null deviance: 159.93  on 118  degrees of freedom
## Residual deviance: 118.96  on 115  degrees of freedom
## AIC: 991.97
## 
## Number of Fisher Scoring iterations: 1
## 
## 
##               Theta:  3.811 
##           Std. Err.:  0.528 
## 
##  2 x log-likelihood:  -981.969

f1<-AIDSdeaths12~ scale (HealthIns) + scale (Poverty) +scale(Unemploy)

mod1<-inla(formula = f1,data = final.dat, 
           family = "nbinomial", E = E_d,  
           control.compute = list(waic=T), 
           control.predictor = list(link=1), 
           num.threads = 3, 
               verbose = F)
summary(mod1)

## 
## Call:
##    c("inla(formula = f1, family = \"nbinomial\", data = final.dat, E = 
##    E_d, ", " verbose = F, control.compute = list(waic = T), 
##    control.predictor = list(link = 1), ", " num.threads = 3)") 
## Time used:
##     Pre = 0.653, Running = 0.211, Post = 0.146, Total = 1.01 
## Fixed effects:
##                    mean    sd 0.025quant 0.5quant 0.975quant   mode kld
## (Intercept)       0.079 0.051     -0.020    0.079      0.181  0.078   0
## scale(HealthIns)  0.001 0.057     -0.111    0.000      0.112  0.000   0
## scale(Poverty)    0.346 0.062      0.225    0.345      0.468  0.345   0
## scale(Unemploy)  -0.007 0.060     -0.125   -0.007      0.112 -0.008   0
## 
## Model hyperparameters:
##                                                        mean    sd 0.025quant
## size for the nbinomial observations (1/overdispersion) 3.80 0.524       2.86
##                                                        0.5quant 0.975quant mode
## size for the nbinomial observations (1/overdispersion)     3.77       4.91 3.71
## 
## Expected number of effective parameters(stdev): 4.00(0.00)
## Number of equivalent replicates : 29.73 
## 
## Watanabe-Akaike information criterion (WAIC) ...: 994.20
## Effective number of parameters .................: 6.66
## 
## Marginal log-Likelihood:  -516.10 
## Posterior marginals for the linear predictor and
##  the fitted values are computed

f2<-AIDSdeaths12~ scale (HealthIns) + scale (Poverty) +scale(Unemploy)+ #fixed effects
  f(struct, model = "iid")  #random effects

mod2<-inla(formula = f2,
           data = final.dat,
           family = "nbinomial",
           E = E_d, 
           control.compute = list(waic=T), 
           control.predictor = list(link=1),
           num.threads = 3, 
               verbose = F)

#total model summary
summary(mod2)

## 
## Call:
##    c("inla(formula = f2, family = \"nbinomial\", data = final.dat, E = 
##    E_d, ", " verbose = F, control.compute = list(waic = T), 
##    control.predictor = list(link = 1), ", " num.threads = 3)") 
## Time used:
##     Pre = 0.362, Running = 0.91, Post = 0.144, Total = 1.42 
## Fixed effects:
##                    mean    sd 0.025quant 0.5quant 0.975quant   mode kld
## (Intercept)       0.078 0.051     -0.022    0.077      0.180  0.077   0
## scale(HealthIns)  0.001 0.057     -0.111    0.001      0.113  0.001   0
## scale(Poverty)    0.345 0.062      0.225    0.345      0.468  0.345   0
## scale(Unemploy)  -0.008 0.060     -0.125   -0.008      0.111 -0.008   0
## 
## Random effects:
##   Name     Model
##     struct IID model
## 
## Model hyperparameters:
##                                                            mean       sd
## size for the nbinomial observations (1/overdispersion)     3.80 5.25e-01
## Precision for struct                                   19880.28 2.03e+04
##                                                        0.025quant 0.5quant
## size for the nbinomial observations (1/overdispersion)       2.86     3.77
## Precision for struct                                      1383.92 13850.83
##                                                        0.975quant    mode
## size for the nbinomial observations (1/overdispersion)       4.93    3.71
## Precision for struct                                     73837.76 3789.64
## 
## Expected number of effective parameters(stdev): 4.83(3.39)
## Number of equivalent replicates : 24.61 
## 
## Watanabe-Akaike information criterion (WAIC) ...: 993.76
## Effective number of parameters .................: 7.59
## 
## Marginal log-Likelihood:  -516.32 
## Posterior marginals for the linear predictor and
##  the fitted values are computed

m2<- inla.tmarginal(function(x) (1/x), #invert the precision to be on variance scale
        mod2$marginals.hyperpar$`Precision for struct`)
#95% credible interval for the variance
inla.hpdmarginal(.95, marginal=m2)

##                    low         high
## level:0.95 5.02374e-06 0.0004805839

plot(m2,
     type="l",
     main=c("Posterior distibution for between county variance", "- IID model -"), 
     xlim=c(0, .5))

final.dat$fitted_m2<-mod2$summary.fitted.values$mean

f3<-AIDSdeaths12~ scale (HealthIns) + scale (Poverty) +scale(Unemploy)+
  f(struct, model = "bym",scale.model = T, constr = T, graph = H)

mod3<-inla(formula = f3,data = final.dat,
           family = "nbinomial",
           E = E_d, 
           control.compute = list(waic=T), 
           control.predictor = list(link=1), 
           num.threads = 3, 
               verbose = F)

#total model summary
summary(mod3)

## 
## Call:
##    c("inla(formula = f3, family = \"nbinomial\", data = final.dat, E = 
##    E_d, ", " verbose = F, control.compute = list(waic = T), 
##    control.predictor = list(link = 1), ", " num.threads = 3)") 
## Time used:
##     Pre = 0.464, Running = 5.62, Post = 0.349, Total = 6.44 
## Fixed effects:
##                    mean    sd 0.025quant 0.5quant 0.975quant   mode kld
## (Intercept)      -0.190 0.073     -0.335   -0.189     -0.048 -0.188   0
## scale(HealthIns) -0.112 0.078     -0.267   -0.112      0.040 -0.111   0
## scale(Poverty)    0.382 0.057      0.270    0.382      0.495  0.382   0
## scale(Unemploy)   0.010 0.061     -0.109    0.010      0.129  0.010   0
## 
## Random effects:
##   Name     Model
##     struct BYM model
## 
## Model hyperparameters:
##                                                           mean      sd
## size for the nbinomial observations (1/overdispersion)   22.49   10.38
## Precision for struct (iid component)                   1725.16 1701.10
## Precision for struct (spatial component)                  4.56    1.29
##                                                        0.025quant 0.5quant
## size for the nbinomial observations (1/overdispersion)       9.25    20.18
## Precision for struct (iid component)                       105.23  1218.28
## Precision for struct (spatial component)                     2.43     4.43
##                                                        0.975quant   mode
## size for the nbinomial observations (1/overdispersion)      49.35  16.51
## Precision for struct (iid component)                      6206.43 279.13
## Precision for struct (spatial component)                     7.46   4.18
## 
## Expected number of effective parameters(stdev): 63.18(12.11)
## Number of equivalent replicates : 1.88 
## 
## Watanabe-Akaike information criterion (WAIC) ...: 896.86
## Effective number of parameters .................: 47.28
## 
## Marginal log-Likelihood:  631.84 
## Posterior marginals for the linear predictor and
##  the fitted values are computed

mod1$waic$waic

## [1] 994.1984

mod2$waic$waic

## [1] 993.7588

mod3$waic$waic

## [1] 896.8632

b1 <- inla.emarginal(exp, mod2$marginals.fixed[[2]])
b1

## [1] 1.002458

b1ci <-inla.qmarginal(c(.025, .975), 
                      inla.tmarginal(exp, mod2$marginals.fixed[[2]]))
b1ci

## [1] 0.8955809 1.1189290

m3a<- inla.tmarginal(
        function(x) (1/x),
        mod3$marginals.hyperpar$`Precision for struct (iid component)`)
m3b<- inla.tmarginal(
        function(x) (1/x),
        mod3$marginals.hyperpar$`Precision for struct (spatial component)`)

plot(m3a, type="l",
     main=c("Posterior distibution for between county variance", "- BYM model -"),
     xlim=c(0, .1)) # you may need to change this
#lines(m3b, col="red")
lines(m3b, col="green")

inla.hpdmarginal(.95,m3a)

##                     low        high
## level:0.95 6.464208e-05 0.006196699

inla.hpdmarginal(.95,m3b)

##                 low     high
## level:0.95 0.120256 0.380015

Denominator/offset variable: Total Population (2012) Numerator: AIDS Deaths (2012) IV: [scaled] Persons in Poverty (2012) IV: [scaled] < 64 with Health Insurance (2012) IV: [scaled] Unemployment (2012). Poisson model results: As Poverty increases by one unit, the number of AIDs deaths increases by 0.37 units, p < .0001. As Health Insurance increases by one unit, the number of AIDs deaths decreases by 0.01 units, p = .556. As Unemployment rate increases by one unit, the number of AIDs deaths decreases by 0.1 units, p < .0001. The model is over-dispersed, p < .001, with the variance 3 times as large as the mean. Negative binomial model results: Calculated risk ratios show that Poverty has a 41.3% increase in risk in the number of AIDs deaths, p < .0001. Health Insurance has a .09% increase in risk in the number of AIDs deaths, p = .987. Unemployment has a 1% decrease in risk in the number of AIDs deaths, p = .896. BYM model results: based on the model AICs, the spatial structured model fits better for the data (Mod1 994 > Mod2 993 > Mod3 896). Mean estimates revealed that Poverty (M = .382) significantly predict risk of HIV deaths (95% CI: .27, .50); Health Insurance (M = -.112) does not significantly predict risk of HIV deaths (95% CI: -.27, .04); and Unemployment does not significantly predict risk of HIV deaths (95% CI: -.11, .13).

#Initial Poisson & Negative Binomial Model

library(dplyr)
arf2018<-arf2018%>%
  mutate(cofips=f00004, 
         coname=f00010,
         state = f00011,
         AIDSdeaths12=f1193012,
         AIDSdeaths11=f1193011,
         TotDeaths12=f1255812,
         TotDeaths11=f1255811,
         TotPop=f1198411,
         HealthIns= f1475112,
         Poverty= f1332112,
         Unemploy=f0679512,
         rucc= as.factor(f0002013),
         hpsa10= as.factor(f0978710),
         )%>%
  select(cofips, AIDSdeaths12, AIDSdeaths11,TotDeaths12, TotDeaths11, TotPop,state, cofips, Unemploy, coname, HealthIns,Poverty, rucc, hpsa10)%>%
  filter(complete.cases(.))%>%
  as.data.frame()


summary(arf2018)

##     cofips           AIDSdeaths12     AIDSdeaths11     TotDeaths12   
##  Length:122         Min.   : 11.00   Min.   : 11.00   Min.   : 1476  
##  Class :character   1st Qu.: 15.00   1st Qu.: 15.00   1st Qu.: 3787  
##  Mode  :character   Median : 20.00   Median : 20.50   Median : 6036  
##                     Mean   : 38.48   Mean   : 40.49   Mean   : 8005  
##                     3rd Qu.: 41.00   3rd Qu.: 46.25   3rd Qu.: 9883  
##                     Max.   :233.00   Max.   :233.00   Max.   :61856  
##                                                                      
##   TotDeaths11        TotPop           state              Unemploy     
##  Min.   : 1415   Min.   : 156433   Length:122         Min.   : 5.100  
##  1st Qu.: 3634   1st Qu.: 494923   Class :character   1st Qu.: 7.300  
##  Median : 5872   Median : 799086   Mode  :character   Median : 8.450  
##  Mean   : 7817   Mean   :1068866                      Mean   : 8.544  
##  3rd Qu.: 9529   3rd Qu.:1214999                      3rd Qu.: 9.375  
##  Max.   :60611   Max.   :9889056                      Max.   :15.200  
##                                                                       
##     coname            HealthIns        Poverty           rucc    hpsa10
##  Length:122         Min.   : 4.00   Min.   : 6.60   01     :87    : 0  
##  Class :character   1st Qu.:13.95   1st Qu.:14.10   02     :34   0: 4  
##  Mode  :character   Median :18.20   Median :18.00   03     : 1   1:53  
##                     Mean   :17.85   Mean   :17.61          : 0   2:65  
##                     3rd Qu.:20.98   3rd Qu.:20.38   04     : 0         
##                     Max.   :38.60   Max.   :34.20   05     : 0         
##                                                     (Other): 0

library(sf)
library(ggplot2)

options(tigris_class="sf")
usco<-counties(cb=T, year=2010)
usco$cofips<-substr(usco$GEO_ID, 10, 15)
sts<-states(cb = T, year=2010)
sts<-st_boundary(sts)%>%
  filter(!STATE %in% c("02", "15", "60", "66", "69", "72", "78"))

arf2018_m<-geo_join(usco, arf2018, by_sp="cofips", by_df="cofips",how="left" )

## Warning: We recommend using the dplyr::*_join() family of functions instead.

## Warning: `group_by_()` was deprecated in dplyr 0.7.0.
## Please use `group_by()` instead.
## See vignette('programming') for more help

arf2018sub<-filter(arf2018_m, is.na(AIDSdeaths12)==F)

#Poisson Analysis

fit_pois<- glm(AIDSdeaths12 ~ offset(log(TotPop)) + scale (HealthIns) + scale (Poverty) +scale(Unemploy),
               family=poisson, 
               data= arf2018sub)
summary(fit_pois)

## 
## Call:
## glm(formula = AIDSdeaths12 ~ offset(log(TotPop)) + scale(HealthIns) + 
##     scale(Poverty) + scale(Unemploy), family = poisson, data = arf2018sub)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -9.6343  -2.1259   0.0249   1.6064  12.0154  
## 
## Coefficients:
##                    Estimate Std. Error  z value Pr(>|z|)    
## (Intercept)      -10.280910   0.015564 -660.538   <2e-16 ***
## scale(HealthIns)  -0.008689   0.014743   -0.589   0.5556    
## scale(Poverty)     0.374350   0.018933   19.772   <2e-16 ***
## scale(Unemploy)   -0.062538   0.016077   -3.890   0.0001 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for poisson family taken to be 1)
## 
##     Null deviance: 1934.5  on 118  degrees of freedom
## Residual deviance: 1468.0  on 115  degrees of freedom
## AIC: 2085
## 
## Number of Fisher Scoring iterations: 5

exp(coef(fit_pois))

##      (Intercept) scale(HealthIns)   scale(Poverty)  scale(Unemploy) 
##     3.428133e-05     9.913490e-01     1.454047e+00     9.393771e-01

#Assess overdispersion

scale<-sqrt(fit_pois$deviance/fit_pois$df.residual)
scale

## [1] 3.57279

1-pchisq(fit_pois$deviance, df = fit_pois$df.residual)

## [1] 0

library(MASS)
fit_nb<- glm.nb(AIDSdeaths12 ~ offset(log(TotPop)) + scale (HealthIns) + scale (Poverty) +scale(Unemploy), 
               data=arf2018sub)
summary(fit_nb)

## 
## Call:
## glm.nb(formula = AIDSdeaths12 ~ offset(log(TotPop)) + scale(HealthIns) + 
##     scale(Poverty) + scale(Unemploy), data = arf2018sub, init.theta = 3.811051054, 
##     link = log)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.8952  -0.8713  -0.1871   0.4250   3.0372  
## 
## Coefficients:
##                    Estimate Std. Error  z value Pr(>|z|)    
## (Intercept)      -1.018e+01  5.040e-02 -201.963  < 2e-16 ***
## scale(HealthIns)  9.349e-04  5.754e-02    0.016    0.987    
## scale(Poverty)    3.455e-01  6.110e-02    5.656 1.55e-08 ***
## scale(Unemploy)  -7.191e-03  5.492e-02   -0.131    0.896    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for Negative Binomial(3.8111) family taken to be 1)
## 
##     Null deviance: 159.93  on 118  degrees of freedom
## Residual deviance: 118.96  on 115  degrees of freedom
## AIC: 991.97
## 
## Number of Fisher Scoring iterations: 1
## 
## 
##               Theta:  3.811 
##           Std. Err.:  0.528 
## 
##  2 x log-likelihood:  -981.969