Use a data set of your choice.
Consider any of the models discussed in the lecture for the outcome. You MUST use: Either of the Binomial, Poisson or Negative Binomial models
What is your denominator/offset variable, or if you wish to construct a SMR/SIR/Expected Value, describe how you do this.
Construct at least 4 predictors for your outcome, I would z-score these so they are on common scale.
Interpret your models.
Is your outcome over-dispersed?
Use INLA to estimate a spatially correlated (besag or bym model) for your count outcome.
Data: 2015-2019 5 Year ACS
For this assignment I am attempting a Poisson regression with my count data as the number of mobile homes per census tract.
My offset variable is the total number of households per tract.
Predictor variables:
Percent white homeowners
Percent renters
MHI - Median Household Income
Percent of children under 18
(I didn’t feel like sorting out which variables were needed and which were extra so I downloaded too many)
#Download ACS data for the year 2015 for Los Angeles County, California census tracts in R using the tidycensus library
#In this extract, request both the proportion of the population that is Non-Hispanic Black and the proportion of the population that is Hispanic
#Use this to search for variables
#acs2019 <- load_variables(2019 , "acs5", cache = TRUE)
#ec2019 <- load_variables(2019 , "census of governments", cache = TRUE)
#demographic profile tables
#Use this to search for variables
#acs2019 <- load_variables(2019 , "acs5", cache = TRUE) #demographic profile tables
tracts<-get_acs(geography = "tract",
state="TX",
year = 2019,
variables=c("B25024_010E",
"B01003_001E",
"DP03_0062E",
"B25003_003E",
"B25003H_003E",
"B25003I_003E",
"B25003B_003E",
"B25003_002E",
"B25003H_002E",
"B25003I_002E",
"B25003B_002E",
"B05003_003E",
"B05003_014E") ,
geometry = T, output = "wide")
## Getting data from the 2015-2019 5-year ACS
## Downloading feature geometry from the Census website. To cache shapefiles for use in future sessions, set `options(tigris_use_cache = TRUE)`.
## Fetching data by table type ("B/C", "S", "DP") and combining the result.
#rename variables and filter missing cases
tracts<-tracts%>%
mutate(mobilehomes=B25024_010E,
totpop=B01003_001E,
MHI=DP03_0062E,
totRent = B25003_003E,
whiteRent = B25003H_003E,
hispRent = B25003I_003E,
blackRent = B25003B_003E,
totOwn = B25003_002E,
whiteOwn = B25003H_002E,
hispOwn = B25003I_002E,
blackOwn = B25003B_002E,
totRentOwn = totRent + totOwn,
pRent = totRent/totRentOwn,
pOwn = totOwn/totRentOwn,
pwhiteOwn = whiteOwn/totRentOwn,
phispOwn = hispOwn/totRentOwn,
pblackOwn = blackOwn/totRentOwn,
pwhiteRent = whiteRent/totRentOwn,
phispRent = hispRent/totRentOwn,
pblackRent = blackRent/totRentOwn,
totUnder18 = B05003_014E + B05003_003E,
pchildren = totUnder18 / totpop)
tracts<-na.omit(tracts)
#pull texas shapefile for context
state<-get_acs(geography = "state", variables=c("DP05_0001E"), state="TX", year=2019, geometry = T, output = "wide")
## Getting data from the 2015-2019 5-year ACS
## Downloading feature geometry from the Census website. To cache shapefiles for use in future sessions, set `options(tigris_use_cache = TRUE)`.
## Using the ACS Data Profile
library(tigris)
MSAs<-core_based_statistical_areas(cb=T,year=2019)
SANB<-MSAs[MSAs$GEOID=="41700",]
DFW<-MSAs[MSAs$GEOID=="19100",]
HOU<-MSAs[MSAs$GEOID=="26420",]
AUS<-MSAs[MSAs$GEOID=="12420",]
#Merge MSAs into new shapefile (for some reason couldn't query them all together)
TX_MSAs <- rbind(SANB, DFW, HOU, AUS)
#% Mobile Homes
tm_shape(tracts)+
tm_polygons("mobilehomes",
title="Mobile Homes by Tract, 2019",
palette="YlGnBu",
border.alpha = 0,
style="pretty",
legend.hist=T )+
tm_shape(state)+
tm_polygons(alpha=0, border.col="black")+
tm_shape(TX_MSAs)+
tm_polygons(alpha=0, border.col = "black")+
tm_format("World", title="Mobile Homes by Tract, Texas", legend.outside=T)+
tm_scale_bar()+
tm_compass()
I believe this model is showing a positive association between the percentages of white homeowners, renters, and children and the number of mobile homes in an area, while the median household income has a negative association.
fit_pois<- glm(mobilehomes ~ offset(scale(totpop)) + scale(pwhiteOwn) + scale(pRent) + scale(MHI) + scale(pchildren),
family=poisson,
data=tracts)
summary(fit_pois)
##
## Call:
## glm(formula = mobilehomes ~ offset(scale(totpop)) + scale(pwhiteOwn) +
## scale(pRent) + scale(MHI) + scale(pchildren), family = poisson,
## data = tracts)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -458.87 -4.71 -0.14 15.74 137.00
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 2.014894 0.001548 1301.59 <2e-16 ***
## scale(pwhiteOwn) 2.039050 0.001807 1128.56 <2e-16 ***
## scale(pRent) 0.101005 0.002156 46.85 <2e-16 ***
## scale(MHI) -4.364476 0.001452 -3005.24 <2e-16 ***
## scale(pchildren) 0.110216 0.001457 75.66 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for poisson family taken to be 1)
##
## Null deviance: 18040876 on 5161 degrees of freedom
## Residual deviance: 2978632 on 5157 degrees of freedom
## AIC: 2999524
##
## Number of Fisher Scoring iterations: 15
Since this measure calculated below is 24, this suggests the variance is much larger than the mean, suggesting high overdispersion.
scale<-sqrt(fit_pois$deviance/fit_pois$df.residual)
scale
## [1] 24.0331
From the readings, the results can be interpreted as “relative risk” - so I believe that would mean that the percent of white homeowners and the percent of children have positive associations with the number of mobile homes in an area, while the percent of renters and the median household income have negative associaitons with the number of mobile homes in an area.
# Numeric Identifiers for tracts
tracts$struct<-1:dim(tracts)[1]
nbs<-knearneigh(st_centroid(tracts), k = 5, longlat = T) #k=5 nearest neighbors
## Warning in st_centroid.sf(tracts): st_centroid assumes attributes are constant
## over geometries of x
## Warning in knearneigh(st_centroid(tracts), k = 5, longlat = T): dnearneigh:
## longlat argument overrides object
nbs<-knn2nb(nbs, row.names = tracts$struct, sym = T) #force symmetry!!
mat <- nb2mat(nbs, style="B",zero.policy=TRUE)
colnames(mat) <- rownames(mat)
mat <- as.matrix(mat[1:dim(mat)[1], 1:dim(mat)[1]])
nb2INLA("cl_graph",nbs)
am_adj <-paste(getwd(),"/cl_graph",sep="")
H<-inla.read.graph(filename="cl_graph")
f3<-mobilehomes ~ offset(scale(totpop)) + scale(pwhiteOwn) + scale(pRent) + scale(MHI) + scale(pchildren)+ f(struct, model = "bym", scale.model = T, constr = T, graph = H)
mod3<-inla(formula = f3,data = tracts,
family = "poisson",
control.compute = list(waic=T,return.marginals.predictor=TRUE),
control.predictor = list(link=1),
num.threads = 3,
verbose = F)
#total model summary
summary(mod3)
##
## Call:
## c("inla(formula = f3, family = \"poisson\", data = tracts, verbose = F,
## ", " control.compute = list(waic = T, return.marginals.predictor =
## TRUE), ", " control.predictor = list(link = 1), num.threads = 3)")
## Time used:
## Pre = 3.39, Running = 28.2, Post = 0.606, Total = 32.2
## Fixed effects:
## mean sd 0.025quant 0.5quant 0.975quant mode kld
## (Intercept) 2.000 0.037 1.925 2.000 2.073 2.000 0
## scale(pwhiteOwn) 0.891 0.089 0.716 0.891 1.067 0.891 0
## scale(pRent) -0.739 0.075 -0.886 -0.739 -0.593 -0.739 0
## scale(MHI) -1.577 0.081 -1.737 -1.577 -1.417 -1.576 0
## scale(pchildren) 0.267 0.050 0.169 0.267 0.365 0.267 0
##
## Random effects:
## Name Model
## struct BYM model
##
## Model hyperparameters:
## mean sd 0.025quant 0.5quant
## Precision for struct (iid component) 0.221 0.007 0.208 0.221
## Precision for struct (spatial component) 0.141 0.012 0.121 0.141
## 0.975quant mode
## Precision for struct (iid component) 0.236 0.220
## Precision for struct (spatial component) 0.166 0.139
##
## Watanabe-Akaike information criterion (WAIC) ...: 77409.05
## Effective number of parameters .................: 26297.81
##
## Marginal log-Likelihood: -23229.67
## Posterior summaries for the linear predictor and the fitted values are computed
## (Posterior marginals needs also 'control.compute=list(return.marginals.predictor=TRUE)')