# Get county socio-economic variables from Area resource file 2019-2020
arf2020<-import("ahrf2020.sas7bdat")
arf2020<-arf2020%>%
filter(f00011=="48") %>%
mutate(cofips=as.factor(f00004),
coname=f00010,
state = f00011,
medhouvl=f1461314,
pctcrpop= round(100*(f1492010/f0453010),2),
medhinc= f1434514,
pctctyunemp=round(100*(f1451214/f1451014),2),
ctypopdes= (f0453010/f1387410),
rucc= as.numeric(f0002013),
pctperpov= f1332118,
obgyn10_pc= 1000*(f1168410/ f0453010) )%>%
dplyr::select(medhouvl, pctcrpop,medhinc, pctctyunemp,state, cofips, coname, ctypopdes, rucc, obgyn10_pc,pctperpov)%>%
mutate(rurality= car::Recode(rucc, recodes ="1:3 ='Urban';4:9='Rural'"))%>%
filter(complete.cases(.))
#load and calculate maternal mortality rates for counties in Texas for a 5year period (2015-2019)
alldat<-readRDS("~/OneDrive - University of Texas at San Antonio/maternalmortality/aggregate/alldat.rds") %>%
filter(State=="TX")
countyMMR <- alldat %>%
group_by(cofips)%>%
summarise(nbir=sum(nbirths, na.rm = T), ndea = sum(ndeaths, na.rm = T)) %>%
mutate(mmrate =100000*(ndea/nbir))
#Merge the socioeconomic variables with Texas counties maternal mortality rates for a 5- years period(2015-2019)
Txmmr<- merge(countyMMR, arf2020, by= "cofips", all.x =TRUE, sort = FALSE) %>%
rename(GEOID=cofips) %>%
mutate(rmmrate=ifelse(mmrate>=1,mmrate,NA))
# Get Texas boundary data from tigris
txcounty<-counties(year=2018, state = "tx", refresh=T, cb = T, progress_bar=FALSE)
# Geo join the boundary data with Texas maternal mortality rate dataset to give it geometries for map making
Txmmrg<-geo_join(txcounty, Txmmr, by_sp="GEOID", by_df="GEOID",how="left" )
# Assign CRS to the dataset for proper map projection,
Txcountymmr<- st_transform(Txmmrg, 3083)
# Make Texas county Maternal Mortality Rate Map
Texasmmr <- Txcountymmr%>%
mutate(MMR_group = cut(rmmrate, breaks=quantile(rmmrate, na.rm=TRUE), include.lowest=T ))%>%
ggplot()+
geom_sf(aes(fill=MMR_group))+
scale_fill_brewer(name="Maternal Mortality Rate", palette = "YlGn"
)+
ggtitle("Texas Counties Maternal Mortality Rate Per 100,000, 2015-2019")+
theme(plot.title = element_text(size=10, hjust = 0.2)) +
theme(
legend.key.width = unit(0.25, "in"),
legend.key.height = unit(0.2, "in"),
legend.text = element_text(size=8),
axis.text.x = element_blank(),
axis.text.y = element_blank(),
axis.ticks = element_blank(),
panel.background = element_rect(fill = "white", color = NA))
The dependent variable in this analysis is maternal mortality rates in Texas counties. Predictors for this analysis include; percentage of county population that is rural; county unemployment rate; the median household income in the county; the county’s median house value; the population density setsquare mile in the county. The percentage of persons in the county living in poverty; per capital number of OB-GYN in the county. All variables were Z scored
##
## Call:
## lm(formula = mmratez ~ medhincz + medhouvlz + pctcrpopz + pctperpovz +
## rurality + ctypopdesz + obgyn10_pcz + pctctyunempz + ruccz,
## data = Txcountymmr2)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.1981 -0.4875 -0.2702 0.1226 6.5594
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.095528 0.120261 0.794 0.4278
## medhincz -0.038526 0.116312 -0.331 0.7408
## medhouvlz -0.210752 0.102760 -2.051 0.0413 *
## pctcrpopz 0.126033 0.085952 1.466 0.1439
## pctperpovz -0.017231 0.108777 -0.158 0.8743
## ruralityUrban -0.288965 0.317924 -0.909 0.3643
## ctypopdesz 0.002196 0.074147 0.030 0.9764
## obgyn10_pcz 0.001540 0.072187 0.021 0.9830
## pctctyunempz 0.077940 0.070414 1.107 0.2694
## ruccz -0.373528 0.165277 -2.260 0.0247 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.9863 on 243 degrees of freedom
## Multiple R-squared: 0.06484, Adjusted R-squared: 0.03021
## F-statistic: 1.872 on 9 and 243 DF, p-value: 0.05674
#Creating a good representative set of neighbor types and spatial weights
Txcty.nb6<-knearneigh(st_centroid(Txcountymmr2), k=6)
Txcty.nb6<-knn2nb(Txcty.nb6)
Txcty.wt6<-nb2listw(Txcty.nb6, style="W")
Txcty.nb5<-knearneigh(st_centroid(Txcountymmr2), k=5)
Txcty.nb5<-knn2nb(Txcty.nb5)
Txcty.wt5<-nb2listw(Txcty.nb5, style="W")
Txcty.nb4<-knearneigh(st_centroid(Txcountymmr2), k=4)
Txcty.nb4<-knn2nb(Txcty.nb4)
Txcty.wt4<-nb2listw(Txcty.nb4, style="W")
Txcty.nb3<-knearneigh(st_centroid(Txcountymmr2), k=3)
Txcty.nb3<-knn2nb(Txcty.nb3)
Txcty.wt3<-nb2listw(Txcty.nb3,style="W")
Txcty.nb2<-knearneigh(st_centroid(Txcountymmr2) , k=2)
Txcty.nb2<-knn2nb(Txcty.nb2)
Txcty.wt2<-nb2listw(Txcty.nb2,style="W")
Txcty.nbr<-poly2nb(Txcountymmr2, queen=F)
Txcty.wtr<-nb2listw(Txcty.nbr, zero.policy=T)
Txcty.nbq<-poly2nb(Txcountymmr2, queen=T)
Txcty.wtq<-nb2listw(Txcty.nbr, style="W", zero.policy=T)
##
## Global Moran I for regression residuals
##
## data:
## model: lm(formula = mmratez ~ medhincz + medhouvlz + pctcrpopz +
## pctperpovz + rurality + ctypopdesz + obgyn10_pcz + pctctyunempz +
## ruccz, data = Txcountymmr2)
## weights: Txcty.wt2
##
## Moran I statistic standard deviate = -0.36792, p-value = 0.7129
## alternative hypothesis: two.sided
## sample estimates:
## Observed Moran I Expectation Variance
## -0.034003530 -0.012765477 0.003332139
##
## Lagrange multiplier diagnostics for spatial dependence
##
## data:
## model: lm(formula = mmratez ~ medhincz + medhouvlz + pctcrpopz +
## pctperpovz + rurality + ctypopdesz + obgyn10_pcz + pctctyunempz +
## ruccz, data = Txcountymmr2)
## weights: Txcty.wt3
##
## LMerr = 0.026624, df = 1, p-value = 0.8704
##
##
## Lagrange multiplier diagnostics for spatial dependence
##
## data:
## model: lm(formula = mmratez ~ medhincz + medhouvlz + pctcrpopz +
## pctperpovz + rurality + ctypopdesz + obgyn10_pcz + pctctyunempz +
## ruccz, data = Txcountymmr2)
## weights: Txcty.wt3
##
## LMlag = 0.0034126, df = 1, p-value = 0.9534
##
##
## Lagrange multiplier diagnostics for spatial dependence
##
## data:
## model: lm(formula = mmratez ~ medhincz + medhouvlz + pctcrpopz +
## pctperpovz + rurality + ctypopdesz + obgyn10_pcz + pctctyunempz +
## ruccz, data = Txcountymmr2)
## weights: Txcty.wt3
##
## RLMerr = 0.29358, df = 1, p-value = 0.5879
##
##
## Lagrange multiplier diagnostics for spatial dependence
##
## data:
## model: lm(formula = mmratez ~ medhincz + medhouvlz + pctcrpopz +
## pctperpovz + rurality + ctypopdesz + obgyn10_pcz + pctctyunempz +
## ruccz, data = Txcountymmr2)
## weights: Txcty.wt3
##
## RLMlag = 0.27037, df = 1, p-value = 0.6031
##
## Call:errorsarlm(formula = mmratez ~ medhincz + medhouvlz + pctcrpopz +
## pctperpovz + rurality + ctypopdesz + obgyn10_pcz + pctctyunempz +
## ruccz, data = Txcountymmr2, listw = Txcty.wt3, method = "MC")
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.19385 -0.49303 -0.27493 0.11954 6.55218
##
## Type: error
## Coefficients: (asymptotic standard errors)
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.0966394 0.1185304 0.8153 0.41489
## medhincz -0.0372954 0.1144553 -0.3259 0.74454
## medhouvlz -0.2109170 0.1013703 -2.0807 0.03747
## pctcrpopz 0.1255281 0.0843860 1.4875 0.13687
## pctperpovz -0.0146040 0.1069107 -0.1366 0.89135
## ruralityUrban -0.2924009 0.3122494 -0.9364 0.34905
## ctypopdesz 0.0012247 0.0728440 0.0168 0.98659
## obgyn10_pcz 0.0019413 0.0707090 0.0275 0.97810
## pctctyunempz 0.0767349 0.0690879 1.1107 0.26670
## ruccz -0.3758847 0.1624592 -2.3137 0.02068
##
## Lambda: 0.015192, LR test value: 0.030791, p-value: 0.86071
## Asymptotic standard error: 0.08042
## z-value: 0.18891, p-value: 0.85016
## Wald statistic: 0.035688, p-value: 0.85016
##
## Log likelihood: -350.3832 for error model
## ML residual variance (sigma squared): 0.93415, (sigma: 0.96652)
## Nagelkerke pseudo-R-squared: 0.064954
## Number of observations: 253
## Number of parameters estimated: 12
## AIC: 724.77, (AIC for lm: 722.8)
##
## t test of coefficients:
##
## Estimate Std. Error t value
## fit.err$tarXI(x - lambda * WX)(Intercept) 0.0966394 0.1039002 0.9301
## fit.err$tarXI(x - lambda * WX)medhincz -0.0372954 0.1226169 -0.3042
## fit.err$tarXI(x - lambda * WX)medhouvlz -0.2109170 0.0850499 -2.4799
## fit.err$tarXI(x - lambda * WX)pctcrpopz 0.1255281 0.0764051 1.6429
## fit.err$tarXI(x - lambda * WX)pctperpovz -0.0146040 0.1022612 -0.1428
## fit.err$tarXI(x - lambda * WX)ruralityUrban -0.2924009 0.2618163 -1.1168
## fit.err$tarXI(x - lambda * WX)ctypopdesz 0.0012247 0.0262940 0.0466
## fit.err$tarXI(x - lambda * WX)obgyn10_pcz 0.0019413 0.0410905 0.0472
## fit.err$tarXI(x - lambda * WX)pctctyunempz 0.0767349 0.0653500 1.1742
## fit.err$tarXI(x - lambda * WX)ruccz -0.3758847 0.1638886 -2.2935
## Pr(>|t|)
## fit.err$tarXI(x - lambda * WX)(Intercept) 0.35323
## fit.err$tarXI(x - lambda * WX)medhincz 0.76126
## fit.err$tarXI(x - lambda * WX)medhouvlz 0.01382 *
## fit.err$tarXI(x - lambda * WX)pctcrpopz 0.10169
## fit.err$tarXI(x - lambda * WX)pctperpovz 0.88656
## fit.err$tarXI(x - lambda * WX)ruralityUrban 0.26518
## fit.err$tarXI(x - lambda * WX)ctypopdesz 0.96289
## fit.err$tarXI(x - lambda * WX)obgyn10_pcz 0.96236
## fit.err$tarXI(x - lambda * WX)pctctyunempz 0.24146
## fit.err$tarXI(x - lambda * WX)ruccz 0.02267 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Call:lagsarlm(formula = mmratez ~ medhincz + medhouvlz + pctcrpopz +
## pctperpovz + rurality + ctypopdesz + obgyn10_pcz + pctctyunempz +
## ruccz, data = Txcountymmr2, listw = Txcty.wt3, type = "lag",
## method = "MC")
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.19547 -0.48774 -0.27078 0.12384 6.55628
##
## Type: lag
## Coefficients: (asymptotic standard errors)
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.0957590 0.1178595 0.8125 0.41651
## medhincz -0.0383548 0.1139955 -0.3365 0.73652
## medhouvlz -0.2105432 0.1008469 -2.0878 0.03682
## pctcrpopz 0.1257854 0.0842394 1.4932 0.13539
## pctperpovz -0.0171720 0.1067105 -0.1609 0.87216
## ruralityUrban -0.2896759 0.3115752 -0.9297 0.35252
## ctypopdesz 0.0020609 0.0726665 0.0284 0.97737
## obgyn10_pcz 0.0015454 0.0707509 0.0218 0.98257
## pctctyunempz 0.0776983 0.0690106 1.1259 0.26021
## ruccz -0.3736710 0.1620066 -2.3065 0.02108
##
## Rho: 0.0050412, LR test value: 0.0037281, p-value: 0.95131
## Asymptotic standard error: 0.078961
## z-value: 0.063845, p-value: 0.94909
## Wald statistic: 0.0040762, p-value: 0.94909
##
## Log likelihood: -350.3967 for lag model
## ML residual variance (sigma squared): 0.93431, (sigma: 0.9666)
## Nagelkerke pseudo-R-squared: 0.064854
## Number of observations: 253
## Number of parameters estimated: 12
## AIC: 724.79, (AIC for lm: 722.8)
## LM test for residual autocorrelation
## test value: 0.3285, p-value: 0.56654
##
## t test of coefficients:
##
## Estimate Std. Error t value Pr(>|t|)
## fit.lag$tarXx(Intercept) 0.0957590 0.1030938 0.9289 0.35389
## fit.lag$tarXxmedhincz -0.0383548 0.1221992 -0.3139 0.75389
## fit.lag$tarXxmedhouvlz -0.2105432 0.0843444 -2.4962 0.01322 *
## fit.lag$tarXxpctcrpopz 0.1257854 0.0762133 1.6504 0.10014
## fit.lag$tarXxpctperpovz -0.0171720 0.1020966 -0.1682 0.86657
## fit.lag$tarXxruralityUrban -0.2896759 0.2615221 -1.1077 0.26911
## fit.lag$tarXxctypopdesz 0.0020609 0.0264533 0.0779 0.93797
## fit.lag$tarXxobgyn10_pcz 0.0015454 0.0412509 0.0375 0.97015
## fit.lag$tarXxpctctyunempz 0.0776983 0.0654956 1.1863 0.23666
## fit.lag$tarXxruccz -0.3736710 0.1627800 -2.2956 0.02255 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Call:errorsarlm(formula = mmratez ~ medhincz + medhouvlz + pctcrpopz +
## pctperpovz + rurality + ctypopdesz + obgyn10_pcz + pctctyunempz +
## ruccz, data = Txcountymmr2, listw = Txcty.wt3)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.19384 -0.49307 -0.27490 0.11953 6.55217
##
## Type: error
## Coefficients: (asymptotic standard errors)
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.0966421 0.1185320 0.8153 0.41489
## medhincz -0.0372924 0.1144564 -0.3258 0.74456
## medhouvlz -0.2109174 0.1013719 -2.0806 0.03747
## pctcrpopz 0.1255269 0.0843864 1.4875 0.13688
## pctperpovz -0.0145975 0.1069114 -0.1365 0.89140
## ruralityUrban -0.2924093 0.3122510 -0.9365 0.34904
## ctypopdesz 0.0012224 0.0728445 0.0168 0.98661
## obgyn10_pcz 0.0019423 0.0707089 0.0275 0.97809
## pctctyunempz 0.0767320 0.0690881 1.1106 0.26672
## ruccz -0.3758905 0.1624604 -2.3137 0.02068
##
## Lambda: 0.01523, LR test value: 0.030864, p-value: 0.86054
## Asymptotic standard error: 0.08042
## z-value: 0.18937, p-value: 0.8498
## Wald statistic: 0.035863, p-value: 0.8498
##
## Log likelihood: -350.3831 for error model
## ML residual variance (sigma squared): 0.93415, (sigma: 0.96652)
## Nagelkerke pseudo-R-squared: 0.064955
## Number of observations: 253
## Number of parameters estimated: 12
## AIC: 724.77, (AIC for lm: 722.8)
After fitting the OLS model and checking for autocorrelation in the residuals, i observed that the model has no auto correlation. However, to fulfill the requirements of the assignment , I did the SAR specification, the AICs from the model show that the OLS best fit the model.