library(nortest)
library(car)
## Loading required package: carData
library(lmtest)
## Loading required package: zoo
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
library(classInt)
library(sandwich)
library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v ggplot2 3.3.5 v purrr 0.3.4
## v tibble 3.1.4 v dplyr 1.0.7
## v tidyr 1.1.3 v stringr 1.4.0
## v readr 2.0.1 v forcats 0.5.1
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
## x dplyr::recode() masks car::recode()
## x purrr::some() masks car::some()
library(spdep)
## Loading required package: sp
## Loading required package: spData
## To access larger datasets in this package, install the spDataLarge
## package with: `install.packages('spDataLarge',
## repos='https://nowosad.github.io/drat/', type='source')`
## Loading required package: sf
## Linking to GEOS 3.9.0, GDAL 3.2.1, PROJ 7.2.1
library(spatialreg)
## Loading required package: Matrix
##
## Attaching package: 'Matrix'
## The following objects are masked from 'package:tidyr':
##
## expand, pack, unpack
##
## Attaching package: 'spatialreg'
## The following objects are masked from 'package:spdep':
##
## as.spam.listw, as_dgRMatrix_listw, as_dsCMatrix_I,
## as_dsCMatrix_IrW, as_dsTMatrix_listw, can.be.simmed, cheb_setup,
## create_WX, do_ldet, eigen_pre_setup, eigen_setup, eigenw,
## errorsarlm, get.ClusterOption, get.coresOption, get.mcOption,
## get.VerboseOption, get.ZeroPolicyOption, GMargminImage, GMerrorsar,
## griffith_sone, gstsls, Hausman.test, impacts, intImpacts,
## Jacobian_W, jacobianSetup, l_max, lagmess, lagsarlm, lextrB,
## lextrS, lextrW, lmSLX, LU_prepermutate_setup, LU_setup,
## Matrix_J_setup, Matrix_setup, mcdet_setup, MCMCsamp, ME, mom_calc,
## mom_calc_int2, moments_setup, powerWeights, sacsarlm,
## SE_classic_setup, SE_interp_setup, SE_whichMin_setup,
## set.ClusterOption, set.coresOption, set.mcOption,
## set.VerboseOption, set.ZeroPolicyOption, similar.listw, spam_setup,
## spam_update_setup, SpatialFiltering, spautolm, spBreg_err,
## spBreg_lag, spBreg_sac, stsls, subgraph_eigenw, trW
library(sf)
adi_tx <- read_csv("C:/Users/maman/OneDrive/DEM Fall 2021/Spatial Demography/adi-download.zip")
## Multiple files in zip: reading 'TX_2019_ADI_Census Block Group_v3.1.txt'
## New names:
## * `` -> ...1
## Rows: 15811 Columns: 7
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (3): GISJOIN, ADI_NATRANK, ADI_STATERNK
## dbl (4): ...1, V1, FIPS, STATE
##
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
adi_tract<-adi_tx%>%
mutate(adi_staterank= as.numeric(ADI_STATERNK)) %>%
mutate(adi_nationrank= as.numeric(ADI_NATRANK)) %>%
mutate(trfips = substr(FIPS, 1,11))%>%
group_by(trfips)%>%
summarise(adi_st_avg=mean(adi_staterank, na.rm=T), adi_nat_avg=mean(adi_nationrank, na.rm=T))%>%
ungroup()
## Warning in mask$eval_all_mutate(quo): NAs introduced by coercion
## Warning in mask$eval_all_mutate(quo): NAs introduced by coercion
save(adi_tract, file="tx_adi_tract_avg.Rdata")
places_2020 <- read_csv("C:/Users/maman/OneDrive/DEM Spring 2021/DEM 7093 GIS/GIS Data/PLACES__Census_Tract_Data__GIS_Friendly_Format___2020_release.csv")
## Rows: 72337 Columns: 63
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (34): StateAbbr, StateDesc, CountyName, CountyFIPS, TractFIPS, ACCESS2_C...
## dbl (29): TotalPopulation, ACCESS2_CrudePrev, ARTHRITIS_CrudePrev, BINGE_Cru...
##
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
places_2020$obesity <- places_2020$OBESITY_CrudePrev
places_2020$high_chol <- places_2020$HIGHCHOL_CrudePrev
places_2020$curr_smk <- places_2020$CSMOKING_CrudePrev
places_2020$high_bp <- places_2020$BPHIGH_CrudePrev
places_2020$uninsured <- places_2020$ACCESS2_CrudePrev
places_2020$diabetes <- places_2020$DIABETES_CrudePrev
places_2020$trfips <- places_2020$TractFIPS
places_2020$pop <- places_2020$TotalPopulation
plcs<-places_2020[, c("diabetes","uninsured","high_bp","curr_smk","high_chol","obesity", "trfips", "pop")]
library(tidycensus)
options(tigris_class = "sf")
tracts<-get_acs(geography = "tract",
year = 2019,
state = "TX",
county = c("029", "013","255","091","187","493","163","311","325","019","265","171","259"),
variables = c("DP05_0071PE", "DP05_0078PE"),
output ="wide",
geometry = TRUE)%>%
rename(hispanic =DP05_0071PE,
nh_black =DP05_0078PE) %>%
filter(complete.cases(nh_black, hispanic))
## Getting data from the 2015-2019 5-year ACS
## Downloading feature geometry from the Census website. To cache shapefiles for use in future sessions, set `options(tigris_use_cache = TRUE)`.
## Using the ACS Data Profile
##
|
| | 0%
|
|= | 1%
|
|= | 2%
|
|== | 3%
|
|=== | 4%
|
|==== | 5%
|
|==== | 6%
|
|===== | 7%
|
|===== | 8%
|
|====== | 8%
|
|====== | 9%
|
|======= | 9%
|
|======= | 10%
|
|======= | 11%
|
|======== | 11%
|
|======== | 12%
|
|========= | 13%
|
|========= | 14%
|
|========== | 14%
|
|========== | 15%
|
|=========== | 15%
|
|=========== | 16%
|
|============ | 17%
|
|============= | 18%
|
|============= | 19%
|
|============== | 20%
|
|============== | 21%
|
|=============== | 21%
|
|=============== | 22%
|
|================ | 23%
|
|================= | 24%
|
|================= | 25%
|
|================== | 25%
|
|================== | 26%
|
|=================== | 27%
|
|==================== | 28%
|
|==================== | 29%
|
|===================== | 30%
|
|====================== | 31%
|
|====================== | 32%
|
|======================= | 33%
|
|======================== | 34%
|
|======================== | 35%
|
|========================= | 36%
|
|========================== | 37%
|
|========================== | 38%
|
|=========================== | 38%
|
|=========================== | 39%
|
|============================ | 39%
|
|============================ | 40%
|
|============================ | 41%
|
|============================= | 41%
|
|============================= | 42%
|
|============================== | 42%
|
|============================== | 43%
|
|=============================== | 44%
|
|================================ | 45%
|
|================================ | 46%
|
|================================= | 47%
|
|================================= | 48%
|
|================================== | 48%
|
|================================== | 49%
|
|=================================== | 50%
|
|==================================== | 51%
|
|==================================== | 52%
|
|===================================== | 52%
|
|===================================== | 53%
|
|===================================== | 54%
|
|====================================== | 54%
|
|====================================== | 55%
|
|======================================= | 55%
|
|======================================= | 56%
|
|======================================== | 57%
|
|======================================== | 58%
|
|========================================= | 59%
|
|========================================== | 60%
|
|=========================================== | 61%
|
|=========================================== | 62%
|
|============================================ | 63%
|
|============================================= | 64%
|
|============================================== | 65%
|
|============================================== | 66%
|
|=============================================== | 66%
|
|=============================================== | 67%
|
|=============================================== | 68%
|
|================================================ | 68%
|
|================================================ | 69%
|
|================================================= | 69%
|
|================================================= | 70%
|
|================================================== | 71%
|
|================================================== | 72%
|
|=================================================== | 72%
|
|=================================================== | 73%
|
|==================================================== | 74%
|
|==================================================== | 75%
|
|===================================================== | 76%
|
|====================================================== | 77%
|
|======================================================= | 78%
|
|======================================================= | 79%
|
|======================================================== | 80%
|
|======================================================== | 81%
|
|========================================================= | 81%
|
|========================================================== | 82%
|
|========================================================== | 83%
|
|=========================================================== | 84%
|
|=========================================================== | 85%
|
|============================================================ | 86%
|
|============================================================= | 87%
|
|============================================================= | 88%
|
|============================================================== | 88%
|
|============================================================== | 89%
|
|=============================================================== | 90%
|
|================================================================ | 91%
|
|================================================================ | 92%
|
|================================================================= | 92%
|
|================================================================= | 93%
|
|================================================================= | 94%
|
|================================================================== | 94%
|
|================================================================== | 95%
|
|=================================================================== | 95%
|
|=================================================================== | 96%
|
|==================================================================== | 97%
|
|==================================================================== | 98%
|
|===================================================================== | 98%
|
|===================================================================== | 99%
|
|======================================================================| 100%
tracts$trfips<-substr(tracts$GEOID, 1, 5)
dog <- left_join(adi_tract, plcs, by = "trfips")
data_final <- left_join(tracts, dog, by = c("GEOID"="trfips")
)
# The predictors used are "uninsured" which is the portion of the population with no health insurance per census tract, "curr_smk" or the rate of current smokers per tract, and "adi_st_avg" that is the average state-wide rank of the area deprivation index corresponding to each census tract.
# The outcome variable is "obesity" which is the obesity rate per census tract.
data_final$hisp_maj_cut <- ifelse(data_final$hispanic>= 50, "Hispanic Majority",
ifelse(data_final$hispanic < 50, "Hispanic Minority", NA))
data_final %>%
ggplot()+
geom_sf(aes( fill=hisp_maj_cut, group=hisp_maj_cut))+
#coord_sf(crs = 2163)+
scale_colour_viridis_d()+
scale_fill_viridis_d()+
guides(title="Tract Type",fill = guide_legend(nrow = 4, byrow = TRUE ))+
guides(fill=guide_legend(title="Tract Type"))+
ggtitle(label = "2020 Hispanic Majority Tracts")+theme(legend.position="bottom")
fit.a <- lm( obesity ~ uninsured + curr_smk + adi_st_avg,
data=data_final)
summary(fit.a)
##
## Call:
## lm(formula = obesity ~ uninsured + curr_smk + adi_st_avg, data = data_final)
##
## Residuals:
## Min 1Q Median 3Q Max
## -7.0155 -0.8804 0.0995 0.9916 5.7612
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 22.58630 0.40767 55.40 < 2e-16 ***
## uninsured 0.25024 0.01730 14.46 < 2e-16 ***
## curr_smk 0.38376 0.03733 10.28 < 2e-16 ***
## adi_st_avg 0.32792 0.07419 4.42 1.23e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.648 on 469 degrees of freedom
## (3 observations deleted due to missingness)
## Multiple R-squared: 0.8909, Adjusted R-squared: 0.8902
## F-statistic: 1276 on 3 and 469 DF, p-value: < 2.2e-16
fit.maj <- lm( obesity ~ uninsured + curr_smk + adi_st_avg,
data=data_final, subset=hisp_maj_cut=="Hispanic Majority")
fit.min <- lm( obesity ~ uninsured + curr_smk + adi_st_avg,
data=data_final, subset=hisp_maj_cut=="Hispanic Minority")
Chow test
RSS2<-sum(resid(fit.min)^2) + sum(resid(fit.maj)^2)
RSS0<-sum(resid(fit.a)^2)
k<-length(coef(fit.a))
n1<-as.numeric(table(data_final$hisp_maj_cut)[1])
n2<-as.numeric(table(data_final$hisp_maj_cut)[2])
ctest<-((RSS0 - RSS2)/k) / (RSS2 / (n1+n2- 2*k))
ctest
## [1] 3.691206
df(ctest, df1 = k, df2=n1+n2-2*k)
## [1] 0.009716178
Significant p-value indicates suggests that the predictors behave differently within the regimes compared to the pooled data meaning that the model is non-stationary. Results from both regimes will be interpreted seperately.
summary(fit.maj)
##
## Call:
## lm(formula = obesity ~ uninsured + curr_smk + adi_st_avg, data = data_final,
## subset = hisp_maj_cut == "Hispanic Majority")
##
## Residuals:
## Min 1Q Median 3Q Max
## -5.3849 -0.6831 0.1413 0.8172 3.1730
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 22.21593 0.47138 47.129 < 2e-16 ***
## uninsured 0.20177 0.02101 9.601 < 2e-16 ***
## curr_smk 0.54332 0.04670 11.634 < 2e-16 ***
## adi_st_avg 0.26953 0.08232 3.274 0.00121 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.321 on 249 degrees of freedom
## Multiple R-squared: 0.8963, Adjusted R-squared: 0.895
## F-statistic: 717.2 on 3 and 249 DF, p-value: < 2.2e-16
summary(fit.min)
##
## Call:
## lm(formula = obesity ~ uninsured + curr_smk + adi_st_avg, data = data_final,
## subset = hisp_maj_cut == "Hispanic Minority")
##
## Residuals:
## Min 1Q Median 3Q Max
## -7.4137 -1.0820 0.2255 1.0900 6.1111
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 23.40936 0.71652 32.671 < 2e-16 ***
## uninsured 0.24426 0.04899 4.986 1.26e-06 ***
## curr_smk 0.31018 0.07110 4.362 1.99e-05 ***
## adi_st_avg 0.38709 0.12866 3.009 0.00294 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.925 on 216 degrees of freedom
## (3 observations deleted due to missingness)
## Multiple R-squared: 0.6726, Adjusted R-squared: 0.668
## F-statistic: 147.9 on 3 and 216 DF, p-value: < 2.2e-16
Results of both regimes
# The three predictor variables( uninsured, curr_smk, and adi_st_avg) retained positive coefficients and significant p-values across the pooled and both subset models. Out of all the predictors, current smoking status fluctuated the most, with a coefficient of 0.54 and 0.31 among the hispanic majority and hispanic minority regimes respectively. The area deprivation index (ADI) appears to have a larger positive relationship to tract-level obesity rates in the hispanic minority regime (beta = 0.39) than the hispanic majority regime (beta=0.27)