#Libraries
library(tidycensus)
library(tidyverse)
## -- Attaching packages ----------------- tidyverse 1.2.1 --
## v ggplot2 3.1.0 v purrr 0.2.5
## v tibble 1.4.2 v dplyr 0.7.7
## v tidyr 0.8.2 v stringr 1.3.1
## v readr 1.1.1 v forcats 0.3.0
## -- Conflicts -------------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(sf)
## Linking to GEOS 3.6.1, GDAL 2.2.3, PROJ 4.9.3
library(classInt)
## Loading required package: spData
## To access larger datasets in this package, install the spDataLarge
## package with: `install.packages('spDataLarge',
## repos='https://nowosad.github.io/drat/', type='source'))`
library(dplyr)
library(gridExtra)
##
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
##
## combine
library(ggsn)
library(openxlsx)
library(brms)
## Loading required package: Rcpp
## Loading 'brms' package (version 2.6.0). Useful instructions
## can be found by typing help('brms'). A more detailed introduction
## to the package is available through vignette('brms_overview').
## Run theme_set(theme_default()) to use the default bayesplot theme.
Renaming
L48acs2016mothership<-L48acs2016mothership%>%
mutate(
#Family Income
#Total responses
FIncTotal=DP03_0075E,
#Percentage Estimates
FInc10k_or_less=DP03_0076PE, #<10k
FInc10kto14.9k=DP03_0077PE, #10k-14999
FInc15kto24.9k=DP03_0078PE, #15k-24999
FInc25kto34.9k=DP03_0079PE, #25k-34999
FInc35kto49.9k=DP03_0080PE, #35k-49999
FInc50kto74.9k=DP03_0081PE, #50k-74999
FInc75kto99.9k=DP03_0082PE, #75k-99999
FInc100kto149.9k=DP03_0083PE, #100k-149999
FInc150kto199.9k=DP03_0084PE, #150k-199999
FInc200kPlus=DP03_0085PE, #200Plus
FIncMedian=DP03_0086E, #Median 86
FIncMean=DP03_0087E, #Mean
FIncPerCapita=DP03_0088E, #per capita
#Non-Family Households
NFInc=DP03_0089E, #Total responses
NFInc=DP03_0090E, #Median
NFInc=DP03_0091E, #Mean
#Families and People Percentage Below Poverty
PercentPoverty=DP03_0128PE,
#Housing
VacancyAll=DP04_0003PE, #Vacancy of housing
VacancyRental=DP04_0005PE, #Rental vacancy
OccupiedOwner=DP04_0046PE, #Owner occupied
OccupiedRental=DP04_0047PE, #Renter occupied
VacancyOwner=(100-OccupiedOwner), #Homeowner vacancy - All NAs in ACS data; workaround
VacancyRental=(100-OccupiedRental), #Rental vacancy - All NAs in ACS data; workaround
#Vehicle availability
VehicleTotalP=DP04_0057PE,#Totals
VehicleTotal=DP04_0057E,#Totals
VehicleNone=DP04_0058PE,#none
VehicleOne=DP04_0059PE,#1
VehicleTwo=DP04_0060PE,#2
VehicleThreePlus=DP04_0061PE,#3Plus
#SMOCAPI Selected monthly owner costs as a percentage of household income with mortgage
HomeCostOwned_less_20P=DP04_0111PE,#<20%
HomeCostOwned_20P_24.9P=DP04_0112PE,#20%-24.9%
HomeCostOwned_25P_29.9P=DP04_0113PE,#25%-29.9%
HomeCostOwned_30P_34.9P=DP04_0114PE,#30%-34.9%
HomeCostOwned_35P_Plus=DP04_0115PE,#35%Plus
HomeCostOwned_not_computed=DP04_0116PE,#not computed
#GRAPI Gross Rent as a Percentage of Household Income
HomeCostRented_less_15P=DP04_0137PE,#<15%
HomeCostRented_15P_19.9P=DP04_0138PE,#15%-19.9%
HomeCostRented_20P_24.9P=DP04_0139PE,#20%-24.9%
HomeCostRented_25P_29.9P=DP04_0140PE,#25%-29.9%
HomeCostRented_30P_34.9P=DP04_0141PE,#30%-34.9%
HomeCostRented_35P_Plus=DP04_0142PE,#35%Plus
HomeCostRented_not_computed=DP04_0143PE)#not computed
subsetting one state
hubdistanceHW4<-st_read(dsn="C:/Users/tobik/OneDrive/Documents/_Thesis/Databases", layer="hubdistancesprojectedoct8")
## Reading layer `hubdistancesprojectedoct8' from data source `C:\Users\tobik\OneDrive\Documents\_Thesis\Databases' using driver `ESRI Shapefile'
## Simple feature collection with 72373 features and 17 fields
## geometry type: LINESTRING
## dimension: XY
## bbox: xmin: -2231438 ymin: -1692614 xmax: 2121318 ymax: 1326520
## epsg (SRID): NA
## proj4string: +proj=aea +lat_1=20 +lat_2=60 +lat_0=40 +lon_0=-96 +x_0=0 +y_0=0 +datum=NAD83 +units=m +no_defs
lower48acsdataHW4<-merge(L48acs2016mothership,hubdistanceHW4,by="GEOID")
reducing dataset for the homework to Texas
HubDistHW4<-subset(lower48acsdataHW4,STATEFP=='48')
Running the Bayesian Model
Running 95% CI
#Income
options(mc.cores=parallel::detectCores())
fit_HW4_1<-brm(HubDist~
FInc10k_or_less+
FInc10kto14.9k+
FInc15kto24.9k+
FInc25kto34.9k+
FInc35kto49.9k+
FInc50kto74.9k+
FInc75kto99.9k+
FInc100kto149.9k+
FInc150kto199.9k+
FInc200kPlus
, family=gaussian(), data=HubDistHW4, cores=6, seed=1115,chains=2, iter=5000,warmup=1000)
## Warning: Rows containing NAs were excluded from the model.
## Compiling the C++ model
## Loading required package: raster
## Loading required package: sp
##
## Attaching package: 'raster'
## The following object is masked from 'package:ggsn':
##
## scalebar
## The following object is masked from 'package:dplyr':
##
## select
## The following object is masked from 'package:tidyr':
##
## extract
## Start sampling
## Warning: There were 487 transitions after warmup that exceeded the maximum treedepth. Increase max_treedepth above 10. See
## http://mc-stan.org/misc/warnings.html#maximum-treedepth-exceeded
## Warning: Examine the pairs() plot to diagnose sampling problems
#vacancy rate
fit_HW4_2<-brm(HubDist~
VacancyOwner+
VacancyRental
, family=gaussian(), data=HubDistHW4, cores=6, seed=1115,chains=2, iter=5000,warmup=1000)
## Warning: Rows containing NAs were excluded from the model.
## Compiling the C++ model
## recompiling to avoid crashing R session
## Start sampling
## Warning: There were 7446 transitions after warmup that exceeded the maximum treedepth. Increase max_treedepth above 10. See
## http://mc-stan.org/misc/warnings.html#maximum-treedepth-exceeded
## Warning: Examine the pairs() plot to diagnose sampling problems
#Number of vehicles
fit_HW4_3<-brm(HubDist~
VehicleNone+
VehicleOne+
(VehicleTwo+
VehicleThreePlus)
, family=gaussian(), data=HubDistHW4, cores=6, seed=1115,chains=2, iter=5000,warmup=1000)
## Warning: Rows containing NAs were excluded from the model.
## Compiling the C++ model
## recompiling to avoid crashing R session
## Start sampling
## Warning: There were 3132 transitions after warmup that exceeded the maximum treedepth. Increase max_treedepth above 10. See
## http://mc-stan.org/misc/warnings.html#maximum-treedepth-exceeded
## Warning: Examine the pairs() plot to diagnose sampling problems
#All 3 above
fit_HW4_All<-brm(HubDist~
FInc10k_or_less+
FInc10kto14.9k+
FInc15kto24.9k+
FInc25kto34.9k+
FInc35kto49.9k+
FInc50kto74.9k+
FInc75kto99.9k+
FInc100kto149.9k+
FInc150kto199.9k+
FInc200kPlus+
VacancyOwner+
VacancyRental+
VehicleNone+
VehicleOne+
(VehicleTwo+
VehicleThreePlus)
, family=gaussian(), data=HubDistHW4, cores=6, seed=1115,chains=2, iter=5000,warmup=1000)
## Warning: Rows containing NAs were excluded from the model.
## Compiling the C++ model
## recompiling to avoid crashing R session
## Start sampling
## Warning: There were 7160 transitions after warmup that exceeded the maximum treedepth. Increase max_treedepth above 10. See
## http://mc-stan.org/misc/warnings.html#maximum-treedepth-exceeded
## Warning: Examine the pairs() plot to diagnose sampling problems
summary(fit_HW4_1) #90%CI
## Family: gaussian
## Links: mu = identity; sigma = identity
## Formula: HubDist ~ FInc10k_or_less + FInc10kto14.9k + FInc15kto24.9k + FInc25kto34.9k + FInc35kto49.9k + FInc50kto74.9k + FInc75kto99.9k + FInc100kto149.9k + FInc150kto199.9k + FInc200kPlus
## Data: HubDistHW4 (Number of observations: 5210)
## Samples: 2 chains, each with iter = 5000; warmup = 1000; thin = 1;
## total post-warmup samples = 8000
##
## Population-Level Effects:
## Estimate Est.Error l-95% CI u-95% CI Eff.Sample Rhat
## Intercept 76.96 265.35 -449.91 587.48 703 1.00
## FInc10k_or_less -0.75 2.65 -5.86 4.55 704 1.00
## FInc10kto14.9k -0.53 2.65 -5.62 4.75 704 1.00
## FInc15kto24.9k -0.67 2.65 -5.78 4.59 703 1.00
## FInc25kto34.9k -0.66 2.65 -5.75 4.63 704 1.00
## FInc35kto49.9k -0.62 2.65 -5.74 4.64 704 1.00
## FInc50kto74.9k -0.58 2.65 -5.66 4.68 704 1.00
## FInc75kto99.9k -0.53 2.66 -5.66 4.74 704 1.00
## FInc100kto149.9k -0.65 2.65 -5.76 4.62 703 1.00
## FInc150kto199.9k -0.96 2.66 -6.07 4.32 703 1.00
## FInc200kPlus -0.81 2.65 -5.90 4.46 704 1.00
##
## Family Specific Parameters:
## Estimate Est.Error l-95% CI u-95% CI Eff.Sample Rhat
## sigma 18.00 0.18 17.66 18.36 2037 1.00
##
## Samples were drawn using sampling(NUTS). For each parameter, Eff.Sample
## is a crude measure of effective sample size, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
summary(fit_HW4_2) #90%CI
## Warning: The model has not converged (some Rhats are > 1.1). Do not analyse the results!
## We recommend running more iterations and/or setting stronger priors.
## Family: gaussian
## Links: mu = identity; sigma = identity
## Formula: HubDist ~ VacancyOwner + VacancyRental
## Data: HubDistHW4 (Number of observations: 5212)
## Samples: 2 chains, each with iter = 5000; warmup = 1000; thin = 1;
## total post-warmup samples = 8000
##
## Population-Level Effects:
## Estimate Est.Error l-95% CI u-95% CI Eff.Sample Rhat
## Intercept -4501.42 5532.52 -15795.33 5159.38 8 1.50
## VacancyOwner 45.01 55.32 -51.60 157.94 8 1.50
## VacancyRental 45.21 55.32 -51.40 158.15 8 1.50
##
## Family Specific Parameters:
## Estimate Est.Error l-95% CI u-95% CI Eff.Sample Rhat
## sigma 17.77 0.17 17.45 18.10 27 1.03
##
## Samples were drawn using sampling(NUTS). For each parameter, Eff.Sample
## is a crude measure of effective sample size, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
summary(fit_HW4_3) #90%CI
## Family: gaussian
## Links: mu = identity; sigma = identity
## Formula: HubDist ~ VehicleNone + VehicleOne + (VehicleTwo + VehicleThreePlus)
## Data: HubDistHW4 (Number of observations: 5212)
## Samples: 2 chains, each with iter = 5000; warmup = 1000; thin = 1;
## total post-warmup samples = 8000
##
## Population-Level Effects:
## Estimate Est.Error l-95% CI u-95% CI Eff.Sample Rhat
## Intercept 351.73 428.13 -474.85 1197.49 1450 1.00
## VehicleNone -3.40 4.28 -11.86 4.87 1451 1.00
## VehicleOne -3.44 4.28 -11.91 4.83 1451 1.00
## VehicleTwo -3.55 4.28 -11.98 4.73 1450 1.00
## VehicleThreePlus -3.03 4.28 -11.52 5.24 1450 1.00
##
## Family Specific Parameters:
## Estimate Est.Error l-95% CI u-95% CI Eff.Sample Rhat
## sigma 17.98 0.18 17.63 18.34 3922 1.00
##
## Samples were drawn using sampling(NUTS). For each parameter, Eff.Sample
## is a crude measure of effective sample size, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
summary(fit_HW4_All) #90%CI
## Warning: The model has not converged (some Rhats are > 1.1). Do not analyse the results!
## We recommend running more iterations and/or setting stronger priors.
## Family: gaussian
## Links: mu = identity; sigma = identity
## Formula: HubDist ~ FInc10k_or_less + FInc10kto14.9k + FInc15kto24.9k + FInc25kto34.9k + FInc35kto49.9k + FInc50kto74.9k + FInc75kto99.9k + FInc100kto149.9k + FInc150kto199.9k + FInc200kPlus + VacancyOwner + VacancyRental + VehicleNone + VehicleOne + (VehicleTwo + VehicleThreePlus)
## Data: HubDistHW4 (Number of observations: 5210)
## Samples: 2 chains, each with iter = 5000; warmup = 1000; thin = 1;
## total post-warmup samples = 8000
##
## Population-Level Effects:
## Estimate Est.Error l-95% CI u-95% CI Eff.Sample Rhat
## Intercept -3241.28 7152.39 -20331.07 7425.05 5 1.86
## FInc10k_or_less 0.90 2.36 -3.92 3.27 1 2.73
## FInc10kto14.9k 1.04 2.36 -3.79 3.41 1 2.72
## FInc15kto24.9k 0.89 2.35 -3.95 3.25 1 2.72
## FInc25kto34.9k 0.84 2.36 -4.00 3.22 1 2.72
## FInc35kto49.9k 0.88 2.36 -3.96 3.25 1 2.72
## FInc50kto74.9k 0.86 2.36 -3.97 3.22 1 2.72
## FInc75kto99.9k 0.85 2.36 -3.99 3.22 1 2.72
## FInc100kto149.9k 0.71 2.36 -4.12 3.08 1 2.72
## FInc150kto199.9k 0.48 2.36 -4.35 2.86 1 2.72
## FInc200kPlus 0.64 2.36 -4.19 3.00 1 2.72
## VacancyOwner 30.01 67.69 -73.20 194.18 5 1.77
## VacancyRental 30.36 67.69 -72.88 194.52 5 1.77
## VehicleNone 1.44 4.66 -9.12 7.23 2 1.83
## VehicleOne 1.60 4.66 -8.99 7.41 2 1.83
## VehicleTwo 1.39 4.66 -9.18 7.19 2 1.83
## VehicleThreePlus 1.53 4.66 -9.04 7.33 2 1.83
##
## Family Specific Parameters:
## Estimate Est.Error l-95% CI u-95% CI Eff.Sample Rhat
## sigma 16.88 0.16 16.56 17.17 18 1.09
##
## Samples were drawn using sampling(NUTS). For each parameter, Eff.Sample
## is a crude measure of effective sample size, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).