Homework 4

#Libraries
library(tidycensus)
library(tidyverse)

## -- Attaching packages ----------------- tidyverse 1.2.1 --

## v ggplot2 3.1.0     v purrr   0.2.5
## v tibble  1.4.2     v dplyr   0.7.7
## v tidyr   0.8.2     v stringr 1.3.1
## v readr   1.1.1     v forcats 0.3.0

## -- Conflicts -------------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()

library(sf)

## Linking to GEOS 3.6.1, GDAL 2.2.3, PROJ 4.9.3

library(classInt)

## Loading required package: spData

## To access larger datasets in this package, install the spDataLarge
## package with: `install.packages('spDataLarge',
## repos='https://nowosad.github.io/drat/', type='source'))`

library(dplyr)
library(gridExtra)

## 
## Attaching package: 'gridExtra'

## The following object is masked from 'package:dplyr':
## 
##     combine

library(ggsn)
library(openxlsx)
library(brms)

## Loading required package: Rcpp

## Loading 'brms' package (version 2.6.0). Useful instructions
## can be found by typing help('brms'). A more detailed introduction
## to the package is available through vignette('brms_overview').
## Run theme_set(theme_default()) to use the default bayesplot theme.

Renaming

L48acs2016mothership<-L48acs2016mothership%>%
  mutate(
#Family Income
  #Total responses
FIncTotal=DP03_0075E,
  #Percentage Estimates
FInc10k_or_less=DP03_0076PE, #<10k
FInc10kto14.9k=DP03_0077PE, #10k-14999
FInc15kto24.9k=DP03_0078PE, #15k-24999
FInc25kto34.9k=DP03_0079PE, #25k-34999
FInc35kto49.9k=DP03_0080PE, #35k-49999
FInc50kto74.9k=DP03_0081PE, #50k-74999
FInc75kto99.9k=DP03_0082PE, #75k-99999
FInc100kto149.9k=DP03_0083PE, #100k-149999
FInc150kto199.9k=DP03_0084PE, #150k-199999
FInc200kPlus=DP03_0085PE, #200Plus
FIncMedian=DP03_0086E, #Median 86
FIncMean=DP03_0087E, #Mean
FIncPerCapita=DP03_0088E, #per capita
#Non-Family Households
NFInc=DP03_0089E, #Total responses
NFInc=DP03_0090E, #Median
NFInc=DP03_0091E, #Mean
#Families and People Percentage Below Poverty
PercentPoverty=DP03_0128PE,
#Housing
VacancyAll=DP04_0003PE, #Vacancy of housing
VacancyRental=DP04_0005PE, #Rental vacancy
OccupiedOwner=DP04_0046PE, #Owner occupied
OccupiedRental=DP04_0047PE, #Renter occupied
VacancyOwner=(100-OccupiedOwner), #Homeowner vacancy - All NAs in ACS data; workaround
VacancyRental=(100-OccupiedRental), #Rental vacancy - All NAs in ACS data; workaround
#Vehicle availability
VehicleTotalP=DP04_0057PE,#Totals
VehicleTotal=DP04_0057E,#Totals
VehicleNone=DP04_0058PE,#none
VehicleOne=DP04_0059PE,#1
VehicleTwo=DP04_0060PE,#2
VehicleThreePlus=DP04_0061PE,#3Plus
#SMOCAPI Selected monthly owner costs as a percentage of household income with mortgage
HomeCostOwned_less_20P=DP04_0111PE,#<20%
HomeCostOwned_20P_24.9P=DP04_0112PE,#20%-24.9%
HomeCostOwned_25P_29.9P=DP04_0113PE,#25%-29.9%
HomeCostOwned_30P_34.9P=DP04_0114PE,#30%-34.9%
HomeCostOwned_35P_Plus=DP04_0115PE,#35%Plus
HomeCostOwned_not_computed=DP04_0116PE,#not computed
#GRAPI Gross Rent as a Percentage of Household Income
HomeCostRented_less_15P=DP04_0137PE,#<15%
HomeCostRented_15P_19.9P=DP04_0138PE,#15%-19.9%
HomeCostRented_20P_24.9P=DP04_0139PE,#20%-24.9%
HomeCostRented_25P_29.9P=DP04_0140PE,#25%-29.9%
HomeCostRented_30P_34.9P=DP04_0141PE,#30%-34.9%
HomeCostRented_35P_Plus=DP04_0142PE,#35%Plus
HomeCostRented_not_computed=DP04_0143PE)#not computed

subsetting one state

hubdistanceHW4<-st_read(dsn="C:/Users/tobik/OneDrive/Documents/_Thesis/Databases", layer="hubdistancesprojectedoct8")

## Reading layer `hubdistancesprojectedoct8' from data source `C:\Users\tobik\OneDrive\Documents\_Thesis\Databases' using driver `ESRI Shapefile'
## Simple feature collection with 72373 features and 17 fields
## geometry type:  LINESTRING
## dimension:      XY
## bbox:           xmin: -2231438 ymin: -1692614 xmax: 2121318 ymax: 1326520
## epsg (SRID):    NA
## proj4string:    +proj=aea +lat_1=20 +lat_2=60 +lat_0=40 +lon_0=-96 +x_0=0 +y_0=0 +datum=NAD83 +units=m +no_defs

lower48acsdataHW4<-merge(L48acs2016mothership,hubdistanceHW4,by="GEOID")

reducing dataset for the homework to Texas

HubDistHW4<-subset(lower48acsdataHW4,STATEFP=='48')

Running the Bayesian Model

Running 95% CI

#Income
options(mc.cores=parallel::detectCores())
fit_HW4_1<-brm(HubDist~
FInc10k_or_less+
FInc10kto14.9k+
FInc15kto24.9k+
FInc25kto34.9k+
FInc35kto49.9k+
FInc50kto74.9k+
FInc75kto99.9k+
FInc100kto149.9k+
FInc150kto199.9k+
FInc200kPlus
, family=gaussian(), data=HubDistHW4, cores=6,  seed=1115,chains=2, iter=5000,warmup=1000)

## Warning: Rows containing NAs were excluded from the model.

## Compiling the C++ model

## Loading required package: raster

## Loading required package: sp

## 
## Attaching package: 'raster'

## The following object is masked from 'package:ggsn':
## 
##     scalebar

## The following object is masked from 'package:dplyr':
## 
##     select

## The following object is masked from 'package:tidyr':
## 
##     extract

## Start sampling

## Warning: There were 487 transitions after warmup that exceeded the maximum treedepth. Increase max_treedepth above 10. See
## http://mc-stan.org/misc/warnings.html#maximum-treedepth-exceeded

## Warning: Examine the pairs() plot to diagnose sampling problems

#vacancy rate
fit_HW4_2<-brm(HubDist~
VacancyOwner+
VacancyRental
, family=gaussian(), data=HubDistHW4, cores=6,  seed=1115,chains=2, iter=5000,warmup=1000)

## Warning: Rows containing NAs were excluded from the model.

## Compiling the C++ model

## recompiling to avoid crashing R session

## Start sampling

## Warning: There were 7446 transitions after warmup that exceeded the maximum treedepth. Increase max_treedepth above 10. See
## http://mc-stan.org/misc/warnings.html#maximum-treedepth-exceeded

## Warning: Examine the pairs() plot to diagnose sampling problems

#Number of vehicles
fit_HW4_3<-brm(HubDist~
VehicleNone+
VehicleOne+
(VehicleTwo+
VehicleThreePlus)
, family=gaussian(), data=HubDistHW4, cores=6,  seed=1115,chains=2, iter=5000,warmup=1000)

## Warning: Rows containing NAs were excluded from the model.

## Compiling the C++ model

## recompiling to avoid crashing R session

## Start sampling

## Warning: There were 3132 transitions after warmup that exceeded the maximum treedepth. Increase max_treedepth above 10. See
## http://mc-stan.org/misc/warnings.html#maximum-treedepth-exceeded

## Warning: Examine the pairs() plot to diagnose sampling problems

#All 3 above
fit_HW4_All<-brm(HubDist~
FInc10k_or_less+
FInc10kto14.9k+
FInc15kto24.9k+
FInc25kto34.9k+
FInc35kto49.9k+
FInc50kto74.9k+
FInc75kto99.9k+
FInc100kto149.9k+
FInc150kto199.9k+
FInc200kPlus+
VacancyOwner+
VacancyRental+
VehicleNone+
VehicleOne+
(VehicleTwo+
VehicleThreePlus)
, family=gaussian(), data=HubDistHW4, cores=6,  seed=1115,chains=2, iter=5000,warmup=1000)

## Warning: Rows containing NAs were excluded from the model.

## Compiling the C++ model

## recompiling to avoid crashing R session

## Start sampling

## Warning: There were 7160 transitions after warmup that exceeded the maximum treedepth. Increase max_treedepth above 10. See
## http://mc-stan.org/misc/warnings.html#maximum-treedepth-exceeded

## Warning: Examine the pairs() plot to diagnose sampling problems

summary(fit_HW4_1) #90%CI

##  Family: gaussian 
##   Links: mu = identity; sigma = identity 
## Formula: HubDist ~ FInc10k_or_less + FInc10kto14.9k + FInc15kto24.9k + FInc25kto34.9k + FInc35kto49.9k + FInc50kto74.9k + FInc75kto99.9k + FInc100kto149.9k + FInc150kto199.9k + FInc200kPlus 
##    Data: HubDistHW4 (Number of observations: 5210) 
## Samples: 2 chains, each with iter = 5000; warmup = 1000; thin = 1;
##          total post-warmup samples = 8000
## 
## Population-Level Effects: 
##                  Estimate Est.Error l-95% CI u-95% CI Eff.Sample Rhat
## Intercept           76.96    265.35  -449.91   587.48        703 1.00
## FInc10k_or_less     -0.75      2.65    -5.86     4.55        704 1.00
## FInc10kto14.9k      -0.53      2.65    -5.62     4.75        704 1.00
## FInc15kto24.9k      -0.67      2.65    -5.78     4.59        703 1.00
## FInc25kto34.9k      -0.66      2.65    -5.75     4.63        704 1.00
## FInc35kto49.9k      -0.62      2.65    -5.74     4.64        704 1.00
## FInc50kto74.9k      -0.58      2.65    -5.66     4.68        704 1.00
## FInc75kto99.9k      -0.53      2.66    -5.66     4.74        704 1.00
## FInc100kto149.9k    -0.65      2.65    -5.76     4.62        703 1.00
## FInc150kto199.9k    -0.96      2.66    -6.07     4.32        703 1.00
## FInc200kPlus        -0.81      2.65    -5.90     4.46        704 1.00
## 
## Family Specific Parameters: 
##       Estimate Est.Error l-95% CI u-95% CI Eff.Sample Rhat
## sigma    18.00      0.18    17.66    18.36       2037 1.00
## 
## Samples were drawn using sampling(NUTS). For each parameter, Eff.Sample 
## is a crude measure of effective sample size, and Rhat is the potential 
## scale reduction factor on split chains (at convergence, Rhat = 1).

summary(fit_HW4_2) #90%CI

## Warning: The model has not converged (some Rhats are > 1.1). Do not analyse the results! 
## We recommend running more iterations and/or setting stronger priors.

##  Family: gaussian 
##   Links: mu = identity; sigma = identity 
## Formula: HubDist ~ VacancyOwner + VacancyRental 
##    Data: HubDistHW4 (Number of observations: 5212) 
## Samples: 2 chains, each with iter = 5000; warmup = 1000; thin = 1;
##          total post-warmup samples = 8000
## 
## Population-Level Effects: 
##               Estimate Est.Error  l-95% CI u-95% CI Eff.Sample Rhat
## Intercept     -4501.42   5532.52 -15795.33  5159.38          8 1.50
## VacancyOwner     45.01     55.32    -51.60   157.94          8 1.50
## VacancyRental    45.21     55.32    -51.40   158.15          8 1.50
## 
## Family Specific Parameters: 
##       Estimate Est.Error l-95% CI u-95% CI Eff.Sample Rhat
## sigma    17.77      0.17    17.45    18.10         27 1.03
## 
## Samples were drawn using sampling(NUTS). For each parameter, Eff.Sample 
## is a crude measure of effective sample size, and Rhat is the potential 
## scale reduction factor on split chains (at convergence, Rhat = 1).

summary(fit_HW4_3) #90%CI

##  Family: gaussian 
##   Links: mu = identity; sigma = identity 
## Formula: HubDist ~ VehicleNone + VehicleOne + (VehicleTwo + VehicleThreePlus) 
##    Data: HubDistHW4 (Number of observations: 5212) 
## Samples: 2 chains, each with iter = 5000; warmup = 1000; thin = 1;
##          total post-warmup samples = 8000
## 
## Population-Level Effects: 
##                  Estimate Est.Error l-95% CI u-95% CI Eff.Sample Rhat
## Intercept          351.73    428.13  -474.85  1197.49       1450 1.00
## VehicleNone         -3.40      4.28   -11.86     4.87       1451 1.00
## VehicleOne          -3.44      4.28   -11.91     4.83       1451 1.00
## VehicleTwo          -3.55      4.28   -11.98     4.73       1450 1.00
## VehicleThreePlus    -3.03      4.28   -11.52     5.24       1450 1.00
## 
## Family Specific Parameters: 
##       Estimate Est.Error l-95% CI u-95% CI Eff.Sample Rhat
## sigma    17.98      0.18    17.63    18.34       3922 1.00
## 
## Samples were drawn using sampling(NUTS). For each parameter, Eff.Sample 
## is a crude measure of effective sample size, and Rhat is the potential 
## scale reduction factor on split chains (at convergence, Rhat = 1).

summary(fit_HW4_All) #90%CI

## Warning: The model has not converged (some Rhats are > 1.1). Do not analyse the results! 
## We recommend running more iterations and/or setting stronger priors.

##  Family: gaussian 
##   Links: mu = identity; sigma = identity 
## Formula: HubDist ~ FInc10k_or_less + FInc10kto14.9k + FInc15kto24.9k + FInc25kto34.9k + FInc35kto49.9k + FInc50kto74.9k + FInc75kto99.9k + FInc100kto149.9k + FInc150kto199.9k + FInc200kPlus + VacancyOwner + VacancyRental + VehicleNone + VehicleOne + (VehicleTwo + VehicleThreePlus) 
##    Data: HubDistHW4 (Number of observations: 5210) 
## Samples: 2 chains, each with iter = 5000; warmup = 1000; thin = 1;
##          total post-warmup samples = 8000
## 
## Population-Level Effects: 
##                  Estimate Est.Error  l-95% CI u-95% CI Eff.Sample Rhat
## Intercept        -3241.28   7152.39 -20331.07  7425.05          5 1.86
## FInc10k_or_less      0.90      2.36     -3.92     3.27          1 2.73
## FInc10kto14.9k       1.04      2.36     -3.79     3.41          1 2.72
## FInc15kto24.9k       0.89      2.35     -3.95     3.25          1 2.72
## FInc25kto34.9k       0.84      2.36     -4.00     3.22          1 2.72
## FInc35kto49.9k       0.88      2.36     -3.96     3.25          1 2.72
## FInc50kto74.9k       0.86      2.36     -3.97     3.22          1 2.72
## FInc75kto99.9k       0.85      2.36     -3.99     3.22          1 2.72
## FInc100kto149.9k     0.71      2.36     -4.12     3.08          1 2.72
## FInc150kto199.9k     0.48      2.36     -4.35     2.86          1 2.72
## FInc200kPlus         0.64      2.36     -4.19     3.00          1 2.72
## VacancyOwner        30.01     67.69    -73.20   194.18          5 1.77
## VacancyRental       30.36     67.69    -72.88   194.52          5 1.77
## VehicleNone          1.44      4.66     -9.12     7.23          2 1.83
## VehicleOne           1.60      4.66     -8.99     7.41          2 1.83
## VehicleTwo           1.39      4.66     -9.18     7.19          2 1.83
## VehicleThreePlus     1.53      4.66     -9.04     7.33          2 1.83
## 
## Family Specific Parameters: 
##       Estimate Est.Error l-95% CI u-95% CI Eff.Sample Rhat
## sigma    16.88      0.16    16.56    17.17         18 1.09
## 
## Samples were drawn using sampling(NUTS). For each parameter, Eff.Sample 
## is a crude measure of effective sample size, and Rhat is the potential 
## scale reduction factor on split chains (at convergence, Rhat = 1).

Homework 4

Tobias Kuhn

October 26, 2018