#Libraries
library(tidycensus)
library(tidyverse)
## -- Attaching packages --------------------
## v ggplot2 3.0.0 v purrr 0.2.5
## v tibble 1.4.2 v dplyr 0.7.6
## v tidyr 0.8.1 v stringr 1.3.1
## v readr 1.1.1 v forcats 0.3.0
## -- Conflicts ---- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(sf)
## Linking to GEOS 3.6.1, GDAL 2.2.3, proj.4 4.9.3
library(classInt)
## Loading required package: spData
## To access larger datasets in this package, install the spDataLarge
## package with: `install.packages('spDataLarge',
## repos='https://nowosad.github.io/drat/', type='source'))`
library(dplyr)
library(gridExtra)
##
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
##
## combine
library(ggsn)
## Loading required package: grid
library(haven)
library (car)
## Loading required package: carData
##
## Attaching package: 'car'
## The following object is masked from 'package:dplyr':
##
## recode
## The following object is masked from 'package:purrr':
##
## some
library(lmtest)
## Loading required package: zoo
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
library(arm)
## Loading required package: MASS
##
## Attaching package: 'MASS'
## The following object is masked from 'package:dplyr':
##
## select
## Loading required package: Matrix
##
## Attaching package: 'Matrix'
## The following object is masked from 'package:tidyr':
##
## expand
## Loading required package: lme4
##
## arm (Version 1.10-1, built: 2018-4-12)
## Working directory is C:/Users/tobik/OneDrive/Documents/_Thesis
##
## Attaching package: 'arm'
## The following object is masked from 'package:car':
##
## logit
library(lme4)
library(dbplyr)
##
## Attaching package: 'dbplyr'
## The following objects are masked from 'package:dplyr':
##
## ident, sql
lower48acs2016data1<-lower48acs2016mothership[c("GEOID","NAME")]
lower48acs2016data1$total=lower48acs2016mothership$B02001_001E
lower48acs2016data1$White=(((1+lower48acs2016mothership$B02001_002E)/lower48acs2016mothership$B02001_001E)-(1/lower48acs2016mothership$B02001_001E))
lower48acs2016data1$Black=(((1+lower48acs2016mothership$B02001_003E)/lower48acs2016mothership$B02001_001E)-(1/lower48acs2016mothership$B02001_001E))
lower48acs2016data1$Native=(((1+lower48acs2016mothership$B02001_004E)/lower48acs2016mothership$B02001_001E)-(1/lower48acs2016mothership$B02001_001E))
lower48acs2016data1$Asian=(((1+lower48acs2016mothership$B02001_005E)/lower48acs2016mothership$B02001_001E)-(1/lower48acs2016mothership$B02001_001E))
lower48acs2016data1$HawaiianPI=(((1+lower48acs2016mothership$B02001_006E)/lower48acs2016mothership$B02001_001E)-(1/lower48acs2016mothership$B02001_001E))
lower48acs2016data1$Other=(((1+lower48acs2016mothership$B02001_007E)/lower48acs2016mothership$B02001_001E)-(1/lower48acs2016mothership$B02001_001E))
lower48acs2016data1$TwoPlus=(((1+lower48acs2016mothership$B02001_008E)/lower48acs2016mothership$B02001_001E)-(1/lower48acs2016mothership$B02001_001E))
lower48acs2016data1$Hispanic=(((1+lower48acs2016mothership$B03001_003E)/lower48acs2016mothership$B03001_001E)-(1/lower48acs2016mothership$B03001_001E))
lower48acs2016data1$NonHispanic=(((1+lower48acs2016mothership$B03001_002E)/lower48acs2016mothership$B03001_001E)-(1/lower48acs2016mothership$B03001_001E))
lower48acs2016data1$BelowPovertyLevel=(((1+lower48acs2016mothership$B17026_002E+lower48acs2016mothership$B17026_003E+lower48acs2016mothership$B17026_004E)/lower48acs2016mothership$B17026_001E)-(1/lower48acs2016mothership$B17026_001E))
lower48acs2016data1$AtOrAbovePovertyLevel=(1-lower48acs2016data1$BelowPovertyLevel)
lower48acs2016data1$Unemployment=(((1+lower48acs2016mothership$B23025_005E)/lower48acs2016mothership$B23025_002E)-(1/lower48acs2016mothership$B23025_002E))
lower48acs2016data1$NotInLaberForceRate=(((1+lower48acs2016mothership$B23025_007E)/lower48acs2016mothership$B23025_001E)-(1/lower48acs2016mothership$B23025_001E))
#Total Answers
lower48acs2016data1$EDUtotal=lower48acs2016mothership$B15003_001E
#High School
lower48acs2016data1$HS_GED=(((1+lower48acs2016mothership$B15003_017E+lower48acs2016mothership$B15003_018E)/lower48acs2016mothership$B15003_001E)-(1/lower48acs2016mothership$B15003_001E))
#Some College
lower48acs2016data1$Some_College_or_AS=(((1+lower48acs2016mothership$B15003_019E+lower48acs2016mothership$B15003_020E+lower48acs2016mothership$B15003_021E)/lower48acs2016mothership$B15003_001E)-(1/lower48acs2016mothership$B15003_001E))
#Associates
#lower48acs2016data1$Associates=(((1+lower48acs2016mothership$B15003_021E)/lower48acs2016mothership$B15003_001E)-(1/lower48acs2016mothership$B15003_001E))
#Bachelors
lower48acs2016data1$Bachelors=(((1+lower48acs2016mothership$B15003_022E)/lower48acs2016mothership$B15003_001E)-(1/lower48acs2016mothership$B15003_001E))
#Masters Plus
lower48acs2016data1$MastersPlus=(((1+lower48acs2016mothership$B15003_023E+lower48acs2016mothership$B15003_024E+lower48acs2016mothership$B15003_025E)/lower48acs2016mothership$B15003_001E)-(1/lower48acs2016mothership$B15003_001E))
#Less than HS
lower48acs2016data1$Less_than_HS=(1-(lower48acs2016data1$HS_GED+lower48acs2016data1$Some_College_or_AS+lower48acs2016data1$Bachelors+lower48acs2016data1$MastersPlus))
#POVERTY
#Total Answers
lower48acs2016data1$POVtotal=lower48acs2016mothership$B17026_001E
#Below Poverty Line
lower48acs2016data1$PovBelow1=(((1+lower48acs2016mothership$B17026_002E+lower48acs2016mothership$B17026_003E+lower48acs2016mothership$B17026_004E)/lower48acs2016mothership$B17026_001E)-(1/lower48acs2016mothership$B17026_001E))
#1-1.49
lower48acs2016data1$Pov1to1.49=(((1+lower48acs2016mothership$B17026_005E+lower48acs2016mothership$B17026_006E)/lower48acs2016mothership$B17026_001E)-(1/lower48acs2016mothership$B17026_001E))
#1.5-1.99
lower48acs2016data1$Pov1.5to1.99=(((1+lower48acs2016mothership$B17026_007E+lower48acs2016mothership$B17026_008E+lower48acs2016mothership$B17026_009E)/lower48acs2016mothership$B17026_001E)-(1/lower48acs2016mothership$B17026_001E))
#2-2.99
lower48acs2016data1$Pov2to2.99=(((1+lower48acs2016mothership$B17026_010E)/lower48acs2016mothership$B17026_001E)-(1/lower48acs2016mothership$B17026_001E))
#3-3.99
lower48acs2016data1$Pov3to3.99=(((1+lower48acs2016mothership$B17026_011E)/lower48acs2016mothership$B17026_001E)-(1/lower48acs2016mothership$B17026_001E))
#4-4.99
lower48acs2016data1$Pov4to4.99=(((1+lower48acs2016mothership$B17026_012E)/lower48acs2016mothership$B17026_001E)-(1/lower48acs2016mothership$B17026_001E))
#5 and over
lower48acs2016data1$Pov5Plus=(((1+lower48acs2016mothership$B17026_013E)/lower48acs2016mothership$B17026_001E)-(1/lower48acs2016mothership$B17026_001E))
#vehicles available
#Total Answers
lower48acs2016data1$VEHtotal=lower48acs2016mothership$B08014_001E
#No vehicle
lower48acs2016data1$NoVEH=(((1+lower48acs2016mothership$B08014_002E)/lower48acs2016mothership$B08014_001E)-(1/lower48acs2016mothership$B08014_001E))
#1 vehicle
lower48acs2016data1$VEH1=(((1+lower48acs2016mothership$B08014_003E)/lower48acs2016mothership$B08014_001E)-(1/lower48acs2016mothership$B08014_001E))
#2 vehicles
lower48acs2016data1$VEH2=(((1+lower48acs2016mothership$B08014_004E)/lower48acs2016mothership$B08014_001E)-(1/lower48acs2016mothership$B08014_001E))
#3 vehicles
lower48acs2016data1$VEH3=(((1+lower48acs2016mothership$B08014_005E)/lower48acs2016mothership$B08014_001E)-(1/lower48acs2016mothership$B08014_001E))
#4 vehicles
lower48acs2016data1$VEH4=(((1+lower48acs2016mothership$B08014_006E)/lower48acs2016mothership$B08014_001E)-(1/lower48acs2016mothership$B08014_001E))
#5 vehicles
lower48acs2016data1$VEH5=(((1+lower48acs2016mothership$B08014_007E)/lower48acs2016mothership$B08014_001E)-(1/lower48acs2016mothership$B08014_001E))
Cleaning a little bit:
lower48acs2016data1<-subset(lower48acs2016data1, total > '100') #deleted all tracts with less than 101 inhabitants
Merging with the hub data (by the way, the ACS data is only for census tracts that have a certain minimum population per county - we loose 436 tracts with low populations):
l48acsdata1<-merge(lower48acs2016data1,hubdistance2,by="GEOID")
Using the data from another source.
homework.fit1<-lmer(HubDist~NoVEH+VEH1+VEH2+VEH3+VEH4+VEH5+PovBelow1+Pov1to1.49+Pov1.5to1.99+Pov2to2.99+Pov3to3.99+Pov4to4.99+Pov5Plus+Less_than_HS+HS_GED+Some_College_or_AS+Bachelors+MastersPlus+White+Black+Native+Asian+HawaiianPI+Other+TwoPlus+(1|STATEFP), data=l48acsdata1, REML=T)
## fixed-effect model matrix is rank deficient so dropping 4 columns / coefficients
homework.fit2<-lmer(HubDist~NoVEH+VEH1+VEH2+VEH3+VEH4+VEH5+PovBelow1+Pov1to1.49+Pov1.5to1.99+Pov2to2.99+Pov3to3.99+Pov4to4.99+Pov5Plus+Less_than_HS+HS_GED+Some_College_or_AS+Bachelors+MastersPlus+White+Black+Native+Asian+HawaiianPI+Other+TwoPlus+Unemployment+(1|STATEFP), data=l48acsdata1, REML=T)
## fixed-effect model matrix is rank deficient so dropping 4 columns / coefficients
summary(homework.fit1)
## Linear mixed model fit by REML ['lmerMod']
## Formula: HubDist ~ NoVEH + VEH1 + VEH2 + VEH3 + VEH4 + VEH5 + PovBelow1 +
## Pov1to1.49 + Pov1.5to1.99 + Pov2to2.99 + Pov3to3.99 + Pov4to4.99 +
## Pov5Plus + Less_than_HS + HS_GED + Some_College_or_AS + Bachelors +
## MastersPlus + White + Black + Native + Asian + HawaiianPI +
## Other + TwoPlus + (1 | STATEFP)
## Data: l48acsdata1
##
## REML criterion at convergence: 589015.4
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -8.1866 -0.4360 -0.1139 0.2120 19.3686
##
## Random effects:
## Groups Name Variance Std.Dev.
## STATEFP (Intercept) 258.0 16.06
## Residual 215.5 14.68
## Number of obs: 71701, groups: STATEFP, 49
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 10.4872 3.7459 2.800
## NoVEH -19.3239 1.7000 -11.367
## VEH1 -31.7745 1.5703 -20.235
## VEH2 -28.8577 1.5562 -18.543
## VEH3 -16.4745 1.7451 -9.440
## VEH4 -18.7742 1.9902 -9.433
## PovBelow1 -5.1933 0.9563 -5.431
## Pov1to1.49 0.5429 1.2953 0.419
## Pov1.5to1.99 3.3138 1.3151 2.520
## Pov2to2.99 2.1764 1.0579 2.057
## Pov3to3.99 -1.3597 1.1711 -1.161
## Pov4to4.99 -9.9870 1.3534 -7.379
## Less_than_HS -1.8904 1.4625 -1.293
## HS_GED 27.2693 1.2677 21.510
## Some_College_or_AS -11.4569 1.3047 -8.781
## Bachelors -24.7511 1.7524 -14.124
## White 33.7550 2.3487 14.372
## Black 23.7988 2.3625 10.074
## Native 74.1775 2.7273 27.198
## Asian 25.2401 2.5272 9.987
## HawaiianPI -6.9427 10.6988 -0.649
## Other 7.6578 2.5408 3.014
##
## Correlation matrix not shown by default, as p = 22 > 12.
## Use print(x, correlation=TRUE) or
## vcov(x) if you need it
## fit warnings:
## fixed-effect model matrix is rank deficient so dropping 4 columns / coefficients
summary(homework.fit2)
## Linear mixed model fit by REML ['lmerMod']
## Formula: HubDist ~ NoVEH + VEH1 + VEH2 + VEH3 + VEH4 + VEH5 + PovBelow1 +
## Pov1to1.49 + Pov1.5to1.99 + Pov2to2.99 + Pov3to3.99 + Pov4to4.99 +
## Pov5Plus + Less_than_HS + HS_GED + Some_College_or_AS + Bachelors +
## MastersPlus + White + Black + Native + Asian + HawaiianPI +
## Other + TwoPlus + Unemployment + (1 | STATEFP)
## Data: l48acsdata1
##
## REML criterion at convergence: 589007.9
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -8.2032 -0.4354 -0.1138 0.2116 19.3659
##
## Random effects:
## Groups Name Variance Std.Dev.
## STATEFP (Intercept) 257.1 16.04
## Residual 215.5 14.68
## Number of obs: 71701, groups: STATEFP, 49
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 10.8049 3.7464 2.884
## NoVEH -19.2703 1.7002 -11.334
## VEH1 -31.7378 1.5704 -20.211
## VEH2 -28.8318 1.5562 -18.527
## VEH3 -16.4034 1.7454 -9.398
## VEH4 -18.7200 1.9903 -9.406
## PovBelow1 -4.6738 0.9850 -4.745
## Pov1to1.49 0.6942 1.2971 0.535
## Pov1.5to1.99 3.3685 1.3153 2.561
## Pov2to2.99 2.1574 1.0579 2.039
## Pov3to3.99 -1.4145 1.1713 -1.208
## Pov4to4.99 -10.0881 1.3541 -7.450
## Less_than_HS -1.8268 1.4627 -1.249
## HS_GED 27.3389 1.2681 21.559
## Some_College_or_AS -11.4053 1.3048 -8.741
## Bachelors -24.9248 1.7542 -14.209
## White 33.5364 2.3507 14.266
## Black 23.8264 2.3625 10.085
## Native 74.2744 2.7275 27.231
## Asian 24.9805 2.5299 9.874
## HawaiianPI -7.3725 10.7003 -0.689
## Other 7.3363 2.5449 2.883
## Unemployment -3.3312 1.5144 -2.200
##
## Correlation matrix not shown by default, as p = 23 > 12.
## Use print(x, correlation=TRUE) or
## vcov(x) if you need it
## fit warnings:
## fixed-effect model matrix is rank deficient so dropping 4 columns / coefficients
anova(homework.fit1,homework.fit2)
## refitting model(s) with ML (instead of REML)
## Data: l48acsdata1
## Models:
## homework.fit1: HubDist ~ NoVEH + VEH1 + VEH2 + VEH3 + VEH4 + VEH5 + PovBelow1 +
## homework.fit1: Pov1to1.49 + Pov1.5to1.99 + Pov2to2.99 + Pov3to3.99 + Pov4to4.99 +
## homework.fit1: Pov5Plus + Less_than_HS + HS_GED + Some_College_or_AS + Bachelors +
## homework.fit1: MastersPlus + White + Black + Native + Asian + HawaiianPI +
## homework.fit1: Other + TwoPlus + (1 | STATEFP)
## homework.fit2: HubDist ~ NoVEH + VEH1 + VEH2 + VEH3 + VEH4 + VEH5 + PovBelow1 +
## homework.fit2: Pov1to1.49 + Pov1.5to1.99 + Pov2to2.99 + Pov3to3.99 + Pov4to4.99 +
## homework.fit2: Pov5Plus + Less_than_HS + HS_GED + Some_College_or_AS + Bachelors +
## homework.fit2: MastersPlus + White + Black + Native + Asian + HawaiianPI +
## homework.fit2: Other + TwoPlus + Unemployment + (1 | STATEFP)
## Df AIC BIC logLik deviance Chisq Chi Df Pr(>Chisq)
## homework.fit1 24 589108 589328 -294530 589060
## homework.fit2 25 589105 589335 -294528 589055 4.8425 1 0.02777 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Hypothesis: The aggregate level indicator of unemployment may indicate a cross level interaction. Minorities except Native Americans in states with higher unemployment rates are closer to EVCS than Whites.
homework.fit3<-lmer(HubDist~NoVEH+VEH1+VEH2+VEH3+VEH4+VEH5+PovBelow1+Pov1to1.49+Pov1.5to1.99+Pov2to2.99+Pov3to3.99+Pov4to4.99+Pov5Plus+Less_than_HS+HS_GED+Some_College_or_AS+Bachelors+MastersPlus+(Black+Asian+HawaiianPI+Other+TwoPlus)*Unemployment+(1|STATEFP), data=l48acsdata1, REML=T)
## fixed-effect model matrix is rank deficient so dropping 3 columns / coefficients
## Warning: Some predictor variables are on very different scales: consider
## rescaling
summary(homework.fit3)
## Linear mixed model fit by REML ['lmerMod']
## Formula: HubDist ~ NoVEH + VEH1 + VEH2 + VEH3 + VEH4 + VEH5 + PovBelow1 +
## Pov1to1.49 + Pov1.5to1.99 + Pov2to2.99 + Pov3to3.99 + Pov4to4.99 +
## Pov5Plus + Less_than_HS + HS_GED + Some_College_or_AS + Bachelors +
## MastersPlus + (Black + Asian + HawaiianPI + Other + TwoPlus) *
## Unemployment + (1 | STATEFP)
## Data: l48acsdata1
##
## REML criterion at convergence: 589871.8
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -7.0897 -0.4381 -0.1134 0.2138 19.5533
##
## Random effects:
## Groups Name Variance Std.Dev.
## STATEFP (Intercept) 277.9 16.67
## Residual 218.3 14.77
## Number of obs: 71701, groups: STATEFP, 49
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 45.2175 2.9800 15.174
## NoVEH -20.5647 1.7119 -12.013
## VEH1 -32.8801 1.5816 -20.789
## VEH2 -30.1972 1.5669 -19.271
## VEH3 -17.6823 1.7566 -10.066
## VEH4 -19.7945 2.0030 -9.883
## PovBelow1 -3.3820 0.9910 -3.413
## Pov1to1.49 1.1692 1.3110 0.892
## Pov1.5to1.99 3.6780 1.3292 2.767
## Pov2to2.99 1.9208 1.0705 1.794
## Pov3to3.99 -1.8580 1.1815 -1.573
## Pov4to4.99 -10.3396 1.3645 -7.577
## Less_than_HS -1.7847 1.4740 -1.211
## HS_GED 27.5924 1.2782 21.587
## Some_College_or_AS -10.8405 1.3174 -8.229
## Bachelors -24.8610 1.7708 -14.039
## Black -9.6550 0.5899 -16.368
## Asian -1.8241 1.4954 -1.220
## HawaiianPI -27.4922 19.5430 -1.407
## Other -26.4857 1.7147 -15.446
## TwoPlus -37.0700 3.3367 -11.110
## Unemployment 7.5423 2.3649 3.189
## Black:Unemployment -11.5567 3.7220 -3.105
## Asian:Unemployment -97.7668 17.9094 -5.459
## HawaiianPI:Unemployment -107.0536 184.6722 -0.580
## Other:Unemployment -13.0737 13.5567 -0.964
## TwoPlus:Unemployment 42.5043 22.2250 1.912
##
## Correlation matrix not shown by default, as p = 27 > 12.
## Use print(x, correlation=TRUE) or
## vcov(x) if you need it
## fit warnings:
## fixed-effect model matrix is rank deficient so dropping 3 columns / coefficients
## Some predictor variables are on very different scales: consider rescaling
Unemployment 7.5423 2.3649 3.189 Black:Unemployment -11.5567 3.7220 -3.105 Asian:Unemployment -97.7668 17.9094 -5.459
Blacks and Asians who live in states with higher unemployment live significantly closer to EVCS.