Homework

#Libraries
library(tidycensus)
library(tidyverse)

## -- Attaching packages --------------------

## v ggplot2 3.0.0     v purrr   0.2.5
## v tibble  1.4.2     v dplyr   0.7.6
## v tidyr   0.8.1     v stringr 1.3.1
## v readr   1.1.1     v forcats 0.3.0

## -- Conflicts ---- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()

library(sf)

## Linking to GEOS 3.6.1, GDAL 2.2.3, proj.4 4.9.3

library(classInt)

## Loading required package: spData

## To access larger datasets in this package, install the spDataLarge
## package with: `install.packages('spDataLarge',
## repos='https://nowosad.github.io/drat/', type='source'))`

library(dplyr)
library(gridExtra)

## 
## Attaching package: 'gridExtra'

## The following object is masked from 'package:dplyr':
## 
##     combine

library(ggsn)

## Loading required package: grid

library(haven)
library (car)

## Loading required package: carData

## 
## Attaching package: 'car'

## The following object is masked from 'package:dplyr':
## 
##     recode

## The following object is masked from 'package:purrr':
## 
##     some

library(lmtest)

## Loading required package: zoo

## 
## Attaching package: 'zoo'

## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric

library(arm)

## Loading required package: MASS

## 
## Attaching package: 'MASS'

## The following object is masked from 'package:dplyr':
## 
##     select

## Loading required package: Matrix

## 
## Attaching package: 'Matrix'

## The following object is masked from 'package:tidyr':
## 
##     expand

## Loading required package: lme4

## 
## arm (Version 1.10-1, built: 2018-4-12)

## Working directory is C:/Users/tobik/OneDrive/Documents/_Thesis

## 
## Attaching package: 'arm'

## The following object is masked from 'package:car':
## 
##     logit

library(lme4)
library(dbplyr)

## 
## Attaching package: 'dbplyr'

## The following objects are masked from 'package:dplyr':
## 
##     ident, sql

lower48acs2016data1<-lower48acs2016mothership[c("GEOID","NAME")]
lower48acs2016data1$total=lower48acs2016mothership$B02001_001E
lower48acs2016data1$White=(((1+lower48acs2016mothership$B02001_002E)/lower48acs2016mothership$B02001_001E)-(1/lower48acs2016mothership$B02001_001E))
lower48acs2016data1$Black=(((1+lower48acs2016mothership$B02001_003E)/lower48acs2016mothership$B02001_001E)-(1/lower48acs2016mothership$B02001_001E))
lower48acs2016data1$Native=(((1+lower48acs2016mothership$B02001_004E)/lower48acs2016mothership$B02001_001E)-(1/lower48acs2016mothership$B02001_001E))
lower48acs2016data1$Asian=(((1+lower48acs2016mothership$B02001_005E)/lower48acs2016mothership$B02001_001E)-(1/lower48acs2016mothership$B02001_001E))
lower48acs2016data1$HawaiianPI=(((1+lower48acs2016mothership$B02001_006E)/lower48acs2016mothership$B02001_001E)-(1/lower48acs2016mothership$B02001_001E))
lower48acs2016data1$Other=(((1+lower48acs2016mothership$B02001_007E)/lower48acs2016mothership$B02001_001E)-(1/lower48acs2016mothership$B02001_001E))
lower48acs2016data1$TwoPlus=(((1+lower48acs2016mothership$B02001_008E)/lower48acs2016mothership$B02001_001E)-(1/lower48acs2016mothership$B02001_001E))
                             
lower48acs2016data1$Hispanic=(((1+lower48acs2016mothership$B03001_003E)/lower48acs2016mothership$B03001_001E)-(1/lower48acs2016mothership$B03001_001E))
lower48acs2016data1$NonHispanic=(((1+lower48acs2016mothership$B03001_002E)/lower48acs2016mothership$B03001_001E)-(1/lower48acs2016mothership$B03001_001E))
                                 
lower48acs2016data1$BelowPovertyLevel=(((1+lower48acs2016mothership$B17026_002E+lower48acs2016mothership$B17026_003E+lower48acs2016mothership$B17026_004E)/lower48acs2016mothership$B17026_001E)-(1/lower48acs2016mothership$B17026_001E))
lower48acs2016data1$AtOrAbovePovertyLevel=(1-lower48acs2016data1$BelowPovertyLevel)

lower48acs2016data1$Unemployment=(((1+lower48acs2016mothership$B23025_005E)/lower48acs2016mothership$B23025_002E)-(1/lower48acs2016mothership$B23025_002E))
lower48acs2016data1$NotInLaberForceRate=(((1+lower48acs2016mothership$B23025_007E)/lower48acs2016mothership$B23025_001E)-(1/lower48acs2016mothership$B23025_001E))
#Total Answers
lower48acs2016data1$EDUtotal=lower48acs2016mothership$B15003_001E
#High School
lower48acs2016data1$HS_GED=(((1+lower48acs2016mothership$B15003_017E+lower48acs2016mothership$B15003_018E)/lower48acs2016mothership$B15003_001E)-(1/lower48acs2016mothership$B15003_001E))
#Some College
lower48acs2016data1$Some_College_or_AS=(((1+lower48acs2016mothership$B15003_019E+lower48acs2016mothership$B15003_020E+lower48acs2016mothership$B15003_021E)/lower48acs2016mothership$B15003_001E)-(1/lower48acs2016mothership$B15003_001E))
#Associates
#lower48acs2016data1$Associates=(((1+lower48acs2016mothership$B15003_021E)/lower48acs2016mothership$B15003_001E)-(1/lower48acs2016mothership$B15003_001E))
#Bachelors
lower48acs2016data1$Bachelors=(((1+lower48acs2016mothership$B15003_022E)/lower48acs2016mothership$B15003_001E)-(1/lower48acs2016mothership$B15003_001E))
#Masters Plus
lower48acs2016data1$MastersPlus=(((1+lower48acs2016mothership$B15003_023E+lower48acs2016mothership$B15003_024E+lower48acs2016mothership$B15003_025E)/lower48acs2016mothership$B15003_001E)-(1/lower48acs2016mothership$B15003_001E))
#Less than HS
lower48acs2016data1$Less_than_HS=(1-(lower48acs2016data1$HS_GED+lower48acs2016data1$Some_College_or_AS+lower48acs2016data1$Bachelors+lower48acs2016data1$MastersPlus))
#POVERTY
#Total Answers
lower48acs2016data1$POVtotal=lower48acs2016mothership$B17026_001E
#Below Poverty Line
lower48acs2016data1$PovBelow1=(((1+lower48acs2016mothership$B17026_002E+lower48acs2016mothership$B17026_003E+lower48acs2016mothership$B17026_004E)/lower48acs2016mothership$B17026_001E)-(1/lower48acs2016mothership$B17026_001E))
#1-1.49
lower48acs2016data1$Pov1to1.49=(((1+lower48acs2016mothership$B17026_005E+lower48acs2016mothership$B17026_006E)/lower48acs2016mothership$B17026_001E)-(1/lower48acs2016mothership$B17026_001E))
#1.5-1.99
lower48acs2016data1$Pov1.5to1.99=(((1+lower48acs2016mothership$B17026_007E+lower48acs2016mothership$B17026_008E+lower48acs2016mothership$B17026_009E)/lower48acs2016mothership$B17026_001E)-(1/lower48acs2016mothership$B17026_001E))
#2-2.99
lower48acs2016data1$Pov2to2.99=(((1+lower48acs2016mothership$B17026_010E)/lower48acs2016mothership$B17026_001E)-(1/lower48acs2016mothership$B17026_001E))
#3-3.99
lower48acs2016data1$Pov3to3.99=(((1+lower48acs2016mothership$B17026_011E)/lower48acs2016mothership$B17026_001E)-(1/lower48acs2016mothership$B17026_001E))
#4-4.99
lower48acs2016data1$Pov4to4.99=(((1+lower48acs2016mothership$B17026_012E)/lower48acs2016mothership$B17026_001E)-(1/lower48acs2016mothership$B17026_001E))
#5 and over
lower48acs2016data1$Pov5Plus=(((1+lower48acs2016mothership$B17026_013E)/lower48acs2016mothership$B17026_001E)-(1/lower48acs2016mothership$B17026_001E))

#vehicles available
#Total Answers
lower48acs2016data1$VEHtotal=lower48acs2016mothership$B08014_001E
#No vehicle
lower48acs2016data1$NoVEH=(((1+lower48acs2016mothership$B08014_002E)/lower48acs2016mothership$B08014_001E)-(1/lower48acs2016mothership$B08014_001E))
#1 vehicle
lower48acs2016data1$VEH1=(((1+lower48acs2016mothership$B08014_003E)/lower48acs2016mothership$B08014_001E)-(1/lower48acs2016mothership$B08014_001E))
#2 vehicles
lower48acs2016data1$VEH2=(((1+lower48acs2016mothership$B08014_004E)/lower48acs2016mothership$B08014_001E)-(1/lower48acs2016mothership$B08014_001E))
#3 vehicles
lower48acs2016data1$VEH3=(((1+lower48acs2016mothership$B08014_005E)/lower48acs2016mothership$B08014_001E)-(1/lower48acs2016mothership$B08014_001E))
#4 vehicles
lower48acs2016data1$VEH4=(((1+lower48acs2016mothership$B08014_006E)/lower48acs2016mothership$B08014_001E)-(1/lower48acs2016mothership$B08014_001E))
#5 vehicles
lower48acs2016data1$VEH5=(((1+lower48acs2016mothership$B08014_007E)/lower48acs2016mothership$B08014_001E)-(1/lower48acs2016mothership$B08014_001E))

Cleaning a little bit:

lower48acs2016data1<-subset(lower48acs2016data1, total > '100') #deleted all tracts with less than 101 inhabitants

Merging with the hub data (by the way, the ACS data is only for census tracts that have a certain minimum population per county - we loose 436 tracts with low populations):

l48acsdata1<-merge(lower48acs2016data1,hubdistance2,by="GEOID")

Using the data from another source.

homework.fit1<-lmer(HubDist~NoVEH+VEH1+VEH2+VEH3+VEH4+VEH5+PovBelow1+Pov1to1.49+Pov1.5to1.99+Pov2to2.99+Pov3to3.99+Pov4to4.99+Pov5Plus+Less_than_HS+HS_GED+Some_College_or_AS+Bachelors+MastersPlus+White+Black+Native+Asian+HawaiianPI+Other+TwoPlus+(1|STATEFP), data=l48acsdata1, REML=T)

## fixed-effect model matrix is rank deficient so dropping 4 columns / coefficients

homework.fit2<-lmer(HubDist~NoVEH+VEH1+VEH2+VEH3+VEH4+VEH5+PovBelow1+Pov1to1.49+Pov1.5to1.99+Pov2to2.99+Pov3to3.99+Pov4to4.99+Pov5Plus+Less_than_HS+HS_GED+Some_College_or_AS+Bachelors+MastersPlus+White+Black+Native+Asian+HawaiianPI+Other+TwoPlus+Unemployment+(1|STATEFP), data=l48acsdata1, REML=T)

## fixed-effect model matrix is rank deficient so dropping 4 columns / coefficients

summary(homework.fit1)

## Linear mixed model fit by REML ['lmerMod']
## Formula: HubDist ~ NoVEH + VEH1 + VEH2 + VEH3 + VEH4 + VEH5 + PovBelow1 +  
##     Pov1to1.49 + Pov1.5to1.99 + Pov2to2.99 + Pov3to3.99 + Pov4to4.99 +  
##     Pov5Plus + Less_than_HS + HS_GED + Some_College_or_AS + Bachelors +  
##     MastersPlus + White + Black + Native + Asian + HawaiianPI +  
##     Other + TwoPlus + (1 | STATEFP)
##    Data: l48acsdata1
## 
## REML criterion at convergence: 589015.4
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -8.1866 -0.4360 -0.1139  0.2120 19.3686 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  STATEFP  (Intercept) 258.0    16.06   
##  Residual             215.5    14.68   
## Number of obs: 71701, groups:  STATEFP, 49
## 
## Fixed effects:
##                    Estimate Std. Error t value
## (Intercept)         10.4872     3.7459   2.800
## NoVEH              -19.3239     1.7000 -11.367
## VEH1               -31.7745     1.5703 -20.235
## VEH2               -28.8577     1.5562 -18.543
## VEH3               -16.4745     1.7451  -9.440
## VEH4               -18.7742     1.9902  -9.433
## PovBelow1           -5.1933     0.9563  -5.431
## Pov1to1.49           0.5429     1.2953   0.419
## Pov1.5to1.99         3.3138     1.3151   2.520
## Pov2to2.99           2.1764     1.0579   2.057
## Pov3to3.99          -1.3597     1.1711  -1.161
## Pov4to4.99          -9.9870     1.3534  -7.379
## Less_than_HS        -1.8904     1.4625  -1.293
## HS_GED              27.2693     1.2677  21.510
## Some_College_or_AS -11.4569     1.3047  -8.781
## Bachelors          -24.7511     1.7524 -14.124
## White               33.7550     2.3487  14.372
## Black               23.7988     2.3625  10.074
## Native              74.1775     2.7273  27.198
## Asian               25.2401     2.5272   9.987
## HawaiianPI          -6.9427    10.6988  -0.649
## Other                7.6578     2.5408   3.014

## 
## Correlation matrix not shown by default, as p = 22 > 12.
## Use print(x, correlation=TRUE)  or
##     vcov(x)        if you need it

## fit warnings:
## fixed-effect model matrix is rank deficient so dropping 4 columns / coefficients

summary(homework.fit2)

## Linear mixed model fit by REML ['lmerMod']
## Formula: HubDist ~ NoVEH + VEH1 + VEH2 + VEH3 + VEH4 + VEH5 + PovBelow1 +  
##     Pov1to1.49 + Pov1.5to1.99 + Pov2to2.99 + Pov3to3.99 + Pov4to4.99 +  
##     Pov5Plus + Less_than_HS + HS_GED + Some_College_or_AS + Bachelors +  
##     MastersPlus + White + Black + Native + Asian + HawaiianPI +  
##     Other + TwoPlus + Unemployment + (1 | STATEFP)
##    Data: l48acsdata1
## 
## REML criterion at convergence: 589007.9
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -8.2032 -0.4354 -0.1138  0.2116 19.3659 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  STATEFP  (Intercept) 257.1    16.04   
##  Residual             215.5    14.68   
## Number of obs: 71701, groups:  STATEFP, 49
## 
## Fixed effects:
##                    Estimate Std. Error t value
## (Intercept)         10.8049     3.7464   2.884
## NoVEH              -19.2703     1.7002 -11.334
## VEH1               -31.7378     1.5704 -20.211
## VEH2               -28.8318     1.5562 -18.527
## VEH3               -16.4034     1.7454  -9.398
## VEH4               -18.7200     1.9903  -9.406
## PovBelow1           -4.6738     0.9850  -4.745
## Pov1to1.49           0.6942     1.2971   0.535
## Pov1.5to1.99         3.3685     1.3153   2.561
## Pov2to2.99           2.1574     1.0579   2.039
## Pov3to3.99          -1.4145     1.1713  -1.208
## Pov4to4.99         -10.0881     1.3541  -7.450
## Less_than_HS        -1.8268     1.4627  -1.249
## HS_GED              27.3389     1.2681  21.559
## Some_College_or_AS -11.4053     1.3048  -8.741
## Bachelors          -24.9248     1.7542 -14.209
## White               33.5364     2.3507  14.266
## Black               23.8264     2.3625  10.085
## Native              74.2744     2.7275  27.231
## Asian               24.9805     2.5299   9.874
## HawaiianPI          -7.3725    10.7003  -0.689
## Other                7.3363     2.5449   2.883
## Unemployment        -3.3312     1.5144  -2.200

## 
## Correlation matrix not shown by default, as p = 23 > 12.
## Use print(x, correlation=TRUE)  or
##     vcov(x)        if you need it

## fit warnings:
## fixed-effect model matrix is rank deficient so dropping 4 columns / coefficients

anova(homework.fit1,homework.fit2)

## refitting model(s) with ML (instead of REML)

## Data: l48acsdata1
## Models:
## homework.fit1: HubDist ~ NoVEH + VEH1 + VEH2 + VEH3 + VEH4 + VEH5 + PovBelow1 + 
## homework.fit1:     Pov1to1.49 + Pov1.5to1.99 + Pov2to2.99 + Pov3to3.99 + Pov4to4.99 + 
## homework.fit1:     Pov5Plus + Less_than_HS + HS_GED + Some_College_or_AS + Bachelors + 
## homework.fit1:     MastersPlus + White + Black + Native + Asian + HawaiianPI + 
## homework.fit1:     Other + TwoPlus + (1 | STATEFP)
## homework.fit2: HubDist ~ NoVEH + VEH1 + VEH2 + VEH3 + VEH4 + VEH5 + PovBelow1 + 
## homework.fit2:     Pov1to1.49 + Pov1.5to1.99 + Pov2to2.99 + Pov3to3.99 + Pov4to4.99 + 
## homework.fit2:     Pov5Plus + Less_than_HS + HS_GED + Some_College_or_AS + Bachelors + 
## homework.fit2:     MastersPlus + White + Black + Native + Asian + HawaiianPI + 
## homework.fit2:     Other + TwoPlus + Unemployment + (1 | STATEFP)
##               Df    AIC    BIC  logLik deviance  Chisq Chi Df Pr(>Chisq)  
## homework.fit1 24 589108 589328 -294530   589060                           
## homework.fit2 25 589105 589335 -294528   589055 4.8425      1    0.02777 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Hypothesis: The aggregate level indicator of unemployment may indicate a cross level interaction. Minorities except Native Americans in states with higher unemployment rates are closer to EVCS than Whites.

homework.fit3<-lmer(HubDist~NoVEH+VEH1+VEH2+VEH3+VEH4+VEH5+PovBelow1+Pov1to1.49+Pov1.5to1.99+Pov2to2.99+Pov3to3.99+Pov4to4.99+Pov5Plus+Less_than_HS+HS_GED+Some_College_or_AS+Bachelors+MastersPlus+(Black+Asian+HawaiianPI+Other+TwoPlus)*Unemployment+(1|STATEFP), data=l48acsdata1, REML=T)

## fixed-effect model matrix is rank deficient so dropping 3 columns / coefficients

## Warning: Some predictor variables are on very different scales: consider
## rescaling

summary(homework.fit3)

## Linear mixed model fit by REML ['lmerMod']
## Formula: HubDist ~ NoVEH + VEH1 + VEH2 + VEH3 + VEH4 + VEH5 + PovBelow1 +  
##     Pov1to1.49 + Pov1.5to1.99 + Pov2to2.99 + Pov3to3.99 + Pov4to4.99 +  
##     Pov5Plus + Less_than_HS + HS_GED + Some_College_or_AS + Bachelors +  
##     MastersPlus + (Black + Asian + HawaiianPI + Other + TwoPlus) *  
##     Unemployment + (1 | STATEFP)
##    Data: l48acsdata1
## 
## REML criterion at convergence: 589871.8
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -7.0897 -0.4381 -0.1134  0.2138 19.5533 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  STATEFP  (Intercept) 277.9    16.67   
##  Residual             218.3    14.77   
## Number of obs: 71701, groups:  STATEFP, 49
## 
## Fixed effects:
##                          Estimate Std. Error t value
## (Intercept)               45.2175     2.9800  15.174
## NoVEH                    -20.5647     1.7119 -12.013
## VEH1                     -32.8801     1.5816 -20.789
## VEH2                     -30.1972     1.5669 -19.271
## VEH3                     -17.6823     1.7566 -10.066
## VEH4                     -19.7945     2.0030  -9.883
## PovBelow1                 -3.3820     0.9910  -3.413
## Pov1to1.49                 1.1692     1.3110   0.892
## Pov1.5to1.99               3.6780     1.3292   2.767
## Pov2to2.99                 1.9208     1.0705   1.794
## Pov3to3.99                -1.8580     1.1815  -1.573
## Pov4to4.99               -10.3396     1.3645  -7.577
## Less_than_HS              -1.7847     1.4740  -1.211
## HS_GED                    27.5924     1.2782  21.587
## Some_College_or_AS       -10.8405     1.3174  -8.229
## Bachelors                -24.8610     1.7708 -14.039
## Black                     -9.6550     0.5899 -16.368
## Asian                     -1.8241     1.4954  -1.220
## HawaiianPI               -27.4922    19.5430  -1.407
## Other                    -26.4857     1.7147 -15.446
## TwoPlus                  -37.0700     3.3367 -11.110
## Unemployment               7.5423     2.3649   3.189
## Black:Unemployment       -11.5567     3.7220  -3.105
## Asian:Unemployment       -97.7668    17.9094  -5.459
## HawaiianPI:Unemployment -107.0536   184.6722  -0.580
## Other:Unemployment       -13.0737    13.5567  -0.964
## TwoPlus:Unemployment      42.5043    22.2250   1.912

## 
## Correlation matrix not shown by default, as p = 27 > 12.
## Use print(x, correlation=TRUE)  or
##     vcov(x)        if you need it

## fit warnings:
## fixed-effect model matrix is rank deficient so dropping 3 columns / coefficients
## Some predictor variables are on very different scales: consider rescaling

Unemployment 7.5423 2.3649 3.189 Black:Unemployment -11.5567 3.7220 -3.105 Asian:Unemployment -97.7668 17.9094 -5.459

Blacks and Asians who live in states with higher unemployment live significantly closer to EVCS.

Homework_3

Tobias Kuhn

October 7, 2018