#Libraries
library(haven);
library (car);
## Loading required package: carData
library(lmtest);
## Loading required package: zoo
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
library(arm);
## Loading required package: MASS
## Loading required package: Matrix
## Loading required package: lme4
## 
## arm (Version 1.10-1, built: 2018-4-12)
## Working directory is C:/Users/tobik/OneDrive/Documents/DEM7473 Hirarchical Models
## 
## Attaching package: 'arm'
## The following object is masked from 'package:car':
## 
##     logit
library(lme4);
library(dbplyr)

Reducing the data to CA and TX only (13299 tracts):

#I made sure all values are changed to numbers by savign Homework2MAIN in Excel, so, here I reload and overwrite the other one:
library(readxl)
Homework2MAIN<-read_excel("C:/Users/tobik/OneDrive/Documents/_Thesis/CATXCT/NewHubDistCATX3.xlsx")
names(Homework2MAIN)
##   [1] "GEOID"                     "Tract_Name"               
##   [3] "Pop16Plus"                 "Pop16PlusMoE"             
##   [5] "LaborForceRate16Plus"      "LaborForceRate16PlusMoE"  
##   [7] "EmploymentRatio16Plus"     "EmploymentRatio16PlusMoE" 
##   [9] "UnemploymentRate16Plus"    "UnemploymentRate16PlusMoE"
##  [11] "White"                     "WhiteMoE"                 
##  [13] "LFPWhite"                  "LFPWhiteMoE"              
##  [15] "UnemploymentWhite"         "UnemploymentWhiteMoE"     
##  [17] "Black"                     "BlackMoE"                 
##  [19] "LFPBlack"                  "LFPBlackMoE"              
##  [21] "UnemploymentBlack"         "UnemploymentBlackMoE"     
##  [23] "Native"                    "NativeMoE"                
##  [25] "LFPNative"                 "LFPNativeMoE"             
##  [27] "UnemploymentNative"        "UnemploymentNativeMoE"    
##  [29] "Asian"                     "AsianMoE"                 
##  [31] "LFPAsian"                  "LFPAsianMoE"              
##  [33] "UnemploymentAsian"         "UnemploymentAsianMoE"     
##  [35] "HawaiianPI"                "HawaiianPIMoE"            
##  [37] "LFPHawaiianPI"             "LFPHawaiianPIMoE"         
##  [39] "UnemploymentHawaiianPI"    "UnemploymentHawaiianPIMoE"
##  [41] "Other"                     "OtherMoE"                 
##  [43] "LFPOther"                  "LFPOtherMoE"              
##  [45] "UnemploymentOther"         "UnemploymentOtherMoE"     
##  [47] "TwoPlus"                   "Two+MoE"                  
##  [49] "LFPTwoPluslus"             "LFPTwoPlusMoE"            
##  [51] "UnemploymentTwoPlus"       "UnemploymentTwoPlusMoE"   
##  [53] "Hispanic"                  "HispanicMoE"              
##  [55] "LFPHispanic"               "LFPHispanicMoE"           
##  [57] "UnemploymentHispanic"      "UnemploymentHispanicMoE"  
##  [59] "NH_White"                  "NH_WhiteMoE"              
##  [61] "LFPNH_White"               "LFPNH_WhiteMoE"           
##  [63] "UnemploymentNH_White"      "UnemploymentNH_WhiteMoE"  
##  [65] "PovertyTotal"              "PovertyTotalMoE"          
##  [67] "LFPPoverty"                "LFPPovertyMoE"            
##  [69] "UnemploymentPoverty"       "UnemploymentPovertyMoE"   
##  [71] "NoPovertyTotal"            "NoPovertyTotalMoE"        
##  [73] "LFPNoPoverty"              "LFPNoPovertyMoE"          
##  [75] "UnemploymentNoPoverty"     "UnemploymentNoPovertyMoE" 
##  [77] "Disabled"                  "DisabledMoE"              
##  [79] "LFPDisabled"               "LFPDisabledMoE"           
##  [81] "UnemploymentDisability"    "UnemploymentDisabilityMoE"
##  [83] "EDUTotal"                  "EDUTotalMoE"              
##  [85] "EDU_NoHS"                  "EDU_NoHSMoE"              
##  [87] "EDU_HS"                    "EDU_HSMoE"                
##  [89] "EDU_SC_AS"                 "EDU_SC_ASMoE"             
##  [91] "EDU_Bachelors"             "EDU_BachelorsMoE"         
##  [93] "STATEFP"                   "COUNTYFP"                 
##  [95] "UniqueCounty"              "TRACTCE"                  
##  [97] "NAME"                      "NAMELSAD"                 
##  [99] "MTFCC"                     "FUNCSTAT"                 
## [101] "ALAND"                     "AWATER"                   
## [103] "INTPTLAT"                  "INTPTLON"                 
## [105] "Area"                      "HubName"                  
## [107] "HubDist"

Recap from last week’s homework: Creating ratios in order to make things comparable

#Adding totals
Homework2MAIN$TotalPop=(Homework2MAIN$White + Homework2MAIN$Black + Homework2MAIN$Native + Homework2MAIN$Asian + Homework2MAIN$HawaiianPI + Homework2MAIN$Other + Homework2MAIN$TwoPlus)
#creating ratios - the "1+" is in there so I don't divide by zero - and I subtract it afterwards - and I checked, I get true zeros where they need to be
Homework2MAIN$WhiteRatio=(((1+Homework2MAIN$White)/Homework2MAIN$TotalPop)-(1/Homework2MAIN$TotalPop))
Homework2MAIN$BlackRatio=(((1+Homework2MAIN$Black)/Homework2MAIN$TotalPop)-(1/Homework2MAIN$TotalPop))
Homework2MAIN$NativeRatio=(((1+Homework2MAIN$Native)/Homework2MAIN$TotalPop)-(1/Homework2MAIN$TotalPop))
Homework2MAIN$AsianRatio=(((1+Homework2MAIN$Asian)/Homework2MAIN$TotalPop)-(1/Homework2MAIN$TotalPop))
Homework2MAIN$HawaiianPIRatio=(((1+Homework2MAIN$HawaiianPI)/Homework2MAIN$TotalPop)-(1/Homework2MAIN$TotalPop))
Homework2MAIN$OtherRatio=(((1+Homework2MAIN$Other)/Homework2MAIN$TotalPop)-(1/Homework2MAIN$TotalPop))
Homework2MAIN$TwoPlusRatio=(((1+Homework2MAIN$TwoPlus)/Homework2MAIN$TotalPop)-(1/Homework2MAIN$TotalPop))
#Hispanic vs. Non-Hispanic White
Homework2MAIN$HispanicRatio=(((1+Homework2MAIN$Hispanic)/Homework2MAIN$TotalPop)-(1/Homework2MAIN$TotalPop))
Homework2MAIN$NH_WhiteRatio=(((1+Homework2MAIN$NH_White)/Homework2MAIN$TotalPop)-(1/Homework2MAIN$TotalPop))
#Poverty relevant data is in ratios.
#Disability seems to have really low response rate and high margins of error, so, I ignore this one.
#Educational Attainment is in total responses, not ratios. Hence, I need to create rates as well.
Homework2MAIN$EDU_NoHSRatio=(((1+Homework2MAIN$EDU_NoHS)/Homework2MAIN$EDUTotal)-(1/Homework2MAIN$EDUTotal))
Homework2MAIN$EDU_HSRatio=(((1+Homework2MAIN$EDU_HS)/Homework2MAIN$EDUTotal)-(1/Homework2MAIN$EDUTotal))
Homework2MAIN$EDU_SC_ASRatio=(((1+Homework2MAIN$EDU_SC_AS)/Homework2MAIN$EDUTotal)-(1/Homework2MAIN$EDUTotal))
Homework2MAIN$EDU_BachelorsRatio=(((1+Homework2MAIN$EDU_Bachelors)/Homework2MAIN$EDUTotal)-(1/Homework2MAIN$EDUTotal))

We have the following population ratios for the CA and TX census tracts:

WhiteRatio (without Hispanic indicator) BlackRatio NativeRatio AsianRatio HawaiianPIRatio OtherRatio TwoPlusRatio

HispanicRatio NH_WhiteRatio (Non-Hispanic)

EDU_NoHSRatio EDU_HSRatio EDU_SC_ASRatio EDU_BachelorsRatio

First, checking for variance

fit0<-lm(HubDist~factor(UniqueCounty), data=Homework2MAIN)
anova(fit0)
## Analysis of Variance Table
## 
## Response: HubDist
##                         Df  Sum Sq Mean Sq F value    Pr(>F)    
## factor(UniqueCounty)   311 181.135 0.58243  243.78 < 2.2e-16 ***
## Residuals            12987  31.027 0.00239                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
fit1<-lmer(HubDist~WhiteRatio+BlackRatio+NativeRatio+AsianRatio+HawaiianPIRatio+OtherRatio+TwoPlusRatio+HispanicRatio+NH_WhiteRatio+EDU_NoHSRatio+EDU_HSRatio+EDU_SC_ASRatio+EDU_BachelorsRatio+(1|UniqueCounty), data=Homework2MAIN, REML=T,subset=complete.cases(Homework2MAIN))
## fixed-effect model matrix is rank deficient so dropping 2 columns / coefficients
summary(fit1)
## Linear mixed model fit by REML ['lmerMod']
## Formula: HubDist ~ WhiteRatio + BlackRatio + NativeRatio + AsianRatio +  
##     HawaiianPIRatio + OtherRatio + TwoPlusRatio + HispanicRatio +  
##     NH_WhiteRatio + EDU_NoHSRatio + EDU_HSRatio + EDU_SC_ASRatio +  
##     EDU_BachelorsRatio + (1 | UniqueCounty)
##    Data: Homework2MAIN
##  Subset: complete.cases(Homework2MAIN)
## 
## REML criterion at convergence: -41418.5
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -6.7669 -0.3785 -0.0877  0.2312 19.3784 
## 
## Random effects:
##  Groups       Name        Variance Std.Dev.
##  UniqueCounty (Intercept) 0.07763  0.27862 
##  Residual                 0.00223  0.04722 
## Number of obs: 13234, groups:  UniqueCounty, 312
## 
## Fixed effects:
##                  Estimate Std. Error t value
## (Intercept)      0.336139   0.029286  11.478
## WhiteRatio      -0.066445   0.024545  -2.707
## BlackRatio      -0.079236   0.025239  -3.139
## NativeRatio      0.099192   0.038546   2.573
## AsianRatio      -0.066645   0.025147  -2.650
## HawaiianPIRatio -0.139482   0.056479  -2.470
## OtherRatio      -0.088942   0.024793  -3.587
## HispanicRatio   -0.040621   0.029774  -1.364
## NH_WhiteRatio    0.044418   0.030678   1.448
## EDU_NoHSRatio    0.104006   0.006536  15.912
## EDU_HSRatio      0.103155   0.006155  16.758
## EDU_SC_ASRatio   0.034474   0.005949   5.795
## 
## Correlation of Fixed Effects:
##             (Intr) WhitRt BlckRt NatvRt AsinRt HwnPIR OthrRt HspncR NH_WhR
## WhiteRatio  -0.201                                                        
## BlackRatio  -0.823  0.243                                                 
## NativeRatio -0.440  0.325  0.527                                          
## AsianRatio  -0.828  0.236  0.978  0.523                                   
## HawaiinPIRt -0.375  0.130  0.449  0.231  0.443                            
## OtherRatio  -0.197  0.969  0.239  0.319  0.234  0.129                     
## HispanicRat -0.518 -0.598  0.615  0.173  0.618  0.270 -0.590              
## NH_WhiteRat -0.520 -0.612  0.608  0.165  0.613  0.268 -0.588  0.988       
## EDU_NoHSRat -0.001 -0.079 -0.077 -0.100 -0.039 -0.048 -0.097 -0.065  0.042
## EDU_HSRatio -0.024 -0.029 -0.050 -0.069 -0.017 -0.053 -0.026 -0.024  0.013
## EDU_SC_ASRt -0.121  0.124  0.036  0.015  0.081 -0.055  0.123 -0.061 -0.041
##             EDU_NH EDU_HS
## WhiteRatio               
## BlackRatio               
## NativeRatio              
## AsianRatio               
## HawaiinPIRt              
## OtherRatio               
## HispanicRat              
## NH_WhiteRat              
## EDU_NoHSRat              
## EDU_HSRatio -0.085       
## EDU_SC_ASRt  0.394 -0.176
## fit warnings:
## fixed-effect model matrix is rank deficient so dropping 2 columns / coefficients

Group effects:

#UniqueCounty (Intercept) 0.07763  0.27862 
#Residual                 0.00223  0.04722 
#Significant individual level fixed effects:
#                Estimate    Std. Error  t value
#WhiteRatio      -0.066445   0.024545  -2.707
#BlackRatio      -0.079236   0.025239  -3.139
#NativeRatio      0.099192   0.038546   2.573
#AsianRatio      -0.066645   0.025147  -2.650
#HawaiianPIRatio -0.139482   0.056479  -2.470
#OtherRatio      -0.088942   0.024793  -3.587
#EDU_NoHSRatio    0.104006   0.006536  15.912
#EDU_HSRatio      0.103155   0.006155  16.758
#EDU_SC_ASRatio   0.034474   0.005949   5.795

Longer distances are measured as positive effects, meaning that negative estimates put populations closer to electric vehicle (EV) charging stations (EVCS). The findings show that Native Americans are further away from EVCS and Hawaiian Natives and Pacific Islanders, Caucasians, African Americans, and “others” are closer to EVCS than the average.

Also, the lower the educational attainment, the further one lives from the next EVCS.

fit2<-lmer(HubDist~WhiteRatio+BlackRatio+NativeRatio+AsianRatio+HawaiianPIRatio+OtherRatio+TwoPlusRatio+HispanicRatio+NH_WhiteRatio+EDU_NoHSRatio+EDU_HSRatio+EDU_SC_ASRatio+EDU_BachelorsRatio+(1+WhiteRatio|UniqueCounty), data=Homework2MAIN, REML=T,subset=complete.cases(Homework2MAIN))
## fixed-effect model matrix is rank deficient so dropping 2 columns / coefficients
summary(fit2)
## Linear mixed model fit by REML ['lmerMod']
## Formula: HubDist ~ WhiteRatio + BlackRatio + NativeRatio + AsianRatio +  
##     HawaiianPIRatio + OtherRatio + TwoPlusRatio + HispanicRatio +  
##     NH_WhiteRatio + EDU_NoHSRatio + EDU_HSRatio + EDU_SC_ASRatio +  
##     EDU_BachelorsRatio + (1 + WhiteRatio | UniqueCounty)
##    Data: Homework2MAIN
##  Subset: complete.cases(Homework2MAIN)
## 
## REML criterion at convergence: -42030.9
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -7.3531 -0.3674 -0.0840  0.2329 18.3374 
## 
## Random effects:
##  Groups       Name        Variance Std.Dev. Corr 
##  UniqueCounty (Intercept) 0.284760 0.53363       
##               WhiteRatio  0.178910 0.42298  -0.87
##  Residual                 0.002039 0.04516       
## Number of obs: 13234, groups:  UniqueCounty, 312
## 
## Fixed effects:
##                  Estimate Std. Error t value
## (Intercept)      0.373041   0.042425   8.793
## WhiteRatio      -0.070633   0.040013  -1.765
## BlackRatio      -0.093240   0.025209  -3.699
## NativeRatio      0.131656   0.041414   3.179
## AsianRatio      -0.086003   0.025022  -3.437
## HawaiianPIRatio -0.160211   0.055036  -2.911
## OtherRatio      -0.074179   0.025407  -2.920
## HispanicRatio   -0.062834   0.029657  -2.119
## NH_WhiteRatio    0.014136   0.030609   0.462
## EDU_NoHSRatio    0.094126   0.006463  14.563
## EDU_HSRatio      0.094093   0.006064  15.518
## EDU_SC_ASRatio   0.025522   0.005911   4.318
## 
## Correlation of Fixed Effects:
##             (Intr) WhitRt BlckRt NatvRt AsinRt HwnPIR OthrRt HspncR NH_WhR
## WhiteRatio  -0.660                                                        
## BlackRatio  -0.545  0.145                                                 
## NativeRatio -0.264  0.239  0.437                                          
## AsianRatio  -0.545  0.131  0.971  0.432                                   
## HawaiinPIRt -0.246  0.071  0.438  0.192  0.443                            
## OtherRatio  -0.134  0.593  0.261  0.348  0.255  0.131                     
## HispanicRat -0.334 -0.392  0.582  0.067  0.589  0.259 -0.600              
## NH_WhiteRat -0.334 -0.402  0.578  0.060  0.590  0.259 -0.594  0.988       
## EDU_NoHSRat  0.005 -0.071 -0.063 -0.105 -0.014 -0.035 -0.093 -0.051  0.057
## EDU_HSRatio -0.026 -0.016 -0.023 -0.072  0.014 -0.040 -0.009 -0.021  0.019
## EDU_SC_ASRt -0.069  0.044  0.047  0.005  0.105 -0.039  0.119 -0.051 -0.026
##             EDU_NH EDU_HS
## WhiteRatio               
## BlackRatio               
## NativeRatio              
## AsianRatio               
## HawaiinPIRt              
## OtherRatio               
## HispanicRat              
## NH_WhiteRat              
## EDU_NoHSRat              
## EDU_HSRatio -0.047       
## EDU_SC_ASRt  0.416 -0.137
## fit warnings:
## fixed-effect model matrix is rank deficient so dropping 2 columns / coefficients
#Random effects:
# Groups       Name        Variance Std.Dev. Corr 
# UniqueCounty (Intercept) 0.284760 0.53363       
#              WhiteRatio  0.178910 0.42298  -0.87
# Residual                 0.002039 0.04516  
#Significant individual level fixed effects:
#Fixed effects:
#                 Estimate Std. Error t value
#(Intercept)      0.373041   0.042425   8.793
#WhiteRatio      -0.070633   0.040013  -1.765*
#BlackRatio      -0.093240   0.025209  -3.699
#NativeRatio      0.131656   0.041414   3.179
#AsianRatio      -0.086003   0.025022  -3.437
#HawaiianPIRatio -0.160211   0.055036  -2.911
#OtherRatio      -0.074179   0.025407  -2.920
#HispanicRatio   -0.062834   0.029657  -2.119*
#NH_WhiteRatio    0.014136   0.030609   0.462*
#EDU_NoHSRatio    0.094126   0.006463  14.563
#EDU_HSRatio      0.094093   0.006064  15.518
#EDU_SC_ASRatio   0.025522   0.005911   4.318

#*= no real effect

The realtions disappeared for a high Caucasian residence ratio as well as NH White / Hispanic population - even though the Hispanic ratio t value was slightly above 2, I would drop it because the random slope of Caucasian ratio artificially raises this one and I would conceptually argue against that due to is natural negative correlation.

ICC for the first model:

library(sjstats)
icc(fit1)
## 
## Linear mixed model
## 
## Family : gaussian (identity)
## Formula: HubDist ~ WhiteRatio + BlackRatio + NativeRatio + AsianRatio + HawaiianPIRatio + OtherRatio + TwoPlusRatio + HispanicRatio + NH_WhiteRatio + EDU_NoHSRatio + EDU_HSRatio + EDU_SC_ASRatio + EDU_BachelorsRatio + (1 | UniqueCounty)
## 
##   ICC (UniqueCounty): 0.9721

ICC for the second model:

icc(fit2)
## Caution! ICC for random-slope-intercept models usually not meaningful. See 'Note' in `?icc`.
## 
## Linear mixed model
## 
## Family : gaussian (identity)
## Formula: HubDist ~ WhiteRatio + BlackRatio + NativeRatio + AsianRatio + HawaiianPIRatio + OtherRatio + TwoPlusRatio + HispanicRatio + NH_WhiteRatio + EDU_NoHSRatio + EDU_HSRatio + EDU_SC_ASRatio + EDU_BachelorsRatio + (1 + WhiteRatio | UniqueCounty)
## 
##   ICC (UniqueCounty): 0.9929

The second model with the random slope is a better fit.

So, let’s try to plot it:

require(lattice)
## Loading required package: lattice
countymeans<-ranef(fit2, condVar=T)
countymeans$UniqueCounty$`(Intercept)`<-countymeans$UniqueCounty$`(Intercept)`+fixef(fit2)[1]
dotplot(countymeans, main=T, ylab="Counties")
## $UniqueCounty