I am using the database that contains Electric Vehicle Charging Stations (EVCS), all continental U.S. census tracts, and the bub distances between the census tract centroids and EVCS. This one, I will not load or re-run in this Markdown because it is already loaded, and the computation takes hours to calculate.
#Libraries
library(haven);
library (car);
## Loading required package: carData
library(lmtest);
## Loading required package: zoo
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
library(arm);
## Loading required package: MASS
## Loading required package: Matrix
## Loading required package: lme4
##
## arm (Version 1.10-1, built: 2018-4-12)
## Working directory is C:/Users/tobik/OneDrive/Documents/DEM7473 Hirarchical Models
##
## Attaching package: 'arm'
## The following object is masked from 'package:car':
##
## logit
library(lme4);
library(dbplyr)
My EVCS data set is called “hubdistance2”; here the headers of the 72373 census tracts used:
hubdistance2 = read.csv("C:/Users/tobik/OneDrive/Documents/_Thesis/CATXCT/hubdistance2.csv", na.strings=c("N", "(X)", "-", "**"))
names(hubdistance2)
## [1] "X" "STATEFP" "COUNTYFP" "TRACTCE" "GEOID" "NAME"
## [7] "NAMELSAD" "MTFCC" "FUNCSTAT" "ALAND" "AWATER" "INTPTLAT"
## [13] "INTPTLON" "Area" "HubName" "HubDist" "geometry"
Reducing the data to CA and TX only (13299 tracts):
hubdistCATX<-subset(hubdistance2, STATEFP == '06' | STATEFP == '48') # needed to use ' around California's STATEFP because it starts with a zero.
Loading the census tract data for California and Texas from the ACS 2016, 5 year estimates:
#ctdata1 = read.csv("C:/Users/tobik/OneDrive/Documents/_Thesis/CATXCT/ACS_16_5YR_S1702_with_ann.csv") #for future use
ctdata2 = read.csv("C:/Users/tobik/OneDrive/Documents/_Thesis/CATXCT/ACS_16_5YR_S2301_with_ann.csv",stringsAsFactors = T)
#ctdata3 = read.csv("C:/Users/tobik/OneDrive/Documents/_Thesis/CATXCT/ACS_16_5YR_S2502_with_ann.csv") #for future use
Removing the first line which explains the variables and has no real values
ctdata2 <- ctdata2[-c(1), ]
{r} #names(ctdata1) #names(ctdata2) #names(ctdata3) #Reducing the dataset to only the columns used
ctdata2<-ctdata2[c("GEO.id2","GEO.display.label","HC01_EST_VC01","HC01_MOE_VC01","HC02_EST_VC01","HC02_MOE_VC01","HC03_EST_VC01","HC03_MOE_VC01","HC04_EST_VC01","HC04_MOE_VC01","HC01_EST_VC15","HC01_MOE_VC15","HC02_EST_VC15","HC02_MOE_VC15","HC04_EST_VC15","HC04_MOE_VC15","HC01_EST_VC16","HC01_MOE_VC16","HC02_EST_VC16","HC02_MOE_VC16","HC04_EST_VC16","HC04_MOE_VC16","HC01_EST_VC17","HC01_MOE_VC17","HC02_EST_VC17","HC02_MOE_VC17","HC04_EST_VC17","HC04_MOE_VC17","HC01_EST_VC18","HC01_MOE_VC18","HC02_EST_VC18","HC02_MOE_VC18","HC04_EST_VC18","HC04_MOE_VC18","HC01_EST_VC19","HC01_MOE_VC19","HC02_EST_VC19","HC02_MOE_VC19","HC04_EST_VC19","HC04_MOE_VC19","HC01_EST_VC20","HC01_MOE_VC20","HC02_EST_VC20","HC02_MOE_VC20","HC04_EST_VC20","HC04_MOE_VC20","HC01_EST_VC21","HC01_MOE_VC21","HC02_EST_VC21","HC02_MOE_VC21","HC04_EST_VC21","HC04_MOE_VC21","HC01_EST_VC23","HC01_MOE_VC23","HC02_EST_VC23","HC02_MOE_VC23","HC04_EST_VC23","HC04_MOE_VC23","HC01_EST_VC24","HC01_MOE_VC24","HC02_EST_VC24","HC02_MOE_VC24","HC04_EST_VC24","HC04_MOE_VC24","HC01_EST_VC36","HC01_MOE_VC36","HC02_EST_VC36","HC02_MOE_VC36","HC04_EST_VC36","HC04_MOE_VC36","HC01_EST_VC37","HC01_MOE_VC37","HC02_EST_VC37","HC02_MOE_VC37","HC04_EST_VC37","HC04_MOE_VC37","HC01_EST_VC40","HC01_MOE_VC40","HC02_EST_VC40","HC02_MOE_VC40","HC04_EST_VC40","HC04_MOE_VC40","HC01_EST_VC43","HC01_MOE_VC43","HC01_EST_VC44","HC01_MOE_VC44","HC01_EST_VC45","HC01_MOE_VC45","HC01_EST_VC46","HC01_MOE_VC46","HC01_EST_VC47","HC01_MOE_VC47")]
Renaming variables to be used since the three files use column names that are the same.
#Employment total and by race and y/n Hispanic
colnames(ctdata2)[colnames(ctdata2)=="GEO.id2"] <- "GEOID" #needed to merge with hubdistCATX
colnames(ctdata2)[colnames(ctdata2)=="GEO.display.label"] <- "Tract_Name"
colnames(ctdata2)[colnames(ctdata2)=="HC01_EST_VC01"] <- "Pop16+"
colnames(ctdata2)[colnames(ctdata2)=="HC01_MOE_VC01"] <- "Pop16+MoE" #MoE = Margin of Error
colnames(ctdata2)[colnames(ctdata2)=="HC02_EST_VC01"] <- "LaborForceRate16+"
colnames(ctdata2)[colnames(ctdata2)=="HC02_MOE_VC01"] <- "LaborForceRate16+MoE"
colnames(ctdata2)[colnames(ctdata2)=="HC03_EST_VC01"] <- "EmploymentRatio16+"
colnames(ctdata2)[colnames(ctdata2)=="HC03_MOE_VC01"] <- "EmploymentRatio16+MoE"
colnames(ctdata2)[colnames(ctdata2)=="HC04_EST_VC01"] <- "UnemploymentRate16+"
colnames(ctdata2)[colnames(ctdata2)=="HC04_MOE_VC01"] <- "UnemploymentRate16+MoE"
#Following race without Hispanic/Non-Hispanic determination
colnames(ctdata2)[colnames(ctdata2)=="HC01_EST_VC15"] <- "White"
colnames(ctdata2)[colnames(ctdata2)=="HC01_MOE_VC15"] <- "WhiteMoE"
colnames(ctdata2)[colnames(ctdata2)=="HC02_EST_VC15"] <- "LFPWhite"
colnames(ctdata2)[colnames(ctdata2)=="HC02_MOE_VC15"] <- "LFPWhiteMoE"
colnames(ctdata2)[colnames(ctdata2)=="HC04_EST_VC15"] <- "UnemploymentWhite"
colnames(ctdata2)[colnames(ctdata2)=="HC04_MOE_VC15"] <- "UnemploymentWhiteMoE"
colnames(ctdata2)[colnames(ctdata2)=="HC01_EST_VC16"] <- "Black"
colnames(ctdata2)[colnames(ctdata2)=="HC01_MOE_VC16"] <- "BlackMoE"
colnames(ctdata2)[colnames(ctdata2)=="HC02_EST_VC16"] <- "LFPBlack"
colnames(ctdata2)[colnames(ctdata2)=="HC02_MOE_VC16"] <- "LFPBlackMoE"
colnames(ctdata2)[colnames(ctdata2)=="HC04_EST_VC16"] <- "UnemploymentBlack"
colnames(ctdata2)[colnames(ctdata2)=="HC04_MOE_VC16"] <- "UnemploymentBlackMoE"
colnames(ctdata2)[colnames(ctdata2)=="HC01_EST_VC17"] <- "Native"
colnames(ctdata2)[colnames(ctdata2)=="HC01_MOE_VC17"] <- "NativeMoE"
colnames(ctdata2)[colnames(ctdata2)=="HC02_EST_VC17"] <- "LFPNative"
colnames(ctdata2)[colnames(ctdata2)=="HC02_MOE_VC17"] <- "LFPNativeMoE"
colnames(ctdata2)[colnames(ctdata2)=="HC04_EST_VC17"] <- "UnemploymentNative"
colnames(ctdata2)[colnames(ctdata2)=="HC04_MOE_VC17"] <- "UnemploymentNativeMoE"
colnames(ctdata2)[colnames(ctdata2)=="HC01_EST_VC18"] <- "Asian"
colnames(ctdata2)[colnames(ctdata2)=="HC01_MOE_VC18"] <- "AsianMoE"
colnames(ctdata2)[colnames(ctdata2)=="HC02_EST_VC18"] <- "LFPAsian"
colnames(ctdata2)[colnames(ctdata2)=="HC02_MOE_VC18"] <- "LFPAsianMoE"
colnames(ctdata2)[colnames(ctdata2)=="HC04_EST_VC18"] <- "UnemploymentAsian"
colnames(ctdata2)[colnames(ctdata2)=="HC04_MOE_VC18"] <- "UnemploymentAsianMoE"
colnames(ctdata2)[colnames(ctdata2)=="HC01_EST_VC19"] <- "HawaiianPI"
colnames(ctdata2)[colnames(ctdata2)=="HC01_MOE_VC19"] <- "HawaiianPIMoE"
colnames(ctdata2)[colnames(ctdata2)=="HC02_EST_VC19"] <- "LFPHawaiianPI"
colnames(ctdata2)[colnames(ctdata2)=="HC02_MOE_VC19"] <- "LFPHawaiianPIMoE"
colnames(ctdata2)[colnames(ctdata2)=="HC04_EST_VC19"] <- "UnemploymentHawaiianPI"
colnames(ctdata2)[colnames(ctdata2)=="HC04_MOE_VC19"] <- "UnemploymentHawaiianPIMoE"
colnames(ctdata2)[colnames(ctdata2)=="HC01_EST_VC20"] <- "Other"
colnames(ctdata2)[colnames(ctdata2)=="HC01_MOE_VC20"] <- "OtherMoE"
colnames(ctdata2)[colnames(ctdata2)=="HC02_EST_VC20"] <- "LFPOther"
colnames(ctdata2)[colnames(ctdata2)=="HC02_MOE_VC20"] <- "LFPOtherMoE"
colnames(ctdata2)[colnames(ctdata2)=="HC04_EST_VC20"] <- "UnemploymentOther"
colnames(ctdata2)[colnames(ctdata2)=="HC04_MOE_VC20"] <- "UnemploymentOtherMoE"
colnames(ctdata2)[colnames(ctdata2)=="HC01_EST_VC21"] <- "TwoPlus"
colnames(ctdata2)[colnames(ctdata2)=="HC01_MOE_VC21"] <- "Two+MoE"
colnames(ctdata2)[colnames(ctdata2)=="HC02_EST_VC21"] <- "LFPTwoPluslus"
colnames(ctdata2)[colnames(ctdata2)=="HC02_MOE_VC21"] <- "LFPTwoPlusMoE"
colnames(ctdata2)[colnames(ctdata2)=="HC04_EST_VC21"] <- "UnemploymentTwoPlus"
colnames(ctdata2)[colnames(ctdata2)=="HC04_MOE_VC21"] <- "UnemploymentTwoPlusMoE"
#Hispanic Identifiers - note: only NH_White and Hispanic data available
colnames(ctdata2)[colnames(ctdata2)=="HC01_EST_VC23"] <- "Hispanic"
colnames(ctdata2)[colnames(ctdata2)=="HC01_MOE_VC23"] <- "HispanicMoE"
colnames(ctdata2)[colnames(ctdata2)=="HC02_EST_VC23"] <- "LFPHispanic"
colnames(ctdata2)[colnames(ctdata2)=="HC02_MOE_VC23"] <- "LFPHispanicMoE"
colnames(ctdata2)[colnames(ctdata2)=="HC04_EST_VC23"] <- "UnemploymentHispanic"
colnames(ctdata2)[colnames(ctdata2)=="HC04_MOE_VC23"] <- "UnemploymentHispanicMoE"
colnames(ctdata2)[colnames(ctdata2)=="HC01_EST_VC24"] <- "NH_White"
colnames(ctdata2)[colnames(ctdata2)=="HC01_MOE_VC24"] <- "NH_WhiteMoE"
colnames(ctdata2)[colnames(ctdata2)=="HC02_EST_VC24"] <- "LFPNH_White"
colnames(ctdata2)[colnames(ctdata2)=="HC02_MOE_VC24"] <- "LFPNH_WhiteMoE"
colnames(ctdata2)[colnames(ctdata2)=="HC04_EST_VC24"] <- "UnemploymentNH_White"
colnames(ctdata2)[colnames(ctdata2)=="HC04_MOE_VC24"] <- "UnemploymentNH_WhiteMoE"
#Poverty past 12 months (NoPoverty includes "at poverty level")
colnames(ctdata2)[colnames(ctdata2)=="HC01_EST_VC36"] <- "PovertyTotal"
colnames(ctdata2)[colnames(ctdata2)=="HC01_MOE_VC36"] <- "PovertyTotalMoE"
colnames(ctdata2)[colnames(ctdata2)=="HC02_EST_VC36"] <- "LFPPoverty"
colnames(ctdata2)[colnames(ctdata2)=="HC02_MOE_VC36"] <- "LFPPovertyMoE"
colnames(ctdata2)[colnames(ctdata2)=="HC04_EST_VC36"] <- "UnemploymentPoverty"
colnames(ctdata2)[colnames(ctdata2)=="HC04_MOE_VC36"] <- "UnemploymentPovertyMoE"
colnames(ctdata2)[colnames(ctdata2)=="HC01_EST_VC37"] <- "NoPovertyTotal"
colnames(ctdata2)[colnames(ctdata2)=="HC01_MOE_VC37"] <- "NoPovertyTotalMoE"
colnames(ctdata2)[colnames(ctdata2)=="HC02_EST_VC37"] <- "LFPNoPoverty"
colnames(ctdata2)[colnames(ctdata2)=="HC02_MOE_VC37"] <- "LFPNoPovertyMoE"
colnames(ctdata2)[colnames(ctdata2)=="HC04_EST_VC37"] <- "UnemploymentNoPoverty"
colnames(ctdata2)[colnames(ctdata2)=="HC04_MOE_VC37"] <- "UnemploymentNoPovertyMoE"
#Disability
colnames(ctdata2)[colnames(ctdata2)=="HC01_EST_VC40"] <- "Disabled"
colnames(ctdata2)[colnames(ctdata2)=="HC01_MOE_VC40"] <- "DisabledMoE"
colnames(ctdata2)[colnames(ctdata2)=="HC02_EST_VC40"] <- "LFPDisabled"
colnames(ctdata2)[colnames(ctdata2)=="HC02_MOE_VC40"] <- "LFPDisabledMoE"
colnames(ctdata2)[colnames(ctdata2)=="HC04_EST_VC40"] <- "UnemploymentDisability"
colnames(ctdata2)[colnames(ctdata2)=="HC04_MOE_VC40"] <- "UnemploymentDisabilityMoE"
#Educational Attainment
colnames(ctdata2)[colnames(ctdata2)=="HC01_EST_VC43"] <- "EDUTotal"
colnames(ctdata2)[colnames(ctdata2)=="HC01_MOE_VC43"] <- "EDUTotalMoE"
colnames(ctdata2)[colnames(ctdata2)=="HC01_EST_VC44"] <- "EDU_NoHS"
colnames(ctdata2)[colnames(ctdata2)=="HC01_MOE_VC44"] <- "EDU_NoHSMoE"
colnames(ctdata2)[colnames(ctdata2)=="HC01_EST_VC45"] <- "EDU_HS"
colnames(ctdata2)[colnames(ctdata2)=="HC01_MOE_VC45"] <- "EDU_HSMoE"
colnames(ctdata2)[colnames(ctdata2)=="HC01_EST_VC46"] <- "EDU_SC_AS"
colnames(ctdata2)[colnames(ctdata2)=="HC01_MOE_VC46"] <- "EDU_SC_ASMoE"
colnames(ctdata2)[colnames(ctdata2)=="HC01_EST_VC47"] <- "EDU_Bachelors"
colnames(ctdata2)[colnames(ctdata2)=="HC01_MOE_VC47"] <- "EDU_BachelorsMoE"
Merging ctdata2 and hubdistCATX
NewHubDistCATX<-merge(ctdata2,hubdistCATX,by=c("GEOID"))
#I made sure all values are changed to numbers by savign NewHubDistCATX in Excel, so, here I reload and overwrite the other one:
library(readxl)
NewHubDistCATX<-read_excel("C:/Users/tobik/OneDrive/Documents/_Thesis/CATXCT/NewHubDistCATX2.xlsx")
Creating ratios in order to make things comparable
#Adding totals
NewHubDistCATX$TotalPop=(NewHubDistCATX$White + NewHubDistCATX$Black + NewHubDistCATX$Native + NewHubDistCATX$Asian + NewHubDistCATX$HawaiianPI + NewHubDistCATX$Other + NewHubDistCATX$TwoPlus)
#creating ratios - the "1+" is in there so I don't divide by zero - and I subtract it afterwards - and I checked, I get true zeros where they need to be
NewHubDistCATX$WhiteRatio=(((1+NewHubDistCATX$White)/NewHubDistCATX$TotalPop)-(1/NewHubDistCATX$TotalPop))
NewHubDistCATX$BlackRatio=(((1+NewHubDistCATX$Black)/NewHubDistCATX$TotalPop)-(1/NewHubDistCATX$TotalPop))
NewHubDistCATX$NativeRatio=(((1+NewHubDistCATX$Native)/NewHubDistCATX$TotalPop)-(1/NewHubDistCATX$TotalPop))
NewHubDistCATX$AsianRatio=(((1+NewHubDistCATX$Asian)/NewHubDistCATX$TotalPop)-(1/NewHubDistCATX$TotalPop))
NewHubDistCATX$HawaiianPIRatio=(((1+NewHubDistCATX$HawaiianPI)/NewHubDistCATX$TotalPop)-(1/NewHubDistCATX$TotalPop))
NewHubDistCATX$OtherRatio=(((1+NewHubDistCATX$Other)/NewHubDistCATX$TotalPop)-(1/NewHubDistCATX$TotalPop))
NewHubDistCATX$TwoPlusRatio=(((1+NewHubDistCATX$TwoPlus)/NewHubDistCATX$TotalPop)-(1/NewHubDistCATX$TotalPop))
#Hispanic vs. Non-Hispanic White
NewHubDistCATX$HispanicRatio=(((1+NewHubDistCATX$Hispanic)/NewHubDistCATX$TotalPop)-(1/NewHubDistCATX$TotalPop))
NewHubDistCATX$NH_WhiteRatio=(((1+NewHubDistCATX$NH_White)/NewHubDistCATX$TotalPop)-(1/NewHubDistCATX$TotalPop))
#Poverty relevant data is in ratios.
#Disability seems to have really low response rate and high margins of error, so, I ignore this one.
#Educational Attainment is in total responses, not ratios. Hence, I need to create rates as well.
NewHubDistCATX$EDU_NoHSRatio=(((1+NewHubDistCATX$EDU_NoHS)/NewHubDistCATX$EDUTotal)-(1/NewHubDistCATX$EDUTotal))
NewHubDistCATX$EDU_HSRatio=(((1+NewHubDistCATX$EDU_HS)/NewHubDistCATX$EDUTotal)-(1/NewHubDistCATX$EDUTotal))
NewHubDistCATX$EDU_SC_ASRatio=(((1+NewHubDistCATX$EDU_SC_AS)/NewHubDistCATX$EDUTotal)-(1/NewHubDistCATX$EDUTotal))
NewHubDistCATX$EDU_BachelorsRatio=(((1+NewHubDistCATX$EDU_Bachelors)/NewHubDistCATX$EDUTotal)-(1/NewHubDistCATX$EDUTotal))
Now, we have the following population ratios for the CA and TX census tracts:
WhiteRatio (without Hispanic indicator) BlackRatio NativeRatio AsianRatio HawaiianPIRatio OtherRatio TwoPlusRatio
HispanicRatio NH_WhiteRatio (Non-Hispanic)
EDU_NoHSRatio EDU_HSRatio EDU_SC_ASRatio EDU_BachelorsRatio
First, checking for variance
fit0<-lm(HubDist~factor(COUNTYFP), data=NewHubDistCATX)
anova(fit0)
## Analysis of Variance Table
##
## Response: HubDist
## Df Sum Sq Mean Sq F value Pr(>F)
## factor(COUNTYFP) 253 158.579 0.62679 152.59 < 2.2e-16 ***
## Residuals 13045 53.583 0.00411
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Next, the Random Intercept Model
fit1<-lmer(HubDist~1+(1|COUNTYFP), data=NewHubDistCATX, REML=T,subset=complete.cases(NewHubDistCATX))
summary(fit1)
## Linear mixed model fit by REML ['lmerMod']
## Formula: HubDist ~ 1 + (1 | COUNTYFP)
## Data: NewHubDistCATX
## Subset: complete.cases(NewHubDistCATX)
##
## REML criterion at convergence: -33891.6
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -8.5409 -0.3176 -0.1285 0.0930 16.9136
##
## Random effects:
## Groups Name Variance Std.Dev.
## COUNTYFP (Intercept) 0.07824 0.27972
## Residual 0.00409 0.06396
## Number of obs: 13234, groups: COUNTYFP, 254
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 0.34359 0.01767 19.44
Next, the variables included
fit2<-lmer(HubDist~WhiteRatio+BlackRatio+NativeRatio+AsianRatio+HawaiianPIRatio+OtherRatio+TwoPlusRatio+HispanicRatio+NH_WhiteRatio+EDU_NoHSRatio+EDU_HSRatio+EDU_SC_ASRatio+EDU_BachelorsRatio+(1|COUNTYFP), data=NewHubDistCATX, REML=T,subset=complete.cases(NewHubDistCATX))
## fixed-effect model matrix is rank deficient so dropping 2 columns / coefficients
summary(fit2)
## Linear mixed model fit by REML ['lmerMod']
## Formula: HubDist ~ WhiteRatio + BlackRatio + NativeRatio + AsianRatio +
## HawaiianPIRatio + OtherRatio + TwoPlusRatio + HispanicRatio +
## NH_WhiteRatio + EDU_NoHSRatio + EDU_HSRatio + EDU_SC_ASRatio +
## EDU_BachelorsRatio + (1 | COUNTYFP)
## Data: NewHubDistCATX
## Subset: complete.cases(NewHubDistCATX)
##
## REML criterion at convergence: -34851.2
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -8.8140 -0.3808 -0.0919 0.1955 17.0029
##
## Random effects:
## Groups Name Variance Std.Dev.
## COUNTYFP (Intercept) 0.075689 0.2751
## Residual 0.003782 0.0615
## Number of obs: 13234, groups: COUNTYFP, 254
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 0.1856594 0.0361464 5.136
## WhiteRatio 0.0732748 0.0316445 2.316
## BlackRatio 0.0605761 0.0324668 1.866
## NativeRatio 0.1861521 0.0494544 3.764
## AsianRatio 0.0558093 0.0324655 1.719
## HawaiianPIRatio -0.1370885 0.0734444 -1.867
## OtherRatio -0.0148104 0.0321139 -0.461
## HispanicRatio -0.0241609 0.0385559 -0.627
## NH_WhiteRatio 0.0463660 0.0396721 1.169
## EDU_NoHSRatio 0.0919198 0.0081652 11.257
## EDU_HSRatio 0.1814661 0.0077195 23.508
## EDU_SC_ASRatio -0.0005172 0.0076422 -0.068
##
## Correlation of Fixed Effects:
## (Intr) WhitRt BlckRt NatvRt AsinRt HwnPIR OthrRt HspncR NH_WhR
## WhiteRatio -0.205
## BlackRatio -0.859 0.236
## NativeRatio -0.470 0.321 0.542
## AsianRatio -0.864 0.232 0.978 0.537
## HawaiinPIRt -0.394 0.132 0.451 0.238 0.445
## OtherRatio -0.202 0.970 0.235 0.315 0.232 0.131
## HispanicRat -0.542 -0.604 0.617 0.187 0.617 0.268 -0.593
## NH_WhiteRat -0.543 -0.615 0.610 0.179 0.614 0.268 -0.591 0.990
## EDU_NoHSRat 0.000 -0.070 -0.075 -0.108 -0.036 -0.047 -0.097 -0.061 0.034
## EDU_HSRatio -0.001 -0.038 -0.076 -0.076 -0.038 -0.054 -0.032 -0.032 -0.002
## EDU_SC_ASRt -0.133 0.134 0.045 0.012 0.089 -0.054 0.127 -0.057 -0.042
## EDU_NH EDU_HS
## WhiteRatio
## BlackRatio
## NativeRatio
## AsianRatio
## HawaiinPIRt
## OtherRatio
## HispanicRat
## NH_WhiteRat
## EDU_NoHSRat
## EDU_HSRatio -0.117
## EDU_SC_ASRt 0.383 -0.177
## fit warnings:
## fixed-effect model matrix is rank deficient so dropping 2 columns / coefficients
The variancees: COUNTYFP (Intercept) 0.075689 0.2751
Residual 0.003782 0.0615
Most of my variance comes from the county level whilst only a small fraction of the variance comes from the census tract level.
Compare both using Likelihood ratio test
anova(fit1,fit2)
## refitting model(s) with ML (instead of REML)
## Data: NewHubDistCATX
## Subset: complete.cases(NewHubDistCATX)
## Models:
## fit1: HubDist ~ 1 + (1 | COUNTYFP)
## fit2: HubDist ~ WhiteRatio + BlackRatio + NativeRatio + AsianRatio +
## fit2: HawaiianPIRatio + OtherRatio + TwoPlusRatio + HispanicRatio +
## fit2: NH_WhiteRatio + EDU_NoHSRatio + EDU_HSRatio + EDU_SC_ASRatio +
## fit2: EDU_BachelorsRatio + (1 | COUNTYFP)
## Df AIC BIC logLik deviance Chisq Chi Df Pr(>Chisq)
## fit1 3 -33892 -33869 16949 -33898
## fit2 14 -34907 -34802 17468 -34935 1037.5 11 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Second one is a better fit.
ICC for the first model:
library(sjstats)
icc(fit1)
##
## Linear mixed model
##
## Family : gaussian (identity)
## Formula: HubDist ~ 1 + (1 | COUNTYFP)
##
## ICC (COUNTYFP): 0.9503
ICC for the second model:
icc(fit2)
##
## Linear mixed model
##
## Family : gaussian (identity)
## Formula: HubDist ~ WhiteRatio + BlackRatio + NativeRatio + AsianRatio + HawaiianPIRatio + OtherRatio + TwoPlusRatio + HispanicRatio + NH_WhiteRatio + EDU_NoHSRatio + EDU_HSRatio + EDU_SC_ASRatio + EDU_BachelorsRatio + (1 | COUNTYFP)
##
## ICC (COUNTYFP): 0.9524
The ICC including individual levels was larger than the ICC not including individual levels. Hence, individual characteristics had a significant influence on the outcome.
#rm(ctdata3)
#gc()
#write.csv(NewHubDistCATX,file= "C:/Users/tobik/OneDrive/Documents/_Thesis/CATXCT/NewHubDistCATX2.csv")
#library("readxl")
#NewHubDistCATX<-read_excel("C:/Users/tobik/OneDrive/Documents/_Thesis/CATXCT/NewHubDistCATX2.xlsx")