Pls do not cite without permission since the results are under review. Results are presented here for reference. Data will be available upon request once the results are published. Questions to love.borjeson at gmail.com
This forest plot summarizes the full model that will be presented below.
But lets start from top…:
#clean slate...:
rm(list = ls())
library(lme4) #The package we use, lme4. An option is to use 'nlme'.
library(plotly)#...and this one to generate interactive plots...
library(ggplot2)#...and this one to generate interactive plots...
library(ggExtra)#to adjust plots
library(bbmle) # to get delta AICs and BICs
library(sjPlot) # more plots... especially for coeffs for multilevel models...
library(lattice) #more plots
require(MuMIn) #To get Marginal and Conditional R2
require(AICcmodavg) #To get AICc
require(lmerTest) #To get the Satterthwaite- ...
require(pbkrtest) #... and Kenward-Roger approximations of p-values
#Get the data...
data1full=read.csv("data_type1F.csv")
#We will have a nested model (random slope and intercept)
#Random effects: Agency (government agnecy), GD (general director~CEO) and Year.
#GD-effects will will be nested in Agency, everything crossed with year.
#First we need to get rid of all NA's:
data2<-data1full[complete.cases(data1full),] #because when we compare competing models, we need them to be based on data of same size.
#Then run the models on the complete-case dataset, 'data2'..:
#Initial model...
initial.model = lmer(Spendingln ~ Tenure +
(1|Agency/GD) + (1|Year), data=data2)
#Summary
summary(initial.model)
## Linear mixed model fit by REML t-tests use Satterthwaite approximations
## to degrees of freedom [lmerMod]
## Formula: Spendingln ~ Tenure + (1 | Agency/GD) + (1 | Year)
## Data: data2
##
## REML criterion at convergence: 1133.3
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.9122 -0.4490 -0.0090 0.5528 3.1990
##
## Random effects:
## Groups Name Variance Std.Dev.
## GD:Agency (Intercept) 0.5537 0.7441
## Agency (Intercept) 3.0774 1.7543
## Year (Intercept) 0.2281 0.4776
## Residual 0.8637 0.9294
## Number of obs: 335, groups: GD:Agency, 106; Agency, 49; Year, 9
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 13.68059 0.33278 53.88000 41.110 < 2e-16 ***
## TenureTenure_1 -0.04363 0.17657 242.43000 -0.247 0.805022
## TenureTenure_2 0.18614 0.18838 268.56000 0.988 0.323982
## TenureTenure_3 0.69516 0.20804 282.57000 3.341 0.000946 ***
## TenureTenure_4 0.67819 0.21928 281.32000 3.093 0.002182 **
## TenureTenure_5 0.32781 0.25072 279.15000 1.307 0.192124
## TenureTenure_6pl 0.12969 0.25258 190.61000 0.513 0.608227
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) TnrT_1 TnrT_2 TnrT_3 TnrT_4 TnrT_5
## TenureTnr_1 -0.243
## TenureTnr_2 -0.255 0.478
## TenureTnr_3 -0.248 0.436 0.500
## TenureTnr_4 -0.243 0.391 0.434 0.456
## TenureTnr_5 -0.221 0.356 0.383 0.402 0.419
## TenrTnr_6pl -0.249 0.348 0.399 0.388 0.431 0.435
#Standard p-values (by asymptotic t-based Wald tests). Higly problematic and theoretically not solid,
#plus relies on assumptions that can make our life difficult (if not met).
#Nonetheless..:
coeffs <- coef(summary(initial.model))
p <- pnorm(abs(coeffs[, "t value"]), lower.tail = FALSE) * 2
cbind(coeffs, "p value" = round(p,3))
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 13.68058834 0.3327801 53.88353 41.1099965 0.000000000
## TenureTenure_1 -0.04363444 0.1765696 242.42956 -0.2471231 0.805022076
## TenureTenure_2 0.18614273 0.1883804 268.56406 0.9881218 0.323982407
## TenureTenure_3 0.69516426 0.2080419 282.56535 3.3414630 0.000945620
## TenureTenure_4 0.67819485 0.2192765 281.32423 3.0928751 0.002181508
## TenureTenure_5 0.32780963 0.2507176 279.14666 1.3074853 0.192123779
## TenureTenure_6pl 0.12968956 0.2525813 190.61221 0.5134567 0.608226556
## p value
## (Intercept) 0.000
## TenureTenure_1 0.805
## TenureTenure_2 0.323
## TenureTenure_3 0.001
## TenureTenure_4 0.002
## TenureTenure_5 0.191
## TenureTenure_6pl 0.608
#Much better, p-values by Satterthwaite- and Kenward-Roger approximations:
#save the coefs to a dataframe
coefsIni <- data.frame(coef(summary(initial.model)))
#get Satterthwaite-approximated degrees of freedom
coefsIni$df.Satt <- coef(summary(initial.model))[, 3]
# get approximate p-values
coefsIni$p.Satt <- coef(summary(initial.model))[, 5]
coefsIni
## Estimate Std..Error df t.value Pr...t..
## (Intercept) 13.68058834 0.3327801 53.88353 41.1099965 0.000000000
## TenureTenure_1 -0.04363444 0.1765696 242.42956 -0.2471231 0.805022076
## TenureTenure_2 0.18614273 0.1883804 268.56406 0.9881218 0.323982407
## TenureTenure_3 0.69516426 0.2080419 282.56535 3.3414630 0.000945620
## TenureTenure_4 0.67819485 0.2192765 281.32423 3.0928751 0.002181508
## TenureTenure_5 0.32780963 0.2507176 279.14666 1.3074853 0.192123779
## TenureTenure_6pl 0.12968956 0.2525813 190.61221 0.5134567 0.608226556
## df.Satt p.Satt
## (Intercept) 53.88353 0.000000000
## TenureTenure_1 242.42956 0.805022076
## TenureTenure_2 268.56406 0.323982407
## TenureTenure_3 282.56535 0.000945620
## TenureTenure_4 281.32423 0.002181508
## TenureTenure_5 279.14666 0.192123779
## TenureTenure_6pl 190.61221 0.608226556
#Lets add Kenward-Roger approximated p-values:
#get Kenward-Roger approximated degrees of freedom
df.KRIni <- get_ddf_Lb(initial.model, fixef(initial.model))
# get p-values from the t-distribution using the t-values and approximated degrees of freedom..:
coefsIni$p.KR <- 2 * (1 - pt(abs(coefsIni$t.value), df.KRIni))
coefsIni #and there we go!
## Estimate Std..Error df t.value Pr...t..
## (Intercept) 13.68058834 0.3327801 53.88353 41.1099965 0.000000000
## TenureTenure_1 -0.04363444 0.1765696 242.42956 -0.2471231 0.805022076
## TenureTenure_2 0.18614273 0.1883804 268.56406 0.9881218 0.323982407
## TenureTenure_3 0.69516426 0.2080419 282.56535 3.3414630 0.000945620
## TenureTenure_4 0.67819485 0.2192765 281.32423 3.0928751 0.002181508
## TenureTenure_5 0.32780963 0.2507176 279.14666 1.3074853 0.192123779
## TenureTenure_6pl 0.12968956 0.2525813 190.61221 0.5134567 0.608226556
## df.Satt p.Satt p.KR
## (Intercept) 53.88353 0.000000000 0.000000000
## TenureTenure_1 242.42956 0.805022076 0.805773659
## TenureTenure_2 268.56406 0.323982407 0.327610692
## TenureTenure_3 282.56535 0.000945620 0.001538190
## TenureTenure_4 281.32423 0.002181508 0.003168176
## TenureTenure_5 279.14666 0.192123779 0.196726649
## TenureTenure_6pl 190.61221 0.608226556 0.609779132
rand(initial.model)# to measure the sig. of the random effect, using ikelihood ratio test
## Analysis of Random effects Table:
## Chi.sq Chi.DF p.value
## Agency/GD 316.0 2 <2e-16 ***
## Year 19.9 1 8e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#To get the estimated means and the CI:s:
lsmeansLT(initial.model, test.effs=NULL, method.grad="Richardson", ddf="Kenward-Roger")
## Least Squares Means table:
## Tenure Estimate Standard Error DF t-value Lower CI
## Tenure Tenure_0 1.0 13.681 0.333 57.0 41.070 13.0
## Tenure Tenure_1 2.0 13.637 0.337 59.9 40.460 13.0
## Tenure Tenure_2 3.0 13.867 0.338 60.8 40.990 13.2
## Tenure Tenure_3 4.0 14.376 0.346 66.4 41.490 13.7
## Tenure Tenure_4 5.0 14.359 0.352 70.4 40.840 13.7
## Tenure Tenure_5 6.0 14.008 0.370 85.0 37.840 13.3
## Tenure Tenure_6pl 7.0 13.810 0.365 79.3 37.810 13.1
## Upper CI p-value
## Tenure Tenure_0 14.3 <2e-16 ***
## Tenure Tenure_1 14.3 <2e-16 ***
## Tenure Tenure_2 14.5 <2e-16 ***
## Tenure Tenure_3 15.1 <2e-16 ***
## Tenure Tenure_4 15.1 <2e-16 ***
## Tenure Tenure_5 14.7 <2e-16 ***
## Tenure Tenure_6pl 14.5 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#Full model...add control variables
full.model = lmer(Spendingln ~ Incomeln + Tenure + Former_CE_exp_exp + Gender + COFOG + PhD + Business_ed + Age +
(1|Agency/GD) + (1|Year), data=data2)
#save the coefs to a data frame
coefsF <- data.frame(coef(summary(full.model)))
#get Satterthwaite-approximated degrees of freedom
coefsF$df.Satt <- coef(summary(full.model))[, 3]
# get approximate p-values
coefsF$p.Satt <- coef(summary(full.model))[, 5]
coefsF
## Estimate Std..Error df
## (Intercept) 0.799150476 3.26524746 69.31484
## Incomeln 0.566919460 0.15413129 65.05975
## TenureTenure_1 -0.049609057 0.17833357 246.96862
## TenureTenure_2 0.125395758 0.19351192 277.59629
## TenureTenure_3 0.641399199 0.21839199 283.09347
## TenureTenure_4 0.692751155 0.23807874 268.66676
## TenureTenure_5 0.301861592 0.27137518 256.77746
## TenureTenure_6pl 0.107398888 0.28858808 156.24702
## Former_CE_exp_expFormer_CE_exp 0.577913811 0.25395083 55.61298
## GenderMale 0.239620788 0.24987906 55.78052
## COFOGCOFOG_02 1.452309272 0.91025679 38.76889
## COFOGCOFOG_03 -0.463877974 0.88012526 36.65458
## COFOGCOFOG_04 0.925665177 0.59591061 40.24753
## COFOGCOFOG_05 0.491029972 0.84656251 35.33307
## COFOGCOFOG_09 -0.403387015 0.97415868 39.33026
## COFOGCOFOG_10 0.792534010 0.83669243 39.95346
## PhDPhD 0.212339842 0.33228296 51.13817
## Business_edBusiness_ed -0.246690822 0.28538083 87.60860
## Age 0.004040217 0.02177427 75.75493
## t.value Pr...t.. df.Satt
## (Intercept) 0.2447442 0.8073775661 69.31484
## Incomeln 3.6781595 0.0004782737 65.05975
## TenureTenure_1 -0.2781813 0.7811059793 246.96862
## TenureTenure_2 0.6480002 0.5175204238 277.59629
## TenureTenure_3 2.9369173 0.0035873021 283.09347
## TenureTenure_4 2.9097564 0.0039198979 268.66676
## TenureTenure_5 1.1123405 0.2670320925 256.77746
## TenureTenure_6pl 0.3721529 0.7102832802 156.24702
## Former_CE_exp_expFormer_CE_exp 2.2756918 0.0267379199 55.61298
## GenderMale 0.9589471 0.3417236318 55.78052
## COFOGCOFOG_02 1.5954940 0.1187225059 38.76889
## COFOGCOFOG_03 -0.5270590 0.6013281944 36.65458
## COFOGCOFOG_04 1.5533625 0.1281656291 40.24753
## COFOGCOFOG_05 0.5800280 0.5655765082 35.33307
## COFOGCOFOG_09 -0.4140876 0.6810595973 39.33026
## COFOGCOFOG_10 0.9472226 0.3492195063 39.95346
## PhDPhD 0.6390332 0.5256534590 51.13817
## Business_edBusiness_ed -0.8644268 0.3897141801 87.60860
## Age 0.1855500 0.8532935461 75.75493
## p.Satt
## (Intercept) 0.8073775661
## Incomeln 0.0004782737
## TenureTenure_1 0.7811059793
## TenureTenure_2 0.5175204238
## TenureTenure_3 0.0035873021
## TenureTenure_4 0.0039198979
## TenureTenure_5 0.2670320925
## TenureTenure_6pl 0.7102832802
## Former_CE_exp_expFormer_CE_exp 0.0267379199
## GenderMale 0.3417236318
## COFOGCOFOG_02 0.1187225059
## COFOGCOFOG_03 0.6013281944
## COFOGCOFOG_04 0.1281656291
## COFOGCOFOG_05 0.5655765082
## COFOGCOFOG_09 0.6810595973
## COFOGCOFOG_10 0.3492195063
## PhDPhD 0.5256534590
## Business_edBusiness_ed 0.3897141801
## Age 0.8532935461
#get Kenward-Roger approximated degrees of freedom
df.KRF <- get_ddf_Lb(full.model, fixef(full.model))
# get p-values from the t-distribution using the t-values and approximated degrees of freedom..:
coefsF$p.KR <- 2 * (1 - pt(abs(coefsF$t.value), df.KRF))
coefsF #and there we go!
## Estimate Std..Error df
## (Intercept) 0.799150476 3.26524746 69.31484
## Incomeln 0.566919460 0.15413129 65.05975
## TenureTenure_1 -0.049609057 0.17833357 246.96862
## TenureTenure_2 0.125395758 0.19351192 277.59629
## TenureTenure_3 0.641399199 0.21839199 283.09347
## TenureTenure_4 0.692751155 0.23807874 268.66676
## TenureTenure_5 0.301861592 0.27137518 256.77746
## TenureTenure_6pl 0.107398888 0.28858808 156.24702
## Former_CE_exp_expFormer_CE_exp 0.577913811 0.25395083 55.61298
## GenderMale 0.239620788 0.24987906 55.78052
## COFOGCOFOG_02 1.452309272 0.91025679 38.76889
## COFOGCOFOG_03 -0.463877974 0.88012526 36.65458
## COFOGCOFOG_04 0.925665177 0.59591061 40.24753
## COFOGCOFOG_05 0.491029972 0.84656251 35.33307
## COFOGCOFOG_09 -0.403387015 0.97415868 39.33026
## COFOGCOFOG_10 0.792534010 0.83669243 39.95346
## PhDPhD 0.212339842 0.33228296 51.13817
## Business_edBusiness_ed -0.246690822 0.28538083 87.60860
## Age 0.004040217 0.02177427 75.75493
## t.value Pr...t.. df.Satt
## (Intercept) 0.2447442 0.8073775661 69.31484
## Incomeln 3.6781595 0.0004782737 65.05975
## TenureTenure_1 -0.2781813 0.7811059793 246.96862
## TenureTenure_2 0.6480002 0.5175204238 277.59629
## TenureTenure_3 2.9369173 0.0035873021 283.09347
## TenureTenure_4 2.9097564 0.0039198979 268.66676
## TenureTenure_5 1.1123405 0.2670320925 256.77746
## TenureTenure_6pl 0.3721529 0.7102832802 156.24702
## Former_CE_exp_expFormer_CE_exp 2.2756918 0.0267379199 55.61298
## GenderMale 0.9589471 0.3417236318 55.78052
## COFOGCOFOG_02 1.5954940 0.1187225059 38.76889
## COFOGCOFOG_03 -0.5270590 0.6013281944 36.65458
## COFOGCOFOG_04 1.5533625 0.1281656291 40.24753
## COFOGCOFOG_05 0.5800280 0.5655765082 35.33307
## COFOGCOFOG_09 -0.4140876 0.6810595973 39.33026
## COFOGCOFOG_10 0.9472226 0.3492195063 39.95346
## PhDPhD 0.6390332 0.5256534590 51.13817
## Business_edBusiness_ed -0.8644268 0.3897141801 87.60860
## Age 0.1855500 0.8532935461 75.75493
## p.Satt p.KR
## (Intercept) 0.8073775661 0.8074535696
## Incomeln 0.0004782737 0.0004896401
## TenureTenure_1 0.7811059793 0.7817884079
## TenureTenure_2 0.5175204238 0.5193504531
## TenureTenure_3 0.0035873021 0.0046311383
## TenureTenure_4 0.0039198979 0.0049999390
## TenureTenure_5 0.2670320925 0.2702379170
## TenureTenure_6pl 0.7102832802 0.7110337280
## Former_CE_exp_expFormer_CE_exp 0.0267379199 0.0262911688
## GenderMale 0.3417236318 0.3412681920
## COFOGCOFOG_02 0.1187225059 0.1156296757
## COFOGCOFOG_03 0.6013281944 0.6000122477
## COFOGCOFOG_04 0.1281656291 0.1253692837
## COFOGCOFOG_05 0.5655765082 0.5639740706
## COFOGCOFOG_09 0.6810595973 0.6802229073
## COFOGCOFOG_10 0.3492195063 0.3471612662
## PhDPhD 0.5256534590 0.5251287143
## Business_edBusiness_ed 0.3897141801 0.3906481292
## Age 0.8532935461 0.8533964954
rand(full.model)# to measure the sig. of the random effect, using ikelihood ratio test
## Analysis of Random effects Table:
## Chi.sq Chi.DF p.value
## Agency/GD 214.0 2 <2e-16 ***
## Year 19.8 1 8e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#To get the estimated means and the CI:s:
lsmeansLT(full.model, test.effs=NULL, method.grad="Richardson", ddf="Kenward-Roger")
## Least Squares Means table:
## Tenure Former_CE_exp_exp Gender COFOG
## Tenure Tenure_0 1.0 NA NA NA
## Tenure Tenure_1 2.0 NA NA NA
## Tenure Tenure_2 3.0 NA NA NA
## Tenure Tenure_3 4.0 NA NA NA
## Tenure Tenure_4 5.0 NA NA NA
## Tenure Tenure_5 6.0 NA NA NA
## Tenure Tenure_6pl 7.0 NA NA NA
## Former_CE_exp_exp ano_Former_CE_exp NA 1.0 NA NA
## Former_CE_exp_exp Former_CE_exp NA 2.0 NA NA
## Gender Female NA NA 1.0 NA
## Gender Male NA NA 2.0 NA
## COFOG COFOG_01 NA NA NA 1.0
## COFOG COFOG_02 NA NA NA 2.0
## COFOG COFOG_03 NA NA NA 3.0
## COFOG COFOG_04 NA NA NA 4.0
## COFOG COFOG_05 NA NA NA 5.0
## COFOG COFOG_09 NA NA NA 6.0
## COFOG COFOG_10 NA NA NA 7.0
## PhD ano_PhD NA NA NA NA
## PhD PhD NA NA NA NA
## Business_ed ano_Business_ed NA NA NA NA
## Business_ed Business_ed NA NA NA NA
## PhD Business_ed Estimate
## Tenure Tenure_0 NA NA 13.7
## Tenure Tenure_1 NA NA 13.7
## Tenure Tenure_2 NA NA 13.8
## Tenure Tenure_3 NA NA 14.3
## Tenure Tenure_4 NA NA 14.4
## Tenure Tenure_5 NA NA 14.0
## Tenure Tenure_6pl NA NA 13.8
## Former_CE_exp_exp ano_Former_CE_exp NA NA 13.7
## Former_CE_exp_exp Former_CE_exp NA NA 14.3
## Gender Female NA NA 13.8
## Gender Male NA NA 14.1
## COFOG COFOG_01 NA NA 13.6
## COFOG COFOG_02 NA NA 15.0
## COFOG COFOG_03 NA NA 13.1
## COFOG COFOG_04 NA NA 14.5
## COFOG COFOG_05 NA NA 14.1
## COFOG COFOG_09 NA NA 13.2
## COFOG COFOG_10 NA NA 14.4
## PhD ano_PhD 1.0 NA 13.9
## PhD PhD 2.0 NA 14.1
## Business_ed ano_Business_ed NA 1.0 14.1
## Business_ed Business_ed NA 2.0 13.8
## Standard Error DF t-value Lower CI
## Tenure Tenure_0 0.342 59.3 40.0 13.0
## Tenure Tenure_1 0.345 62.0 39.5 13.0
## Tenure Tenure_2 0.347 62.8 39.8 13.1
## Tenure Tenure_3 0.354 68.3 40.5 13.6
## Tenure Tenure_4 0.361 73.5 39.9 13.7
## Tenure Tenure_5 0.380 86.7 36.9 13.2
## Tenure Tenure_6pl 0.384 84.6 36.0 13.0
## Former_CE_exp_exp ano_Former_CE_exp 0.342 56.6 39.9 13.0
## Former_CE_exp_exp Former_CE_exp 0.351 62.0 40.6 13.5
## Gender Female 0.359 65.3 38.6 13.1
## Gender Male 0.333 52.5 42.3 13.4
## COFOG COFOG_01 0.459 54.3 29.5 12.6
## COFOG COFOG_02 0.796 41.9 18.9 13.4
## COFOG COFOG_03 0.786 41.9 16.7 11.5
## COFOG COFOG_04 0.474 57.6 30.5 13.5
## COFOG COFOG_05 0.767 40.7 18.3 12.5
## COFOG COFOG_09 0.905 43.1 14.5 11.3
## COFOG COFOG_10 0.735 47.2 19.5 12.9
## PhD ano_PhD 0.319 46.7 43.4 13.2
## PhD PhD 0.403 77.9 34.9 13.3
## Business_ed ano_Business_ed 0.336 53.4 41.9 13.4
## Business_ed Business_ed 0.370 68.8 37.4 13.1
## Upper CI p-value
## Tenure Tenure_0 14.4 <2e-16 ***
## Tenure Tenure_1 14.3 <2e-16 ***
## Tenure Tenure_2 14.5 <2e-16 ***
## Tenure Tenure_3 15.1 <2e-16 ***
## Tenure Tenure_4 15.1 <2e-16 ***
## Tenure Tenure_5 14.8 <2e-16 ***
## Tenure Tenure_6pl 14.6 <2e-16 ***
## Former_CE_exp_exp ano_Former_CE_exp 14.4 <2e-16 ***
## Former_CE_exp_exp Former_CE_exp 15.0 <2e-16 ***
## Gender Female 14.6 <2e-16 ***
## Gender Male 14.7 <2e-16 ***
## COFOG COFOG_01 14.5 <2e-16 ***
## COFOG COFOG_02 16.6 <2e-16 ***
## COFOG COFOG_03 14.7 <2e-16 ***
## COFOG COFOG_04 15.4 <2e-16 ***
## COFOG COFOG_05 15.6 <2e-16 ***
## COFOG COFOG_09 15.0 <2e-16 ***
## COFOG COFOG_10 15.8 <2e-16 ***
## PhD ano_PhD 14.5 <2e-16 ***
## PhD PhD 14.9 <2e-16 ***
## Business_ed ano_Business_ed 14.8 <2e-16 ***
## Business_ed Business_ed 14.6 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#should we wish to save this, a data matrix will do the job, e.g.:
qqq <- data.matrix(lsmeansLT(full.model, test.effs=NULL, method.grad="Richardson", ddf="Kenward-Roger"))
write.csv(qqq, "qqq.csv")
#Parsimonious model, guided by p-values in the full model...
pars.model = lmer(Spendingln ~ Incomeln + Tenure + Former_CE_exp_exp +
(1|Agency/GD) + (1|Year), data=data2)
#save coefs to a data frame
coefsP <- data.frame(coef(summary(pars.model)))
#get Satterthwaite-approximated degrees of freedom
coefsP$df.Satt <- coef(summary(pars.model))[, 3]
#get approximate p-values
coefsP$p.Satt <- coef(summary(pars.model))[, 5]
coefsP
## Estimate Std..Error df t.value
## (Intercept) -0.61168205 2.8194808 65.82661 -0.2169485
## Incomeln 0.66979460 0.1333039 65.83007 5.0245688
## TenureTenure_1 -0.03127041 0.1767809 244.77671 -0.1768879
## TenureTenure_2 0.15352455 0.1884581 270.35709 0.8146350
## TenureTenure_3 0.68923049 0.2079445 282.94817 3.3144921
## TenureTenure_4 0.75836406 0.2196483 284.41619 3.4526295
## TenureTenure_5 0.37475665 0.2506041 281.48942 1.4954130
## TenureTenure_6pl 0.16332171 0.2516351 202.69193 0.6490419
## Former_CE_exp_expFormer_CE_exp 0.48332236 0.2401881 65.07354 2.0122662
## Pr...t.. df.Satt p.Satt
## (Intercept) 8.289195e-01 65.82661 8.289195e-01
## Incomeln 4.110628e-06 65.83007 4.110628e-06
## TenureTenure_1 8.597427e-01 244.77671 8.597427e-01
## TenureTenure_2 4.159983e-01 270.35709 4.159983e-01
## TenureTenure_3 1.037744e-03 282.94817 1.037744e-03
## TenureTenure_4 6.395576e-04 284.41619 6.395576e-04
## TenureTenure_5 1.359269e-01 281.48942 1.359269e-01
## TenureTenure_6pl 5.170461e-01 202.69193 5.170461e-01
## Former_CE_exp_expFormer_CE_exp 4.833462e-02 65.07354 4.833462e-02
#get Kenward-Roger approximated degrees of freedom
df.KRP <- get_ddf_Lb(pars.model, fixef(pars.model))
# get p-values from the t-distribution using the t-values and approximated degrees of freedom..:
coefsP$p.KR <- 2 * (1 - pt(abs(coefsP$t.value), df.KRP))
coefsP #and there we go!
## Estimate Std..Error df t.value
## (Intercept) -0.61168205 2.8194808 65.82661 -0.2169485
## Incomeln 0.66979460 0.1333039 65.83007 5.0245688
## TenureTenure_1 -0.03127041 0.1767809 244.77671 -0.1768879
## TenureTenure_2 0.15352455 0.1884581 270.35709 0.8146350
## TenureTenure_3 0.68923049 0.2079445 282.94817 3.3144921
## TenureTenure_4 0.75836406 0.2196483 284.41619 3.4526295
## TenureTenure_5 0.37475665 0.2506041 281.48942 1.4954130
## TenureTenure_6pl 0.16332171 0.2516351 202.69193 0.6490419
## Former_CE_exp_expFormer_CE_exp 0.48332236 0.2401881 65.07354 2.0122662
## Pr...t.. df.Satt p.Satt
## (Intercept) 8.289195e-01 65.82661 8.289195e-01
## Incomeln 4.110628e-06 65.83007 4.110628e-06
## TenureTenure_1 8.597427e-01 244.77671 8.597427e-01
## TenureTenure_2 4.159983e-01 270.35709 4.159983e-01
## TenureTenure_3 1.037744e-03 282.94817 1.037744e-03
## TenureTenure_4 6.395576e-04 284.41619 6.395576e-04
## TenureTenure_5 1.359269e-01 281.48942 1.359269e-01
## TenureTenure_6pl 5.170461e-01 202.69193 5.170461e-01
## Former_CE_exp_expFormer_CE_exp 4.833462e-02 65.07354 4.833462e-02
## p.KR
## (Intercept) 8.288412e-01
## Incomeln 3.356895e-06
## TenureTenure_1 8.600762e-01
## TenureTenure_2 4.178765e-01
## TenureTenure_3 1.419566e-03
## TenureTenure_4 9.184214e-04
## TenureTenure_5 1.390289e-01
## TenureTenure_6pl 5.183063e-01
## Former_CE_exp_expFormer_CE_exp 4.780555e-02
rand(pars.model)# to measure the sig. of the random effect, using ikelihood ratio test
## Analysis of Random effects Table:
## Chi.sq Chi.DF p.value
## Agency/GD 234.5 2 <2e-16 ***
## Year 18.5 1 2e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#To get the estimated means and the CI:s:
lsmeansLT(pars.model, test.effs=NULL, method.grad="Richardson", ddf="Kenward-Roger")
## Least Squares Means table:
## Tenure Former_CE_exp_exp Estimate
## Tenure Tenure_0 1.0 NA 13.675
## Tenure Tenure_1 2.0 NA 13.644
## Tenure Tenure_2 3.0 NA 13.829
## Tenure Tenure_3 4.0 NA 14.364
## Tenure Tenure_4 5.0 NA 14.434
## Tenure Tenure_5 6.0 NA 14.050
## Tenure Tenure_6pl 7.0 NA 13.839
## Former_CE_exp_exp ano_Former_CE_exp NA 1.0 13.735
## Former_CE_exp_exp Former_CE_exp NA 2.0 14.218
## Standard Error DF t-value Lower CI
## Tenure Tenure_0 0.287 50.2 47.7 13.1
## Tenure Tenure_1 0.292 54.0 46.8 13.1
## Tenure Tenure_2 0.293 55.3 47.1 13.2
## Tenure Tenure_3 0.303 62.3 47.4 13.8
## Tenure Tenure_4 0.309 67.2 46.8 13.8
## Tenure Tenure_5 0.329 84.7 42.7 13.4
## Tenure Tenure_6pl 0.324 78.0 42.7 13.2
## Former_CE_exp_exp ano_Former_CE_exp 0.279 44.4 49.2 13.2
## Former_CE_exp_exp Former_CE_exp 0.303 58.4 46.9 13.6
## Upper CI p-value
## Tenure Tenure_0 14.3 <2e-16 ***
## Tenure Tenure_1 14.2 <2e-16 ***
## Tenure Tenure_2 14.4 <2e-16 ***
## Tenure Tenure_3 15.0 <2e-16 ***
## Tenure Tenure_4 15.0 <2e-16 ***
## Tenure Tenure_5 14.7 <2e-16 ***
## Tenure Tenure_6pl 14.5 <2e-16 ***
## Former_CE_exp_exp ano_Former_CE_exp 14.3 <2e-16 ***
## Former_CE_exp_exp Former_CE_exp 14.8 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#To get stuff we've got so far out of R..:
write.csv(coefsP, "coesfP.csv")
parsfit <- data.frame(fitted(pars.model))
write.csv(parsfit, "fitted_pars.csv")
fixedef <- data.frame(fixef(pars.model,add.dropped=TRUE))
write.csv(fixedef, "fixedef.csv")
Since p-values are disputed, here are some references:
Luke (2016), Evaluating significance in linear mixed-effects models in R. Behavior Research Methods, pp 1-9
Kenward, M. G., and J. H. Roger, 1997: Small sample inference for fixed effects from restricted maximum likelihood. Biometrics 53, 983-997.
Spilke J., Piepho, H.-P. and Hu, X. Hu (2005) A Simulation Study on Tests of Hypotheses and Confidence Intervals for Fixed Effects in Mixed Models for Blocked Experiments With Missing Data Journal of Agricultural, Biological, and Environmental Statistics, Vol. 10,p. 374-389
Alnosaier, W. (2007) Kenward-Roger Approximate F Test for Fixed Effects in Mixed Linear Models, Dissertation, Oregon State University
All in all, KR.approximation seem to be preferred. Or some bootsrap or simulation technique…
#Let's have the models compete:
AIC(initial.model,full.model,pars.model) #lower=better. AIC good criteria for getting good prediction-models.
## df AIC
## initial.model 11 1155.270
## full.model 23 1150.521
## pars.model 13 1138.870
bbmle::AICtab(initial.model,full.model,pars.model) #Call the bbmle-package first. gives delta AIC
## dAIC df
## pars.model 0.0 13
## full.model 11.7 23
## initial.model 16.4 11
#Corrected AIC's, more robust for small samples
AICc(initial.model)
## [1] 1156.088
AICc(full.model)
## [1] 1154.071
AICc(pars.model)
## [1] 1140.004
BIC(initial.model,full.model,pars.model) #lower=better. BIC usually favor simpler models.
## df BIC
## initial.model 11 1197.226
## full.model 23 1238.246
## pars.model 13 1188.454
bbmle::BICtab(initial.model,full.model,pars.model) #gives delta BIC
## dBIC df
## pars.model 0.0 13
## initial.model 8.8 11
## full.model 49.8 23
logLik(initial.model,REML=FALSE) #Higher (i.e., closer to zero) =better
## 'log Lik.' -561.8062 (df=11)
logLik(full.model,REML=FALSE)
## 'log Lik.' -545.4416 (df=23)
logLik(pars.model,REML=FALSE)
## 'log Lik.' -549.795 (df=13)
deviance(initial.model,REML=FALSE) #lower=better
## [1] 1123.612
deviance(full.model,REML=FALSE)
## [1] 1090.883
deviance(pars.model,REML=FALSE)
## [1] 1099.59
#You could easily, or with great effort I mean, write a phd on which criteria is the best...
#The parsimonious model nonetheless comes out as a winner it seems.
#Export our favoured models' coeffs to a csv..:
write.csv(coefsP, "coeffs_pars.csv")
write.csv(coefsIni,"coeffs_ini.csv")
write.csv(coefsF,"coeffs_full.csv")
#Complementing sig tests for our favoured models (should perhaps be done for the initial and full model as well)
#First the alternative models (same as the favoured models above, but with the 'REML=FALSE' addition, thus using ML)
#Why?
#Anser from CrossValidated/Robert Long:
#.... ML is biased for the estimation of variance components.
#But observe that the bias gets smaller for larger sample sizes.
#Hence, Under what circumstances may REML be preferred over ML (or vice versa) when fitting a mixed effects model ?",
#for small sample sizes REML is preferred.
#However, likelihood ratio tests for REML *****require exactly the same fixed effects specification***** in both models.
#So, to compare models with different fixed effects (a common scenario) with an LR test, ML must be used.
#[Since by default, we we'll held out the one fixed effect we're interested in -my addition]
#Two sets of null models, with one model for each variable we remove (i.e. test)...
#For the pars.model
parsALT.model = lmer(Spendingln ~ Incomeln + Tenure + Former_CE_exp_exp +
(1|Agency/GD) + (1|Year), data=data2, REML=FALSE) #Eeverthing included...
parsNULL_ten.model = lmer(Spendingln ~ Incomeln + Former_CE_exp_exp +
(1|Agency/GD) + (1|Year), data=data2, REML=FALSE)
parsNULL_inc.model = lmer(Spendingln ~ Tenure + Former_CE_exp_exp +
(1|Agency/GD) + (1|Year), data=data2, REML=FALSE)
parsNULL_form.model = lmer(Spendingln ~ Incomeln + Tenure +
(1|Agency/GD) + (1|Year), data=data2, REML=FALSE)
#Says ANOVA below, but they are actually likelihood ratio tests
#The tests for the pars.model variables:
anova(parsNULL_ten.model,parsALT.model) #gives us three stars
## Data: data2
## Models:
## object: Spendingln ~ Incomeln + Former_CE_exp_exp + (1 | Agency/GD) +
## object: (1 | Year)
## ..1: Spendingln ~ Incomeln + Tenure + Former_CE_exp_exp + (1 | Agency/GD) +
## ..1: (1 | Year)
## Df AIC BIC logLik deviance Chisq Chi Df Pr(>Chisq)
## object 7 1136.4 1163.1 -561.20 1122.4
## ..1 13 1125.6 1175.2 -549.79 1099.6 22.814 6 0.000861 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova(parsNULL_inc.model,parsALT.model) #gives us three stars
## Data: data2
## Models:
## object: Spendingln ~ Tenure + Former_CE_exp_exp + (1 | Agency/GD) + (1 |
## object: Year)
## ..1: Spendingln ~ Incomeln + Tenure + Former_CE_exp_exp + (1 | Agency/GD) +
## ..1: (1 | Year)
## Df AIC BIC logLik deviance Chisq Chi Df Pr(>Chisq)
## object 12 1143.5 1189.3 -559.74 1119.5
## ..1 13 1125.6 1175.2 -549.79 1099.6 19.904 1 8.145e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova(parsNULL_form.model,parsALT.model) #gives us one star
## Data: data2
## Models:
## object: Spendingln ~ Incomeln + Tenure + (1 | Agency/GD) + (1 | Year)
## ..1: Spendingln ~ Incomeln + Tenure + Former_CE_exp_exp + (1 | Agency/GD) +
## ..1: (1 | Year)
## Df AIC BIC logLik deviance Chisq Chi Df Pr(>Chisq)
## object 12 1127.7 1173.4 -551.84 1103.7
## ..1 13 1125.6 1175.2 -549.79 1099.6 4.0898 1 0.04314 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#The likelihood ratio tests can be reported seperatly in my opinion.
#Then the bootstrap tests, (and now we're talking)
#using bobyqa optimization (Bound Optimization BY Quadratic Approximation)
#ATTENTION! Computationally expensive, these are...
BootstrapParsTen <- PBmodcomp(parsALT.model, parsNULL_ten.model, nsim = 40000) #mayby 20-40 min with 40K...
BootstrapParsInc <- PBmodcomp(parsALT.model, parsNULL_inc.model, nsim = 40000)
BootstrapParsForm <- PBmodcomp(parsALT.model, parsNULL_form.model, nsim = 40000)
#... and then simply call the lists to get the results.
BootstrapParsTen
BootstrapParsInc
BootstrapParsForm
#It will not look good until you increase the no. of evaluations. 20K= 1038.24 sec, 8 cores paralell processing.
#20K will give you p=0.0010499, 40K p=0.0010250 for the parstenvariable. What will happen if we run 100K evaluations? Extra star? I've exemted the bootstrap procedure to save time here, so you want see the results.
#Results from the parametric bootstrap procedure can be reported seperatly in my opinion.
#How about the overall R2? Though contetsted (as are p-values), they could give you a hint on how well your model performs.
#Function suggetsed by Jarrett Byrnes:
r2.corr.mer <- function(m) {
lmfit <- lm(model.response(model.frame(m)) ~ fitted(m))
summary(lmfit)$r.squared
}
#Then apply the function on the different models...
r2.corr.mer(initial.model)
## [1] 0.8628964
r2.corr.mer(full.model)
## [1] 0.8629538
r2.corr.mer(pars.model) # Could be called R2 corr; i.e. the correlation between the fitted and the observed values
## [1] 0.8615724
#Optionally, and published, we have 'Omega02':
1-var(residuals(initial.model))/(var(model.response(model.frame(initial.model))))
## [1] 0.8596971
1-var(residuals(full.model))/(var(model.response(model.frame(full.model))))
## [1] 0.8605329
1-var(residuals(pars.model))/(var(model.response(model.frame(pars.model))))
## [1] 0.8585665
#Even better is to report marginal (for fixed effect) and conditional (for the whole model) R2's, using the 'MuMin' package:
r.squaredGLMM(initial.model)
## R2m R2c
## 0.01594819 0.82004155
r.squaredGLMM(full.model)
## R2m R2c
## 0.2996564 0.8259308
r.squaredGLMM(pars.model) #Splitting up r2 shows how relatively effective our favoured models are
## R2m R2c
## 0.2498851 0.8082569
#Lets run some diagnostics, most importantly, the homogeneity of variance assumption
#Fligner-Killeen's test of homogeneity of variance, one per independent variabel in each favoured model ..:
#Less sensitive (and thus more reliable) for outliers than Levene's test
#Can also handle continious independent variables (such as our variable 'Incomeln')
#Anything above p=0.05 is ok...
fligner.test(residuals(initial.model) ~ data2$Tenure)
##
## Fligner-Killeen test of homogeneity of variances
##
## data: residuals(initial.model) by data2$Tenure
## Fligner-Killeen:med chi-squared = 9.8666, df = 6, p-value = 0.1304
fligner.test(residuals(full.model) ~ data2$Tenure)
##
## Fligner-Killeen test of homogeneity of variances
##
## data: residuals(full.model) by data2$Tenure
## Fligner-Killeen:med chi-squared = 10.263, df = 6, p-value = 0.114
fligner.test(residuals(full.model) ~ data2$Incomeln)
##
## Fligner-Killeen test of homogeneity of variances
##
## data: residuals(full.model) by data2$Incomeln
## Fligner-Killeen:med chi-squared = 334, df = 329, p-value = 0.413
fligner.test(residuals(full.model) ~ data2$Gender)
##
## Fligner-Killeen test of homogeneity of variances
##
## data: residuals(full.model) by data2$Gender
## Fligner-Killeen:med chi-squared = 3.2401, df = 1, p-value =
## 0.07186
fligner.test(residuals(full.model) ~ data2$COFOG)
##
## Fligner-Killeen test of homogeneity of variances
##
## data: residuals(full.model) by data2$COFOG
## Fligner-Killeen:med chi-squared = 12.579, df = 6, p-value =
## 0.05023
fligner.test(residuals(full.model) ~ data2$Age)
##
## Fligner-Killeen test of homogeneity of variances
##
## data: residuals(full.model) by data2$Age
## Fligner-Killeen:med chi-squared = 18.771, df = 30, p-value =
## 0.9447
fligner.test(residuals(full.model) ~ data2$Business_ed)
##
## Fligner-Killeen test of homogeneity of variances
##
## data: residuals(full.model) by data2$Business_ed
## Fligner-Killeen:med chi-squared = 0.22525, df = 1, p-value =
## 0.6351
fligner.test(residuals(full.model) ~ data2$Former_CE_exp_exp)
##
## Fligner-Killeen test of homogeneity of variances
##
## data: residuals(full.model) by data2$Former_CE_exp_exp
## Fligner-Killeen:med chi-squared = 3.5983, df = 1, p-value =
## 0.05784
fligner.test(residuals(full.model) ~ data2$PhD)
##
## Fligner-Killeen test of homogeneity of variances
##
## data: residuals(full.model) by data2$PhD
## Fligner-Killeen:med chi-squared = 0.043224, df = 1, p-value =
## 0.8353
fligner.test(residuals(pars.model) ~ data2$Tenure)
##
## Fligner-Killeen test of homogeneity of variances
##
## data: residuals(pars.model) by data2$Tenure
## Fligner-Killeen:med chi-squared = 10.838, df = 6, p-value = 0.0935
fligner.test(residuals(pars.model) ~ data2$Incomeln)
##
## Fligner-Killeen test of homogeneity of variances
##
## data: residuals(pars.model) by data2$Incomeln
## Fligner-Killeen:med chi-squared = 334, df = 329, p-value = 0.413
fligner.test(residuals(pars.model) ~ data2$Former_CE_exp_exp)
##
## Fligner-Killeen test of homogeneity of variances
##
## data: residuals(pars.model) by data2$Former_CE_exp_exp
## Fligner-Killeen:med chi-squared = 3.3322, df = 1, p-value =
## 0.06793
#Oh, Nelly! We'll accept this, eventhough it's a close call.
#Check under extras below for strategies when we're not so lucky...
#It might actually make sense to test for homogeneity directly on the dependent variable, BY SOME independent variable:
fligner.test(Spendingln ~ Tenure, data=data2)
##
## Fligner-Killeen test of homogeneity of variances
##
## data: Spendingln by Tenure
## Fligner-Killeen:med chi-squared = 10.143, df = 6, p-value = 0.1188
fligner.test(Spendingln ~ Incomeln, data=data2)
##
## Fligner-Killeen test of homogeneity of variances
##
## data: Spendingln by Incomeln
## Fligner-Killeen:med chi-squared = 333.93, df = 329, p-value =
## 0.4141
fligner.test(Spendingln ~ Former_CE_exp_exp, data=data2) # ok, so the Former_CE_exp_exp is causing us problems again. We can ignore it here,
##
## Fligner-Killeen test of homogeneity of variances
##
## data: Spendingln by Former_CE_exp_exp
## Fligner-Killeen:med chi-squared = 8.8894, df = 1, p-value =
## 0.002868
# ... since we passed the test on the residuals.
# Four options to the 'laissez faire' approach:
# 1. Get rid of problematic outliers
# 2. Weight the model (look at the code snippet under extras below or use package 'robustlmm')
# 3. Could we add Former GD as a random variable, though the categories stricly speaking exhausts the population?
# 4. Akin to no.3, but with brute force: divide the dataset based on FormerGD and run seperate models.
# I would start with 2 or 1.
# For some reason, variance inflation factor (VIF) is not supported for lmer. Strange, but I guess I'm missing something.
# From Github "aufrank/R-hacks", we have this life-saver for diagnosing collinearity in mixed models from lme4:
vif.mer <- function (fit) {
## adapted from rms::vif
v <- vcov(fit)
nam <- names(fixef(fit))
## exclude intercepts
ns <- sum(1 * (nam == "Intercept" | nam == "(Intercept)"))
if (ns > 0) {
v <- v[-(1:ns), -(1:ns), drop = FALSE]
nam <- nam[-(1:ns)]
}
d <- diag(v)^0.5
v <- diag(solve(v/(d %o% d)))
names(v) <- nam
v
}
#Then just run the function on the model you want to test
vif.mer(initial.model)
## TenureTenure_1 TenureTenure_2 TenureTenure_3 TenureTenure_4
## 1.458765 1.616805 1.591310 1.528405
## TenureTenure_5 TenureTenure_6pl
## 1.435218 1.443401
vif.mer(full.model)
## Incomeln TenureTenure_1
## 1.322994 1.490216
## TenureTenure_2 TenureTenure_3
## 1.707890 1.756560
## TenureTenure_4 TenureTenure_5
## 1.805983 1.683035
## TenureTenure_6pl Former_CE_exp_expFormer_CE_exp
## 1.897091 1.107124
## GenderMale COFOGCOFOG_02
## 1.122647 1.395292
## COFOGCOFOG_03 COFOGCOFOG_04
## 1.310719 1.514845
## COFOGCOFOG_05 COFOGCOFOG_09
## 1.195156 1.168983
## COFOGCOFOG_10 PhDPhD
## 1.328277 1.097509
## Business_edBusiness_ed Age
## 1.095125 1.416182
vif.mer(pars.model) #Golden! Should be <3 (or 2.5 Or 5. Or whatever the reviewers tells you it should be)
## Incomeln TenureTenure_1
## 1.010971 1.460610
## TenureTenure_2 TenureTenure_3
## 1.615029 1.588399
## TenureTenure_4 TenureTenure_5
## 1.533527 1.432175
## TenureTenure_6pl Former_CE_exp_expFormer_CE_exp
## 1.446241 1.005426
#Time to plot some diagnostics, visual versions of above tests...
#pars.model
plot(fitted(pars.model),residuals(pars.model), family ="serif")
abline(0, 0)
lines(smooth.spline(fitted(pars.model), residuals(pars.model))) #Here we would like to have at fitted line too
#or simply..:
plot(pars.model)
#density plot of the residuals... We don't really need to check for normality, but hey...
hist(residuals(pars.model), prob=TRUE, ylim=c(0,0.6), family="serif")
lines(density(residuals(pars.model)))
curve(dnorm(x, mean=mean(residuals(pars.model)), sd=sd(residuals(pars.model))), lty=3, col="#FF4500", add=TRUE)
#qq plot
qqnorm(residuals(pars.model), family="serif")
qqline(residuals(pars.model))
#The original variables
densityplot(~Spendingln, data = data2)
densityplot(~Incomeln, data = data2)
par(family = "serif")
boxplot(Spendingln ~ Tenure, data = data2)
boxplot(Spendingln ~ Former_CE_exp_exp, data = data2)
xyplot(Spendingln ~ Incomeln, data = data2, type=c("p","r","smooth"))
xyplot(Spendingln ~ Incomeln | Tenure, data=data2, type=c("p","r","smooth"))
xyplot(Spendingln ~ Incomeln | Former_CE_exp_exp, data=data2, type=c("p","r","smooth"))
#Not everything is diagnostics. We actually have results as well!
#Lets use the coefs to plot our results.
#To get the data right, you can use reshape2 and reshapeGUI (has to be run in R, not Rstudio).
#or that other program, begins with ex and ends with cel. I won't tell.
#e.g.
plotFULL=read.csv("coeffs_full1.csv") # You will have to inspect this one to grasp what's going on.
#Tell R the order of the variables
plotFULL$Variable <- factor(plotFULL$Variable, levels = plotFULL$Variable[order(-plotFULL$order_no)])
#plotting the marginal effects
ggplot(plotFULL, aes(x=Variable, y=Estimate, ymin=CIlow, ymax=CIhi))+
theme_bw() + theme(panel.border = element_blank(), panel.grid.major = element_blank(),
panel.grid.minor = element_blank(), text=element_text(family="serif"))+
scale_x_discrete('Variable') +
scale_y_continuous('Marginal Effect',limits=c(-3,5)) +
geom_errorbar(size=.3,width=.2) +
geom_point()+
geom_hline(yintercept = 0, linetype=2)+
coord_flip()+
xlab('Variable')+
ylab('Estimate')+
geom_text(aes(y=3.8,label=p.Krshort),size=3, family="serif")+
geom_text(aes(y=4.3,label=sig),size=3, family="serif")
# Then export, arrange and import coefs for the other models and do the same thing. But change the name.
#then for diagnostics
#Extract values from lmer objects
fitted.pars <- fitted(pars.model) #Fitted values
residuals.pars <- residuals(pars.model) #residuals
theo_quant.pars <- qqnorm(residuals.pars, plot.it = F)$x #Theoretical Quantiles
stdr.pars <- residuals(pars.model,
type = c("pearson")) #Standardized residuals
# Create data frame
# Will be used as input to plots/plot_ly/ggplot
p_data <- data.frame(fitted.pars,
stdr.pars,
residuals.pars,
theo_quant.pars)
#scatterplot wit marginal densityplots
#This I like:
scatter <- qplot(fitted.pars,residuals.pars, data=p_data) +
scale_x_continuous(limits=c(min(fitted.pars),max(fitted.pars))) +
scale_y_continuous(limits=c(min(residuals.pars),max(residuals.pars))) +
geom_rug(col=rgb(.5,0,0,alpha=.5))
print(scatter)
#
#below with marginal density plots
p1 <- p1 <- p3 <- ggplot(p_data, aes(fitted.pars, residuals.pars))+ geom_point() + theme_classic()
ggExtra::ggMarginal(
p = p1,
type = 'density',
margins = 'both',
size = 5,
col = 'black'
) #This you can add using the addin 'ggplot2 Marginal Plots' (in Rstudio)
#Old school:
#create a function for marginal hist plots
scatterhist = function(x, y, xlab="", ylab=""){
zones=matrix(c(2,0,1,3), ncol=2, byrow=TRUE)
layout(zones, widths=c(4/5,1/5), heights=c(1/5,4/5))
xhist = hist(x, plot=FALSE)
yhist = hist(y, plot=FALSE)
top = max(c(xhist$counts, yhist$counts))
par(mar=c(3,3,1,1))
plot(x,y)
par(mar=c(0,3,1,1))
barplot(xhist$counts, axes=FALSE, ylim=c(0, top), space=0)
par(mar=c(3,0,1,1))
barplot(yhist$counts, axes=FALSE, xlim=c(0, top), space=0, horiz=TRUE)
par(oma=c(3,3,0,0))
mtext(xlab, side=1, line=1, outer=TRUE, adj=0,
at=.8 * (mean(x) - min(x))/(max(x)-min(x)))
mtext(ylab, side=2, line=1, outer=TRUE, adj=0,
at=(.8 * (mean(y) - min(y))/(max(y) - min(y))))
}
#run the function with our data..:
with(coefsP, scatterhist(fitted.pars,residuals.pars, xlab="fitted", ylab="residuals"))
#useful? Maybe.
PLOT <- xyplot(residuals.pars ~ fitted.pars, data=data2, type=c("p","r","smooth"),xlab = "Fitted Values",
ylab = "Residuals")
PLOT
update(PLOT, panel=function(...){
panel.xyplot(...)
panel.abline(h=0)
} ) #This one I actually find useful
#Fancy plotly stuff....:
#Scatter plot
LOESS1 <- loess.smooth(fitted.pars, residuals.pars)
plot_ly(x = fitted.pars, y = residuals.pars,
type = "scatter", mode = "markers", hoverinfo = "x+y", name = "Data",
marker = list(size = 10, opacity = 0.5), showlegend = F) %>%
add_trace(x = LOESS1$x, y = LOESS1$y, type = "scatter", mode = "lines+markers", name = "Smooth",
line = list(width = 2)) %>%
layout(title = "Residuals (y) vs Fitted Values (x)", plot_bgcolor = "#e6e6e6")
#Histogram, density and fitted nd, all in the same graph
sd(residuals.pars) #which we then manually insert in below, by sd:
## [1] 0.7953341
p <- ggplot(p_data, aes(residuals.pars))+
geom_histogram(aes(y = ..density..), alpha = 0.7, fill = "#333333", breaks=seq(-3, 3, by = 0.4)) +
geom_density(fill = "#ff4d4d", alpha = 0.5) +
theme(panel.background = element_rect(fill = '#ffffff')) +
stat_function(fun = dnorm, n = 101, args = list(mean = 0, sd = 0.7970169)) + ylab("") +
scale_y_continuous(breaks = NULL) +
ggtitle("Density with Histogram overlay")
#alternatively, and more elegantly..:
p <- ggplot(p_data, aes(residuals.pars))+
geom_histogram(aes(y = ..density..), alpha = 0.7, fill = "#333333", breaks=seq(-3, 3, by = 0.4)) +
geom_density(fill = "#ff4d4d", alpha = 0.5) +
theme(panel.background = element_rect(fill = '#ffffff')) +
stat_function(fun = dnorm, n = 101, args = list(mean = 0, sd = sd(residuals.pars))) + ylab("") +
scale_y_continuous(breaks = NULL) +
ggtitle("Density with Histogram overlay")
ggplotly(p) # This to transform the ggplot into an plotly object
#Below, we combine fq and density in the same plot, with two scales (left and right)
fit <- density(residuals.pars)
plot_ly(x = residuals.pars) %>%
add_histogram(nbinsx=25) %>%
add_lines(x = fit$x, y = fit$y, fill = "tozeroy", yaxis = "y2") %>%
layout(yaxis2 = list(overlaying = "y", side = "right")) #it actually works, imagine that!
REDUNDANT CODE BELOW THIS POINT
#########################################
#Since FormerGD is significant, how does it interact with Tenure?
#Interaction model, add interaction variabel between tenure and previous GD-experience (conceptually justified model)
interaction.model = lmer(Spendingln ~ Incomeln + Tenure * Former_CE_exp_exp +
(1|Agency/GD) + (1|Year), data=data2)
# By default, the variables in the interaction variable are also included as single variables...
# Then follow the procedures above to test and diagnose this model as well.
##########################################
##########################################
meanp <- mean(predict(initial.model)) #This one can come in handy as a reference point.
#For the initial model above, the intercept is actually meaningful. For the subsequent models, not so much.
#We can however then use the above mean of the predictor, as it is the same for all models.
meanp
##########################################
##########################################
# The heteroscesdasticity corner
#To use Levene's test', first load 'car':
library(car) #then simply run
leveneTest(residuals(pars.model) ~ data2$Tenure)
#One: check for heteroscesdasticity with some method.
#Should we run in to problems:
#Two: update the model (if, indeed, variance of the residuals are uneven):
new.model <- update( old.model, weights = varIdent(form = ~ 1 | the.problematic.variable) )
#..as suggested by on crossvalidated/ToJo.
#You may reference to (and read, by all means) 'Fixed-Effects Models in S and S-PLUS' (Pinheiro & Bates, 2000).
#Further more, you may have a look at the robustlmm package which also uses a weighing approach.
##########################################