https://bookdown.org/roback/bookdown-BeyondMLR/ch-GLMM.html https://jayrobwilliams.com/files/html/teaching-materials/Multilevel_GLMs.html https://jtools.jacob-long.com/reference/summ.glm
Finch, W. H., Bolin, J. E., & Kelley, K. (2019). Multilevel modeling using r: CRC Press.
bwj <- import("Merged Jordan worker managers audits data Dec2023.sav")%>%as.data.table()
bwj$faid<-bwj$FactoryAssessedID #creating a short name for factory grouping variable for convenience
#coding the main IV
bwj$voice_opp<-ifelse(bwj$H7=="Very dissatisfied",1,ifelse(bwj$H7=="Dissatisfied",2,ifelse(bwj$H7=="Neither satisfied nor dissatisfied",3,ifelse(bwj$H7=="Satisfied",4,ifelse(bwj$H7=="Very satisfied",5,NA)))))
val_labels(bwj$voice_opp) <-c(c("Very dissatisfied"=1, "Dissatisfied"=2, "Neither satisfied nor dissatisfied"=3, "Satisfied"=4, "Very satisfied"=5))
Freq(bwj$voice_opp)
## Frequency Statistics:
## ────────────
## N %
## ────────────
## 1 237 4.6
## 2 353 6.9
## 3 481 9.3
## 4 2340 45.4
## 5 1741 33.8
## ────────────
## Total N = 5,152
#Coding other controls or potential IVs
bwj$weekly_hours<-ifelse(bwj$E6=="NA",NA,bwj$E6) %>% as.numeric(data=bwj)
bwj$PICC_eff<-ifelse(bwj$E10=="Neither effective nor ineffective",3,ifelse(bwj$E10=="Effective",4,ifelse(bwj$E10=="Ineffective",2,ifelse(bwj$E10=="Very effective",5,ifelse(bwj$E10=="Very ineffective",1,NA)))))
val_labels(bwj$PICC_eff) <-c(c("Very ineffective"=1, "Ineffective"=2, "Neither effective nor ineffective"=3, "Effective"=4, "Very effective"=5))
bwj$woman<-ifelse(bwj$D1=="Female",1,0)
bwj$sup_woman<-ifelse(bwj$E11=="Female",1,ifelse(bwj$E11=="Male",0,NA)) # A lot of NAs/1730
bwj$age<-ifelse(bwj$D2=="NA", NA,bwj$D2) %>% as.numeric(data=bwj)
bwj$E4_clean_rec<-ifelse(bwj$E4_clean=="NA", NA,bwj$E4_clean) %>% as.numeric(data=bwj)
bwj$wage_clean<-ifelse(bwj$E4_raw==789366426, NA,log(bwj$E4_raw+1)) #performs much better
bwj$origin<-relevel(as.factor(bwj$D3), ref= "Jordan")
bwj$jordanian<-ifelse(bwj$migrant=="Jordanian",1,0)
bwj$education_r23<-ifelse(bwj$D4=="NA",NA,as.numeric(bwj$D4))
bwj$education_r1<-ifelse(bwj$D4_raw_round1=="NA",NA,as.numeric(bwj$D4_raw_round1))
bwj$education<-ifelse(!is.na(bwj$education_r23),bwj$education_r23,ifelse(!is.na(bwj$education_r1),bwj$education_r1, NA))
#group mean centering
#group mean centering based on factory only
bwj=group_mean_center(bwj, by="faid", add.suffix="_c",
c("voice_opp", "weekly_hours", "age", "tenure", "trust", "absence", "education", "PICC_eff"))
#group mean centering based on both factory and survey round
setDT(bwj)[, voice_opp_2c := voice_opp-mean(voice_opp, na.rm = TRUE), by = .(faid,round)][is.na(voice_opp), voice_opp_2c := NA]
setDT(bwj)[, weekly_hours_2c := weekly_hours-mean(weekly_hours, na.rm = TRUE), by = .(faid,round)][is.na(weekly_hours), weekly_hours_2c := NA]
setDT(bwj)[, age_2c := age - mean(age, na.rm = TRUE), by = .(faid, round)][is.na(age), age_2c := NA]
setDT(bwj)[, tenure_2c := tenure - mean(tenure, na.rm = TRUE), by = .(faid, round)][is.na(tenure), tenure_2c := NA]
setDT(bwj)[, trust_2c := trust - mean(trust, na.rm = TRUE), by = .(faid, round)][is.na(trust), trust_2c := NA]
setDT(bwj)[, absence_2c := absence - mean(absence, na.rm = TRUE), by = .(faid, round)][is.na(absence), absence_2c := NA]
setDT(bwj)[, education_2c := education - mean(education, na.rm = TRUE), by = .(faid, round)][is.na(education), education_2c := NA]
setDT(bwj)[, PICC_eff_2c := PICC_eff - mean(PICC_eff, na.rm = TRUE), by = .(faid, round)][is.na(PICC_eff), PICC_eff_2c := NA]
subset_bwj <- bwj[round != 1 & !(faid %in% c(168, 639, 2565,2975, 2088, 2669, 272)), ] #removing clusters
Y: abuse is the DV (coded 1,0)
X: main IV is voice_opp which is ordinal (treated as interval), there is one factory level predictor FOA19_20 which is invariant to time period but variant to factories
Cluster: faid is factory group, round2 time period indicator. round also indicates time period.
Notes: There are three rounds, but we are only looking at round 2 and 3, so just round2(1,0) indicator is enough there are other controls which may make sense on their own I am using subset function to take a subset of the data to remove round1 entries. However, this is not strictly needed since some variables just don’t have round 1 entry so listwise deletion has the same effect
#multilevel approach
#without centering
summary(glmer(abuse~voice_opp+FOA19_20+woman+sup_woman+trust+weekly_hours+age+tenure+absence+education+round2+(1|faid),family="binomial",subset(bwj,round!=1), control=glmerControl(optimizer="bobyqa"))) # I get the scaling warning
summary(glmer(abuse~voice_opp+FOA19_20+woman+sup_woman+trust+weekly_hours+age+tenure+absence+education+(1|faid/round2),family="binomial",subset(bwj,round!=1))) #I additionally get convergence warning. This likely because some factories are only measured in one round
summary(glmer(abuse~voice_opp+FOA19_20+woman+sup_woman+trust+weekly_hours+age+tenure+absence+education+(1|round2)+(1|faid),family="binomial",subset(bwj,round!=1))) #The cross nested approach also produces warning, likely for similar reasons
Freq(bwj$round2)
#with centering
GroC=glmer(abuse~voice_opp_c+FOA19_20+woman+sup_woman+trust_c+weekly_hours_c+age_c+tenure_c+absence_c+education_c+round2+(1|faid),family="binomial",subset(bwj,round!=1)) #using factory level centering produces convergence errors
summary(glmer(abuse~voice_opp_2c+FOA19_20+woman+sup_woman+trust_2c+weekly_hours_2c+age_2c+tenure_2c+absence_2c+education_2c+round2+(1|faid),family="binomial",subset(bwj,round!=1)))# using factory and time period based centering produces same error
# a temporary workaround using factory centering, two level nesting, and other tweaking
summary(glmer(abuse~voice_opp_c+FOA19_20+woman+sup_woman+trust_c+weekly_hours_c+age_c+tenure_c+absence_c+education_c+(1|faid/round2),control=glmerControl(optimizer="bobyqa"),family="binomial",subset(bwj,round!=1)))
#A quick diagnostic by fitting it in lavaan
model<-"Level:1
abuse~voice_opp_c+woman+sup_woman+trust_c+weekly_hours_c+age_c+tenure_c+absence_c+education_c+round2
Level:2
abuse~FOA19_20"
summary(sem(model,subset(bwj,round!=1),cluster="faid", se="robust"))
#Lavaan informs me that-
#the gender variable woman is the same in some clusters.
#the time variable round2 is invariant in some clusters.
# I ran a quick diagnostic by removing clusters where those problematic issues don't exist
subset_bwj <- bwj[round != 1 & !(faid %in% c(168, 639, 2565,2975, 2088, 2669, 272)), ] #removing clusters
#using glmer
summary(glmer(abuse~voice_opp+FOA19_20+woman+sup_woman+trust+weekly_hours+age+tenure+absence+education+round2+(1|faid),family="binomial",subset_bwj)) #Problems still persists
#using lavaan
summary(sem(model,subset_bwj,cluster="faid", se="robust")) # However, lavaan no longer returns any warning (except that some factory ids have no missing value)
GroC=glmer(abuse~voice_opp_c+FOA19_20+woman+sup_woman+trust_c+weekly_hours_c+age_c+tenure_c+absence_c+education_c+round2+(1|faid),family="binomial",subset(bwj,round!=1)) #using factory level centering produces convergence errors
raw=glmer(abuse~voice_opp+FOA19_20+woman+sup_woman+trust+weekly_hours+age+tenure+absence+education+round2+(1|faid),family="binomial",subset_bwj)
Please refer to these two articles regarding hybrid model:
- Certo, S. T., Withers, M. C., & Semadeni, M. (2017). A tale of two effects: Using longitudinal data to compare within‐and between‐firm effects. Strategic Management Journal, 38(7), 1536-1556.
- Bliese, P. D., Schepker, D. J., Essman, S. M., & Ployhart, R. E. (2020). Bridging methodological divides between macro-and microresearch: Endogeneity and methods for panel data. Journal of Management, 46(1), 70-99.
Hybrid.GroC=glmer(abuse~voice_opp_c+FOA19_20+woman+sup_woman+trust_c+weekly_hours_c+age_c+tenure_c+absence_c+education_c+
voice_opp_mean+FOA19_20+woman+sup_woman+trust_mean+weekly_hours_mean+age_mean+tenure_mean+absence_mean+education_mean
+factor(round2)+(1|faid),family="binomial",subset(bwj,round!=1), control = glmerControl(optimizer = "bobyqa"))
Hybrid.raw=glmer(abuse~voice_opp+FOA19_20+woman+sup_woman+trust+weekly_hours+age+tenure+absence+education+
voice_opp_mean+FOA19_20+woman+sup_woman+trust_mean+weekly_hours_mean+age_mean+tenure_mean+absence_mean+education_mean
+factor(round2)+(1|faid),family="binomial",subset(bwj,round!=1), control = glmerControl(optimizer = "bobyqa"))
Hybrid.GroC2=glmer(abuse~voice_opp_c+FOA19_20+woman+sup_woman+trust_c+weekly_hours_c+age_c+tenure_c+absence_c+education_c+
voice_opp_mean+FOA19_20+woman+sup_woman+trust_mean+weekly_hours_mean+age_mean+tenure_mean+absence_mean+education_mean
+factor(round2)+(1|faid),family="binomial",subset_bwj, control = glmerControl(optimizer = "bobyqa"))
Hybrid.raw2=glmer(abuse~voice_opp+FOA19_20+woman+sup_woman+trust+weekly_hours+age+tenure+absence+education+
voice_opp_mean+FOA19_20+woman+sup_woman+trust_mean+weekly_hours_mean+age_mean+tenure_mean+absence_mean+education_mean
+factor(round2)+(1|faid),family="binomial",subset_bwj, control = glmerControl(optimizer = "bobyqa"))
##
## Model Summary
##
## ───────────────────────────────────────────────────────────────
## (1) abuse (2) abuse (3) abuse
## ───────────────────────────────────────────────────────────────
## (Intercept) 2.618 5.569 ** 5.172 *
## (2.046) (2.122) (2.164)
## voice_opp_c -0.240 *** -0.236 *** -0.253 ***
## (0.041) (0.040) (0.042)
## FOA19_20 -0.038 0.003 0.004
## (0.023) (0.016) (0.017)
## woman -0.070 -0.088 -0.082
## (0.099) (0.098) (0.099)
## sup_woman 0.095 0.120 0.087
## (0.086) (0.085) (0.087)
## trust_c -0.197 *** -0.195 *** -0.206 ***
## (0.031) (0.031) (0.031)
## weekly_hours_c -0.001 -0.002 -0.003
## (0.002) (0.003) (0.003)
## age_c 0.017 * 0.018 * 0.019 **
## (0.007) (0.007) (0.007)
## tenure_c -0.088 * -0.090 ** -0.081 *
## (0.035) (0.034) (0.035)
## absence_c 0.054 *** 0.055 *** 0.058 ***
## (0.014) (0.014) (0.015)
## education_c 0.076 *** 0.077 *** 0.074 ***
## (0.011) (0.011) (0.012)
## round2 0.042
## (0.083)
## voice_opp_mean -0.561 * -0.584 *
## (0.247) (0.252)
## trust_mean -0.282 -0.298
## (0.173) (0.196)
## weekly_hours_mean -0.064 *** -0.055 ***
## (0.014) (0.015)
## age_mean -0.078 * -0.073 *
## (0.031) (0.032)
## tenure_mean -0.080 -0.116
## (0.142) (0.150)
## absence_mean -0.082 -0.041
## (0.094) (0.118)
## education_mean 0.281 *** 0.276 ***
## (0.052) (0.053)
## factor(round2)1 0.031 0.057
## (0.083) (0.085)
## ───────────────────────────────────────────────────────────────
## Marginal R^2 0.071 0.149 0.141
## Conditional R^2 0.179 0.162 0.153
## AIC 3669.144 3604.512 3429.383
## BIC 3747.522 3725.094 3549.038
## Num. obs. 3069 3069 2930
## Num. groups: faid 69 69 62
## Var: faid (Intercept) 0.435 0.049 0.047
## ───────────────────────────────────────────────────────────────
## Note. * p < .05, ** p < .01, *** p < .001.
##
## Model Summary
##
## ───────────────────────────────────────────────────────────────
## (1) abuse (2) abuse (3) abuse
## ───────────────────────────────────────────────────────────────
## (Intercept) 1.835 5.568 ** 5.172 *
## (1.613) (2.122) (2.158)
## voice_opp -0.273 *** -0.236 *** -0.253 ***
## (0.041) (0.040) (0.042)
## FOA19_20 -0.018 0.003 0.004
## (0.018) (0.016) (0.017)
## woman -0.057 -0.088 -0.082
## (0.100) (0.098) (0.099)
## sup_woman 0.076 0.119 0.086
## (0.088) (0.085) (0.087)
## trust -0.213 *** -0.195 *** -0.206 ***
## (0.031) (0.031) (0.031)
## weekly_hours -0.004 -0.002 -0.003
## (0.003) (0.003) (0.003)
## age 0.017 * 0.018 * 0.019 **
## (0.007) (0.007) (0.007)
## tenure -0.085 * -0.090 ** -0.081 *
## (0.035) (0.034) (0.035)
## absence 0.059 *** 0.055 *** 0.058 ***
## (0.015) (0.014) (0.015)
## education 0.082 *** 0.077 *** 0.074 ***
## (0.011) (0.011) (0.012)
## round2 0.058
## (0.085)
## voice_opp_mean -0.326 -0.332
## (0.250) (0.256)
## trust_mean -0.087 -0.092
## (0.175) (0.198)
## weekly_hours_mean -0.062 *** -0.052 ***
## (0.014) (0.015)
## age_mean -0.096 ** -0.091 **
## (0.031) (0.032)
## tenure_mean 0.010 -0.035
## (0.146) (0.153)
## absence_mean -0.137 -0.098
## (0.095) (0.119)
## education_mean 0.204 *** 0.201 ***
## (0.052) (0.054)
## factor(round2)1 0.030 0.056
## (0.083) (0.085)
## ───────────────────────────────────────────────────────────────
## Marginal R^2 0.099 0.149 0.141
## Conditional R^2 0.144 0.162 0.153
## AIC 3456.183 3604.512 3429.384
## BIC 3533.959 3725.094 3549.039
## Num. obs. 2930 3069 2930
## Num. groups: faid 62 69 62
## Var: faid (Intercept) 0.176 0.049 0.046
## ───────────────────────────────────────────────────────────────
## Note. * p < .05, ** p < .01, *** p < .001.
I’m not sure whether we should run this in laavan because the MSEM in lavaan may not be able to analyze Y as 0/1 variable. Please confirmed this.
model<-"Level:1
abuse~voice_opp_c+woman+sup_woman+trust_c+weekly_hours_c+age_c+tenure_c+absence_c+education_c +round2
Level:2
abuse~voice_opp_mean+FOA19_20+woman+sup_woman+trust_mean+weekly_hours_mean+age_mean+tenure_mean+absence_mean+education_mean"
summary(sem(model,subset(bwj,round!=1),cluster="faid", se="robust"))
## lavaan 0.6.16 ended normally after 70 iterations
##
## Estimator ML
## Optimization method NLMINB
## Number of model parameters 23
##
## Used Total
## Number of observations 3069 3422
## Number of clusters [faid] 69
##
## Model Test User Model:
##
## Test statistic 0.483
## Degrees of freedom 0
##
## Parameter Estimates:
##
## Standard errors Sandwich
## Information bread Observed
## Observed information based on Hessian
##
##
## Level 1 [within]:
##
## Regressions:
## Estimate Std.Err z-value P(>|z|)
## abuse ~
## voice_opp_c -0.049 0.010 -4.825 0.000
## woman -0.029 0.026 -1.120 0.263
## sup_woman 0.020 0.025 0.793 0.428
## trust_c -0.040 0.006 -6.222 0.000
## weekly_hours_c -0.000 0.001 -0.365 0.715
## age_c 0.003 0.002 2.113 0.035
## tenure_c -0.017 0.009 -1.864 0.062
## absence_c 0.012 0.003 3.413 0.001
## education_c 0.015 0.002 6.120 0.000
## round2 0.009 0.025 0.377 0.706
##
## Intercepts:
## Estimate Std.Err z-value P(>|z|)
## .abuse 0.000
##
## Variances:
## Estimate Std.Err z-value P(>|z|)
## .abuse 0.196 0.005 37.354 0.000
##
##
## Level 2 [faid]:
##
## Regressions:
## Estimate Std.Err z-value P(>|z|)
## abuse ~
## voice_opp_mean -0.092 0.047 -1.948 0.051
## FOA19_20 0.001 0.003 0.428 0.669
## woman 0.324 0.108 3.012 0.003
## sup_woman 0.009 0.074 0.115 0.909
## trust_mean -0.100 0.035 -2.882 0.004
## weekly_hors_mn -0.009 0.003 -2.987 0.003
## age_mean -0.002 0.009 -0.281 0.779
## tenure_mean -0.017 0.024 -0.695 0.487
## absence_mean -0.011 0.016 -0.679 0.497
## education_mean 0.052 0.010 5.036 0.000
##
## Intercepts:
## Estimate Std.Err z-value P(>|z|)
## .abuse 0.839 0.544 1.542 0.123
##
## Variances:
## Estimate Std.Err z-value P(>|z|)
## .abuse 0.001 0.001 1.392 0.164
summary(sem(model,subset_bwj,cluster="faid", se="robust")) # However, lavaan no longer returns any warning (except that some factory ids have no missing value)
## lavaan 0.6.16 ended normally after 68 iterations
##
## Estimator ML
## Optimization method NLMINB
## Number of model parameters 23
##
## Used Total
## Number of observations 2930 3274
## Number of clusters [faid] 62
##
## Model Test User Model:
##
## Test statistic 0.630
## Degrees of freedom 0
##
## Parameter Estimates:
##
## Standard errors Sandwich
## Information bread Observed
## Observed information based on Hessian
##
##
## Level 1 [within]:
##
## Regressions:
## Estimate Std.Err z-value P(>|z|)
## abuse ~
## voice_opp_c -0.052 0.011 -4.910 0.000
## woman -0.026 0.026 -1.003 0.316
## sup_woman 0.014 0.026 0.562 0.574
## trust_c -0.042 0.006 -6.722 0.000
## weekly_hours_c -0.000 0.001 -0.596 0.551
## age_c 0.003 0.002 2.191 0.028
## tenure_c -0.016 0.010 -1.626 0.104
## absence_c 0.013 0.003 3.670 0.000
## education_c 0.014 0.002 5.776 0.000
## round2 0.011 0.025 0.444 0.657
##
## Intercepts:
## Estimate Std.Err z-value P(>|z|)
## .abuse 0.000
##
## Variances:
## Estimate Std.Err z-value P(>|z|)
## .abuse 0.195 0.005 36.123 0.000
##
##
## Level 2 [faid]:
##
## Regressions:
## Estimate Std.Err z-value P(>|z|)
## abuse ~
## voice_opp_mean -0.106 0.050 -2.128 0.033
## FOA19_20 0.002 0.004 0.601 0.548
## woman 0.314 0.111 2.840 0.005
## sup_woman -0.014 0.074 -0.193 0.847
## trust_mean -0.095 0.039 -2.408 0.016
## weekly_hors_mn -0.007 0.003 -2.464 0.014
## age_mean -0.004 0.009 -0.410 0.682
## tenure_mean -0.021 0.026 -0.797 0.426
## absence_mean 0.007 0.019 0.340 0.734
## education_mean 0.052 0.010 5.003 0.000
##
## Intercepts:
## Estimate Std.Err z-value P(>|z|)
## .abuse 0.750 0.569 1.318 0.187
##
## Variances:
## Estimate Std.Err z-value P(>|z|)
## .abuse 0.001 0.001 1.284 0.199