1 PREPARATION

1.1 Learning sources

https://bookdown.org/roback/bookdown-BeyondMLR/ch-GLMM.html https://jayrobwilliams.com/files/html/teaching-materials/Multilevel_GLMs.html https://jtools.jacob-long.com/reference/summ.glm

Finch, W. H., Bolin, J. E., & Kelley, K. (2019). Multilevel modeling using r: CRC Press.

1.2 Data clean

bwj <- import("Merged Jordan worker managers audits data Dec2023.sav")%>%as.data.table()
bwj$faid<-bwj$FactoryAssessedID #creating a short name for factory grouping variable for convenience
#coding the main IV
bwj$voice_opp<-ifelse(bwj$H7=="Very dissatisfied",1,ifelse(bwj$H7=="Dissatisfied",2,ifelse(bwj$H7=="Neither satisfied nor dissatisfied",3,ifelse(bwj$H7=="Satisfied",4,ifelse(bwj$H7=="Very satisfied",5,NA)))))
val_labels(bwj$voice_opp) <-c(c("Very dissatisfied"=1, "Dissatisfied"=2, "Neither satisfied nor dissatisfied"=3, "Satisfied"=4, "Very satisfied"=5))

Freq(bwj$voice_opp)

## Frequency Statistics:
## ────────────
##       N    %
## ────────────
## 1   237  4.6
## 2   353  6.9
## 3   481  9.3
## 4  2340 45.4
## 5  1741 33.8
## ────────────
## Total N = 5,152

#Coding other controls or potential IVs
bwj$weekly_hours<-ifelse(bwj$E6=="NA",NA,bwj$E6) %>% as.numeric(data=bwj)
bwj$PICC_eff<-ifelse(bwj$E10=="Neither effective nor ineffective",3,ifelse(bwj$E10=="Effective",4,ifelse(bwj$E10=="Ineffective",2,ifelse(bwj$E10=="Very effective",5,ifelse(bwj$E10=="Very ineffective",1,NA)))))
val_labels(bwj$PICC_eff) <-c(c("Very ineffective"=1, "Ineffective"=2, "Neither effective nor ineffective"=3, "Effective"=4, "Very effective"=5))
bwj$woman<-ifelse(bwj$D1=="Female",1,0)
bwj$sup_woman<-ifelse(bwj$E11=="Female",1,ifelse(bwj$E11=="Male",0,NA)) # A lot of NAs/1730
bwj$age<-ifelse(bwj$D2=="NA", NA,bwj$D2) %>% as.numeric(data=bwj)
bwj$E4_clean_rec<-ifelse(bwj$E4_clean=="NA", NA,bwj$E4_clean) %>% as.numeric(data=bwj) 
bwj$wage_clean<-ifelse(bwj$E4_raw==789366426, NA,log(bwj$E4_raw+1)) #performs much better
bwj$origin<-relevel(as.factor(bwj$D3), ref= "Jordan")
bwj$jordanian<-ifelse(bwj$migrant=="Jordanian",1,0)
bwj$education_r23<-ifelse(bwj$D4=="NA",NA,as.numeric(bwj$D4))
bwj$education_r1<-ifelse(bwj$D4_raw_round1=="NA",NA,as.numeric(bwj$D4_raw_round1))
bwj$education<-ifelse(!is.na(bwj$education_r23),bwj$education_r23,ifelse(!is.na(bwj$education_r1),bwj$education_r1, NA))



#group mean centering

#group mean centering based on factory only
bwj=group_mean_center(bwj, by="faid", add.suffix="_c", 
                      c("voice_opp", "weekly_hours", "age", "tenure", "trust", "absence", "education", "PICC_eff"))


#group mean centering based on both factory and survey round

setDT(bwj)[, voice_opp_2c := voice_opp-mean(voice_opp, na.rm = TRUE), by = .(faid,round)][is.na(voice_opp), voice_opp_2c := NA]
setDT(bwj)[, weekly_hours_2c := weekly_hours-mean(weekly_hours, na.rm = TRUE), by = .(faid,round)][is.na(weekly_hours), weekly_hours_2c := NA]
setDT(bwj)[, age_2c := age - mean(age, na.rm = TRUE), by = .(faid, round)][is.na(age), age_2c := NA]
setDT(bwj)[, tenure_2c := tenure - mean(tenure, na.rm = TRUE), by = .(faid, round)][is.na(tenure), tenure_2c := NA]
setDT(bwj)[, trust_2c := trust - mean(trust, na.rm = TRUE), by = .(faid, round)][is.na(trust), trust_2c := NA]
setDT(bwj)[, absence_2c := absence - mean(absence, na.rm = TRUE), by = .(faid, round)][is.na(absence), absence_2c := NA]
setDT(bwj)[, education_2c := education - mean(education, na.rm = TRUE), by = .(faid, round)][is.na(education), education_2c := NA]
setDT(bwj)[, PICC_eff_2c := PICC_eff - mean(PICC_eff, na.rm = TRUE), by = .(faid, round)][is.na(PICC_eff), PICC_eff_2c := NA]

subset_bwj <- bwj[round != 1 & !(faid %in% c(168, 639, 2565,2975, 2088, 2669, 272)), ] #removing clusters

2 MODELING

Y: abuse is the DV (coded 1,0)

X: main IV is voice_opp which is ordinal (treated as interval), there is one factory level predictor FOA19_20 which is invariant to time period but variant to factories

Cluster: faid is factory group, round2 time period indicator. round also indicates time period.

Notes: There are three rounds, but we are only looking at round 2 and 3, so just round2(1,0) indicator is enough there are other controls which may make sense on their own I am using subset function to take a subset of the data to remove round1 entries. However, this is not strictly needed since some variables just don’t have round 1 entry so listwise deletion has the same effect

2.1 Previous code that are not used

#multilevel approach
#without centering
summary(glmer(abuse~voice_opp+FOA19_20+woman+sup_woman+trust+weekly_hours+age+tenure+absence+education+round2+(1|faid),family="binomial",subset(bwj,round!=1), control=glmerControl(optimizer="bobyqa"))) # I get the scaling warning
summary(glmer(abuse~voice_opp+FOA19_20+woman+sup_woman+trust+weekly_hours+age+tenure+absence+education+(1|faid/round2),family="binomial",subset(bwj,round!=1))) #I additionally get convergence warning. This likely because some factories are only measured in one round
summary(glmer(abuse~voice_opp+FOA19_20+woman+sup_woman+trust+weekly_hours+age+tenure+absence+education+(1|round2)+(1|faid),family="binomial",subset(bwj,round!=1))) #The cross nested approach also produces warning, likely for similar reasons

Freq(bwj$round2)

#with centering
GroC=glmer(abuse~voice_opp_c+FOA19_20+woman+sup_woman+trust_c+weekly_hours_c+age_c+tenure_c+absence_c+education_c+round2+(1|faid),family="binomial",subset(bwj,round!=1)) #using factory level centering produces convergence errors
summary(glmer(abuse~voice_opp_2c+FOA19_20+woman+sup_woman+trust_2c+weekly_hours_2c+age_2c+tenure_2c+absence_2c+education_2c+round2+(1|faid),family="binomial",subset(bwj,round!=1)))# using factory and time period based centering produces same error 

# a temporary workaround using factory centering, two level nesting, and other tweaking
summary(glmer(abuse~voice_opp_c+FOA19_20+woman+sup_woman+trust_c+weekly_hours_c+age_c+tenure_c+absence_c+education_c+(1|faid/round2),control=glmerControl(optimizer="bobyqa"),family="binomial",subset(bwj,round!=1)))

#A quick diagnostic by fitting it in lavaan
model<-"Level:1
abuse~voice_opp_c+woman+sup_woman+trust_c+weekly_hours_c+age_c+tenure_c+absence_c+education_c+round2
Level:2
abuse~FOA19_20"
summary(sem(model,subset(bwj,round!=1),cluster="faid", se="robust"))

#Lavaan informs me that-
#the gender variable woman is the same in some clusters. 
#the time variable round2 is invariant in some clusters. 

# I ran a quick diagnostic by removing clusters where those problematic issues don't exist
subset_bwj <- bwj[round != 1 & !(faid %in% c(168, 639, 2565,2975, 2088, 2669, 272)), ] #removing clusters
#using glmer
summary(glmer(abuse~voice_opp+FOA19_20+woman+sup_woman+trust+weekly_hours+age+tenure+absence+education+round2+(1|faid),family="binomial",subset_bwj)) #Problems still persists
#using lavaan
summary(sem(model,subset_bwj,cluster="faid", se="robust")) # However, lavaan no longer returns any warning (except that some factory ids have no missing value)

2.2 The demean based fix-effect model in your code

GroC=glmer(abuse~voice_opp_c+FOA19_20+woman+sup_woman+trust_c+weekly_hours_c+age_c+tenure_c+absence_c+education_c+round2+(1|faid),family="binomial",subset(bwj,round!=1)) #using factory level centering produces convergence errors
raw=glmer(abuse~voice_opp+FOA19_20+woman+sup_woman+trust+weekly_hours+age+tenure+absence+education+round2+(1|faid),family="binomial",subset_bwj)

2.3 My main question is:

should we remove the clusters where woman and round2 are invariant as I did?
- This should be fine. Please compare the results to be no I didn’t find any big difference.
Should We use a Higher level-factory, and Intermediate Level-round2 approach to clustering? This is relevant for centering decisions
- Yes, two levels should be enough, but we need to use the hybrid model which was clarified in the reference Is mentioned below.
Or should we do cross nested models since individuals sampled in different rounds should be nested under both factories and time period? e.g. (1|faid) + (1|round2)
- No need

3 HYBRID MODEL AS THE MOST APPROPRIATE APPROACH THAT FIT THE CURRENT DATA

Please refer to these two articles regarding hybrid model:

- Certo, S. T., Withers, M. C., & Semadeni, M. (2017). A tale of two effects: Using longitudinal data to compare within‐and between‐firm effects. Strategic Management Journal, 38(7), 1536-1556.

- Bliese, P. D., Schepker, D. J., Essman, S. M., & Ployhart, R. E. (2020). Bridging methodological divides between macro-and microresearch: Endogeneity and methods for panel data. Journal of Management, 46(1), 70-99.

3.1 Full dataset

Hybrid.GroC=glmer(abuse~voice_opp_c+FOA19_20+woman+sup_woman+trust_c+weekly_hours_c+age_c+tenure_c+absence_c+education_c+
                    voice_opp_mean+FOA19_20+woman+sup_woman+trust_mean+weekly_hours_mean+age_mean+tenure_mean+absence_mean+education_mean
                  +factor(round2)+(1|faid),family="binomial",subset(bwj,round!=1), control = glmerControl(optimizer = "bobyqa"))
Hybrid.raw=glmer(abuse~voice_opp+FOA19_20+woman+sup_woman+trust+weekly_hours+age+tenure+absence+education+
                    voice_opp_mean+FOA19_20+woman+sup_woman+trust_mean+weekly_hours_mean+age_mean+tenure_mean+absence_mean+education_mean
                  +factor(round2)+(1|faid),family="binomial",subset(bwj,round!=1), control = glmerControl(optimizer = "bobyqa"))

3.2 Vaild dataset

Hybrid.GroC2=glmer(abuse~voice_opp_c+FOA19_20+woman+sup_woman+trust_c+weekly_hours_c+age_c+tenure_c+absence_c+education_c+
                    voice_opp_mean+FOA19_20+woman+sup_woman+trust_mean+weekly_hours_mean+age_mean+tenure_mean+absence_mean+education_mean
                  +factor(round2)+(1|faid),family="binomial",subset_bwj, control = glmerControl(optimizer = "bobyqa"))
Hybrid.raw2=glmer(abuse~voice_opp+FOA19_20+woman+sup_woman+trust+weekly_hours+age+tenure+absence+education+
                    voice_opp_mean+FOA19_20+woman+sup_woman+trust_mean+weekly_hours_mean+age_mean+tenure_mean+absence_mean+education_mean
                  +factor(round2)+(1|faid),family="binomial",subset_bwj, control = glmerControl(optimizer = "bobyqa"))

3.3 Summary

3.3.1 Results based on group-mean centering data

model_summary(list(GroC,Hybrid.GroC,Hybrid.GroC2))

## 
## Model Summary
## 
## ───────────────────────────────────────────────────────────────
##                        (1) abuse     (2) abuse     (3) abuse   
## ───────────────────────────────────────────────────────────────
## (Intercept)               2.618         5.569 **      5.172 *  
##                          (2.046)       (2.122)       (2.164)   
## voice_opp_c              -0.240 ***    -0.236 ***    -0.253 ***
##                          (0.041)       (0.040)       (0.042)   
## FOA19_20                 -0.038         0.003         0.004    
##                          (0.023)       (0.016)       (0.017)   
## woman                    -0.070        -0.088        -0.082    
##                          (0.099)       (0.098)       (0.099)   
## sup_woman                 0.095         0.120         0.087    
##                          (0.086)       (0.085)       (0.087)   
## trust_c                  -0.197 ***    -0.195 ***    -0.206 ***
##                          (0.031)       (0.031)       (0.031)   
## weekly_hours_c           -0.001        -0.002        -0.003    
##                          (0.002)       (0.003)       (0.003)   
## age_c                     0.017 *       0.018 *       0.019 ** 
##                          (0.007)       (0.007)       (0.007)   
## tenure_c                 -0.088 *      -0.090 **     -0.081 *  
##                          (0.035)       (0.034)       (0.035)   
## absence_c                 0.054 ***     0.055 ***     0.058 ***
##                          (0.014)       (0.014)       (0.015)   
## education_c               0.076 ***     0.077 ***     0.074 ***
##                          (0.011)       (0.011)       (0.012)   
## round2                    0.042                                
##                          (0.083)                               
## voice_opp_mean                         -0.561 *      -0.584 *  
##                                        (0.247)       (0.252)   
## trust_mean                             -0.282        -0.298    
##                                        (0.173)       (0.196)   
## weekly_hours_mean                      -0.064 ***    -0.055 ***
##                                        (0.014)       (0.015)   
## age_mean                               -0.078 *      -0.073 *  
##                                        (0.031)       (0.032)   
## tenure_mean                            -0.080        -0.116    
##                                        (0.142)       (0.150)   
## absence_mean                           -0.082        -0.041    
##                                        (0.094)       (0.118)   
## education_mean                          0.281 ***     0.276 ***
##                                        (0.052)       (0.053)   
## factor(round2)1                         0.031         0.057    
##                                        (0.083)       (0.085)   
## ───────────────────────────────────────────────────────────────
## Marginal R^2              0.071         0.149         0.141    
## Conditional R^2           0.179         0.162         0.153    
## AIC                    3669.144      3604.512      3429.383    
## BIC                    3747.522      3725.094      3549.038    
## Num. obs.              3069          3069          2930        
## Num. groups: faid        69            69            62        
## Var: faid (Intercept)     0.435         0.049         0.047    
## ───────────────────────────────────────────────────────────────
## Note. * p < .05, ** p < .01, *** p < .001.

3.3.2 Results based on raw centering data

model_summary(list(raw,Hybrid.raw,Hybrid.raw2))

## 
## Model Summary
## 
## ───────────────────────────────────────────────────────────────
##                        (1) abuse     (2) abuse     (3) abuse   
## ───────────────────────────────────────────────────────────────
## (Intercept)               1.835         5.568 **      5.172 *  
##                          (1.613)       (2.122)       (2.158)   
## voice_opp                -0.273 ***    -0.236 ***    -0.253 ***
##                          (0.041)       (0.040)       (0.042)   
## FOA19_20                 -0.018         0.003         0.004    
##                          (0.018)       (0.016)       (0.017)   
## woman                    -0.057        -0.088        -0.082    
##                          (0.100)       (0.098)       (0.099)   
## sup_woman                 0.076         0.119         0.086    
##                          (0.088)       (0.085)       (0.087)   
## trust                    -0.213 ***    -0.195 ***    -0.206 ***
##                          (0.031)       (0.031)       (0.031)   
## weekly_hours             -0.004        -0.002        -0.003    
##                          (0.003)       (0.003)       (0.003)   
## age                       0.017 *       0.018 *       0.019 ** 
##                          (0.007)       (0.007)       (0.007)   
## tenure                   -0.085 *      -0.090 **     -0.081 *  
##                          (0.035)       (0.034)       (0.035)   
## absence                   0.059 ***     0.055 ***     0.058 ***
##                          (0.015)       (0.014)       (0.015)   
## education                 0.082 ***     0.077 ***     0.074 ***
##                          (0.011)       (0.011)       (0.012)   
## round2                    0.058                                
##                          (0.085)                               
## voice_opp_mean                         -0.326        -0.332    
##                                        (0.250)       (0.256)   
## trust_mean                             -0.087        -0.092    
##                                        (0.175)       (0.198)   
## weekly_hours_mean                      -0.062 ***    -0.052 ***
##                                        (0.014)       (0.015)   
## age_mean                               -0.096 **     -0.091 ** 
##                                        (0.031)       (0.032)   
## tenure_mean                             0.010        -0.035    
##                                        (0.146)       (0.153)   
## absence_mean                           -0.137        -0.098    
##                                        (0.095)       (0.119)   
## education_mean                          0.204 ***     0.201 ***
##                                        (0.052)       (0.054)   
## factor(round2)1                         0.030         0.056    
##                                        (0.083)       (0.085)   
## ───────────────────────────────────────────────────────────────
## Marginal R^2              0.099         0.149         0.141    
## Conditional R^2           0.144         0.162         0.153    
## AIC                    3456.183      3604.512      3429.384    
## BIC                    3533.959      3725.094      3549.039    
## Num. obs.              2930          3069          2930        
## Num. groups: faid        62            69            62        
## Var: faid (Intercept)     0.176         0.049         0.046    
## ───────────────────────────────────────────────────────────────
## Note. * p < .05, ** p < .01, *** p < .001.

3.4 A quick diagnostic by fitting it in lavaan

I’m not sure whether we should run this in laavan because the MSEM in lavaan may not be able to analyze Y as 0/1 variable. Please confirmed this.

3.4.1 Full dataset

model<-"Level:1
abuse~voice_opp_c+woman+sup_woman+trust_c+weekly_hours_c+age_c+tenure_c+absence_c+education_c +round2
Level:2
abuse~voice_opp_mean+FOA19_20+woman+sup_woman+trust_mean+weekly_hours_mean+age_mean+tenure_mean+absence_mean+education_mean"
summary(sem(model,subset(bwj,round!=1),cluster="faid", se="robust"))

## lavaan 0.6.16 ended normally after 70 iterations
## 
##   Estimator                                         ML
##   Optimization method                           NLMINB
##   Number of model parameters                        23
## 
##                                                   Used       Total
##   Number of observations                          3069        3422
##   Number of clusters [faid]                         69            
## 
## Model Test User Model:
##                                                       
##   Test statistic                                 0.483
##   Degrees of freedom                                 0
## 
## Parameter Estimates:
## 
##   Standard errors                             Sandwich
##   Information bread                           Observed
##   Observed information based on                Hessian
## 
## 
## Level 1 [within]:
## 
## Regressions:
##                    Estimate  Std.Err  z-value  P(>|z|)
##   abuse ~                                             
##     voice_opp_c      -0.049    0.010   -4.825    0.000
##     woman            -0.029    0.026   -1.120    0.263
##     sup_woman         0.020    0.025    0.793    0.428
##     trust_c          -0.040    0.006   -6.222    0.000
##     weekly_hours_c   -0.000    0.001   -0.365    0.715
##     age_c             0.003    0.002    2.113    0.035
##     tenure_c         -0.017    0.009   -1.864    0.062
##     absence_c         0.012    0.003    3.413    0.001
##     education_c       0.015    0.002    6.120    0.000
##     round2            0.009    0.025    0.377    0.706
## 
## Intercepts:
##                    Estimate  Std.Err  z-value  P(>|z|)
##    .abuse             0.000                           
## 
## Variances:
##                    Estimate  Std.Err  z-value  P(>|z|)
##    .abuse             0.196    0.005   37.354    0.000
## 
## 
## Level 2 [faid]:
## 
## Regressions:
##                    Estimate  Std.Err  z-value  P(>|z|)
##   abuse ~                                             
##     voice_opp_mean   -0.092    0.047   -1.948    0.051
##     FOA19_20          0.001    0.003    0.428    0.669
##     woman             0.324    0.108    3.012    0.003
##     sup_woman         0.009    0.074    0.115    0.909
##     trust_mean       -0.100    0.035   -2.882    0.004
##     weekly_hors_mn   -0.009    0.003   -2.987    0.003
##     age_mean         -0.002    0.009   -0.281    0.779
##     tenure_mean      -0.017    0.024   -0.695    0.487
##     absence_mean     -0.011    0.016   -0.679    0.497
##     education_mean    0.052    0.010    5.036    0.000
## 
## Intercepts:
##                    Estimate  Std.Err  z-value  P(>|z|)
##    .abuse             0.839    0.544    1.542    0.123
## 
## Variances:
##                    Estimate  Std.Err  z-value  P(>|z|)
##    .abuse             0.001    0.001    1.392    0.164

3.4.2 Vaild dataset

summary(sem(model,subset_bwj,cluster="faid", se="robust")) # However, lavaan no longer returns any warning (except that some factory ids have no missing value)

## lavaan 0.6.16 ended normally after 68 iterations
## 
##   Estimator                                         ML
##   Optimization method                           NLMINB
##   Number of model parameters                        23
## 
##                                                   Used       Total
##   Number of observations                          2930        3274
##   Number of clusters [faid]                         62            
## 
## Model Test User Model:
##                                                       
##   Test statistic                                 0.630
##   Degrees of freedom                                 0
## 
## Parameter Estimates:
## 
##   Standard errors                             Sandwich
##   Information bread                           Observed
##   Observed information based on                Hessian
## 
## 
## Level 1 [within]:
## 
## Regressions:
##                    Estimate  Std.Err  z-value  P(>|z|)
##   abuse ~                                             
##     voice_opp_c      -0.052    0.011   -4.910    0.000
##     woman            -0.026    0.026   -1.003    0.316
##     sup_woman         0.014    0.026    0.562    0.574
##     trust_c          -0.042    0.006   -6.722    0.000
##     weekly_hours_c   -0.000    0.001   -0.596    0.551
##     age_c             0.003    0.002    2.191    0.028
##     tenure_c         -0.016    0.010   -1.626    0.104
##     absence_c         0.013    0.003    3.670    0.000
##     education_c       0.014    0.002    5.776    0.000
##     round2            0.011    0.025    0.444    0.657
## 
## Intercepts:
##                    Estimate  Std.Err  z-value  P(>|z|)
##    .abuse             0.000                           
## 
## Variances:
##                    Estimate  Std.Err  z-value  P(>|z|)
##    .abuse             0.195    0.005   36.123    0.000
## 
## 
## Level 2 [faid]:
## 
## Regressions:
##                    Estimate  Std.Err  z-value  P(>|z|)
##   abuse ~                                             
##     voice_opp_mean   -0.106    0.050   -2.128    0.033
##     FOA19_20          0.002    0.004    0.601    0.548
##     woman             0.314    0.111    2.840    0.005
##     sup_woman        -0.014    0.074   -0.193    0.847
##     trust_mean       -0.095    0.039   -2.408    0.016
##     weekly_hors_mn   -0.007    0.003   -2.464    0.014
##     age_mean         -0.004    0.009   -0.410    0.682
##     tenure_mean      -0.021    0.026   -0.797    0.426
##     absence_mean      0.007    0.019    0.340    0.734
##     education_mean    0.052    0.010    5.003    0.000
## 
## Intercepts:
##                    Estimate  Std.Err  z-value  P(>|z|)
##    .abuse             0.750    0.569    1.318    0.187
## 
## Variances:
##                    Estimate  Std.Err  z-value  P(>|z|)
##    .abuse             0.001    0.001    1.284    0.199

The revised code for ILR/JIBS/ASQ paper