1 PREPARATION

1.2 Data clean

bwj <- import("Merged Jordan worker managers audits data Dec2023.sav")%>%as.data.table()
bwj$faid<-bwj$FactoryAssessedID #creating a short name for factory grouping variable for convenience
#coding the main IV
bwj$voice_opp<-ifelse(bwj$H7=="Very dissatisfied",1,ifelse(bwj$H7=="Dissatisfied",2,ifelse(bwj$H7=="Neither satisfied nor dissatisfied",3,ifelse(bwj$H7=="Satisfied",4,ifelse(bwj$H7=="Very satisfied",5,NA)))))
val_labels(bwj$voice_opp) <-c(c("Very dissatisfied"=1, "Dissatisfied"=2, "Neither satisfied nor dissatisfied"=3, "Satisfied"=4, "Very satisfied"=5))

Freq(bwj$voice_opp)
## Frequency Statistics:
## ────────────
##       N    %
## ────────────
## 1   237  4.6
## 2   353  6.9
## 3   481  9.3
## 4  2340 45.4
## 5  1741 33.8
## ────────────
## Total N = 5,152
#Coding other controls or potential IVs
bwj$weekly_hours<-ifelse(bwj$E6=="NA",NA,bwj$E6) %>% as.numeric(data=bwj)
bwj$PICC_eff<-ifelse(bwj$E10=="Neither effective nor ineffective",3,ifelse(bwj$E10=="Effective",4,ifelse(bwj$E10=="Ineffective",2,ifelse(bwj$E10=="Very effective",5,ifelse(bwj$E10=="Very ineffective",1,NA)))))
val_labels(bwj$PICC_eff) <-c(c("Very ineffective"=1, "Ineffective"=2, "Neither effective nor ineffective"=3, "Effective"=4, "Very effective"=5))
bwj$woman<-ifelse(bwj$D1=="Female",1,0)
bwj$sup_woman<-ifelse(bwj$E11=="Female",1,ifelse(bwj$E11=="Male",0,NA)) # A lot of NAs/1730
bwj$age<-ifelse(bwj$D2=="NA", NA,bwj$D2) %>% as.numeric(data=bwj)
bwj$E4_clean_rec<-ifelse(bwj$E4_clean=="NA", NA,bwj$E4_clean) %>% as.numeric(data=bwj) 
bwj$wage_clean<-ifelse(bwj$E4_raw==789366426, NA,log(bwj$E4_raw+1)) #performs much better
bwj$origin<-relevel(as.factor(bwj$D3), ref= "Jordan")
bwj$jordanian<-ifelse(bwj$migrant=="Jordanian",1,0)
bwj$education_r23<-ifelse(bwj$D4=="NA",NA,as.numeric(bwj$D4))
bwj$education_r1<-ifelse(bwj$D4_raw_round1=="NA",NA,as.numeric(bwj$D4_raw_round1))
bwj$education<-ifelse(!is.na(bwj$education_r23),bwj$education_r23,ifelse(!is.na(bwj$education_r1),bwj$education_r1, NA))



#group mean centering

#group mean centering based on factory only
bwj=group_mean_center(bwj, by="faid", add.suffix="_c", 
                      c("voice_opp", "weekly_hours", "age", "tenure", "trust", "absence", "education", "PICC_eff"))


#group mean centering based on both factory and survey round

setDT(bwj)[, voice_opp_2c := voice_opp-mean(voice_opp, na.rm = TRUE), by = .(faid,round)][is.na(voice_opp), voice_opp_2c := NA]
setDT(bwj)[, weekly_hours_2c := weekly_hours-mean(weekly_hours, na.rm = TRUE), by = .(faid,round)][is.na(weekly_hours), weekly_hours_2c := NA]
setDT(bwj)[, age_2c := age - mean(age, na.rm = TRUE), by = .(faid, round)][is.na(age), age_2c := NA]
setDT(bwj)[, tenure_2c := tenure - mean(tenure, na.rm = TRUE), by = .(faid, round)][is.na(tenure), tenure_2c := NA]
setDT(bwj)[, trust_2c := trust - mean(trust, na.rm = TRUE), by = .(faid, round)][is.na(trust), trust_2c := NA]
setDT(bwj)[, absence_2c := absence - mean(absence, na.rm = TRUE), by = .(faid, round)][is.na(absence), absence_2c := NA]
setDT(bwj)[, education_2c := education - mean(education, na.rm = TRUE), by = .(faid, round)][is.na(education), education_2c := NA]
setDT(bwj)[, PICC_eff_2c := PICC_eff - mean(PICC_eff, na.rm = TRUE), by = .(faid, round)][is.na(PICC_eff), PICC_eff_2c := NA]

subset_bwj <- bwj[round != 1 & !(faid %in% c(168, 639, 2565,2975, 2088, 2669, 272)), ] #removing clusters

2 MODELING

Y: abuse is the DV (coded 1,0)

X: main IV is voice_opp which is ordinal (treated as interval), there is one factory level predictor FOA19_20 which is invariant to time period but variant to factories

Cluster: faid is factory group, round2 time period indicator. round also indicates time period.

Notes: There are three rounds, but we are only looking at round 2 and 3, so just round2(1,0) indicator is enough there are other controls which may make sense on their own I am using subset function to take a subset of the data to remove round1 entries. However, this is not strictly needed since some variables just don’t have round 1 entry so listwise deletion has the same effect

2.1 Previous code that are not used

#multilevel approach
#without centering
summary(glmer(abuse~voice_opp+FOA19_20+woman+sup_woman+trust+weekly_hours+age+tenure+absence+education+round2+(1|faid),family="binomial",subset(bwj,round!=1), control=glmerControl(optimizer="bobyqa"))) # I get the scaling warning
summary(glmer(abuse~voice_opp+FOA19_20+woman+sup_woman+trust+weekly_hours+age+tenure+absence+education+(1|faid/round2),family="binomial",subset(bwj,round!=1))) #I additionally get convergence warning. This likely because some factories are only measured in one round
summary(glmer(abuse~voice_opp+FOA19_20+woman+sup_woman+trust+weekly_hours+age+tenure+absence+education+(1|round2)+(1|faid),family="binomial",subset(bwj,round!=1))) #The cross nested approach also produces warning, likely for similar reasons

Freq(bwj$round2)

#with centering
GroC=glmer(abuse~voice_opp_c+FOA19_20+woman+sup_woman+trust_c+weekly_hours_c+age_c+tenure_c+absence_c+education_c+round2+(1|faid),family="binomial",subset(bwj,round!=1)) #using factory level centering produces convergence errors
summary(glmer(abuse~voice_opp_2c+FOA19_20+woman+sup_woman+trust_2c+weekly_hours_2c+age_2c+tenure_2c+absence_2c+education_2c+round2+(1|faid),family="binomial",subset(bwj,round!=1)))# using factory and time period based centering produces same error 

# a temporary workaround using factory centering, two level nesting, and other tweaking
summary(glmer(abuse~voice_opp_c+FOA19_20+woman+sup_woman+trust_c+weekly_hours_c+age_c+tenure_c+absence_c+education_c+(1|faid/round2),control=glmerControl(optimizer="bobyqa"),family="binomial",subset(bwj,round!=1)))

#A quick diagnostic by fitting it in lavaan
model<-"Level:1
abuse~voice_opp_c+woman+sup_woman+trust_c+weekly_hours_c+age_c+tenure_c+absence_c+education_c+round2
Level:2
abuse~FOA19_20"
summary(sem(model,subset(bwj,round!=1),cluster="faid", se="robust"))

#Lavaan informs me that-
#the gender variable woman is the same in some clusters. 
#the time variable round2 is invariant in some clusters. 

# I ran a quick diagnostic by removing clusters where those problematic issues don't exist
subset_bwj <- bwj[round != 1 & !(faid %in% c(168, 639, 2565,2975, 2088, 2669, 272)), ] #removing clusters
#using glmer
summary(glmer(abuse~voice_opp+FOA19_20+woman+sup_woman+trust+weekly_hours+age+tenure+absence+education+round2+(1|faid),family="binomial",subset_bwj)) #Problems still persists
#using lavaan
summary(sem(model,subset_bwj,cluster="faid", se="robust")) # However, lavaan no longer returns any warning (except that some factory ids have no missing value)

2.2 The demean based fix-effect model in your code

GroC=glmer(abuse~voice_opp_c+FOA19_20+woman+sup_woman+trust_c+weekly_hours_c+age_c+tenure_c+absence_c+education_c+round2+(1|faid),family="binomial",subset(bwj,round!=1)) #using factory level centering produces convergence errors
raw=glmer(abuse~voice_opp+FOA19_20+woman+sup_woman+trust+weekly_hours+age+tenure+absence+education+round2+(1|faid),family="binomial",subset_bwj)

2.3 My main question is:

  • should we remove the clusters where woman and round2 are invariant as I did?
    • This should be fine. Please compare the results to be no I didn’t find any big difference.
  • Should We use a Higher level-factory, and Intermediate Level-round2 approach to clustering? This is relevant for centering decisions
    • Yes, two levels should be enough, but we need to use the hybrid model which was clarified in the reference Is mentioned below.
  • Or should we do cross nested models since individuals sampled in different rounds should be nested under both factories and time period? e.g. (1|faid) + (1|round2)
    • No need

3 HYBRID MODEL AS THE MOST APPROPRIATE APPROACH THAT FIT THE CURRENT DATA

Please refer to these two articles regarding hybrid model:

- Certo, S. T., Withers, M. C., & Semadeni, M. (2017). A tale of two effects: Using longitudinal data to compare within‐and between‐firm effects. Strategic Management Journal, 38(7), 1536-1556.

- Bliese, P. D., Schepker, D. J., Essman, S. M., & Ployhart, R. E. (2020). Bridging methodological divides between macro-and microresearch: Endogeneity and methods for panel data. Journal of Management, 46(1), 70-99.

3.1 Full dataset

Hybrid.GroC=glmer(abuse~voice_opp_c+FOA19_20+woman+sup_woman+trust_c+weekly_hours_c+age_c+tenure_c+absence_c+education_c+
                    voice_opp_mean+FOA19_20+woman+sup_woman+trust_mean+weekly_hours_mean+age_mean+tenure_mean+absence_mean+education_mean
                  +factor(round2)+(1|faid),family="binomial",subset(bwj,round!=1), control = glmerControl(optimizer = "bobyqa"))
Hybrid.raw=glmer(abuse~voice_opp+FOA19_20+woman+sup_woman+trust+weekly_hours+age+tenure+absence+education+
                    voice_opp_mean+FOA19_20+woman+sup_woman+trust_mean+weekly_hours_mean+age_mean+tenure_mean+absence_mean+education_mean
                  +factor(round2)+(1|faid),family="binomial",subset(bwj,round!=1), control = glmerControl(optimizer = "bobyqa"))

3.2 Vaild dataset

Hybrid.GroC2=glmer(abuse~voice_opp_c+FOA19_20+woman+sup_woman+trust_c+weekly_hours_c+age_c+tenure_c+absence_c+education_c+
                    voice_opp_mean+FOA19_20+woman+sup_woman+trust_mean+weekly_hours_mean+age_mean+tenure_mean+absence_mean+education_mean
                  +factor(round2)+(1|faid),family="binomial",subset_bwj, control = glmerControl(optimizer = "bobyqa"))
Hybrid.raw2=glmer(abuse~voice_opp+FOA19_20+woman+sup_woman+trust+weekly_hours+age+tenure+absence+education+
                    voice_opp_mean+FOA19_20+woman+sup_woman+trust_mean+weekly_hours_mean+age_mean+tenure_mean+absence_mean+education_mean
                  +factor(round2)+(1|faid),family="binomial",subset_bwj, control = glmerControl(optimizer = "bobyqa"))

3.3 Summary

3.3.1 Results based on group-mean centering data

model_summary(list(GroC,Hybrid.GroC,Hybrid.GroC2))
## 
## Model Summary
## 
## ───────────────────────────────────────────────────────────────
##                        (1) abuse     (2) abuse     (3) abuse   
## ───────────────────────────────────────────────────────────────
## (Intercept)               2.618         5.569 **      5.172 *  
##                          (2.046)       (2.122)       (2.164)   
## voice_opp_c              -0.240 ***    -0.236 ***    -0.253 ***
##                          (0.041)       (0.040)       (0.042)   
## FOA19_20                 -0.038         0.003         0.004    
##                          (0.023)       (0.016)       (0.017)   
## woman                    -0.070        -0.088        -0.082    
##                          (0.099)       (0.098)       (0.099)   
## sup_woman                 0.095         0.120         0.087    
##                          (0.086)       (0.085)       (0.087)   
## trust_c                  -0.197 ***    -0.195 ***    -0.206 ***
##                          (0.031)       (0.031)       (0.031)   
## weekly_hours_c           -0.001        -0.002        -0.003    
##                          (0.002)       (0.003)       (0.003)   
## age_c                     0.017 *       0.018 *       0.019 ** 
##                          (0.007)       (0.007)       (0.007)   
## tenure_c                 -0.088 *      -0.090 **     -0.081 *  
##                          (0.035)       (0.034)       (0.035)   
## absence_c                 0.054 ***     0.055 ***     0.058 ***
##                          (0.014)       (0.014)       (0.015)   
## education_c               0.076 ***     0.077 ***     0.074 ***
##                          (0.011)       (0.011)       (0.012)   
## round2                    0.042                                
##                          (0.083)                               
## voice_opp_mean                         -0.561 *      -0.584 *  
##                                        (0.247)       (0.252)   
## trust_mean                             -0.282        -0.298    
##                                        (0.173)       (0.196)   
## weekly_hours_mean                      -0.064 ***    -0.055 ***
##                                        (0.014)       (0.015)   
## age_mean                               -0.078 *      -0.073 *  
##                                        (0.031)       (0.032)   
## tenure_mean                            -0.080        -0.116    
##                                        (0.142)       (0.150)   
## absence_mean                           -0.082        -0.041    
##                                        (0.094)       (0.118)   
## education_mean                          0.281 ***     0.276 ***
##                                        (0.052)       (0.053)   
## factor(round2)1                         0.031         0.057    
##                                        (0.083)       (0.085)   
## ───────────────────────────────────────────────────────────────
## Marginal R^2              0.071         0.149         0.141    
## Conditional R^2           0.179         0.162         0.153    
## AIC                    3669.144      3604.512      3429.383    
## BIC                    3747.522      3725.094      3549.038    
## Num. obs.              3069          3069          2930        
## Num. groups: faid        69            69            62        
## Var: faid (Intercept)     0.435         0.049         0.047    
## ───────────────────────────────────────────────────────────────
## Note. * p < .05, ** p < .01, *** p < .001.

3.3.2 Results based on raw centering data

model_summary(list(raw,Hybrid.raw,Hybrid.raw2))
## 
## Model Summary
## 
## ───────────────────────────────────────────────────────────────
##                        (1) abuse     (2) abuse     (3) abuse   
## ───────────────────────────────────────────────────────────────
## (Intercept)               1.835         5.568 **      5.172 *  
##                          (1.613)       (2.122)       (2.158)   
## voice_opp                -0.273 ***    -0.236 ***    -0.253 ***
##                          (0.041)       (0.040)       (0.042)   
## FOA19_20                 -0.018         0.003         0.004    
##                          (0.018)       (0.016)       (0.017)   
## woman                    -0.057        -0.088        -0.082    
##                          (0.100)       (0.098)       (0.099)   
## sup_woman                 0.076         0.119         0.086    
##                          (0.088)       (0.085)       (0.087)   
## trust                    -0.213 ***    -0.195 ***    -0.206 ***
##                          (0.031)       (0.031)       (0.031)   
## weekly_hours             -0.004        -0.002        -0.003    
##                          (0.003)       (0.003)       (0.003)   
## age                       0.017 *       0.018 *       0.019 ** 
##                          (0.007)       (0.007)       (0.007)   
## tenure                   -0.085 *      -0.090 **     -0.081 *  
##                          (0.035)       (0.034)       (0.035)   
## absence                   0.059 ***     0.055 ***     0.058 ***
##                          (0.015)       (0.014)       (0.015)   
## education                 0.082 ***     0.077 ***     0.074 ***
##                          (0.011)       (0.011)       (0.012)   
## round2                    0.058                                
##                          (0.085)                               
## voice_opp_mean                         -0.326        -0.332    
##                                        (0.250)       (0.256)   
## trust_mean                             -0.087        -0.092    
##                                        (0.175)       (0.198)   
## weekly_hours_mean                      -0.062 ***    -0.052 ***
##                                        (0.014)       (0.015)   
## age_mean                               -0.096 **     -0.091 ** 
##                                        (0.031)       (0.032)   
## tenure_mean                             0.010        -0.035    
##                                        (0.146)       (0.153)   
## absence_mean                           -0.137        -0.098    
##                                        (0.095)       (0.119)   
## education_mean                          0.204 ***     0.201 ***
##                                        (0.052)       (0.054)   
## factor(round2)1                         0.030         0.056    
##                                        (0.083)       (0.085)   
## ───────────────────────────────────────────────────────────────
## Marginal R^2              0.099         0.149         0.141    
## Conditional R^2           0.144         0.162         0.153    
## AIC                    3456.183      3604.512      3429.384    
## BIC                    3533.959      3725.094      3549.039    
## Num. obs.              2930          3069          2930        
## Num. groups: faid        62            69            62        
## Var: faid (Intercept)     0.176         0.049         0.046    
## ───────────────────────────────────────────────────────────────
## Note. * p < .05, ** p < .01, *** p < .001.

3.4 A quick diagnostic by fitting it in lavaan

I’m not sure whether we should run this in laavan because the MSEM in lavaan may not be able to analyze Y as 0/1 variable. Please confirmed this.

3.4.1 Full dataset

model<-"Level:1
abuse~voice_opp_c+woman+sup_woman+trust_c+weekly_hours_c+age_c+tenure_c+absence_c+education_c +round2
Level:2
abuse~voice_opp_mean+FOA19_20+woman+sup_woman+trust_mean+weekly_hours_mean+age_mean+tenure_mean+absence_mean+education_mean"
summary(sem(model,subset(bwj,round!=1),cluster="faid", se="robust"))
## lavaan 0.6.16 ended normally after 70 iterations
## 
##   Estimator                                         ML
##   Optimization method                           NLMINB
##   Number of model parameters                        23
## 
##                                                   Used       Total
##   Number of observations                          3069        3422
##   Number of clusters [faid]                         69            
## 
## Model Test User Model:
##                                                       
##   Test statistic                                 0.483
##   Degrees of freedom                                 0
## 
## Parameter Estimates:
## 
##   Standard errors                             Sandwich
##   Information bread                           Observed
##   Observed information based on                Hessian
## 
## 
## Level 1 [within]:
## 
## Regressions:
##                    Estimate  Std.Err  z-value  P(>|z|)
##   abuse ~                                             
##     voice_opp_c      -0.049    0.010   -4.825    0.000
##     woman            -0.029    0.026   -1.120    0.263
##     sup_woman         0.020    0.025    0.793    0.428
##     trust_c          -0.040    0.006   -6.222    0.000
##     weekly_hours_c   -0.000    0.001   -0.365    0.715
##     age_c             0.003    0.002    2.113    0.035
##     tenure_c         -0.017    0.009   -1.864    0.062
##     absence_c         0.012    0.003    3.413    0.001
##     education_c       0.015    0.002    6.120    0.000
##     round2            0.009    0.025    0.377    0.706
## 
## Intercepts:
##                    Estimate  Std.Err  z-value  P(>|z|)
##    .abuse             0.000                           
## 
## Variances:
##                    Estimate  Std.Err  z-value  P(>|z|)
##    .abuse             0.196    0.005   37.354    0.000
## 
## 
## Level 2 [faid]:
## 
## Regressions:
##                    Estimate  Std.Err  z-value  P(>|z|)
##   abuse ~                                             
##     voice_opp_mean   -0.092    0.047   -1.948    0.051
##     FOA19_20          0.001    0.003    0.428    0.669
##     woman             0.324    0.108    3.012    0.003
##     sup_woman         0.009    0.074    0.115    0.909
##     trust_mean       -0.100    0.035   -2.882    0.004
##     weekly_hors_mn   -0.009    0.003   -2.987    0.003
##     age_mean         -0.002    0.009   -0.281    0.779
##     tenure_mean      -0.017    0.024   -0.695    0.487
##     absence_mean     -0.011    0.016   -0.679    0.497
##     education_mean    0.052    0.010    5.036    0.000
## 
## Intercepts:
##                    Estimate  Std.Err  z-value  P(>|z|)
##    .abuse             0.839    0.544    1.542    0.123
## 
## Variances:
##                    Estimate  Std.Err  z-value  P(>|z|)
##    .abuse             0.001    0.001    1.392    0.164

3.4.2 Vaild dataset

summary(sem(model,subset_bwj,cluster="faid", se="robust")) # However, lavaan no longer returns any warning (except that some factory ids have no missing value)
## lavaan 0.6.16 ended normally after 68 iterations
## 
##   Estimator                                         ML
##   Optimization method                           NLMINB
##   Number of model parameters                        23
## 
##                                                   Used       Total
##   Number of observations                          2930        3274
##   Number of clusters [faid]                         62            
## 
## Model Test User Model:
##                                                       
##   Test statistic                                 0.630
##   Degrees of freedom                                 0
## 
## Parameter Estimates:
## 
##   Standard errors                             Sandwich
##   Information bread                           Observed
##   Observed information based on                Hessian
## 
## 
## Level 1 [within]:
## 
## Regressions:
##                    Estimate  Std.Err  z-value  P(>|z|)
##   abuse ~                                             
##     voice_opp_c      -0.052    0.011   -4.910    0.000
##     woman            -0.026    0.026   -1.003    0.316
##     sup_woman         0.014    0.026    0.562    0.574
##     trust_c          -0.042    0.006   -6.722    0.000
##     weekly_hours_c   -0.000    0.001   -0.596    0.551
##     age_c             0.003    0.002    2.191    0.028
##     tenure_c         -0.016    0.010   -1.626    0.104
##     absence_c         0.013    0.003    3.670    0.000
##     education_c       0.014    0.002    5.776    0.000
##     round2            0.011    0.025    0.444    0.657
## 
## Intercepts:
##                    Estimate  Std.Err  z-value  P(>|z|)
##    .abuse             0.000                           
## 
## Variances:
##                    Estimate  Std.Err  z-value  P(>|z|)
##    .abuse             0.195    0.005   36.123    0.000
## 
## 
## Level 2 [faid]:
## 
## Regressions:
##                    Estimate  Std.Err  z-value  P(>|z|)
##   abuse ~                                             
##     voice_opp_mean   -0.106    0.050   -2.128    0.033
##     FOA19_20          0.002    0.004    0.601    0.548
##     woman             0.314    0.111    2.840    0.005
##     sup_woman        -0.014    0.074   -0.193    0.847
##     trust_mean       -0.095    0.039   -2.408    0.016
##     weekly_hors_mn   -0.007    0.003   -2.464    0.014
##     age_mean         -0.004    0.009   -0.410    0.682
##     tenure_mean      -0.021    0.026   -0.797    0.426
##     absence_mean      0.007    0.019    0.340    0.734
##     education_mean    0.052    0.010    5.003    0.000
## 
## Intercepts:
##                    Estimate  Std.Err  z-value  P(>|z|)
##    .abuse             0.750    0.569    1.318    0.187
## 
## Variances:
##                    Estimate  Std.Err  z-value  P(>|z|)
##    .abuse             0.001    0.001    1.284    0.199