library(tidyverse)#Misc
library(Hmisc) #Misc
library(MASS) #Regression
#library(foreign)
library(sandwich) #for cluster robust standard error
library(lmtest) #for cluster robust standard error & LR test
library(coxphw) # for wald test
library(stargazer) # for output html'
library(emmeans) # for exploring interaction
load("D:/thesis/data/imp_aggregate.RData")
df <- dat$imputations$imp1 # Take the first set of the five
# Preparing the operation data frame for regression
rdf <- df %>%
dplyr::select(delinquency, mgb, stegb, ltegb, vbs, nrwa,
nrws, nrwp, pf, np, nle, sne, apc,
ats, atp, D45, D47, bcv, ica, delinf, fses,
age, gender, school, lbc, lbc1)
rdf1 <- rdf
rdf1$gender <- as.factor(rdf1$gender)
Based on my understanding, I would have three models: one contains only the interaction terms for social strain variables and gender, on contains only the interaction terms for negative emotions and gender, and one with all interaqction terms. It is shown below. Are the model without any interaction needs to be included? I kept it in the first draft.
df_lbc2 <- rdf1 %>% filter(lbc1==2)
# Model 1, with only interaction terms for social strain variables and gender
fit1_lbc2 <- glm.nb(delinquency ~ mgb + stegb + ltegb + vbs + nrwa + nrws +
nrwp + pf + np + nle + sne +
mgb*gender + stegb*gender +
ltegb*gender + vbs*gender +
nrwa*gender + nrws*gender +
nrwp*gender + pf*gender +
np*gender + nle*gender +
apc + ats + atp + D45 + D47 + bcv + ica +
delinf + fses + age, data = df_lbc2)
summary(fit1_lbc2)
##
## Call:
## glm.nb(formula = delinquency ~ mgb + stegb + ltegb + vbs + nrwa +
## nrws + nrwp + pf + np + nle + sne + mgb * gender + stegb *
## gender + ltegb * gender + vbs * gender + nrwa * gender +
## nrws * gender + nrwp * gender + pf * gender + np * gender +
## nle * gender + apc + ats + atp + D45 + D47 + bcv + ica +
## delinf + fses + age, data = df_lbc2, init.theta = 3.630348231,
## link = log)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.6551 -1.1391 -0.2235 0.4966 2.8168
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.154197 0.519677 -0.297 0.766682
## mgb 0.019642 0.016199 1.212 0.225322
## stegb -0.012534 0.037496 -0.334 0.738167
## ltegb -0.003137 0.026068 -0.120 0.904229
## vbs 0.056733 0.036047 1.574 0.115517
## nrwa 0.030602 0.027473 1.114 0.265326
## nrws 0.042510 0.045345 0.937 0.348510
## nrwp 0.035318 0.044408 0.795 0.426443
## pf 0.049075 0.060888 0.806 0.420255
## np 0.016589 0.018598 0.892 0.372391
## nle -0.010463 0.014975 -0.699 0.484732
## sne -0.002313 0.007344 -0.315 0.752773
## gender1 -0.748305 0.290765 -2.574 0.010065 *
## apc -0.012477 0.009196 -1.357 0.174851
## ats -0.043094 0.011656 -3.697 0.000218 ***
## atp 0.033654 0.015986 2.105 0.035276 *
## D45 0.229251 0.045966 4.987 6.12e-07 ***
## D47 -0.052562 0.024230 -2.169 0.030059 *
## bcv -0.061812 0.015636 -3.953 7.71e-05 ***
## ica 0.021259 0.009834 2.162 0.030633 *
## delinf 0.084924 0.008978 9.459 < 2e-16 ***
## fses 0.027701 0.010096 2.744 0.006076 **
## age 0.062339 0.032585 1.913 0.055729 .
## mgb:gender1 -0.053011 0.026269 -2.018 0.043587 *
## stegb:gender1 -0.142896 0.065652 -2.177 0.029514 *
## ltegb:gender1 0.029647 0.039669 0.747 0.454838
## vbs:gender1 -0.063225 0.079959 -0.791 0.429109
## nrwa:gender1 0.041513 0.047222 0.879 0.379337
## nrws:gender1 -0.019035 0.068718 -0.277 0.781777
## nrwp:gender1 0.075209 0.068952 1.091 0.275387
## pf:gender1 0.008902 0.093630 0.095 0.924253
## np:gender1 0.032949 0.031625 1.042 0.297466
## nle:gender1 0.042926 0.024115 1.780 0.075066 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for Negative Binomial(3.6303) family taken to be 1)
##
## Null deviance: 1783.5 on 974 degrees of freedom
## Residual deviance: 1103.5 on 942 degrees of freedom
## AIC: 3508.1
##
## Number of Fisher Scoring iterations: 1
##
##
## Theta: 3.630
## Std. Err.: 0.481
##
## 2 x log-likelihood: -3440.138
# Model 2, with only the interaction terms for negative emotions and gender
fit2_lbc2 <- glm.nb(delinquency ~ mgb + stegb + ltegb + vbs + nrwa + nrws +
nrwp + pf + np + nle + sne +
sne*gender +
apc + ats + atp + D45 + D47 + bcv + ica +
delinf + fses + age, data = df_lbc2)
summary(fit2_lbc2)
##
## Call:
## glm.nb(formula = delinquency ~ mgb + stegb + ltegb + vbs + nrwa +
## nrws + nrwp + pf + np + nle + sne + sne * gender + apc +
## ats + atp + D45 + D47 + bcv + ica + delinf + fses + age,
## data = df_lbc2, init.theta = 3.443352426, link = log)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.6635 -1.1531 -0.2078 0.4955 2.8534
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.1783397 0.5238035 -0.340 0.73350
## mgb -0.0044954 0.0129348 -0.348 0.72819
## stegb -0.0623888 0.0323340 -1.930 0.05367 .
## ltegb 0.0108060 0.0225183 0.480 0.63132
## vbs 0.0347874 0.0325602 1.068 0.28534
## nrwa 0.0434381 0.0224739 1.933 0.05326 .
## nrws 0.0432984 0.0344544 1.257 0.20887
## nrwp 0.0686912 0.0344053 1.997 0.04588 *
## pf 0.0507858 0.0467936 1.085 0.27778
## np 0.0299704 0.0154466 1.940 0.05235 .
## nle 0.0083062 0.0118047 0.704 0.48166
## sne -0.0023607 0.0086165 -0.274 0.78411
## gender1 -0.6505951 0.1594805 -4.079 4.51e-05 ***
## apc -0.0094363 0.0091983 -1.026 0.30495
## ats -0.0467157 0.0116989 -3.993 6.52e-05 ***
## atp 0.0360912 0.0160024 2.255 0.02411 *
## D45 0.2297334 0.0462094 4.972 6.64e-07 ***
## D47 -0.0515448 0.0243882 -2.114 0.03456 *
## bcv -0.0615186 0.0157160 -3.914 9.06e-05 ***
## ica 0.0211303 0.0098636 2.142 0.03217 *
## delinf 0.0851934 0.0089161 9.555 < 2e-16 ***
## fses 0.0278181 0.0101017 2.754 0.00589 **
## age 0.0577632 0.0326560 1.769 0.07692 .
## sne:gender1 0.0004593 0.0144077 0.032 0.97457
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for Negative Binomial(3.4434) family taken to be 1)
##
## Null deviance: 1754.4 on 974 degrees of freedom
## Residual deviance: 1100.3 on 951 degrees of freedom
## AIC: 3503.3
##
## Number of Fisher Scoring iterations: 1
##
##
## Theta: 3.443
## Std. Err.: 0.442
##
## 2 x log-likelihood: -3453.255
# Model 3, full model
fit3_lbc2 <- glm.nb(delinquency ~ mgb + stegb + ltegb + vbs + nrwa + nrws +
nrwp + pf + np + nle + sne +
mgb*gender + stegb*gender +
ltegb*gender + vbs*gender +
nrwa*gender + nrws*gender +
nrwp*gender + pf*gender +
np*gender + nle*gender + sne*gender +
apc + ats + atp + D45 + D47 + bcv + ica +
delinf + fses + age, data = df_lbc2)
summary(fit3_lbc2)
##
## Call:
## glm.nb(formula = delinquency ~ mgb + stegb + ltegb + vbs + nrwa +
## nrws + nrwp + pf + np + nle + sne + mgb * gender + stegb *
## gender + ltegb * gender + vbs * gender + nrwa * gender +
## nrws * gender + nrwp * gender + pf * gender + np * gender +
## nle * gender + sne * gender + apc + ats + atp + D45 + D47 +
## bcv + ica + delinf + fses + age, data = df_lbc2, init.theta = 3.630240908,
## link = log)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.6551 -1.1393 -0.2243 0.4964 2.8165
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.1530666 0.5221479 -0.293 0.769409
## mgb 0.0196498 0.0162025 1.213 0.225221
## stegb -0.0125021 0.0375233 -0.333 0.738997
## ltegb -0.0031556 0.0260853 -0.121 0.903712
## vbs 0.0567419 0.0360469 1.574 0.115461
## nrwa 0.0306315 0.0275260 1.113 0.265787
## nrws 0.0424963 0.0453523 0.937 0.348745
## nrwp 0.0353733 0.0444969 0.795 0.426637
## pf 0.0490696 0.0608941 0.806 0.420348
## np 0.0166073 0.0186419 0.891 0.373005
## nle -0.0104572 0.0149778 -0.698 0.485064
## sne -0.0024034 0.0086765 -0.277 0.781779
## gender1 -0.7500024 0.3038239 -2.469 0.013566 *
## apc -0.0124782 0.0092017 -1.356 0.175073
## ats -0.0430861 0.0116723 -3.691 0.000223 ***
## atp 0.0336444 0.0159901 2.104 0.035372 *
## D45 0.2291919 0.0460351 4.979 6.4e-07 ***
## D47 -0.0525372 0.0242666 -2.165 0.030387 *
## bcv -0.0618015 0.0156566 -3.947 7.9e-05 ***
## ica 0.0212595 0.0098349 2.162 0.030646 *
## delinf 0.0849345 0.0089894 9.448 < 2e-16 ***
## fses 0.0276906 0.0101063 2.740 0.006145 **
## age 0.0623081 0.0326146 1.910 0.056077 .
## mgb:gender1 -0.0530457 0.0263244 -2.015 0.043897 *
## stegb:gender1 -0.1431306 0.0667869 -2.143 0.032105 *
## ltegb:gender1 0.0296250 0.0397027 0.746 0.455564
## vbs:gender1 -0.0632305 0.0799601 -0.791 0.429075
## nrwa:gender1 0.0413937 0.0475836 0.870 0.384346
## nrws:gender1 -0.0191469 0.0690614 -0.277 0.781592
## nrwp:gender1 0.0750985 0.0692639 1.084 0.278259
## pf:gender1 0.0088283 0.0937432 0.094 0.924970
## np:gender1 0.0328938 0.0317753 1.035 0.300575
## nle:gender1 0.0428770 0.0242237 1.770 0.076720 .
## sne:gender1 0.0003073 0.0162506 0.019 0.984915
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for Negative Binomial(3.6302) family taken to be 1)
##
## Null deviance: 1783.4 on 974 degrees of freedom
## Residual deviance: 1103.5 on 941 degrees of freedom
## AIC: 3510.1
##
## Number of Fisher Scoring iterations: 1
##
##
## Theta: 3.630
## Std. Err.: 0.481
##
## 2 x log-likelihood: -3440.137
Regarding your comment on the strange statistical result given the dicrepancy between the model table and the marginal effect table. I am also a little bit confused too. One question I have tried to understand, but still get confusing is this:
So taking the term monetary goal blockage
*gender
for LBC2 as an example. I will just use the model that include the 10 interaction terms for social strain variables and gender.
The model suggests THERE IS AN INTERACTION. But if you take a look at the Estimated Marginal Effects (EMEs), it is kind of wierd, since the 95% CI for the margial effect of both male (0) and female (1) contain zero, while the p value for the z-statistics is significant:
emtrends(fit1_lbc2, pairwise ~ gender, 'mgb')
## $emtrends
## gender mgb.trend SE df asymp.LCL asymp.UCL
## 0 0.0196 0.0162 Inf -0.0121 0.05139
## 1 -0.0334 0.0212 Inf -0.0750 0.00823
##
## Confidence level used: 0.95
##
## $contrasts
## contrast estimate SE df z.ratio p.value
## 0 - 1 0.053 0.0263 Inf 2.018 0.0436
However, if we look at the Estimated Marginal Means (EMMs), we can see that the EMMs of monetary goal blockage at both level of gender is signifianct, the compaison between the Emms at both level of gender is also significant, as it is shown below.
emmeans(fit1_lbc2, pairwise ~ gender, 'mgb')
## $emmeans
## mgb = 2.62:
## gender emmean SE df asymp.LCL asymp.UCL
## 0 0.897 0.0411 Inf 0.817 0.978
## 1 0.232 0.0532 Inf 0.127 0.336
##
## Results are given on the log (not the response) scale.
## Confidence level used: 0.95
##
## $contrasts
## mgb = 2.62:
## contrast estimate SE df z.ratio p.value
## 0 - 1 0.666 0.0682 Inf 9.764 <.0001
##
## Results are given on the log (not the response) scale.
This is where I get a little bit confused. How should I interpret? Which metric should I use?
On Pp15 of my Chapter 5, you mentioned the difference between interaction term and conditining term. When I said conditioning term, I am refering to gender
when it comes to my thesis, because I was looking into the gender’s conditioning effect on social strain varables. When I said interaction terms, I mean things like mgb*gender
. I don’t know whether I am stating it correctly.
But I do admit, my writing in the said paragraph is somewaht vague, I will review.
In addition, when I mentioned “constitutive terms” I wanted to refer to things like mgb
and gender
, when they constitute the term mgb*gender
.
On Pp17 Comments[cuhk23], you said the interpretion is incorrect. This comment also applies to the next two subsections regrading LBC1 and NLBC, would you elaborate?
Also, would you elaborate on Pp17 Comments[cuhk24], where you mentioned “the significant marginal effect for female and the insignificant marginal effect for male do not statistically justify the insignificant main effect of negative relationship with peers. Instead, it is only partially because of the insignificant gender differences revealed by z statistics?”
```