JNT Function
Theta Plot Function
Load The Data
JNT results w/ Theta Plot (simultaneous)
Non-simultaneous JNT
Example from Hayes’ Chapter 7
- Example (1) protest is a dummy coded (categorical) predictor
- Example (2) respappr is a continuous predictor

JNT Function

Johnson-Neyman code adapted from: https://rpubs.com/bachl/jn-plot by Marko Bachl

jnt = function(.lm, predictor, moderator, alpha=.05,simultaneous=TRUE) {
  
   require(stringi)
    b1 = coef(.lm)[predictor]
    b3 = coef(.lm)[stri_startswith_fixed(names(coef(.lm)), paste0(predictor,":")) | stri_endswith_fixed(names(coef(.lm)), paste0(":",predictor))]
    se_b1 = coef(summary(.lm))[predictor, 2]
    se_b3 = coef(summary(.lm))[stri_startswith_fixed(names(coef(.lm)), paste0(predictor,":")) | stri_endswith_fixed(names(coef(.lm)), paste0(":",predictor)), 2]
    COV_b1b3 = vcov(.lm)[predictor, stri_startswith_fixed(names(coef(.lm)), paste0(predictor,":")) | stri_endswith_fixed(names(coef(.lm)), paste0(":",predictor))]
    if(simultaneous==TRUE) {
      t_crit <- sqrt(2*qf(1-alpha,df1=2,df2=.lm$df.residual)) 
    } else {
        t_crit = qt(1-alpha/2, .lm$df.residual)
        }
    a = t_crit^2 * se_b3^2 - b3^2
    b = 2 * (t_crit^2 * COV_b1b3 - b1 * b3)
    c = t_crit^2 * se_b1^2 - b1^2
    jn = c(
      (-b - sqrt(b^2 - 4 * a * c)) / (2 * a),
      (-b + sqrt(b^2 - 4 * a * c)) / (2 * a)
    )
    JN = sort(unname(jn))
    JN = JN[JN>=min(.lm$model[,moderator]) & JN<=max(.lm$model[,moderator])]
    JN
  }

Theta Plot Function

theta_plot = function(.lm, predictor, moderator, alpha=.05, jn=F,simultaneous=TRUE) {
    require(dplyr)
    require(ggplot2); theme_set(theme_minimal())
    require(stringi)
    
  
    if(simultaneous==TRUE) {
    .data = data_frame(b1 = coef(.lm)[predictor],
                       b3 = coef(.lm)[stri_startswith_fixed(names(coef(.lm)), paste0(predictor,":")) | stri_endswith_fixed(names(coef(.lm)), paste0(":",predictor))],
                       Z = quantile(.lm$model[,moderator], seq(0,1,.01)),
                       theta = b1 + Z * b3,
                       se_b1 = coef(summary(.lm))[predictor, 2],
                       COV_b1b3 = vcov(.lm)[predictor, stri_startswith_fixed(names(coef(.lm)), paste0(predictor,":")) | stri_endswith_fixed(names(coef(.lm)), paste0(":",predictor))],
                       se_b3 = coef(summary(.lm))[stri_startswith_fixed(names(coef(.lm)), paste0(predictor,":")) | stri_endswith_fixed(names(coef(.lm)), paste0(":",predictor)), 2],
                       se_theta = sqrt(se_b1^2 + 2 * Z * COV_b1b3 + Z^2 * se_b3^2),
                        ci.lo_theta = theta-qf(1-alpha,df1=(length(.lm$residuals))-.lm$df.residual,df2=.lm$df.residual)*se_theta, 
                       ci.hi_theta = theta+qf(1-alpha,df1=(length(.lm$residuals))-.lm$df.residual,df2=.lm$df.residual)*se_theta) 
    } else {
  .data = data_frame(b1 = coef(.lm)[predictor],
                       b3 = coef(.lm)[stri_startswith_fixed(names(coef(.lm)), paste0(predictor,":")) | stri_endswith_fixed(names(coef(.lm)), paste0(":",predictor))],
                       Z = quantile(.lm$model[,moderator], seq(0,1,.01)),
                       theta = b1 + Z * b3,
                       se_b1 = coef(summary(.lm))[predictor, 2],
                       COV_b1b3 = vcov(.lm)[predictor, stri_startswith_fixed(names(coef(.lm)), paste0(predictor,":")) | stri_endswith_fixed(names(coef(.lm)), paste0(":",predictor))],
                       se_b3 = coef(summary(.lm))[stri_startswith_fixed(names(coef(.lm)), paste0(predictor,":")) | stri_endswith_fixed(names(coef(.lm)), paste0(":",predictor)), 2],
                       se_theta = sqrt(se_b1^2 + 2 * Z * COV_b1b3 + Z^2 * se_b3^2),
          ci.lo_theta = theta+qt(alpha/2, .lm$df.residual)*se_theta,
          ci.hi_theta = theta+qt(1-alpha/2, .lm$df.residual)*se_theta)
    }
  
 type <- ifelse(simultaneous,"(simultaneous)","(non-simultaneous)")
      
    if (jn) {
      JN = jnt(.lm=.lm, predictor=predictor, moderator=moderator, alpha=alpha,simultaneous)
      JN_lines = geom_vline(xintercept=JN, linetype=2)
      JN_regions = ifelse(length(JN) == 0, "no significance regions", paste(round(JN,2), collapse = "; "))
      Xlab = paste0(moderator, " (JN Significance Regions: ", JN_regions,")")
    }
    else {
      Xlab = moderator
      JN_lines = NULL
    }
    .data %>%
      ggplot(aes(Z, theta, ymin=ci.lo_theta, ymax=ci.hi_theta)) + geom_ribbon(alpha = .2) + geom_line() + ggtitle(paste("Conditional Effect of", predictor, "as function of", moderator, " ", type)) + geom_hline(yintercept=0, linetype=2) + labs(x = Xlab, y=expression(theta)) + JN_lines
  }

Load The Data

ch9e_3 <- readr::read_csv("~/R/DataHouse/PROJECTS/JNT/ch9e_3.csv")
  ch9e_3$AvPost <- apply(ch9e_3[,c("IQ4","IQ8")],1,mean)
  
  mod.het2 <- lm(AvPost~Treatment*IQPre,data=ch9e_3)
  summary(mod.het2)


Call:
lm(formula = AvPost ~ Treatment * IQPre, data = ch9e_3)

Residuals:
    Min      1Q  Median      3Q     Max 
-24.458  -7.735  -0.539   6.895  42.555 

Coefficients:
                Estimate Std. Error t value Pr(>|t|)    
(Intercept)     32.69121    3.95087   8.274 4.03e-15 ***
Treatment       -8.63690    8.57528  -1.007    0.315    
IQPre            0.72445    0.03972  18.239  < 2e-16 ***
Treatment:IQPre  0.12191    0.08387   1.454    0.147    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 11.45 on 306 degrees of freedom
Multiple R-squared:  0.6101,    Adjusted R-squared:  0.6062 
F-statistic: 159.6 on 3 and 306 DF,  p-value: < 2.2e-16

  head(ch9e_3)

# A tibble: 6 × 7
  Grade Treatment IQPre   IQ4   IQ8 IQGain AvPost
  <int>     <int> <int> <int> <int>  <int>  <dbl>
1     1         0    45    58    76     31   67.0
2     1         0    75    62    85     10   73.5
3     1         0    61    82    87     26   84.5
4     1         0    84    86    86      2   86.0
5     1         0    65    76    78     13   77.0
6     1         0    72    94    94     22   94.0

JNT results w/ Theta Plot (simultaneous)

predictor HAS TO BE A DUMMY CODED numeric variable
moderator is continuous
simultaneous computes CI based on simultaneous comparisons, if false this is non-simultaneous
jn in the theta plot function places lines of significance on the plot
NOTE: the following was used to compute the CV for simultaneous JNT qf(1-alpha,df1=4,df2=(length(.lm$residuals))-4)

jnt(mod.het2,predictor="Treatment",moderator="IQPre",simultaneous = TRUE)

[1] 103.8919 128.6798

theta_plot(mod.het2,predictor="Treatment",moderator="IQPre",jn=T,simultaneous=TRUE)

Non-simultaneous JNT

jnt(mod.het2,predictor="Treatment",moderator="IQPre",simultaneous = FALSE)

[1] 97.21789

theta_plot(mod.het2,predictor="Treatment",moderator="IQPre",jn=T,simultaneous=FALSE)

Example from Hayes’ Chapter 7

The data come from a study by Garcia et al. and were made available as demo data by Hayes as part of his PROCESS macro. The moderation analysis of this data set is described in great detail in Hayes’ Chapter 7.

Example (1) protest is a dummy coded (categorical) predictor

library(readr)
protest <- read_csv("~/R/DataHouse/MISC/DATA/protest.csv")
protest.lm <- lm(liking ~ protest * sexism, protest)
summary(protest.lm)


Call:
lm(formula = liking ~ protest * sexism, data = protest)

Residuals:
    Min      1Q  Median      3Q     Max 
-3.9894 -0.6381  0.0478  0.7404  2.3650 

Coefficients:
               Estimate Std. Error t value Pr(>|t|)    
(Intercept)      7.7062     1.0449   7.375 1.99e-11 ***
protest         -3.7727     1.2541  -3.008  0.00318 ** 
sexism          -0.4725     0.2038  -2.318  0.02205 *  
protest:sexism   0.8336     0.2436   3.422  0.00084 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 0.9888 on 125 degrees of freedom
Multiple R-squared:  0.1335,    Adjusted R-squared:  0.1127 
F-statistic: 6.419 on 3 and 125 DF,  p-value: 0.0004439

jnt(protest.lm,predictor="protest",moderator="sexism",alpha=0.05,simultaneous = FALSE)

[1] 3.508712 4.975297

jnt(protest.lm,predictor="protest",moderator="sexism",alpha=0.05,simultaneous = TRUE)

[1] 5.082156

theta_plot(protest.lm,predictor="protest",moderator="sexism",alpha=0.05,jn=TRUE,simultaneous = FALSE)

theta_plot(protest.lm,predictor="protest",moderator="sexism",alpha=0.05,jn=TRUE,simultaneous = TRUE)

Example (2) respappr is a continuous predictor

protest.lm2 <- lm(liking ~ respappr * sexism, protest)
summary(protest.lm2)


Call:
lm(formula = liking ~ respappr * sexism, data = protest)

Residuals:
    Min      1Q  Median      3Q     Max 
-3.8910 -0.5641  0.0329  0.6319  2.4860 

Coefficients:
                Estimate Std. Error t value Pr(>|t|)   
(Intercept)      6.09345    2.04087   2.986   0.0034 **
respappr        -0.19998    0.41005  -0.488   0.6266   
sexism          -0.43395    0.38218  -1.135   0.2584   
respappr:sexism  0.10972    0.07639   1.436   0.1534   
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 0.9128 on 125 degrees of freedom
Multiple R-squared:  0.2616,    Adjusted R-squared:  0.2439 
F-statistic: 14.76 on 3 and 125 DF,  p-value: 2.749e-08

jnt(protest.lm2,predictor="respappr",moderator="sexism",alpha=0.05,simultaneous = FALSE)

[1] 3.966377

jnt(protest.lm2,predictor="respappr",moderator="sexism",alpha=0.05,simultaneous = TRUE)

[1] 4.186246

theta_plot(protest.lm2,predictor="respappr",moderator="sexism",alpha=0.05,jn=TRUE,simultaneous = FALSE)

theta_plot(protest.lm2,predictor="respappr",moderator="sexism",alpha=0.05,jn=TRUE,simultaneous = TRUE)