#Load packages

library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ ggplot2 3.3.6     ✔ purrr   0.3.4
## ✔ tibble  3.1.8     ✔ dplyr   1.0.9
## ✔ tidyr   1.2.0     ✔ stringr 1.4.1
## ✔ readr   2.1.2     ✔ forcats 0.5.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
library(psych)
## 
## Attaching package: 'psych'
## 
## The following objects are masked from 'package:ggplot2':
## 
##     %+%, alpha
library(lme4)
## Loading required package: Matrix
## 
## Attaching package: 'Matrix'
## 
## The following objects are masked from 'package:tidyr':
## 
##     expand, pack, unpack
library(haven)

#Load data

CRdata <- read_csv("schoolsmoke.csv")
## Rows: 1600 Columns: 10
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (10): school, thk, prethk, cc, gprethk, d, d2, d3, d4, d5
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
glimpse(CRdata)
## Rows: 1,600
## Columns: 10
## $ school  <dbl> 193, 193, 193, 193, 193, 193, 193, 193, 193, 193, 193, 193, 19…
## $ thk     <dbl> 1, 2, 3, 1, 2, 2, 3, 3, 2, 2, 4, 2, 2, 3, 4, 4, 1, 3, 4, 2, 1,…
## $ prethk  <dbl> 5, 3, 4, 0, 1, 3, 1, 2, 3, 2, 2, 2, 3, 3, 2, 3, 3, 3, 2, 1, 3,…
## $ cc      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ gprethk <dbl> 2.153846, 2.153846, 2.153846, 2.153846, 2.153846, 2.153846, 2.…
## $ d       <dbl> 1, 1, 2, 3, 0, 2, 2, 2, 1, 2, 2, 4, 0, 2, 1, 0, 2, 1, 2, 4, 3,…
## $ d2      <dbl> 2, 4, 1, 2, 3, 1, 2, 1, 1, 2, 3, 3, 2, 2, 2, 2, 4, 1, 1, 1, 2,…
## $ d3      <dbl> 2, 0, 3, 2, 3, 1, 4, 2, 3, 3, 3, 0, 2, 1, 2, 2, 2, 3, 2, 1, 3,…
## $ d4      <dbl> 3, 1, 3, 2, 2, 2, 2, 3, 2, 4, 3, 1, 2, 2, 2, 2, 2, 2, 2, 1, 4,…
## $ d5      <dbl> 0, 3, 3, 2, 2, 0, 0, 4, 1, 0, 2, 1, 2, 4, 3, 4, 1, 1, 2, 0, 2,…
psych::describe(CRdata, 
                fast = TRUE)
##         vars    n   mean     sd    min    max  range   se
## school     1 1600 421.94 112.67 193.00 515.00 322.00 2.82
## thk        2 1600   2.59   1.12   1.00   4.00   3.00 0.03
## prethk     3 1600   2.07   1.26   0.00   6.00   6.00 0.03
## cc         4 1600   0.48   0.50   0.00   1.00   1.00 0.01
## gprethk    5 1600   2.07   0.30   1.59   3.06   1.47 0.01
## d          6 1600   1.99   1.10   0.00   4.00   4.00 0.03
## d2         7 1600   1.98   1.08   0.00   4.00   4.00 0.03
## d3         8 1600   1.96   1.10   0.00   4.00   4.00 0.03
## d4         9 1600   2.02   1.10   0.00   4.00   4.00 0.03
## d5        10 1600   1.99   1.09   0.00   4.00   4.00 0.03

#Calculating scale score & Alpha

peer_items <- CRdata %>% 
  select(., d,
         d2,
         d3,
         d4,
         d5)

alpha(peer_items, check.keys=TRUE)
## Warning in alpha(peer_items, check.keys = TRUE): Some items were negatively correlated with total scale and were automatically reversed.
##  This is indicated by a negative sign for the variable name.
## 
## Reliability analysis   
## Call: alpha(x = peer_items, check.keys = TRUE)
## 
##   raw_alpha std.alpha G6(smc) average_r   S/N   ase mean  sd median_r
##      0.063     0.063   0.054     0.013 0.067 0.037    2 0.5    0.011
## 
##     95% confidence boundaries 
##          lower alpha upper
## Feldt    -0.01  0.06  0.13
## Duhachek -0.01  0.06  0.14
## 
##  Reliability if an item is dropped:
##     raw_alpha std.alpha G6(smc) average_r    S/N alpha se   var.r   med.r
## d      0.0076    0.0076  0.0081    0.0019 0.0077    0.041 0.00098 -0.0113
## d2     0.0512    0.0508  0.0414    0.0132 0.0536    0.039 0.00113  0.0126
## d3-    0.0201    0.0194  0.0172    0.0049 0.0198    0.040 0.00101 -0.0041
## d4-    0.0844    0.0841  0.0657    0.0224 0.0918    0.037 0.00053  0.0257
## d5     0.0878    0.0875  0.0698    0.0234 0.0959    0.037 0.00113  0.0315
## 
##  Item statistics 
##        n raw.r std.r   r.cor  r.drop mean  sd
## d   1600  0.49  0.49  0.2361  0.0602    2 1.1
## d2  1600  0.45  0.46  0.1055  0.0259    2 1.1
## d3- 1600  0.48  0.48  0.2014  0.0506    2 1.1
## d4- 1600  0.44  0.43  0.0038 -0.0010    2 1.1
## d5  1600  0.43  0.43 -0.0133 -0.0041    2 1.1
## 
## Non missing response frequency for each item
##       0    1    2    3    4 miss
## d  0.10 0.22 0.38 0.20 0.10    0
## d2 0.11 0.18 0.43 0.18 0.09    0
## d3 0.10 0.23 0.38 0.18 0.10    0
## d4 0.10 0.20 0.40 0.20 0.10    0
## d5 0.10 0.20 0.40 0.20 0.10    0
my.keys.list <- list(peer = c("d","d2","d3", "d4","d5"))
                     
my.scales <- scoreItems(my.keys.list, CRdata, impute = "none")
## Warning in sqrt(corrected.var * item.var): NaNs produced
print(my.scales, short = FALSE)
## Call: scoreItems(keys = my.keys.list, items = CRdata, impute = "none")
## 
## (Standardized) Alpha:
##        peer
## alpha -0.07
## 
## Standard errors of unstandardized Alpha:
##        peer
## ASE   0.041
## 
## Standardized Alpha of observed scales:
##       peer
## [1,] -0.07
## 
## Average item correlation:
##             peer
## average.r -0.013
## 
## Median item correlation:
##   peer 
## -0.011 
## 
##  Guttman 6* reliability: 
##            peer
## Lambda.6 -0.052
## 
## Signal/Noise based upon av.r : 
##                peer
## Signal/Noise -0.065
## 
## Scale intercorrelations corrected for attenuation 
##  raw correlations below the diagonal, alpha on the diagonal 
##  corrected correlations above the diagonal:
## 
## Note that these are the correlations of the complete scales based on the correlation matrix,
##  not the observed scales based on the raw items.
##       peer
## peer -0.07
## 
## Item by scale correlations:
##  corrected for item overlap and scale reliability
##    peer
## d   NaN
## d2  NaN
## d3  NaN
## d4  NaN
## d5  NaN
## 
## Non missing response frequency for each item
##       0    1    2    3    4 miss
## d  0.10 0.22 0.38 0.20 0.10    0
## d2 0.11 0.18 0.43 0.18 0.09    0
## d3 0.10 0.23 0.38 0.18 0.10    0
## d4 0.10 0.20 0.40 0.20 0.10    0
## d5 0.10 0.20 0.40 0.20 0.10    0
my.scores <- as_tibble(my.scales$scores)
CRdata.1 <- bind_cols(CRdata, my.scores)

glimpse(CRdata.1)
## Rows: 1,600
## Columns: 11
## $ school  <dbl> 193, 193, 193, 193, 193, 193, 193, 193, 193, 193, 193, 193, 19…
## $ thk     <dbl> 1, 2, 3, 1, 2, 2, 3, 3, 2, 2, 4, 2, 2, 3, 4, 4, 1, 3, 4, 2, 1,…
## $ prethk  <dbl> 5, 3, 4, 0, 1, 3, 1, 2, 3, 2, 2, 2, 3, 3, 2, 3, 3, 3, 2, 1, 3,…
## $ cc      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ gprethk <dbl> 2.153846, 2.153846, 2.153846, 2.153846, 2.153846, 2.153846, 2.…
## $ d       <dbl> 1, 1, 2, 3, 0, 2, 2, 2, 1, 2, 2, 4, 0, 2, 1, 0, 2, 1, 2, 4, 3,…
## $ d2      <dbl> 2, 4, 1, 2, 3, 1, 2, 1, 1, 2, 3, 3, 2, 2, 2, 2, 4, 1, 1, 1, 2,…
## $ d3      <dbl> 2, 0, 3, 2, 3, 1, 4, 2, 3, 3, 3, 0, 2, 1, 2, 2, 2, 3, 2, 1, 3,…
## $ d4      <dbl> 3, 1, 3, 2, 2, 2, 2, 3, 2, 4, 3, 1, 2, 2, 2, 2, 2, 2, 2, 1, 4,…
## $ d5      <dbl> 0, 3, 3, 2, 2, 0, 0, 4, 1, 0, 2, 1, 2, 4, 3, 4, 1, 1, 2, 0, 2,…
## $ peer    <dbl> 1.6, 1.8, 2.4, 2.2, 2.0, 1.2, 2.0, 2.4, 1.6, 2.2, 2.6, 1.8, 1.…

Visualizing the Key Variables

hist(CRdata.1$thk, 
     main = "Distribution of Tobacco and Health Knowledge Post-Intervention",
     sub = "N = 1600 students. Data from Flay et al. (1988) and Rabe-Hesketh & Skrondal (2012).",
     xlab = "Average Tobacco and Health Knowledge Score")

##Step 1a: Null Model

models <- list()
model.0 <- lmer(thk ~ (1|school), REML = FALSE, data = CRdata.1)
summary(model.0)
## Linear mixed model fit by maximum likelihood  ['lmerMod']
## Formula: thk ~ (1 | school)
##    Data: CRdata.1
## 
##      AIC      BIC   logLik deviance df.resid 
##   4833.0   4849.1  -2413.5   4827.0     1597 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -1.9039 -0.7734  0.0693  0.9240  1.7430 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  school   (Intercept) 0.09438  0.3072  
##  Residual             1.16219  1.0780  
## Number of obs: 1600, groups:  school, 28
## 
## Fixed effects:
##             Estimate Std. Error t value
## (Intercept)  2.60750    0.06538   39.88

#Step 1b: Calculate ICC

null.ICC <- .09438/(.09438 + 1.16219)
null.ICC
## [1] 0.07510923
lmerTest::rand(model.0)
## ANOVA-like table for random-effects: Single term deletions
## 
## Model:
## thk ~ (1 | school)
##              npar  logLik    AIC   LRT Df Pr(>Chisq)    
## <none>          3 -2413.5 4833.0                        
## (1 | school)    2 -2445.6 4895.2 64.16  1  1.147e-15 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

##Adding level 1 covariate - prethk

model.1 <- lmer(thk ~ prethk + (1|school), REML = FALSE, data = CRdata.1)
summary(model.1)
## Linear mixed model fit by maximum likelihood  ['lmerMod']
## Formula: thk ~ prethk + (1 | school)
##    Data: CRdata.1
## 
##      AIC      BIC   logLik deviance df.resid 
##   4733.5   4755.0  -2362.7   4725.5     1596 
## 
## Scaled residuals: 
##      Min       1Q   Median       3Q      Max 
## -2.12273 -0.82727  0.03082  0.83756  2.15165 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  school   (Intercept) 0.07519  0.2742  
##  Residual             1.09321  1.0456  
## Number of obs: 1600, groups:  school, 28
## 
## Fixed effects:
##             Estimate Std. Error t value
## (Intercept)  2.15164    0.07414   29.02
## prethk       0.21747    0.02122   10.25
## 
## Correlation of Fixed Effects:
##        (Intr)
## prethk -0.598

#Adding level-2 covariates

model.2 <- lmer(thk ~ prethk + cc + (1|school), REML = FALSE, data = CRdata.1)
summary(model.2)
## Linear mixed model fit by maximum likelihood  ['lmerMod']
## Formula: thk ~ prethk + cc + (1 | school)
##    Data: CRdata.1
## 
##      AIC      BIC   logLik deviance df.resid 
##   4721.3   4748.2  -2355.6   4711.3     1595 
## 
## Scaled residuals: 
##      Min       1Q   Median       3Q      Max 
## -2.20705 -0.80300  0.04029  0.80179  2.15135 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  school   (Intercept) 0.03333  0.1826  
##  Residual             1.09412  1.0460  
## Number of obs: 1600, groups:  school, 28
## 
## Fixed effects:
##             Estimate Std. Error t value
## (Intercept)  1.94507    0.07699  25.265
## prethk       0.22262    0.02113  10.535
## cc           0.39215    0.08944   4.385
## 
## Correlation of Fixed Effects:
##        (Intr) prethk
## prethk -0.585       
## cc     -0.580  0.023
model.3 <- lmer(thk ~ prethk + cc +gprethk + (1|school), REML = FALSE, data = CRdata.1)
summary(model.3)
## Linear mixed model fit by maximum likelihood  ['lmerMod']
## Formula: thk ~ prethk + cc + gprethk + (1 | school)
##    Data: CRdata.1
## 
##      AIC      BIC   logLik deviance df.resid 
##   4712.6   4744.9  -2350.3   4700.6     1594 
## 
## Scaled residuals: 
##      Min       1Q   Median       3Q      Max 
## -2.23332 -0.79567  0.02374  0.79254  2.16704 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  school   (Intercept) 0.01734  0.1317  
##  Residual             1.09334  1.0456  
## Number of obs: 1600, groups:  school, 28
## 
## Fixed effects:
##             Estimate Std. Error t value
## (Intercept)  1.07609    0.25478   4.224
## prethk       0.21260    0.02138   9.944
## cc           0.43075    0.07554   5.702
## gprethk      0.41835    0.11930   3.507
## 
## Correlation of Fixed Effects:
##         (Intr) prethk cc    
## prethk   0.000              
## cc      -0.288  0.000       
## gprethk -0.963 -0.179  0.148

#Creating a Table

library(modelsummary)
## 
## Attaching package: 'modelsummary'
## The following object is masked from 'package:psych':
## 
##     SD
library(broom.mixed)

models <- list(model.0, model.1, model.2, model.3)
modelsummary(models)
Model 1 Model 2 Model 3 Model 4
(Intercept) 2.607 2.152 1.945 1.076
(0.065) (0.074) (0.077) (0.255)
prethk 0.217 0.223 0.213
(0.021) (0.021) (0.021)
cc 0.392 0.431
(0.089) (0.076)
gprethk 0.418
(0.119)
SD (Intercept school) 0.307 0.274 0.183 0.132
SD (Observations) 1.078 1.046 1.046 1.046
Num.Obs. 1600 1600 1600 1600
R2 Marg. 0.000 0.060 0.091 0.109
R2 Cond. 0.075 0.121 0.118 0.123
AIC 4836.6 4743.2 4734.5 4729.0
BIC 4852.7 4764.7 4761.4 4761.3
ICC 0.1 0.1 0.0 0.0
RMSE 1.07 1.04 1.04 1.04
modelsummary(models, output = 'modelsummary.html', title = 'MLM Estimates')