#Load packages
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ ggplot2 3.3.6 ✔ purrr 0.3.4
## ✔ tibble 3.1.8 ✔ dplyr 1.0.9
## ✔ tidyr 1.2.0 ✔ stringr 1.4.1
## ✔ readr 2.1.2 ✔ forcats 0.5.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
library(psych)
##
## Attaching package: 'psych'
##
## The following objects are masked from 'package:ggplot2':
##
## %+%, alpha
library(lme4)
## Loading required package: Matrix
##
## Attaching package: 'Matrix'
##
## The following objects are masked from 'package:tidyr':
##
## expand, pack, unpack
library(haven)
#Load data
CRdata <- read_csv("schoolsmoke.csv")
## Rows: 1600 Columns: 10
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (10): school, thk, prethk, cc, gprethk, d, d2, d3, d4, d5
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
glimpse(CRdata)
## Rows: 1,600
## Columns: 10
## $ school <dbl> 193, 193, 193, 193, 193, 193, 193, 193, 193, 193, 193, 193, 19…
## $ thk <dbl> 1, 2, 3, 1, 2, 2, 3, 3, 2, 2, 4, 2, 2, 3, 4, 4, 1, 3, 4, 2, 1,…
## $ prethk <dbl> 5, 3, 4, 0, 1, 3, 1, 2, 3, 2, 2, 2, 3, 3, 2, 3, 3, 3, 2, 1, 3,…
## $ cc <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ gprethk <dbl> 2.153846, 2.153846, 2.153846, 2.153846, 2.153846, 2.153846, 2.…
## $ d <dbl> 1, 1, 2, 3, 0, 2, 2, 2, 1, 2, 2, 4, 0, 2, 1, 0, 2, 1, 2, 4, 3,…
## $ d2 <dbl> 2, 4, 1, 2, 3, 1, 2, 1, 1, 2, 3, 3, 2, 2, 2, 2, 4, 1, 1, 1, 2,…
## $ d3 <dbl> 2, 0, 3, 2, 3, 1, 4, 2, 3, 3, 3, 0, 2, 1, 2, 2, 2, 3, 2, 1, 3,…
## $ d4 <dbl> 3, 1, 3, 2, 2, 2, 2, 3, 2, 4, 3, 1, 2, 2, 2, 2, 2, 2, 2, 1, 4,…
## $ d5 <dbl> 0, 3, 3, 2, 2, 0, 0, 4, 1, 0, 2, 1, 2, 4, 3, 4, 1, 1, 2, 0, 2,…
psych::describe(CRdata,
fast = TRUE)
## vars n mean sd min max range se
## school 1 1600 421.94 112.67 193.00 515.00 322.00 2.82
## thk 2 1600 2.59 1.12 1.00 4.00 3.00 0.03
## prethk 3 1600 2.07 1.26 0.00 6.00 6.00 0.03
## cc 4 1600 0.48 0.50 0.00 1.00 1.00 0.01
## gprethk 5 1600 2.07 0.30 1.59 3.06 1.47 0.01
## d 6 1600 1.99 1.10 0.00 4.00 4.00 0.03
## d2 7 1600 1.98 1.08 0.00 4.00 4.00 0.03
## d3 8 1600 1.96 1.10 0.00 4.00 4.00 0.03
## d4 9 1600 2.02 1.10 0.00 4.00 4.00 0.03
## d5 10 1600 1.99 1.09 0.00 4.00 4.00 0.03
#Calculating scale score & Alpha
peer_items <- CRdata %>%
select(., d,
d2,
d3,
d4,
d5)
alpha(peer_items, check.keys=TRUE)
## Warning in alpha(peer_items, check.keys = TRUE): Some items were negatively correlated with total scale and were automatically reversed.
## This is indicated by a negative sign for the variable name.
##
## Reliability analysis
## Call: alpha(x = peer_items, check.keys = TRUE)
##
## raw_alpha std.alpha G6(smc) average_r S/N ase mean sd median_r
## 0.063 0.063 0.054 0.013 0.067 0.037 2 0.5 0.011
##
## 95% confidence boundaries
## lower alpha upper
## Feldt -0.01 0.06 0.13
## Duhachek -0.01 0.06 0.14
##
## Reliability if an item is dropped:
## raw_alpha std.alpha G6(smc) average_r S/N alpha se var.r med.r
## d 0.0076 0.0076 0.0081 0.0019 0.0077 0.041 0.00098 -0.0113
## d2 0.0512 0.0508 0.0414 0.0132 0.0536 0.039 0.00113 0.0126
## d3- 0.0201 0.0194 0.0172 0.0049 0.0198 0.040 0.00101 -0.0041
## d4- 0.0844 0.0841 0.0657 0.0224 0.0918 0.037 0.00053 0.0257
## d5 0.0878 0.0875 0.0698 0.0234 0.0959 0.037 0.00113 0.0315
##
## Item statistics
## n raw.r std.r r.cor r.drop mean sd
## d 1600 0.49 0.49 0.2361 0.0602 2 1.1
## d2 1600 0.45 0.46 0.1055 0.0259 2 1.1
## d3- 1600 0.48 0.48 0.2014 0.0506 2 1.1
## d4- 1600 0.44 0.43 0.0038 -0.0010 2 1.1
## d5 1600 0.43 0.43 -0.0133 -0.0041 2 1.1
##
## Non missing response frequency for each item
## 0 1 2 3 4 miss
## d 0.10 0.22 0.38 0.20 0.10 0
## d2 0.11 0.18 0.43 0.18 0.09 0
## d3 0.10 0.23 0.38 0.18 0.10 0
## d4 0.10 0.20 0.40 0.20 0.10 0
## d5 0.10 0.20 0.40 0.20 0.10 0
my.keys.list <- list(peer = c("d","d2","d3", "d4","d5"))
my.scales <- scoreItems(my.keys.list, CRdata, impute = "none")
## Warning in sqrt(corrected.var * item.var): NaNs produced
print(my.scales, short = FALSE)
## Call: scoreItems(keys = my.keys.list, items = CRdata, impute = "none")
##
## (Standardized) Alpha:
## peer
## alpha -0.07
##
## Standard errors of unstandardized Alpha:
## peer
## ASE 0.041
##
## Standardized Alpha of observed scales:
## peer
## [1,] -0.07
##
## Average item correlation:
## peer
## average.r -0.013
##
## Median item correlation:
## peer
## -0.011
##
## Guttman 6* reliability:
## peer
## Lambda.6 -0.052
##
## Signal/Noise based upon av.r :
## peer
## Signal/Noise -0.065
##
## Scale intercorrelations corrected for attenuation
## raw correlations below the diagonal, alpha on the diagonal
## corrected correlations above the diagonal:
##
## Note that these are the correlations of the complete scales based on the correlation matrix,
## not the observed scales based on the raw items.
## peer
## peer -0.07
##
## Item by scale correlations:
## corrected for item overlap and scale reliability
## peer
## d NaN
## d2 NaN
## d3 NaN
## d4 NaN
## d5 NaN
##
## Non missing response frequency for each item
## 0 1 2 3 4 miss
## d 0.10 0.22 0.38 0.20 0.10 0
## d2 0.11 0.18 0.43 0.18 0.09 0
## d3 0.10 0.23 0.38 0.18 0.10 0
## d4 0.10 0.20 0.40 0.20 0.10 0
## d5 0.10 0.20 0.40 0.20 0.10 0
my.scores <- as_tibble(my.scales$scores)
CRdata.1 <- bind_cols(CRdata, my.scores)
glimpse(CRdata.1)
## Rows: 1,600
## Columns: 11
## $ school <dbl> 193, 193, 193, 193, 193, 193, 193, 193, 193, 193, 193, 193, 19…
## $ thk <dbl> 1, 2, 3, 1, 2, 2, 3, 3, 2, 2, 4, 2, 2, 3, 4, 4, 1, 3, 4, 2, 1,…
## $ prethk <dbl> 5, 3, 4, 0, 1, 3, 1, 2, 3, 2, 2, 2, 3, 3, 2, 3, 3, 3, 2, 1, 3,…
## $ cc <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ gprethk <dbl> 2.153846, 2.153846, 2.153846, 2.153846, 2.153846, 2.153846, 2.…
## $ d <dbl> 1, 1, 2, 3, 0, 2, 2, 2, 1, 2, 2, 4, 0, 2, 1, 0, 2, 1, 2, 4, 3,…
## $ d2 <dbl> 2, 4, 1, 2, 3, 1, 2, 1, 1, 2, 3, 3, 2, 2, 2, 2, 4, 1, 1, 1, 2,…
## $ d3 <dbl> 2, 0, 3, 2, 3, 1, 4, 2, 3, 3, 3, 0, 2, 1, 2, 2, 2, 3, 2, 1, 3,…
## $ d4 <dbl> 3, 1, 3, 2, 2, 2, 2, 3, 2, 4, 3, 1, 2, 2, 2, 2, 2, 2, 2, 1, 4,…
## $ d5 <dbl> 0, 3, 3, 2, 2, 0, 0, 4, 1, 0, 2, 1, 2, 4, 3, 4, 1, 1, 2, 0, 2,…
## $ peer <dbl> 1.6, 1.8, 2.4, 2.2, 2.0, 1.2, 2.0, 2.4, 1.6, 2.2, 2.6, 1.8, 1.…
hist(CRdata.1$thk,
main = "Distribution of Tobacco and Health Knowledge Post-Intervention",
sub = "N = 1600 students. Data from Flay et al. (1988) and Rabe-Hesketh & Skrondal (2012).",
xlab = "Average Tobacco and Health Knowledge Score")
##Step 1a: Null Model
models <- list()
model.0 <- lmer(thk ~ (1|school), REML = FALSE, data = CRdata.1)
summary(model.0)
## Linear mixed model fit by maximum likelihood ['lmerMod']
## Formula: thk ~ (1 | school)
## Data: CRdata.1
##
## AIC BIC logLik deviance df.resid
## 4833.0 4849.1 -2413.5 4827.0 1597
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -1.9039 -0.7734 0.0693 0.9240 1.7430
##
## Random effects:
## Groups Name Variance Std.Dev.
## school (Intercept) 0.09438 0.3072
## Residual 1.16219 1.0780
## Number of obs: 1600, groups: school, 28
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 2.60750 0.06538 39.88
#Step 1b: Calculate ICC
null.ICC <- .09438/(.09438 + 1.16219)
null.ICC
## [1] 0.07510923
lmerTest::rand(model.0)
## ANOVA-like table for random-effects: Single term deletions
##
## Model:
## thk ~ (1 | school)
## npar logLik AIC LRT Df Pr(>Chisq)
## <none> 3 -2413.5 4833.0
## (1 | school) 2 -2445.6 4895.2 64.16 1 1.147e-15 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##Adding level 1 covariate - prethk
model.1 <- lmer(thk ~ prethk + (1|school), REML = FALSE, data = CRdata.1)
summary(model.1)
## Linear mixed model fit by maximum likelihood ['lmerMod']
## Formula: thk ~ prethk + (1 | school)
## Data: CRdata.1
##
## AIC BIC logLik deviance df.resid
## 4733.5 4755.0 -2362.7 4725.5 1596
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.12273 -0.82727 0.03082 0.83756 2.15165
##
## Random effects:
## Groups Name Variance Std.Dev.
## school (Intercept) 0.07519 0.2742
## Residual 1.09321 1.0456
## Number of obs: 1600, groups: school, 28
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 2.15164 0.07414 29.02
## prethk 0.21747 0.02122 10.25
##
## Correlation of Fixed Effects:
## (Intr)
## prethk -0.598
#Adding level-2 covariates
model.2 <- lmer(thk ~ prethk + cc + (1|school), REML = FALSE, data = CRdata.1)
summary(model.2)
## Linear mixed model fit by maximum likelihood ['lmerMod']
## Formula: thk ~ prethk + cc + (1 | school)
## Data: CRdata.1
##
## AIC BIC logLik deviance df.resid
## 4721.3 4748.2 -2355.6 4711.3 1595
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.20705 -0.80300 0.04029 0.80179 2.15135
##
## Random effects:
## Groups Name Variance Std.Dev.
## school (Intercept) 0.03333 0.1826
## Residual 1.09412 1.0460
## Number of obs: 1600, groups: school, 28
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 1.94507 0.07699 25.265
## prethk 0.22262 0.02113 10.535
## cc 0.39215 0.08944 4.385
##
## Correlation of Fixed Effects:
## (Intr) prethk
## prethk -0.585
## cc -0.580 0.023
model.3 <- lmer(thk ~ prethk + cc +gprethk + (1|school), REML = FALSE, data = CRdata.1)
summary(model.3)
## Linear mixed model fit by maximum likelihood ['lmerMod']
## Formula: thk ~ prethk + cc + gprethk + (1 | school)
## Data: CRdata.1
##
## AIC BIC logLik deviance df.resid
## 4712.6 4744.9 -2350.3 4700.6 1594
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.23332 -0.79567 0.02374 0.79254 2.16704
##
## Random effects:
## Groups Name Variance Std.Dev.
## school (Intercept) 0.01734 0.1317
## Residual 1.09334 1.0456
## Number of obs: 1600, groups: school, 28
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 1.07609 0.25478 4.224
## prethk 0.21260 0.02138 9.944
## cc 0.43075 0.07554 5.702
## gprethk 0.41835 0.11930 3.507
##
## Correlation of Fixed Effects:
## (Intr) prethk cc
## prethk 0.000
## cc -0.288 0.000
## gprethk -0.963 -0.179 0.148
#Creating a Table
library(modelsummary)
##
## Attaching package: 'modelsummary'
## The following object is masked from 'package:psych':
##
## SD
library(broom.mixed)
models <- list(model.0, model.1, model.2, model.3)
modelsummary(models)
| Model 1 | Model 2 | Model 3 | Model 4 | |
|---|---|---|---|---|
| (Intercept) | 2.607 | 2.152 | 1.945 | 1.076 |
| (0.065) | (0.074) | (0.077) | (0.255) | |
| prethk | 0.217 | 0.223 | 0.213 | |
| (0.021) | (0.021) | (0.021) | ||
| cc | 0.392 | 0.431 | ||
| (0.089) | (0.076) | |||
| gprethk | 0.418 | |||
| (0.119) | ||||
| SD (Intercept school) | 0.307 | 0.274 | 0.183 | 0.132 |
| SD (Observations) | 1.078 | 1.046 | 1.046 | 1.046 |
| Num.Obs. | 1600 | 1600 | 1600 | 1600 |
| R2 Marg. | 0.000 | 0.060 | 0.091 | 0.109 |
| R2 Cond. | 0.075 | 0.121 | 0.118 | 0.123 |
| AIC | 4836.6 | 4743.2 | 4734.5 | 4729.0 |
| BIC | 4852.7 | 4764.7 | 4761.4 | 4761.3 |
| ICC | 0.1 | 0.1 | 0.0 | 0.0 |
| RMSE | 1.07 | 1.04 | 1.04 | 1.04 |
modelsummary(models, output = 'modelsummary.html', title = 'MLM Estimates')