Part A
- Standardize ‘bmi’. Check for missing data across all the variables.
If there are, report the number of missing data by variable. Delete
cases with missing data.
df1$bmi_z <- as.numeric(scale(df1$bmi))
colSums(is.na(df1))
## bmi sex race edu health aag msg state bmi_z
## 6129 244 0 466 0 0 0 0 6129
# Conduct listwise deletion
df2 <- na.omit(df1)
# Confirm NAs are removed
colSums(is.na(df2))
## bmi sex race edu health aag msg state bmi_z
## 0 0 0 0 0 0 0 0 0
- Rename ‘sex’ as ‘female’. Code male as 0 and female as 1. Designate
‘female’, ‘race’, and ‘edu’ as factor (categorical) variables and
‘health’ as a numeric (continuous) variable.
df3 <- df2 |>
mutate(female = case_when(sex == "1.male" ~ 0, sex == "2.female" ~ 1))
# Factor 'female', 'race', 'edu'
df3$female <- as.factor(df3$female)
df3$race <- as.factor(df3$race)
df3$edu <- as.factor(df3$edu)
# Designate 'health' as a numeric variable
df3$health <- as.numeric(df3$health)
# Confirm dataframe classes
sapply(df3, class)
## bmi sex race edu health aag
## "numeric" "character" "factor" "factor" "numeric" "numeric"
## msg state bmi_z female
## "numeric" "integer" "numeric" "factor"
- Draw histogram of DV (z-BMI) and visually evaluate if there are
potential outliers. Then check for univariate outliers (use criterion: z
= ±3.3). If there are, delete the cases with univariate outliers. After
deleting outliers, draw a histogram of z-BMI with a normal curve
superimposed to check improvement of data fit to the normality.
# Draw a histogram of bmi_z scores
ggplot(df3, aes(x = bmi_z)) +
geom_histogram(
aes(y = after_stat(density)),
binwidth = 0.5,
fill = "#00BE67",
color = "black"
) +
labs(title = "Histogram of scaled BMI scores")

# Univariate outlier test for bmi_z
df3$outlier <- ifelse(df3$bmi_z > 3.3 | df3$bmi_z < -3.3,
1,
ifelse(df3$bmi_z <= 3.3 &
df3$bmi_z >= -3.3, 0, NA))
# Calculate number of outliers
df3 |>
filter(outlier == 1) |>
summarize(freq_n = n())
## freq_n
## 1 514
# Remove outliers and simplify dataset
df4 <- df3 |>
filter(!(outlier == 1)) |>
dplyr::select(bmi_z, race, edu, health, aag, msg, state, female)
# Draw new histogram after removal of outliers
ggplot(df4, aes(x = bmi_z)) +
geom_histogram(
aes(y = after_stat(density)),
binwidth = 0.5,
fill = "#00BE67",
color = "black"
) +
stat_function(fun = dnorm, args = list(mean = mean(df2$bmi_z), sd = sd(df2$bmi_z))) +
labs(title = "Histogram of scaled BMI scores after outlier removal")

- Report frequencies and proportions for each individual-level factor
IV and create dot plots of z-BMI by ‘female’,‘race’, and ‘edu’. Then,
test mean differences of z-BMI between different levels of these three
factor variables using either t-test or ANOVA. Interpret the
results.
# Run frequencies and proportions for 'female','race', 'edu'
CrossTable(df4$female)
##
##
## Cell Contents
## |-------------------------|
## | N |
## | N / Table Total |
## |-------------------------|
##
##
## Total Observations in Table: 59552
##
##
## | 0 | 1 |
## |-----------|-----------|
## | 28675 | 30877 |
## | 0.482 | 0.518 |
## |-----------|-----------|
##
##
##
##
CrossTable(df4$race)
##
##
## Cell Contents
## |-------------------------|
## | N |
## | N / Table Total |
## |-------------------------|
##
##
## Total Observations in Table: 59552
##
##
## | 1.white | 2.black | 3.hispanic | 4.others |
## |------------|------------|------------|------------|
## | 44627 | 5530 | 4750 | 4645 |
## | 0.749 | 0.093 | 0.080 | 0.078 |
## |------------|------------|------------|------------|
##
##
##
##
CrossTable(df4$edu)
##
##
## Cell Contents
## |-------------------------|
## | N |
## | N / Table Total |
## |-------------------------|
##
##
## Total Observations in Table: 59552
##
##
## | 0.<HS | 1.HS | 2.col attended | 3.col graduated |
## |-----------------|-----------------|-----------------|-----------------|
## | 4178 | 14618 | 15526 | 25230 |
## | 0.070 | 0.245 | 0.261 | 0.424 |
## |-----------------|-----------------|-----------------|-----------------|
##
##
##
##
# Create dot plot of 'bmi_z' by 'female"
dotplot(
female ~ bmi_z,
data = df4,
jitter.y = TRUE,
ylab = "Race",
main = "Dotplot of sex"
)

# Create dot plot of 'bmi_z' by 'race'
dotplot(
race ~ bmi_z,
data = df4,
jitter.y = TRUE,
ylab = "Race",
main = "Dotplot of race"
)

# Create dot plot of 'bmi_z' by 'edu'
dotplot(
edu ~ bmi_z,
data = df4,
jitter.y = TRUE,
ylab = "Race",
main = "Dotplot of education"
)

# Test mean differences using anova
t.test(bmi_z ~ female, data = df4, var.equal = TRUE)
##
## Two Sample t-test
##
## data: bmi_z by female
## t = 13.988, df = 59550, p-value < 2.2e-16
## alternative hypothesis: true difference in means between group 0 and group 1 is not equal to 0
## 95 percent confidence interval:
## 0.08841578 0.11723048
## sample estimates:
## mean in group 0 mean in group 1
## 0.01536388 -0.08745924
anova((lm(bmi_z ~ race, data = df4)))
## Analysis of Variance Table
##
## Response: bmi_z
## Df Sum Sq Mean Sq F value Pr(>F)
## race 3 533 177.664 222.9 < 2.2e-16 ***
## Residuals 59548 47463 0.797
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova((lm(bmi_z ~ edu, data = df4)))
## Analysis of Variance Table
##
## Response: bmi_z
## Df Sum Sq Mean Sq F value Pr(>F)
## edu 3 901 300.377 379.81 < 2.2e-16 ***
## Residuals 59548 47095 0.791
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
- Draw a scatterplot between z-BMI and ‘health’ (y = z-BMI and x =
‘health’) by state with prediction lines for each sex for the first six
states (Hint: There are six states coded less than 9). Interpret the
plots.
df4 |>
filter(state %in% 1:9) |>
ggplot(aes(
x = health,
y = bmi_z,
color = factor(female)
)) +
geom_jitter(
width = .2,
height = 0,
alpha = 0.15,
size = 1
) +
geom_smooth(method = "lm", se = TRUE) +
facet_wrap(~ state) +
theme_bw() +
scale_color_manual(
values = c("0" = "#2C7FB8", "1" = "#E8743B"),
labels = c("0" = "Male", "1" = "Female"),
name = "Sex"
) +
labs(
title = "z-BMI vs. Self-Reported Health by State",
subtitle = "Linear prediction lines by sex, first six states",
x = "Self-reported health (1 = excellent, 5 = poor)",
y = "z-BMI"
) +
theme(legend.position = "bottom")
## `geom_smooth()` using formula = 'y ~ x'

- Report the number of state-level units (i.e., the frequency of
‘state’), and the minimum and maximum cell size of the ‘state’ (i.e.,
the smallest and the largest number of state residents). [Although we
already know we have enough numbers of level-2 units and huge sample
sizes in this specific dataset, you are expected to check the frequency
of level-2 units and their cell sizes for a typical MLM analysis].
# Calculate the number of state-level units
df4 |>
count(state) |>
summarize(freq_n = n())
## freq_n
## 1 51
# Calculate the minimum cell size
df4 |>
count(state) |>
summarise(min_n = min(n))
## min_n
## 1 376
# Calculate the maximum cell size
df4 |>
count(state) |>
summarise(max_n = max(n))
## max_n
## 1 5202
Part B
- Fit a cross-sectional two-level multilevel model without predictors
(i.e., null model or Model m0) and interpret the results. Make sure you
calculate and interpret intraclass correlation coefficient (ICC) without
using a package.
# Fit null model
m0 <- lme4::lmer(bmi_z ~ 1 + (1 | state), data = df4)
summary(m0)
## Linear mixed model fit by REML ['lmerMod']
## Formula: bmi_z ~ 1 + (1 | state)
## Data: df4
##
## REML criterion at convergence: 155728.4
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.4362 -0.7095 -0.1469 0.5441 3.8395
##
## Random effects:
## Groups Name Variance Std.Dev.
## state (Intercept) 0.008912 0.0944
## Residual 0.798409 0.8935
## Number of obs: 59552, groups: state, 51
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) -0.03356 0.01385 -2.424
m0.1 <- lmerTest::lmer(bmi_z ~ 1 + (1 | state), data = df4)
summary(m0.1)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: bmi_z ~ 1 + (1 | state)
## Data: df4
##
## REML criterion at convergence: 155728.4
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.4362 -0.7095 -0.1469 0.5441 3.8395
##
## Random effects:
## Groups Name Variance Std.Dev.
## state (Intercept) 0.008912 0.0944
## Residual 0.798409 0.8935
## Number of obs: 59552, groups: state, 51
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) -0.03356 0.01385 49.35874 -2.424 0.0191 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Calculate ICC manually
0.009 / (0.009 + 0.798)
## [1] 0.01115242
# Confirm results using 'performance' package
performance::icc(m0)
## # Intraclass Correlation Coefficient
##
## Adjusted ICC: 0.011
## Unadjusted ICC: 0.011
- Do the grand-mean centering for the variable ‘health’ and name it as
‘Chealth’). Fit a random-coefficients regression model with only
‘Chealth’ as the predictor (Model m1). Compute 95% profile confidence
interval. Interpret the results including both fixed and random effects.
Compare model fit between m0 and m1 models.
# Grand mean centering of 'health'
Chealth <- df4$health - mean(df4$health)
# Fit random-coefficients model
m1 <- lme4::lmer(bmi_z ~ Chealth + (1 | state), data = df4)
summary(m1)
## Linear mixed model fit by REML ['lmerMod']
## Formula: bmi_z ~ Chealth + (1 | state)
## Data: df4
##
## REML criterion at convergence: 151458.4
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.9512 -0.6753 -0.1241 0.5516 4.3643
##
## Random effects:
## Groups Name Variance Std.Dev.
## state (Intercept) 0.005554 0.07452
## Residual 0.743282 0.86214
## Number of obs: 59552, groups: state, 51
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) -0.035848 0.011162 -3.212
## Chealth 0.217502 0.003264 66.640
##
## Correlation of Fixed Effects:
## (Intr)
## Chealth -0.003
m1.1 <- lmerTest::lmer(bmi_z ~ Chealth + (1 | state), data = df4)
summary(m1.1)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: bmi_z ~ Chealth + (1 | state)
## Data: df4
##
## REML criterion at convergence: 151458.4
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.9512 -0.6753 -0.1241 0.5516 4.3643
##
## Random effects:
## Groups Name Variance Std.Dev.
## state (Intercept) 0.005554 0.07452
## Residual 0.743282 0.86214
## Number of obs: 59552, groups: state, 51
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) -3.585e-02 1.116e-02 4.917e+01 -3.212 0.00233 **
## Chealth 2.175e-01 3.264e-03 5.951e+04 66.640 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr)
## Chealth -0.003
# Compute 95% profile confidence intervals
confint(m1, method = c("Wald"))
## 2.5 % 97.5 %
## .sig01 NA NA
## .sigma NA NA
## (Intercept) -0.0577245 -0.01397093
## Chealth 0.2111054 0.22389943
# Compare model fit between m0 and m1
anova(m0, m1)
## refitting model(s) with ML (instead of REML)
## Data: df4
## Models:
## m0: bmi_z ~ 1 + (1 | state)
## m1: bmi_z ~ Chealth + (1 | state)
## npar AIC BIC logLik -2*log(L) Chisq Df Pr(>Chisq)
## m0 3 155728 155755 -77861 155722
## m1 4 151450 151486 -75721 151442 4280 1 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
- Fit a random-coefficients regression model with all the
individual-level (level-1) predictors (i.e., ‘Chealth’, ‘female’,
‘race’, and ‘edu’) (Model m2). Compute 95% profile confidence interval.
Interpret the results including both fixed and random effects. Compare
model fit between m1 and m2 models.
# Conduct model fitting
m2 <- lme4::lmer(bmi_z ~ Chealth + female + race + edu + (1 |
state), data = df4)
summary(m2)
## Linear mixed model fit by REML ['lmerMod']
## Formula: bmi_z ~ Chealth + female + race + edu + (1 | state)
## Data: df4
##
## REML criterion at convergence: 150496.9
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -4.0734 -0.6729 -0.1295 0.5418 4.2823
##
## Random effects:
## Groups Name Variance Std.Dev.
## state (Intercept) 0.004516 0.0672
## Residual 0.730944 0.8550
## Number of obs: 59552, groups: state, 51
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) -0.081923 0.017586 -4.658
## Chealth 0.206993 0.003412 60.668
## female1 -0.108255 0.007048 -15.359
## race2.black 0.259984 0.012890 20.169
## race3.hispanic 0.041393 0.014108 2.934
## race4.others -0.064772 0.013802 -4.693
## edu1.HS 0.114337 0.015475 7.389
## edu2.col attended 0.160948 0.015555 10.347
## edu3.col graduated 0.022390 0.015343 1.459
##
## Correlation of Fixed Effects:
## (Intr) Chelth femal1 rc2.bl rc3.hs rc4.th ed1.HS ed2.ca
## Chealth -0.131
## female1 -0.182 -0.020
## race2.black -0.099 -0.047 -0.024
## race3.hspnc -0.257 -0.043 0.000 0.109
## race4.othrs -0.101 -0.060 0.012 0.099 0.129
## edu1.HS -0.716 0.104 0.001 0.002 0.193 0.023
## ed2.clattnd -0.720 0.143 -0.038 0.022 0.218 0.024 0.796
## ed3.clgrdtd -0.749 0.238 -0.045 0.044 0.236 0.027 0.820 0.834
m2.1 <- lmerTest::lmer(bmi_z ~ Chealth + female + race + edu + (1 |
state), data = df4)
summary(m2.1)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: bmi_z ~ Chealth + female + race + edu + (1 | state)
## Data: df4
##
## REML criterion at convergence: 150496.9
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -4.0734 -0.6729 -0.1295 0.5418 4.2823
##
## Random effects:
## Groups Name Variance Std.Dev.
## state (Intercept) 0.004516 0.0672
## Residual 0.730944 0.8550
## Number of obs: 59552, groups: state, 51
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) -8.192e-02 1.759e-02 4.230e+02 -4.658 4.28e-06 ***
## Chealth 2.070e-01 3.412e-03 5.954e+04 60.668 < 2e-16 ***
## female1 -1.083e-01 7.048e-03 5.953e+04 -15.359 < 2e-16 ***
## race2.black 2.600e-01 1.289e-02 4.475e+04 20.169 < 2e-16 ***
## race3.hispanic 4.139e-02 1.411e-02 5.128e+04 2.934 0.00335 **
## race4.others -6.477e-02 1.380e-02 4.976e+04 -4.693 2.70e-06 ***
## edu1.HS 1.143e-01 1.547e-02 5.954e+04 7.389 1.50e-13 ***
## edu2.col attended 1.609e-01 1.555e-02 5.954e+04 10.347 < 2e-16 ***
## edu3.col graduated 2.239e-02 1.534e-02 5.952e+04 1.459 0.14450
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) Chelth femal1 rc2.bl rc3.hs rc4.th ed1.HS ed2.ca
## Chealth -0.131
## female1 -0.182 -0.020
## race2.black -0.099 -0.047 -0.024
## race3.hspnc -0.257 -0.043 0.000 0.109
## race4.othrs -0.101 -0.060 0.012 0.099 0.129
## edu1.HS -0.716 0.104 0.001 0.002 0.193 0.023
## ed2.clattnd -0.720 0.143 -0.038 0.022 0.218 0.024 0.796
## ed3.clgrdtd -0.749 0.238 -0.045 0.044 0.236 0.027 0.820 0.834
# Calculate profile confidence intervals
confint(m2, method = c("Wald"))
## 2.5 % 97.5 %
## .sig01 NA NA
## .sigma NA NA
## (Intercept) -0.116391036 -0.04745466
## Chealth 0.200306189 0.21368054
## female1 -0.122069373 -0.09444105
## race2.black 0.234720032 0.28524807
## race3.hispanic 0.013742167 0.06904325
## race4.others -0.091823934 -0.03772053
## edu1.HS 0.084007323 0.14466690
## edu2.col attended 0.130461180 0.19143404
## edu3.col graduated -0.007682054 0.05246108
# Compare model fit between m1 and m2
anova(m1, m2)
## refitting model(s) with ML (instead of REML)
## Data: df4
## Models:
## m1: bmi_z ~ Chealth + (1 | state)
## m2: bmi_z ~ Chealth + female + race + edu + (1 | state)
## npar AIC BIC logLik -2*log(L) Chisq Df Pr(>Chisq)
## m1 4 151450 151486 -75721 151442
## m2 11 150452 150550 -75215 150430 1012.1 7 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
- Fit a random-coefficients regression model with only the state-level
(level-2) predictors (i.e.,‘aag’ and ‘msg’) (Model m3). Compute 95%
profile confidence interval. Interpret the results including both fixed
and random effects. Compare model fit between m2 and m3 models.
# Conduct model fitting
m3 <- lme4::lmer(bmi_z ~ aag + msg + (aag + msg |
state), data = df4)
## boundary (singular) fit: see help('isSingular')
summary(m3)
## Linear mixed model fit by REML ['lmerMod']
## Formula: bmi_z ~ aag + msg + (aag + msg | state)
## Data: df4
##
## REML criterion at convergence: 155724.8
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.4362 -0.7090 -0.1484 0.5444 3.8485
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## state (Intercept) 0.9302915 0.96452
## aag 0.0002193 0.01481 1.00
## msg 0.0034622 0.05884 -1.00 -1.00
## Residual 0.7981317 0.89338
## Number of obs: 59552, groups: state, 51
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 0.877970 0.181802 4.829
## aag -0.006434 0.004344 -1.481
## msg -0.020328 0.010339 -1.966
##
## Correlation of Fixed Effects:
## (Intr) aag
## aag 0.098
## msg -0.670 -0.804
## optimizer (nloptwrap) convergence code: 0 (OK)
## boundary (singular) fit: see help('isSingular')
m3.1 <- lmerTest::lmer(bmi_z ~ aag + msg + (aag + msg |
state), data = df4)
## boundary (singular) fit: see help('isSingular')
## Warning: Model failed to converge with 3 negative eigenvalues: -2.3e+02
## -4.9e+04 -1.9e+06
summary(m3.1)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: bmi_z ~ aag + msg + (aag + msg | state)
## Data: df4
##
## REML criterion at convergence: 155724.8
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.4362 -0.7090 -0.1484 0.5444 3.8485
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## state (Intercept) 0.9302915 0.96452
## aag 0.0002193 0.01481 1.00
## msg 0.0034622 0.05884 -1.00 -1.00
## Residual 0.7981317 0.89338
## Number of obs: 59552, groups: state, 51
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 0.877970 0.181802 160.159234 4.829 3.18e-06 ***
## aag -0.006434 0.004344 59.008409 -1.481 0.1440
## msg -0.020328 0.010339 129.944740 -1.966 0.0514 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) aag
## aag 0.098
## msg -0.670 -0.804
## optimizer (nloptwrap) convergence code: 0 (OK)
## boundary (singular) fit: see help('isSingular')
# Calculate profile confidence intervals
confint(m3, method = c("Wald"))
## 2.5 % 97.5 %
## .sig01 NA NA
## .sig02 NA NA
## .sig03 NA NA
## .sig04 NA NA
## .sig05 NA NA
## .sig06 NA NA
## .sigma NA NA
## (Intercept) 0.52164375 1.234296e+00
## aag -0.01494862 2.081253e-03
## msg -0.04059221 -6.341367e-05
# Compare model fit between m2 and m3
anova(m2, m3)
## refitting model(s) with ML (instead of REML)
## Warning in optwrap(optimizer, devfun, rho$mkPar(rho$pp$theta), lower =
## getLower(x), : convergence code 1 from bobyqa: bobyqa -- maximum number of
## function evaluations exceeded
## Data: df4
## Models:
## m3: bmi_z ~ aag + msg + (aag + msg | state)
## m2: bmi_z ~ Chealth + female + race + edu + (1 | state)
## npar AIC BIC logLik -2*log(L) Chisq Df Pr(>Chisq)
## m3 10 155691 155781 -77836 155671
## m2 11 150452 150550 -75215 150430 5242 1 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
- Fit a random-coefficients regression model with all the level-1 and
level-2 predictors and an interaction between ‘female’ and ‘race’ (Model
m4). Compute 95% profile confidence interval. Interpret the results
including both fixed and random effects. Compare model fit between m2
and m4 models.
# Conduct model fitting
m4 <- lme4::lmer(bmi_z ~ Chealth + aag + msg + female + race + edu + female * race + (aag + msg |
state),
data = df4)
## boundary (singular) fit: see help('isSingular')
summary(m4)
## Linear mixed model fit by REML ['lmerMod']
## Formula: bmi_z ~ Chealth + aag + msg + female + race + edu + female *
## race + (aag + msg | state)
## Data: df4
##
## REML criterion at convergence: 150197.2
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -4.1082 -0.6686 -0.1322 0.5410 4.2659
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## state (Intercept) 7.833e-01 0.885037
## aag 4.277e-06 0.002068 -1.00
## msg 6.864e-04 0.026200 -1.00 0.99
## Residual 7.268e-01 0.852553
## Number of obs: 59552, groups: state, 51
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 0.539671 0.166499 3.241
## Chealth 0.205470 0.003404 60.369
## aag -0.003540 0.002526 -1.402
## msg -0.014114 0.005736 -2.461
## female1 -0.164701 0.008105 -20.321
## race2.black 0.022058 0.018709 1.179
## race3.hispanic -0.026319 0.019085 -1.379
## race4.others -0.100095 0.018927 -5.289
## edu1.HS 0.113110 0.015433 7.329
## edu2.col attended 0.158771 0.015512 10.235
## edu3.col graduated 0.020305 0.015303 1.327
## female1:race2.black 0.429316 0.024481 17.537
## female1:race3.hispanic 0.139532 0.026047 5.357
## female1:race4.others 0.073973 0.026307 2.812
##
## Correlation matrix not shown by default, as p = 14 > 12.
## Use print(x, correlation=TRUE) or
## vcov(x) if you need it
## optimizer (nloptwrap) convergence code: 0 (OK)
## boundary (singular) fit: see help('isSingular')
m4.1 <- lmerTest::lmer(bmi_z ~ Chealth + aag + msg + female + race + edu + female * race + (aag + msg |
state),
data = df4)
## boundary (singular) fit: see help('isSingular')
## Warning: Model failed to converge with 1 negative eigenvalue: -4.8e+02
summary(m4.1)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: bmi_z ~ Chealth + aag + msg + female + race + edu + female *
## race + (aag + msg | state)
## Data: df4
##
## REML criterion at convergence: 150197.2
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -4.1082 -0.6686 -0.1322 0.5410 4.2659
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## state (Intercept) 7.833e-01 0.885037
## aag 4.277e-06 0.002068 -1.00
## msg 6.864e-04 0.026200 -1.00 0.99
## Residual 7.268e-01 0.852553
## Number of obs: 59552, groups: state, 51
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 5.397e-01 1.665e-01 1.965e+02 3.241 0.00140 **
## Chealth 2.055e-01 3.404e-03 5.950e+04 60.369 < 2e-16 ***
## aag -3.540e-03 2.526e-03 1.669e+01 -1.402 0.17935
## msg -1.411e-02 5.736e-03 7.509e+01 -2.461 0.01616 *
## female1 -1.647e-01 8.105e-03 5.951e+04 -20.321 < 2e-16 ***
## race2.black 2.206e-02 1.871e-02 5.748e+04 1.179 0.23840
## race3.hispanic -2.632e-02 1.909e-02 5.665e+04 -1.379 0.16789
## race4.others -1.001e-01 1.893e-02 5.589e+04 -5.289 1.24e-07 ***
## edu1.HS 1.131e-01 1.543e-02 5.951e+04 7.329 2.34e-13 ***
## edu2.col attended 1.588e-01 1.551e-02 5.950e+04 10.235 < 2e-16 ***
## edu3.col graduated 2.031e-02 1.530e-02 5.944e+04 1.327 0.18457
## female1:race2.black 4.293e-01 2.448e-02 5.953e+04 17.537 < 2e-16 ***
## female1:race3.hispanic 1.395e-01 2.605e-02 5.953e+04 5.357 8.49e-08 ***
## female1:race4.others 7.397e-02 2.631e-02 5.952e+04 2.812 0.00493 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation matrix not shown by default, as p = 14 > 12.
## Use print(x, correlation=TRUE) or
## vcov(x) if you need it
## optimizer (nloptwrap) convergence code: 0 (OK)
## boundary (singular) fit: see help('isSingular')
# Compute 95% profile confidence intervals
confint(m4, method = c("Wald"))
## 2.5 % 97.5 %
## .sig01 NA NA
## .sig02 NA NA
## .sig03 NA NA
## .sig04 NA NA
## .sig05 NA NA
## .sig06 NA NA
## .sigma NA NA
## (Intercept) 0.213339157 0.866002264
## Chealth 0.198799233 0.212141060
## aag -0.008491056 0.001410291
## msg -0.025356124 -0.002872411
## female1 -0.180586923 -0.148815789
## race2.black -0.014610718 0.058726917
## race3.hispanic -0.063724505 0.011086857
## race4.others -0.137190727 -0.062998823
## edu1.HS 0.082862750 0.143358058
## edu2.col attended 0.128367626 0.189173493
## edu3.col graduated -0.009688684 0.050298358
## female1:race2.black 0.381333690 0.477297592
## female1:race3.hispanic 0.088481302 0.190581924
## female1:race4.others 0.022412141 0.125533925
# Compare model fit between m2 and m4
anova(m2, m4)
## refitting model(s) with ML (instead of REML)
## Warning in optwrap(optimizer, devfun, rho$mkPar(rho$pp$theta), lower =
## getLower(x), : convergence code 1 from bobyqa: bobyqa -- maximum number of
## function evaluations exceeded
## Data: df4
## Models:
## m2: bmi_z ~ Chealth + female + race + edu + (1 | state)
## m4: bmi_z ~ Chealth + aag + msg + female + race + edu + female * race + (aag + msg | state)
## npar AIC BIC logLik -2*log(L) Chisq Df Pr(>Chisq)
## m2 11 150452 150550 -75215 150430
## m4 21 150120 150309 -75039 150078 351.43 10 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
- Fit a random-coefficients regression model with all the variables
you entered in Model m4 and add a cross-level interaction between
‘female’ and ‘aag’ (Model m5). Compute 95% profile confidence interval.
Interpret the results including both fixed and random effects. Compare
model fit between m4 and m5 models.
# Conduct model
m5 <- lme4::lmer(
bmi_z ~ Chealth + aag + msg + female + race + edu + female * race + female * aag + (aag + msg |
state),
data = df4
)
## boundary (singular) fit: see help('isSingular')
summary(m5)
## Linear mixed model fit by REML ['lmerMod']
## Formula: bmi_z ~ Chealth + aag + msg + female + race + edu + female *
## race + female * aag + (aag + msg | state)
## Data: df4
##
## REML criterion at convergence: 150249.5
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -4.1081 -0.6691 -0.1323 0.5408 4.2697
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## state (Intercept) 0.852573 0.92335
## aag 0.002169 0.04658 0.97
## msg 0.013148 0.11466 -0.98 -1.00
## Residual 0.726619 0.85242
## Number of obs: 59552, groups: state, 51
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 0.419957 0.191372 2.194
## Chealth 0.205305 0.003404 60.316
## aag -0.006436 0.008305 -0.775
## msg -0.004932 0.018374 -0.268
## female1 0.033680 0.076881 0.438
## race2.black 0.025151 0.018779 1.339
## race3.hispanic -0.028355 0.019161 -1.480
## race4.others -0.102875 0.018931 -5.434
## edu1.HS 0.112919 0.015432 7.317
## edu2.col attended 0.159123 0.015510 10.259
## edu3.col graduated 0.020793 0.015303 1.359
## female1:race2.black 0.421902 0.024650 17.116
## female1:race3.hispanic 0.146146 0.026170 5.585
## female1:race4.others 0.079326 0.026379 3.007
## aag:female1 -0.003865 0.001487 -2.599
##
## Correlation matrix not shown by default, as p = 15 > 12.
## Use print(x, correlation=TRUE) or
## vcov(x) if you need it
## optimizer (nloptwrap) convergence code: 0 (OK)
## boundary (singular) fit: see help('isSingular')
m5.1 <- lmerTest::lmer(
bmi_z ~ Chealth + aag + msg + female + race + edu + female * race + female * aag + (aag + msg |
state),
data = df4
)
## boundary (singular) fit: see help('isSingular')
## Warning: Model failed to converge with 2 negative eigenvalues: -5.1e+02
## -3.0e+06
summary(m5.1)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: bmi_z ~ Chealth + aag + msg + female + race + edu + female *
## race + female * aag + (aag + msg | state)
## Data: df4
##
## REML criterion at convergence: 150249.5
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -4.1081 -0.6691 -0.1323 0.5408 4.2697
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## state (Intercept) 0.852573 0.92335
## aag 0.002169 0.04658 0.97
## msg 0.013148 0.11466 -0.98 -1.00
## Residual 0.726619 0.85242
## Number of obs: 59552, groups: state, 51
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 4.200e-01 1.914e-01 2.317e+02 2.194 0.02920 *
## Chealth 2.053e-01 3.404e-03 5.949e+04 60.316 < 2e-16 ***
## aag -6.436e-03 8.305e-03 2.037e+02 -0.775 0.43929
## msg -4.932e-03 1.837e-02 5.275e+02 -0.268 0.78847
## female1 3.368e-02 7.688e-02 5.953e+04 0.438 0.66133
## race2.black 2.515e-02 1.878e-02 5.869e+04 1.339 0.18049
## race3.hispanic -2.836e-02 1.916e-02 5.828e+04 -1.480 0.13892
## race4.others -1.029e-01 1.893e-02 4.177e+04 -5.434 5.54e-08 ***
## edu1.HS 1.129e-01 1.543e-02 5.949e+04 7.317 2.56e-13 ***
## edu2.col attended 1.591e-01 1.551e-02 5.950e+04 10.259 < 2e-16 ***
## edu3.col graduated 2.079e-02 1.530e-02 5.940e+04 1.359 0.17424
## female1:race2.black 4.219e-01 2.465e-02 5.953e+04 17.116 < 2e-16 ***
## female1:race3.hispanic 1.461e-01 2.617e-02 5.953e+04 5.585 2.35e-08 ***
## female1:race4.others 7.933e-02 2.638e-02 5.952e+04 3.007 0.00264 **
## aag:female1 -3.865e-03 1.487e-03 5.953e+04 -2.599 0.00936 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation matrix not shown by default, as p = 15 > 12.
## Use print(x, correlation=TRUE) or
## vcov(x) if you need it
## optimizer (nloptwrap) convergence code: 0 (OK)
## boundary (singular) fit: see help('isSingular')
# Calculate profile confidence intervals
confint(m5, method = c("Wald"))
## 2.5 % 97.5 %
## .sig01 NA NA
## .sig02 NA NA
## .sig03 NA NA
## .sig04 NA NA
## .sig05 NA NA
## .sig06 NA NA
## .sigma NA NA
## (Intercept) 0.044875428 0.7950383036
## Chealth 0.198633391 0.2119760622
## aag -0.022713340 0.0098420126
## msg -0.040944394 0.0310798662
## female1 -0.117004406 0.1843637957
## race2.black -0.011656552 0.0619576419
## race3.hispanic -0.065909146 0.0091993053
## race4.others -0.139979352 -0.0657697873
## edu1.HS 0.082673187 0.1431638679
## edu2.col attended 0.128723492 0.1895224407
## edu3.col graduated -0.009201126 0.0507865165
## female1:race2.black 0.373589317 0.4702148127
## female1:race3.hispanic 0.094854369 0.1974383055
## female1:race4.others 0.027624843 0.1310269517
## aag:female1 -0.006780642 -0.0009501011
# Compare model fit between m4 and m5
anova(m4, m5)
## refitting model(s) with ML (instead of REML)
## Warning in optwrap(optimizer, devfun, rho$mkPar(rho$pp$theta), lower =
## getLower(x), : convergence code 1 from bobyqa: bobyqa -- maximum number of
## function evaluations exceeded
## Warning in optwrap(optimizer, devfun, rho$mkPar(rho$pp$theta), lower =
## getLower(x), : convergence code 1 from bobyqa: bobyqa -- maximum number of
## function evaluations exceeded
## Data: df4
## Models:
## m4: bmi_z ~ Chealth + aag + msg + female + race + edu + female * race + (aag + msg | state)
## m5: bmi_z ~ Chealth + aag + msg + female + race + edu + female * race + female * aag + (aag + msg | state)
## npar AIC BIC logLik -2*log(L) Chisq Df Pr(>Chisq)
## m4 21 150120 150309 -75039 150078
## m5 22 150110 150308 -75033 150066 12.028 1 0.0005242 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
- Check if residuals are normally distributed for the final model by
plotting a histogram with density curve.
hist(
scale(resid(m5)),
freq = FALSE,
ylim = c(0, .5),
xlim = c(-3.5, 3.5),
main = "Histogram of Standardized Residuals from Model m5",
xlab = "Standardized Residuals"
)
lines(density(scale(resid(m4))))
box()
