## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.2 ✔ readr 2.1.4
## ✔ forcats 1.0.0 ✔ stringr 1.5.0
## ✔ ggplot2 3.4.2 ✔ tibble 3.2.1
## ✔ lubridate 1.9.2 ✔ tidyr 1.3.0
## ✔ purrr 1.0.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
## Loading required package: Matrix
##
##
## Attaching package: 'Matrix'
##
##
## The following objects are masked from 'package:tidyr':
##
## expand, pack, unpack
##
##
##
## Attaching package: 'psych'
##
##
## The following objects are masked from 'package:ggplot2':
##
## %+%, alpha
##
##
##
## Attaching package: 'MASS'
##
##
## The following object is masked from 'package:dplyr':
##
## select
##
##
##
## arm (Version 1.13-1, built: 2022-8-25)
##
##
## Working directory is C:/Users/brand/Desktop/UCR/PhD CCN/PSYC213_Spring2023/Lab/Week 6
##
##
##
## Attaching package: 'arm'
##
##
## The following objects are masked from 'package:psych':
##
## logit, rescale, sim
# Read in the data and create factor variables
hs_data <- read.csv("MLM_haptic_example_data.csv", header = TRUE)
hs_data <- as_tibble(hs_data) %>% mutate(Sub_Num = factor(Subject_Num))
hs_data <- hs_data %>% mutate(Ratio = factor(Relative_Size))
hs_data <- hs_data %>% mutate(Num_Dis = factor(Distractor_Number))
why might we want to treat some of our variables as factors?
Answer: If the values of a variable are not scaled in any way, we may want to treat them all separately
describe(hs_data)
## vars n mean sd median trimmed mad min max range
## Experiment_Number 1 3370 0.49 0.50 0.00 0.49 0.00 0.00 1.00 1.00
## Subject_Num 2 3370 39.69 23.19 38.00 39.12 28.17 1.00 85.00 84.00
## Blk 3 3370 1.51 0.50 2.00 1.52 0.00 1.00 2.00 1.00
## Distractor_Number 4 3370 2.00 0.82 2.00 2.00 1.48 1.00 3.00 2.00
## Distractor_Lengths 5 3370 1.00 0.54 0.75 1.00 0.74 0.25 1.75 1.50
## Relative_Size 6 3370 0.98 0.94 0.75 0.82 0.37 0.25 3.00 2.75
## Times 7 3370 14.70 10.26 11.65 13.17 7.89 0.55 60.05 59.50
## Sub_Num* 8 3370 37.52 21.91 36.00 37.05 26.69 1.00 79.00 78.00
## Ratio* 9 3370 3.52 1.70 4.00 3.53 1.48 1.00 6.00 5.00
## Num_Dis* 10 3370 2.00 0.82 2.00 2.00 1.48 1.00 3.00 2.00
## skew kurtosis se
## Experiment_Number 0.03 -2.00 0.01
## Subject_Num 0.17 -1.07 0.40
## Blk -0.06 -2.00 0.01
## Distractor_Number -0.01 -1.51 0.01
## Distractor_Lengths 0.01 -1.51 0.01
## Relative_Size 1.50 0.64 0.02
## Times 1.39 1.83 0.18
## Sub_Num* 0.15 -1.08 0.38
## Ratio* -0.02 -1.27 0.03
## Num_Dis* -0.01 -1.51 0.01
#summary(hs_data)
I want to walk through how we visualize multilevel models as a segway from the previous work we have been doing in the course. I am going to fit previous models that we have discussed in class to recap and extend them to transition into the multilevel framework.
hs_mdl0 <- lm("Times~ Distractor_Number", data = hs_data)
display(hs_mdl0)
## lm(formula = "Times~ Distractor_Number", data = hs_data)
## coef.est coef.se
## (Intercept) 12.52 0.47
## Distractor_Number 1.09 0.22
## ---
## n = 3370, k = 2
## residual sd = 10.23, R-Squared = 0.01
Now plot that
ggplot(data = hs_data, mapping = aes(x = Distractor_Number,y = Times)) + geom_jitter() + geom_smooth(method = "lm")
## `geom_smooth()` using formula = 'y ~ x'
hs_mdl0.5 <- lm("Times~ Num_Dis-1 ", data = hs_data)
display(hs_mdl0.5)
## lm(formula = "Times~ Num_Dis-1 ", data = hs_data)
## coef.est coef.se
## Num_Dis1 13.62 0.31
## Num_Dis2 14.69 0.31
## Num_Dis3 15.80 0.30
## ---
## n = 3370, k = 3
## residual sd = 10.23, R-Squared = 0.67
ggplot(data = hs_data, mapping = aes(x = 1:3370, y = Times)) + geom_jitter() + geom_smooth(method = "lm", aes(color = Num_Dis))
## `geom_smooth()` using formula = 'y ~ x'
ggplot(data = hs_data, mapping = aes(x = Relative_Size,y = Times)) + geom_jitter(mapping = aes(color = Num_Dis)) + stat_smooth(method = "lm", se=FALSE, color = 'red')
## `geom_smooth()` using formula = 'y ~ x'
How do we write this model using the lm() function?
hs_mdl1 <- lm("Times~ Relative_Size", data = hs_data)
display(hs_mdl1)
## lm(formula = "Times~ Relative_Size", data = hs_data)
## coef.est coef.se
## (Intercept) 17.57 0.25
## Relative_Size -2.92 0.18
## ---
## n = 3370, k = 2
## residual sd = 9.89, R-Squared = 0.07
hs_mdl1.5 <- lm("Times~ Ratio-1", data = hs_data)
display(hs_mdl1.5)
## lm(formula = "Times~ Ratio-1", data = hs_data)
## coef.est coef.se
## Ratio0.25 17.45 0.42
## Ratio0.333333333333333 19.65 0.41
## Ratio0.5 14.88 0.41
## Ratio0.75 14.43 0.41
## Ratio1 12.43 0.41
## Ratio3 9.61 0.41
## ---
## n = 3370, k = 6
## residual sd = 9.74, R-Squared = 0.71
ggplot(data = hs_data, mapping = aes(x = 1:3370 ,y = Times)) + geom_jitter() + stat_smooth(method = "lm",aes(color = Ratio))
## `geom_smooth()` using formula = 'y ~ x'
hs_mdl2 <- lm('Times~ Ratio - 1 + Num_Dis - 1',data= hs_data)
display(hs_mdl2)
## lm(formula = "Times~ Ratio - 1 + Num_Dis - 1", data = hs_data)
## coef.est coef.se
## Ratio0.25 16.35 0.48
## Ratio0.333333333333333 18.53 0.47
## Ratio0.5 13.76 0.47
## Ratio0.75 13.31 0.47
## Ratio1 11.33 0.47
## Ratio3 8.48 0.47
## Num_Dis2 1.13 0.41
## Num_Dis3 2.20 0.41
## ---
## n = 3370, k = 8
## residual sd = 9.71, R-Squared = 0.71
What is being ignored by all of the above models? Answer: None of the above models are modeling mixed, or more specifically random effects. The lm function assumes independence and similar distribution of errors among the predictors, which may not be the case.
ggplot(data = hs_data, aes(x = Relative_Size,y = Times, color = Num_Dis)) + geom_jitter() + geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula = 'y ~ x'
hs_lm <- lmer(Times ~ 1 + (1 | Experiment_Number),data=hs_data)
display(hs_lm)
## lmer(formula = Times ~ 1 + (1 | Experiment_Number), data = hs_data)
## coef.est coef.se
## 14.70 0.56
##
## Error terms:
## Groups Name Std.Dev.
## Experiment_Number (Intercept) 0.76
## Residual 10.25
## ---
## number of obs: 3370, groups: Experiment_Number, 2
## AIC = 25258.6, DIC = 25253.3
## deviance = 25253.0
What is being included in this model?
Answer: This model simply includes a constant term and allows that term to vary by the grouping variable of Experiment. We also get more informative error terms like residual
hs_lm_RI <- lmer(Times ~ 1 + Relative_Size +(1 | Experiment_Number),data=hs_data)
display(hs_lm_RI)
## lmer(formula = Times ~ 1 + Relative_Size + (1 | Experiment_Number),
## data = hs_data)
## coef.est coef.se
## (Intercept) 17.58 0.68
## Relative_Size -2.94 0.18
##
## Error terms:
## Groups Name Std.Dev.
## Experiment_Number (Intercept) 0.90
## Residual 9.87
## ---
## number of obs: 3370, groups: Experiment_Number, 2
## AIC = 25005.8, DIC = 24995.8
## deviance = 24996.8
This model starts with the no-pooling model $ y~ x$ and then adds the varying intercept from the previous model.
What does adding the varying intercept to the no-pooling model allow for?
Answer: This allows the coefficient for the predictor to vary by group.
# From the book
hs_c <- coef(hs_lm_RI)
hs_c
## $Experiment_Number
## (Intercept) Relative_Size
## 0 18.19459 -2.942456
## 1 16.96378 -2.942456
##
## attr(,"class")
## [1] "coef.mer"
Write the estimated regression line for each of the groups.
Answer: for Experiment 1 = y = 18.19 - 2.94X Experiment 2 = y = 16.96 - 2.94X
Alternatively we can look at the “fixed effect”
hs_cF <- fixef(hs_lm_RI)
hs_cF
## (Intercept) Relative_Size
## 17.579188 -2.942456
Write the regression line for the fixed effect and explain in words what this line means.
Answer: This is the estimated regression line averaging over the two experiments y = -2.94X + 17.5
Now lets look at the experiment level errors
hs_cE <-ranef(hs_lm_RI)
hs_cE
## $Experiment_Number
## (Intercept)
## 0 0.6154041
## 1 -0.6154041
##
## with conditional variances for "Experiment_Number"
What do these two numbers tell us?
Answer: These two numbers tell us how much the intercept is shifted above or below the intercept of the previous regression line (Averaged over experiments line) depending on if you were in experiment 1 or experiment 2
Now lets look at the uncertainty in the estimates of our coefficients
se.fixef(hs_lm_RI)
## (Intercept) Relative_Size
## 0.6825168 0.1802499
se.ranef(hs_lm_RI)
## $Experiment_Number
## (Intercept)
## 0 0.2306844
## 1 0.2339019
What might be the cause of the similarity or difference between the standard errors of our coefficients. Answer: because the number of observations within each group level (experiment) are similar the stadard errors should be similar. If they were very different then it would be due to the weighting or ‘shrinking’ due to having different n’s.
Interpret the fit of the two models and explain which one is better and how you know? Answer: The aic and bic are both reduced in the model which includes a predictor. However, more important is that the chi squared test has an appreciable p value.
Below is an effective way in which to test the difference between the two.
anova(hs_lm, hs_lm_RI)
## refitting model(s) with ML (instead of REML)
## Data: hs_data
## Models:
## hs_lm: Times ~ 1 + (1 | Experiment_Number)
## hs_lm_RI: Times ~ 1 + Relative_Size + (1 | Experiment_Number)
## npar AIC BIC logLik deviance Chisq Df Pr(>Chisq)
## hs_lm 3 25259 25277 -12626 25253
## hs_lm_RI 4 25005 25029 -12498 24997 256.16 1 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
hs_lm_RI2 <- lmer(Times ~ 1 + Relative_Size +(1|Distractor_Number),data=hs_data)
summary(hs_lm_RI2)
## Linear mixed model fit by REML ['lmerMod']
## Formula: Times ~ 1 + Relative_Size + (1 | Distractor_Number)
## Data: hs_data
##
## REML criterion at convergence: 24987.8
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -1.7583 -0.7043 -0.2350 0.4268 4.2826
##
## Random effects:
## Groups Name Variance Std.Dev.
## Distractor_Number (Intercept) 1.147 1.071
## Residual 97.022 9.850
## Number of obs: 3370, groups: Distractor_Number, 3
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 17.5685 0.6651 26.42
## Relative_Size -2.9253 0.1798 -16.27
##
## Correlation of Fixed Effects:
## (Intr)
## Relative_Sz -0.265
display(hs_lm_RI2)
## lmer(formula = Times ~ 1 + Relative_Size + (1 | Distractor_Number),
## data = hs_data)
## coef.est coef.se
## (Intercept) 17.57 0.67
## Relative_Size -2.93 0.18
##
## Error terms:
## Groups Name Std.Dev.
## Distractor_Number (Intercept) 1.07
## Residual 9.85
## ---
## number of obs: 3370, groups: Distractor_Number, 3
## AIC = 24995.8, DIC = 24986.1
## deviance = 24986.9
setwd("C:/Users/brand/Desktop/UCR/PhD CCN/PSYC213_Spring2023/Lab/Week 6")
TLE3new<-read.csv("213outputTLE3_v03.csv")
TLE3m1<- lmer(Reaction.Time ~ 1 + (1 | Teamsize), data = TLE3new)
summary(TLE3m1)
## Linear mixed model fit by REML ['lmerMod']
## Formula: Reaction.Time ~ 1 + (1 | Teamsize)
## Data: TLE3new
##
## REML criterion at convergence: 56517.9
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -0.9975 -0.6486 -0.3507 0.2697 7.6316
##
## Random effects:
## Groups Name Variance Std.Dev.
## Teamsize (Intercept) 0.005463 0.07391
## Residual 0.327694 0.57245
## Number of obs: 32809, groups: Teamsize, 2
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 0.55158 0.05236 10.53
display(TLE3m1)
## lmer(formula = Reaction.Time ~ 1 + (1 | Teamsize), data = TLE3new)
## coef.est coef.se
## 0.55 0.05
##
## Error terms:
## Groups Name Std.Dev.
## Teamsize (Intercept) 0.07
## Residual 0.57
## ---
## number of obs: 32809, groups: Teamsize, 2
## AIC = 56523.9, DIC = 56509
## deviance = 56513.5
TLE3m2<- lmer(Reaction.Time ~ Trial + (1 | Teamsize), data = TLE3new)
summary(TLE3m2)
## Linear mixed model fit by REML ['lmerMod']
## Formula: Reaction.Time ~ Trial + (1 | Teamsize)
## Data: TLE3new
##
## REML criterion at convergence: 56460.8
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -1.0685 -0.6449 -0.3511 0.2733 7.6644
##
## Random effects:
## Groups Name Variance Std.Dev.
## Teamsize (Intercept) 0.005442 0.07377
## Residual 0.327001 0.57184
## Number of obs: 32809, groups: Teamsize, 2
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 0.6001132 0.0525803 11.413
## Trial -0.0042093 0.0005012 -8.399
##
## Correlation of Fixed Effects:
## (Intr)
## Trial -0.110
display(TLE3m2)
## lmer(formula = Reaction.Time ~ Trial + (1 | Teamsize), data = TLE3new)
## coef.est coef.se
## (Intercept) 0.60 0.05
## Trial 0.00 0.00
##
## Error terms:
## Groups Name Std.Dev.
## Teamsize (Intercept) 0.07
## Residual 0.57
## ---
## number of obs: 32809, groups: Teamsize, 2
## AIC = 56468.8, DIC = 56425.2
## deviance = 56443.0