## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.2     ✔ readr     2.1.4
## ✔ forcats   1.0.0     ✔ stringr   1.5.0
## ✔ ggplot2   3.4.2     ✔ tibble    3.2.1
## ✔ lubridate 1.9.2     ✔ tidyr     1.3.0
## ✔ purrr     1.0.1     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
## Loading required package: Matrix
## 
## 
## Attaching package: 'Matrix'
## 
## 
## The following objects are masked from 'package:tidyr':
## 
##     expand, pack, unpack
## 
## 
## 
## Attaching package: 'psych'
## 
## 
## The following objects are masked from 'package:ggplot2':
## 
##     %+%, alpha
## 
## 
## 
## Attaching package: 'MASS'
## 
## 
## The following object is masked from 'package:dplyr':
## 
##     select
## 
## 
## 
## arm (Version 1.13-1, built: 2022-8-25)
## 
## 
## Working directory is C:/Users/brand/Desktop/UCR/PhD CCN/PSYC213_Spring2023/Lab/Week 6
## 
## 
## 
## Attaching package: 'arm'
## 
## 
## The following objects are masked from 'package:psych':
## 
##     logit, rescale, sim

Haptic Example

# Read in the data and create factor variables 

hs_data <- read.csv("MLM_haptic_example_data.csv", header = TRUE)
hs_data <- as_tibble(hs_data) %>% mutate(Sub_Num = factor(Subject_Num))    
hs_data <- hs_data %>% mutate(Ratio = factor(Relative_Size))
hs_data <- hs_data %>% mutate(Num_Dis = factor(Distractor_Number))

why might we want to treat some of our variables as factors?

Answer: If the values of a variable are not scaled in any way, we may want to treat them all separately

describe(hs_data)
##                    vars    n  mean    sd median trimmed   mad  min   max range
## Experiment_Number     1 3370  0.49  0.50   0.00    0.49  0.00 0.00  1.00  1.00
## Subject_Num           2 3370 39.69 23.19  38.00   39.12 28.17 1.00 85.00 84.00
## Blk                   3 3370  1.51  0.50   2.00    1.52  0.00 1.00  2.00  1.00
## Distractor_Number     4 3370  2.00  0.82   2.00    2.00  1.48 1.00  3.00  2.00
## Distractor_Lengths    5 3370  1.00  0.54   0.75    1.00  0.74 0.25  1.75  1.50
## Relative_Size         6 3370  0.98  0.94   0.75    0.82  0.37 0.25  3.00  2.75
## Times                 7 3370 14.70 10.26  11.65   13.17  7.89 0.55 60.05 59.50
## Sub_Num*              8 3370 37.52 21.91  36.00   37.05 26.69 1.00 79.00 78.00
## Ratio*                9 3370  3.52  1.70   4.00    3.53  1.48 1.00  6.00  5.00
## Num_Dis*             10 3370  2.00  0.82   2.00    2.00  1.48 1.00  3.00  2.00
##                     skew kurtosis   se
## Experiment_Number   0.03    -2.00 0.01
## Subject_Num         0.17    -1.07 0.40
## Blk                -0.06    -2.00 0.01
## Distractor_Number  -0.01    -1.51 0.01
## Distractor_Lengths  0.01    -1.51 0.01
## Relative_Size       1.50     0.64 0.02
## Times               1.39     1.83 0.18
## Sub_Num*            0.15    -1.08 0.38
## Ratio*             -0.02    -1.27 0.03
## Num_Dis*           -0.01    -1.51 0.01
#summary(hs_data)

Multi Level Visualization

I want to walk through how we visualize multilevel models as a segway from the previous work we have been doing in the course. I am going to fit previous models that we have discussed in class to recap and extend them to transition into the multilevel framework.

Start with the group level model
hs_mdl0 <- lm("Times~ Distractor_Number", data = hs_data)
display(hs_mdl0)
## lm(formula = "Times~ Distractor_Number", data = hs_data)
##                   coef.est coef.se
## (Intercept)       12.52     0.47  
## Distractor_Number  1.09     0.22  
## ---
## n = 3370, k = 2
## residual sd = 10.23, R-Squared = 0.01

Now plot that

ggplot(data = hs_data, mapping = aes(x = Distractor_Number,y = Times)) + geom_jitter() +  geom_smooth(method = "lm")
## `geom_smooth()` using formula = 'y ~ x'

hs_mdl0.5 <- lm("Times~ Num_Dis-1 ", data = hs_data)
display(hs_mdl0.5)
## lm(formula = "Times~ Num_Dis-1 ", data = hs_data)
##          coef.est coef.se
## Num_Dis1 13.62     0.31  
## Num_Dis2 14.69     0.31  
## Num_Dis3 15.80     0.30  
## ---
## n = 3370, k = 3
## residual sd = 10.23, R-Squared = 0.67
ggplot(data = hs_data, mapping = aes(x = 1:3370, y = Times)) + geom_jitter() +  geom_smooth(method = "lm", aes(color = Num_Dis))
## `geom_smooth()` using formula = 'y ~ x'

Now look at the lower level model
ggplot(data = hs_data, mapping = aes(x = Relative_Size,y = Times)) + geom_jitter(mapping = aes(color = Num_Dis)) +  stat_smooth(method = "lm", se=FALSE, color = 'red')
## `geom_smooth()` using formula = 'y ~ x'

How do we write this model using the lm() function?

hs_mdl1 <- lm("Times~ Relative_Size", data = hs_data)
display(hs_mdl1)
## lm(formula = "Times~ Relative_Size", data = hs_data)
##               coef.est coef.se
## (Intercept)   17.57     0.25  
## Relative_Size -2.92     0.18  
## ---
## n = 3370, k = 2
## residual sd = 9.89, R-Squared = 0.07
hs_mdl1.5 <- lm("Times~ Ratio-1", data = hs_data)
display(hs_mdl1.5)
## lm(formula = "Times~ Ratio-1", data = hs_data)
##                        coef.est coef.se
## Ratio0.25              17.45     0.42  
## Ratio0.333333333333333 19.65     0.41  
## Ratio0.5               14.88     0.41  
## Ratio0.75              14.43     0.41  
## Ratio1                 12.43     0.41  
## Ratio3                  9.61     0.41  
## ---
## n = 3370, k = 6
## residual sd = 9.74, R-Squared = 0.71
ggplot(data = hs_data, mapping = aes(x = 1:3370 ,y = Times)) + geom_jitter() + stat_smooth(method = "lm",aes(color = Ratio))
## `geom_smooth()` using formula = 'y ~ x'

Now how can we represent both
hs_mdl2 <- lm('Times~ Ratio - 1 + Num_Dis - 1',data= hs_data) 
display(hs_mdl2)
## lm(formula = "Times~ Ratio - 1 + Num_Dis - 1", data = hs_data)
##                        coef.est coef.se
## Ratio0.25              16.35     0.48  
## Ratio0.333333333333333 18.53     0.47  
## Ratio0.5               13.76     0.47  
## Ratio0.75              13.31     0.47  
## Ratio1                 11.33     0.47  
## Ratio3                  8.48     0.47  
## Num_Dis2                1.13     0.41  
## Num_Dis3                2.20     0.41  
## ---
## n = 3370, k = 8
## residual sd = 9.71, R-Squared = 0.71

What is being ignored by all of the above models? Answer: None of the above models are modeling mixed, or more specifically random effects. The lm function assumes independence and similar distribution of errors among the predictors, which may not be the case.

ggplot(data = hs_data, aes(x = Relative_Size,y = Times, color = Num_Dis)) + geom_jitter() +  geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula = 'y ~ x'

Part 2 Chapter 12

Now lets do multilevel modeling using lmer

Varying Intercept Model

hs_lm <- lmer(Times ~ 1 + (1 | Experiment_Number),data=hs_data)

display(hs_lm)
## lmer(formula = Times ~ 1 + (1 | Experiment_Number), data = hs_data)
## coef.est  coef.se 
##    14.70     0.56 
## 
## Error terms:
##  Groups            Name        Std.Dev.
##  Experiment_Number (Intercept)  0.76   
##  Residual                      10.25   
## ---
## number of obs: 3370, groups: Experiment_Number, 2
## AIC = 25258.6, DIC = 25253.3
## deviance = 25253.0

What is being included in this model?

Answer: This model simply includes a constant term and allows that term to vary by the grouping variable of Experiment. We also get more informative error terms like residual

Varying Intercept Model with 1 predictor

hs_lm_RI <- lmer(Times ~ 1 + Relative_Size +(1 | Experiment_Number),data=hs_data)

display(hs_lm_RI)
## lmer(formula = Times ~ 1 + Relative_Size + (1 | Experiment_Number), 
##     data = hs_data)
##               coef.est coef.se
## (Intercept)   17.58     0.68  
## Relative_Size -2.94     0.18  
## 
## Error terms:
##  Groups            Name        Std.Dev.
##  Experiment_Number (Intercept) 0.90    
##  Residual                      9.87    
## ---
## number of obs: 3370, groups: Experiment_Number, 2
## AIC = 25005.8, DIC = 24995.8
## deviance = 24996.8

This model starts with the no-pooling model $ y~ x$ and then adds the varying intercept from the previous model.

What does adding the varying intercept to the no-pooling model allow for?

Answer: This allows the coefficient for the predictor to vary by group.

# From the book 
hs_c <- coef(hs_lm_RI)
hs_c
## $Experiment_Number
##   (Intercept) Relative_Size
## 0    18.19459     -2.942456
## 1    16.96378     -2.942456
## 
## attr(,"class")
## [1] "coef.mer"

Write the estimated regression line for each of the groups.

Answer: for Experiment 1 = y = 18.19 - 2.94X Experiment 2 = y = 16.96 - 2.94X

Alternatively we can look at the “fixed effect”

hs_cF <- fixef(hs_lm_RI)
hs_cF
##   (Intercept) Relative_Size 
##     17.579188     -2.942456

Write the regression line for the fixed effect and explain in words what this line means.

Answer: This is the estimated regression line averaging over the two experiments y = -2.94X + 17.5

Now lets look at the experiment level errors

hs_cE <-ranef(hs_lm_RI)
hs_cE 
## $Experiment_Number
##   (Intercept)
## 0   0.6154041
## 1  -0.6154041
## 
## with conditional variances for "Experiment_Number"

What do these two numbers tell us?

Answer: These two numbers tell us how much the intercept is shifted above or below the intercept of the previous regression line (Averaged over experiments line) depending on if you were in experiment 1 or experiment 2

Now lets look at the uncertainty in the estimates of our coefficients

se.fixef(hs_lm_RI)
##   (Intercept) Relative_Size 
##     0.6825168     0.1802499
se.ranef(hs_lm_RI)
## $Experiment_Number
##   (Intercept)
## 0   0.2306844
## 1   0.2339019

What might be the cause of the similarity or difference between the standard errors of our coefficients. Answer: because the number of observations within each group level (experiment) are similar the stadard errors should be similar. If they were very different then it would be due to the weighting or ‘shrinking’ due to having different n’s.

Interpret the fit of the two models and explain which one is better and how you know? Answer: The aic and bic are both reduced in the model which includes a predictor. However, more important is that the chi squared test has an appreciable p value.

Below is an effective way in which to test the difference between the two.

anova(hs_lm, hs_lm_RI)
## refitting model(s) with ML (instead of REML)
## Data: hs_data
## Models:
## hs_lm: Times ~ 1 + (1 | Experiment_Number)
## hs_lm_RI: Times ~ 1 + Relative_Size + (1 | Experiment_Number)
##          npar   AIC   BIC logLik deviance  Chisq Df Pr(>Chisq)    
## hs_lm       3 25259 25277 -12626    25253                         
## hs_lm_RI    4 25005 25029 -12498    24997 256.16  1  < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
hs_lm_RI2 <- lmer(Times ~ 1 + Relative_Size +(1|Distractor_Number),data=hs_data)

summary(hs_lm_RI2)
## Linear mixed model fit by REML ['lmerMod']
## Formula: Times ~ 1 + Relative_Size + (1 | Distractor_Number)
##    Data: hs_data
## 
## REML criterion at convergence: 24987.8
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -1.7583 -0.7043 -0.2350  0.4268  4.2826 
## 
## Random effects:
##  Groups            Name        Variance Std.Dev.
##  Distractor_Number (Intercept)  1.147   1.071   
##  Residual                      97.022   9.850   
## Number of obs: 3370, groups:  Distractor_Number, 3
## 
## Fixed effects:
##               Estimate Std. Error t value
## (Intercept)    17.5685     0.6651   26.42
## Relative_Size  -2.9253     0.1798  -16.27
## 
## Correlation of Fixed Effects:
##             (Intr)
## Relative_Sz -0.265
display(hs_lm_RI2)
## lmer(formula = Times ~ 1 + Relative_Size + (1 | Distractor_Number), 
##     data = hs_data)
##               coef.est coef.se
## (Intercept)   17.57     0.67  
## Relative_Size -2.93     0.18  
## 
## Error terms:
##  Groups            Name        Std.Dev.
##  Distractor_Number (Intercept) 1.07    
##  Residual                      9.85    
## ---
## number of obs: 3370, groups: Distractor_Number, 3
## AIC = 24995.8, DIC = 24986.1
## deviance = 24986.9

Homework

Repeate Part 2 above using your own data. If you do not have data feel free to use the data from our textbook.

Read in your data

setwd("C:/Users/brand/Desktop/UCR/PhD CCN/PSYC213_Spring2023/Lab/Week 6")
TLE3new<-read.csv("213outputTLE3_v03.csv")

Varying Intercept Model

TLE3m1<- lmer(Reaction.Time ~ 1 + (1 | Teamsize), data = TLE3new)
summary(TLE3m1)
## Linear mixed model fit by REML ['lmerMod']
## Formula: Reaction.Time ~ 1 + (1 | Teamsize)
##    Data: TLE3new
## 
## REML criterion at convergence: 56517.9
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -0.9975 -0.6486 -0.3507  0.2697  7.6316 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  Teamsize (Intercept) 0.005463 0.07391 
##  Residual             0.327694 0.57245 
## Number of obs: 32809, groups:  Teamsize, 2
## 
## Fixed effects:
##             Estimate Std. Error t value
## (Intercept)  0.55158    0.05236   10.53
display(TLE3m1)
## lmer(formula = Reaction.Time ~ 1 + (1 | Teamsize), data = TLE3new)
## coef.est  coef.se 
##     0.55     0.05 
## 
## Error terms:
##  Groups   Name        Std.Dev.
##  Teamsize (Intercept) 0.07    
##  Residual             0.57    
## ---
## number of obs: 32809, groups: Teamsize, 2
## AIC = 56523.9, DIC = 56509
## deviance = 56513.5

Varying Intercept Model with 1 predictor

TLE3m2<- lmer(Reaction.Time ~ Trial + (1 | Teamsize), data = TLE3new)
summary(TLE3m2)
## Linear mixed model fit by REML ['lmerMod']
## Formula: Reaction.Time ~ Trial + (1 | Teamsize)
##    Data: TLE3new
## 
## REML criterion at convergence: 56460.8
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -1.0685 -0.6449 -0.3511  0.2733  7.6644 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  Teamsize (Intercept) 0.005442 0.07377 
##  Residual             0.327001 0.57184 
## Number of obs: 32809, groups:  Teamsize, 2
## 
## Fixed effects:
##               Estimate Std. Error t value
## (Intercept)  0.6001132  0.0525803  11.413
## Trial       -0.0042093  0.0005012  -8.399
## 
## Correlation of Fixed Effects:
##       (Intr)
## Trial -0.110
display(TLE3m2)
## lmer(formula = Reaction.Time ~ Trial + (1 | Teamsize), data = TLE3new)
##             coef.est coef.se
## (Intercept) 0.60     0.05   
## Trial       0.00     0.00   
## 
## Error terms:
##  Groups   Name        Std.Dev.
##  Teamsize (Intercept) 0.07    
##  Residual             0.57    
## ---
## number of obs: 32809, groups: Teamsize, 2
## AIC = 56468.8, DIC = 56425.2
## deviance = 56443.0