1 In-class exercises 1:

Assess how similar family members are in their ratings of liberalism in the family example using the intraclass correlation.

1.1 Load data file

#
pacman::p_load(tidyverse, lme4)

#
dta <- read.table("./data/family.txt", header = T)

# data structure
str(dta)

'data.frame':   12 obs. of  2 variables:
 $ family    : int  1 1 1 2 2 2 2 3 3 4 ...
 $ liberalism: int  25 29 34 18 21 21 26 31 35 30 ...

# first 6 lines
head(dta)

  family liberalism
1      1         25
2      1         29
3      1         34
4      2         18
5      2         21
6      2         21

1.2 aov with fixed family effects

# coerce variable family to be of the factor type
dta$family <- factor(dta$family)

# aov with fixed family effects
summary(m0 <- aov(liberalism ~ family, data = dta))

            Df Sum Sq Mean Sq F value Pr(>F)
family       3  244.3    81.4    7.73 0.0095
Residuals    8   84.3    10.5

When we set the family as fixed effect, the results showed that family yielded a significant effect on liberalism.

1.3 aov with random family effects

summary(m1 <- aov(liberalism ~ Error(family), data = dta))


Error: family
          Df Sum Sq Mean Sq F value Pr(>F)
Residuals  3    244    81.4               

Error: Within
          Df Sum Sq Mean Sq F value Pr(>F)
Residuals  8   84.3    10.5

1.4 regression without intercept term

summary(m2 <- lm(liberalism ~ family - 1, data = dta))


Call:
lm(formula = liberalism ~ family - 1, data = dta)

Residuals:
   Min     1Q Median     3Q    Max 
 -4.33  -1.00  -0.50   1.50   4.67 

Coefficients:
        Estimate Std. Error t value Pr(>|t|)
family1    29.33       1.87    15.7  2.8e-07
family2    21.50       1.62    13.2  1.0e-06
family3    33.00       2.30    14.4  5.4e-07
family4    30.67       1.87    16.4  2.0e-07

Residual standard error: 3.25 on 8 degrees of freedom
Multiple R-squared:  0.991, Adjusted R-squared:  0.987 
F-statistic:  224 on 4 and 8 DF,  p-value: 3.06e-08

When we used dummy coding for the different number of members in each family, the results showed that family still yielded a significant effect on liberalism.

1.5

anova(m2)

Analysis of Variance Table

Response: liberalism
          Df Sum Sq Mean Sq F value  Pr(>F)
family     4   9430    2357     224 3.1e-08
Residuals  8     84      11

We got larger SSQ, MSQ, and F value than the model which did not correct for the members

1.6 lme4 package

summary(m3 <- lmer(liberalism ~ 1 + (1 | family), data = dta))

Linear mixed model fit by REML ['lmerMod']
Formula: liberalism ~ 1 + (1 | family)
   Data: dta

REML criterion at convergence: 65.4

Scaled residuals: 
   Min     1Q Median     3Q    Max 
-1.312 -0.385 -0.114  0.600  1.477 

Random effects:
 Groups   Name        Variance Std.Dev.
 family   (Intercept) 21.9     4.68    
 Residual             10.5     3.24    
Number of obs: 12, groups:  family, 4

Fixed effects:
            Estimate Std. Error t value
(Intercept)    28.49       2.53    11.3

1.7 show random effects

ranef(m3)

$family
  (Intercept)
1     0.72715
2    -6.24380
3     3.63949
4     1.87716

with conditional variances for "family"

1.8 Data Manipulation

# augument data with group mean and ID
dta_fy <- dta %>% 
    mutate( ID = paste0("S", 11:22), g_ave = mean(liberalism )) %>% 
    group_by(family) %>% 
    mutate( grp_ave = mean(liberalism) ) %>%
    as.data.frame %>%
    mutate( grp_ave_uw = mean(unique(grp_ave)) )

1.9 output from model m3

dta_m3 <- data.frame(ID = paste0("S",11:22), fe = rep(summary(m3)$coef[1], 12), 
            re = rep(as.numeric(t(as.data.frame(ranef(m3)[1]))), c(3, 4, 2, 3)), 
            pred = fitted(m3))

1.10 join two data frames

merge(dta_fy, dta_m3, by = "ID")

    ID family liberalism  g_ave grp_ave grp_ave_uw    fe       re   pred
1  S11      1         25 27.667  29.333     28.625 28.49  0.72715 29.217
2  S12      1         29 27.667  29.333     28.625 28.49  0.72715 29.217
3  S13      1         34 27.667  29.333     28.625 28.49  0.72715 29.217
4  S14      2         18 27.667  21.500     28.625 28.49 -6.24380 22.246
5  S15      2         21 27.667  21.500     28.625 28.49 -6.24380 22.246
6  S16      2         21 27.667  21.500     28.625 28.49 -6.24380 22.246
7  S17      2         26 27.667  21.500     28.625 28.49 -6.24380 22.246
8  S18      3         31 27.667  33.000     28.625 28.49  3.63949 32.130
9  S19      3         35 27.667  33.000     28.625 28.49  3.63949 32.130
10 S20      4         30 27.667  30.667     28.625 28.49  1.87716 30.367
11 S21      4         30 27.667  30.667     28.625 28.49  1.87716 30.367
12 S22      4         32 27.667  30.667     28.625 28.49  1.87716 30.367

Note. g_ave is the overall average liberalism, grp_ave is the group (family members) average liberalism, grp_ave_uw is the average removed the duplicate observation, fe is the coefficient, re is the random effects, and pred is the fitted values, separately.

2 In-class exercises 2:

Replicate the Chicano example where the numerical results using SAS are available, and add comment for each code chunk.

2.1 Load Data

dta <- read.csv('data/Chicano.csv', stringsAsFactors = TRUE)

pacman::p_load(tidyverse, VCA, lme4, nlme)

2.2 Visualization

VCA::varPlot(Score ~ Trt/Class/Pupil, 
             Data=dta,
             YLabel=list(text="Score", 
                         side=2, 
                         cex=1),
             MeanLine=list(var=c("Trt", "Class"),
                           col=c("darkred", "salmon"), 
                           lwd=c(1, 2)))

2.3 The ANOVA for the differences scores, treatment as fixed effect and class as random effect.

summary(m1 <- aov(Score ~ Trt + Error(Class), data=dta))


Error: Class
          Df Sum Sq Mean Sq F value Pr(>F)
Trt        1    216     216    9.82  0.035
Residuals  4     88      22               

Error: Within
          Df Sum Sq Mean Sq F value Pr(>F)
Residuals 18    198      11

The results revealed that treatment yielded a significant effect on scores.

2.4 Estimated Variances

faraway::sumary(m2 <- lmer(Score ~ Trt + (1 | Class), data=dta))

Fixed Effects:
            coef.est coef.se
(Intercept) 4.00     1.35   
TrtT        6.00     1.91   

Random Effects:
 Groups   Name        Std.Dev.
 Class    (Intercept) 1.66    
 Residual             3.32    
---
number of obs: 24, groups: Class, 6
AIC = 130.9, DIC = 131.8
deviance = 127.4

The mean is 4 for the control group and 6 for the treatment group.

2.5 The Bootstrapping results

confint(m2, method="boot")

             2.5 % 97.5 %
.sig01      0.0000 3.8071
.sigma      2.2827 4.3622
(Intercept) 1.4613 6.8627
TrtT        2.1758 9.5550

3 In-class exercises 3:

Perform the analysis in the finger ridges example to assess the resemblance of the dermal ridge counts among twins. Comment on your code chunks.

3.1 Load data file

pacman::p_load(tidyverse, lme4)

# input data
dta <- read.table("Data/finger_ridges.txt", header = T)

# coerce pair variable to factor type
dta$Pair <- factor(dta$Pair)

# show first 6 lines
head(dta)

  Pair Twin_1 Twin_2
1    1     71     71
2    2     79     82
3    3    105     99
4    4    115    114
5    5     76     70
6    6     83     82

3.2 historical idea of intraclass correlation

cor(c(dta$Twin_1, dta$Twin_2), c(dta$Twin_2, dta$Twin_1))

[1] 0.96256

The correlation coefficient is 0.96

3.3 Turn data from wide format to long format

dtaL <- dta %>% 
        gather(key = Twin, value = Ridges, Twin_1, Twin_2) %>%
        mutate( Twin = as.factor(Twin))

3.4 Show mean and sd of ridges by twin pair

dtaL %>% group_by(Pair) %>%
         summarize( ridge_ave = mean(Ridges), ridge_sd = sd(Ridges)) %>%
         arrange( desc(ridge_ave))

# A tibble: 12 x 3
   Pair  ridge_ave ridge_sd
   <fct>     <dbl>    <dbl>
 1 4         114.     0.707
 2 7         114.     0.707
 3 9         114.     0.707
 4 3         102      4.24 
 5 10         92.5    2.12 
 6 6          82.5    0.707
 7 2          80.5    2.12 
 8 11         79      5.66 
 9 12         74      2.83 
10 5          73      4.24 
11 1          71      0    
12 8          50.5    9.19

3.5 dot plot

ggplot(dtaL, aes(reorder(Pair, Ridges, mean), Ridges, color = Pair)) + 
 geom_point() + 
 geom_hline(yintercept = mean(dtaL$Ridges), linetype = "dashed") +
 labs(y = "Number of finger ridges", x = "Twin ID") +
 theme_bw() +
 theme(legend.position = "NONE")

3.6 random effect anova for balanced designs

summary(m0 <- aov(Ridges ~ Error(Pair), data = dtaL))


Error: Pair
          Df Sum Sq Mean Sq F value Pr(>F)
Residuals 11   8990     817               

Error: Within
          Df Sum Sq Mean Sq F value Pr(>F)
Residuals 12    172    14.3

3.7 Estimated Variances

(817-14.3)/2

[1] 401.35

3.8 The estimate of intraclass correlation

401.35/(401.35+14.3)

[1] 0.9656

3.9 random effects anova

summary(m1 <- lmer(Ridges ~ 1 + (1 | Pair), data = dtaL))

Linear mixed model fit by REML ['lmerMod']
Formula: Ridges ~ 1 + (1 | Pair)
   Data: dtaL

REML criterion at convergence: 174.1

Scaled residuals: 
    Min      1Q  Median      3Q     Max 
-1.8892 -0.3862 -0.0083  0.3796  1.5496 

Random effects:
 Groups   Name        Variance Std.Dev.
 Pair     (Intercept) 401.5    20.04   
 Residual              14.3     3.78   
Number of obs: 24, groups:  Pair, 12

Fixed effects:
            Estimate Std. Error t value
(Intercept)    87.21       5.84    14.9

Variance of pair is 401.5

3.10 Show random effects

ranef(m1)

$Pair
   (Intercept)
1     -15.9249
2      -6.5910
3      14.5330
4      26.8144
5     -13.9599
6      -4.6260
7      25.8319
8     -36.0664
9      25.8319
10      5.1991
11     -8.0648
12    -12.9774

with conditional variances for "Pair"

Analysis of Variance: In-class exercises

Hao-Lun Fu

2020-10-05