0.1 data management

#load library
pacman::p_load(tidyverse, lme4)
#input data
dta <- read.table("family.txt", header = T)
# data structure
str(dta)
## 'data.frame':    12 obs. of  2 variables:
##  $ family    : int  1 1 1 2 2 2 2 3 3 4 ...
##  $ liberalism: int  25 29 34 18 21 21 26 31 35 30 ...
# first 6 lines
head(dta)
##   family liberalism
## 1      1         25
## 2      1         29
## 3      1         34
## 4      2         18
## 5      2         21
## 6      2         21
# coerce variable family to be of the factor type
dta$family <- factor(dta$family)

0.2 anova analysis

# aov with fixed family effects
summary(m0 <- aov(liberalism ~ family, data = dta))
##             Df Sum Sq Mean Sq F value  Pr(>F)   
## family       3 244.33   81.44   7.726 0.00951 **
## Residuals    8  84.33   10.54                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# aov with random family effects
summary(m1 <- aov(liberalism ~ Error(family), data = dta))
## 
## Error: family
##           Df Sum Sq Mean Sq F value Pr(>F)
## Residuals  3  244.3   81.44               
## 
## Error: Within
##           Df Sum Sq Mean Sq F value Pr(>F)
## Residuals  8  84.33   10.54
# regression without intercept term
summary(m2 <- lm(liberalism ~ family - 1, data = dta))
## 
## Call:
## lm(formula = liberalism ~ family - 1, data = dta)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -4.333 -1.000 -0.500  1.500  4.667 
## 
## Coefficients:
##         Estimate Std. Error t value Pr(>|t|)    
## family1   29.333      1.875   15.65 2.77e-07 ***
## family2   21.500      1.623   13.24 1.01e-06 ***
## family3   33.000      2.296   14.37 5.36e-07 ***
## family4   30.667      1.875   16.36 1.96e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3.247 on 8 degrees of freedom
## Multiple R-squared:  0.9911, Adjusted R-squared:  0.9867 
## F-statistic: 223.6 on 4 and 8 DF,  p-value: 3.065e-08
#
anova(m2)
## Analysis of Variance Table
## 
## Response: liberalism
##           Df Sum Sq Mean Sq F value    Pr(>F)    
## family     4 9429.7 2357.42  223.63 3.065e-08 ***
## Residuals  8   84.3   10.54                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# lme4 package
summary(m3 <- lmer(liberalism ~ 1 + (1 | family), data = dta))
## Linear mixed model fit by REML ['lmerMod']
## Formula: liberalism ~ 1 + (1 | family)
##    Data: dta
## 
## REML criterion at convergence: 65.4
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -1.3116 -0.3850 -0.1135  0.5998  1.4772 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  family   (Intercept) 21.92    4.682   
##  Residual             10.48    3.238   
## Number of obs: 12, groups:  family, 4
## 
## Fixed effects:
##             Estimate Std. Error t value
## (Intercept)    28.49       2.53   11.26
# show random effects
ranef(m3)
## $family
##   (Intercept)
## 1   0.7271474
## 2  -6.2437983
## 3   3.6394896
## 4   1.8771613
## 
## with conditional variances for "family"

顯示family 四個level 的random intercept

# augument data with group mean and ID
dta_fy <- dta %>% 
    mutate( ID = paste0("S", 11:22), g_ave = mean(liberalism )) %>% 
    group_by(family) %>% 
    mutate( grp_ave = mean(liberalism) ) %>%
    as.data.frame %>%
    mutate( grp_ave_uw = mean(unique(grp_ave)) )

創建以下新的欄位

ID從S11到S22
g_ave是liberalism平均欄
grp_ave是by family的liberalism平均欄
grp_ave_uw??

# output from model m3
dta_m3 <- data.frame(ID = paste0("S",11:22), fe = rep(summary(m3)$coef[1], 12), 
            re = rep(as.numeric(t(as.data.frame(ranef(m3)[1]))), c(3, 4, 2, 3)), 
            pred = fitted(m3))

創建以下新的欄位

ID從S11到S22
fe是lmer(liberalism ~ 1 + (1 | family)的Fix effect
re是family的random effect,按照family level 給定
re = rep(as.numeric(t(as.data.frame(ranef(m3)[1]))), c(3, 4, 2, 3))中的c(3, 4, 2, 3)??
pred??

# join two data frames
merge(dta_fy, dta_m3, by = "ID")
##     ID family liberalism    g_ave  grp_ave grp_ave_uw       fe         re
## 1  S11      1         25 27.66667 29.33333     28.625 28.49027  0.7271474
## 2  S12      1         29 27.66667 29.33333     28.625 28.49027  0.7271474
## 3  S13      1         34 27.66667 29.33333     28.625 28.49027  0.7271474
## 4  S14      2         18 27.66667 21.50000     28.625 28.49027 -6.2437983
## 5  S15      2         21 27.66667 21.50000     28.625 28.49027 -6.2437983
## 6  S16      2         21 27.66667 21.50000     28.625 28.49027 -6.2437983
## 7  S17      2         26 27.66667 21.50000     28.625 28.49027 -6.2437983
## 8  S18      3         31 27.66667 33.00000     28.625 28.49027  3.6394896
## 9  S19      3         35 27.66667 33.00000     28.625 28.49027  3.6394896
## 10 S20      4         30 27.66667 30.66667     28.625 28.49027  1.8771613
## 11 S21      4         30 27.66667 30.66667     28.625 28.49027  1.8771613
## 12 S22      4         32 27.66667 30.66667     28.625 28.49027  1.8771613
##        pred
## 1  29.21742
## 2  29.21742
## 3  29.21742
## 4  22.24648
## 5  22.24648
## 6  22.24648
## 7  22.24648
## 8  32.12976
## 9  32.12976
## 10 30.36744
## 11 30.36744
## 12 30.36744
# The end

0.3 the end

ICC

The correlation between any two liberalism ratings from the same family is 21.919/(21.919+10.482) = 0.677

family 4 E[MSQ]

The best linear unbiased prediction of the random effect for family 4 is (30.667 - 28.49)*(21.919/(21.919+(10.482/3))) = 1.877

The predicted mean for family 4 is 28.49 + 1.877 = 30.368 < 30.667

0.4 Question

1.grp_ave_uw = mean(unique(grp_ave))語法的意義是什麼?

2.re = rep(as.numeric(t(as.data.frame(ranef(m3)[1]))), c(3, 4, 2, 3))計算的是什麼??

3.pred = fitted(m3)計算的是什麼?