#Analysis of variance #Problem Description At the end of six weeks of therapy, the changes of the absolute theta power of electroencephalogram (EEG) of 19 depressed patients in nine selected channels of the patients’ brain were recorded.
pacman::p_load(car, tidyverse, lme4, GGally)
dta <- read.table("C:/Users/Ching-Fang Wu/Documents/data/thetaEEG.txt", header=T)
#檢視資料 #data structure
str(dta)
## 'data.frame': 19 obs. of 10 variables:
## $ ID : int 1 2 3 4 5 6 7 8 9 10 ...
## $ Ch3 : num -3.54 5.72 0.52 0 2.07 1.67 9.13 -0.43 -0.56 1.28 ...
## $ Ch4 : num -3.11 5.07 -0.18 0.74 0.76 ...
## $ Ch5 : num -0.24 6.87 0.9 1.1 3.51 2.77 3.44 -0.31 -1.22 1.89 ...
## $ Ch6 : num 0.42 5.96 0.6 0.13 0.6 4.55 4.8 -0.61 0.67 1.77 ...
## $ Ch7 : num -0.49 8.2 1.27 0.19 3.71 1.8 0.48 -1.04 -0.97 1.83 ...
## $ Ch8 : num 2.13 4.87 1.28 0.07 1.86 4.79 1.63 -0.13 -0.98 0.91 ...
## $ Ch17: num -4.15 5.48 -0.95 0.8 1.49 4.51 9.94 -0.61 0 1.4 ...
## $ Ch18: num 2.87 5.57 1.74 0.25 3.11 3.24 1.34 -0.61 -1.22 1.1 ...
## $ Ch19: num 1.34 6.33 0.79 -0.66 1.8 3.99 1.53 -0.43 -0.91 -0.12 ...
head(dta)
## ID Ch3 Ch4 Ch5 Ch6 Ch7 Ch8 Ch17 Ch18 Ch19
## 1 1 -3.54 -3.11 -0.24 0.42 -0.49 2.13 -4.15 2.87 1.34
## 2 2 5.72 5.07 6.87 5.96 8.20 4.87 5.48 5.57 6.33
## 3 3 0.52 -0.18 0.90 0.60 1.27 1.28 -0.95 1.74 0.79
## 4 4 0.00 0.74 1.10 0.13 0.19 0.07 0.80 0.25 -0.66
## 5 5 2.07 0.76 3.51 0.60 3.71 1.86 1.49 3.11 1.80
## 6 6 1.67 4.18 2.77 4.55 1.80 4.79 4.51 3.24 3.99
#colMeans
colMeans(dta[2:10])
## Ch3 Ch4 Ch5 Ch6 Ch7 Ch8 Ch17 Ch18
## 0.9010526 1.5894737 1.0373684 1.1457895 0.8510526 0.8526316 1.4026316 0.8542105
## Ch19
## 0.9947368
#Covariance
cov(dta[2:10])
## Ch3 Ch4 Ch5 Ch6 Ch7 Ch8 Ch17 Ch18
## Ch3 8.645877 9.526639 5.625497 5.212582 3.849543 2.910530 9.021380 3.547645
## Ch4 9.526639 12.348605 5.209576 6.170981 2.671112 3.218435 10.720563 2.416258
## Ch5 5.625497 5.209576 5.560076 4.205233 4.540831 3.504274 5.730063 4.215412
## Ch6 5.212582 6.170981 4.205233 5.073637 3.083282 3.707812 6.069184 3.657924
## Ch7 3.849543 2.671112 4.540831 3.083282 5.181343 2.903964 3.451603 3.811479
## Ch8 2.910530 3.218435 3.504274 3.707812 2.903964 4.281098 3.589004 3.189672
## Ch17 9.021380 10.720563 5.730063 6.069184 3.451603 3.589004 10.738754 3.378316
## Ch18 3.547645 2.416258 4.215412 3.657924 3.811479 3.189672 3.378316 6.769626
## Ch19 3.618034 4.035575 4.241524 4.143538 3.239545 3.822715 4.460326 2.636857
## Ch19
## Ch3 3.618034
## Ch4 4.035575
## Ch5 4.241524
## Ch6 4.143538
## Ch7 3.239545
## Ch8 3.822715
## Ch17 4.460326
## Ch18 2.636857
## Ch19 5.566882
channel<-as.factor(colnames(dta[, -1]))
m0 <- lm(as.matrix(dta[, -1]) ~ 1)
m0_aov <- Anova(m0, idata = data.frame(channel), idesign = ~ channel)
## Note: model has only an intercept; equivalent type-III tests substituted.
Note: model has only an intercept; equivalent type-III tests substituted.
summary(m0_aov)
##
## Type III Repeated Measures MANOVA Tests:
##
## ------------------------------------------
##
## Term: (Intercept)
##
## Response transformation matrix:
## (Intercept)
## Ch3 1
## Ch4 1
## Ch5 1
## Ch6 1
## Ch7 1
## Ch8 1
## Ch17 1
## Ch18 1
## Ch19 1
##
## Sum of squares and products for the hypothesis:
## (Intercept)
## (Intercept) 1761.616
##
## Multivariate Tests: (Intercept)
## Df test stat approx F num Df den Df Pr(>F)
## Pillai 1 0.2035907 4.601445 1 18 0.045839 *
## Wilks 1 0.7964093 4.601445 1 18 0.045839 *
## Hotelling-Lawley 1 0.2556358 4.601445 1 18 0.045839 *
## Roy 1 0.2556358 4.601445 1 18 0.045839 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## ------------------------------------------
##
## Term: channel
##
## Response transformation matrix:
## channel1 channel2 channel3 channel4 channel5 channel6 channel7 channel8
## Ch3 0 0 0 1 0 0 0 0
## Ch4 0 0 0 0 1 0 0 0
## Ch5 0 0 0 0 0 1 0 0
## Ch6 0 0 0 0 0 0 1 0
## Ch7 0 0 0 0 0 0 0 1
## Ch8 -1 -1 -1 -1 -1 -1 -1 -1
## Ch17 1 0 0 0 0 0 0 0
## Ch18 0 1 0 0 0 0 0 0
## Ch19 0 0 1 0 0 0 0 0
##
## Sum of squares and products for the hypothesis:
## channel1 channel2 channel3 channel4 channel5
## channel1 5.7475 1.650000e-02 1.485000000 0.506000000 7.70000000
## channel2 0.0165 4.736842e-05 0.004263158 0.001452632 0.02210526
## channel3 1.4850 4.263158e-03 0.383684211 0.130736842 1.98947368
## channel4 0.5060 1.452632e-03 0.130736842 0.044547368 0.67789474
## channel5 7.7000 2.210526e-02 1.989473684 0.677894737 10.31578947
## channel6 1.9305 5.542105e-03 0.498789474 0.169957895 2.58631579
## channel7 3.0635 8.794737e-03 0.791526316 0.269705263 4.10421053
## channel8 -0.0165 -4.736842e-05 -0.004263158 -0.001452632 -0.02210526
## channel6 channel7 channel8
## channel1 1.930500000 3.063500000 -1.650000e-02
## channel2 0.005542105 0.008794737 -4.736842e-05
## channel3 0.498789474 0.791526316 -4.263158e-03
## channel4 0.169957895 0.269705263 -1.452632e-03
## channel5 2.586315789 4.104210526 -2.210526e-02
## channel6 0.648426316 1.028984211 -5.542105e-03
## channel7 1.028984211 1.632889474 -8.794737e-03
## channel8 -0.005542105 -0.008794737 4.736842e-05
##
## Multivariate Tests: channel
## Df test stat approx F num Df den Df Pr(>F)
## Pillai 1 0.5243665 1.515882 8 11 0.25588
## Wilks 1 0.4756335 1.515882 8 11 0.25588
## Hotelling-Lawley 1 1.1024593 1.515882 8 11 0.25588
## Roy 1 1.1024593 1.515882 8 11 0.25588
##
## Univariate Type III Repeated-Measures ANOVA Assuming Sphericity
##
## Sum Sq num Df Error SS den Df F value Pr(>F)
## (Intercept) 195.735 1 765.68 18 4.6014 0.04584 *
## channel 10.702 8 389.31 144 0.4948 0.85840
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
##
## Mauchly Tests for Sphericity
##
## Test statistic p-value
## channel 0.00037434 9.7367e-11
##
##
## Greenhouse-Geisser and Huynh-Feldt Corrections
## for Departure from Sphericity
##
## GG eps Pr(>F[GG])
## channel 0.31531 0.6559
##
## HF eps Pr(>F[HF])
## channel 0.3709213 0.6854028
dtaL <- dta %>%
gather( key = channel, value = EEG, 2:10) %>%
mutate( channel = factor(channel))%>%
mutate( ID = factor(ID))
str(dtaL)
## 'data.frame': 171 obs. of 3 variables:
## $ ID : Factor w/ 19 levels "1","2","3","4",..: 1 2 3 4 5 6 7 8 9 10 ...
## $ channel: Factor w/ 9 levels "Ch17","Ch18",..: 4 4 4 4 4 4 4 4 4 4 ...
## $ EEG : num -3.54 5.72 0.52 0 2.07 1.67 9.13 -0.43 -0.56 1.28 ...
ot <- theme_set(theme_bw())
ggplot(dtaL, aes(reorder(channel, EEG, median), EEG)) +
geom_boxplot() +
geom_point(alpha = .3) +
geom_hline(yintercept = mean(dtaL$EEG)) +
labs(x ="channel condition", y = "EEG amplitutde")
summary(m1 <- aov(EEG ~ channel + Error(ID/channel), data = dtaL))
##
## Error: ID
## Df Sum Sq Mean Sq F value Pr(>F)
## Residuals 18 765.7 42.54
##
## Error: ID:channel
## Df Sum Sq Mean Sq F value Pr(>F)
## channel 8 10.7 1.338 0.495 0.858
## Residuals 144 389.3 2.704
model.tables(m1, "means")
## Tables of means
## Grand mean
##
## 1.069883
##
## channel
## channel
## Ch17 Ch18 Ch19 Ch3 Ch4 Ch5 Ch6 Ch7 Ch8
## 1.4026 0.8542 0.9947 0.9011 1.5895 1.0374 1.1458 0.8511 0.8526
model.tables(m1, se = T)
## Tables of effects
##
## channel
## channel
## Ch17 Ch18 Ch19 Ch3 Ch4 Ch5 Ch6 Ch7 Ch8
## 0.3327 -0.2157 -0.0751 -0.1688 0.5196 -0.0325 0.0759 -0.2188 -0.2173
##
## Standard errors of effects
## channel
## 0.3772
## replic. 19
dtaL$channel <- factor(dtaL$channel, levels(dtaL$channel)[c(3,2,1,4)]) #不懂[c(3,2,1,4)]這一串的功能
#dtaL$channel <- factor(dtaL$channel, levels(dtaL$channel))
ggplot(dtaL, aes(channel, EEG)) +
stat_summary(fun.data = "mean_cl_boot", geom = "pointrange", col = "firebrick") +
geom_hline(yintercept = mean(dtaL$EEG), linetype = "dashed") +
geom_point(alpha = .3, size = rel(.7)) +
labs(x = "channel condition", y = "EEG amplititude")
ggplot(dtaL, aes(reorder(ID, EEG, mean), EEG, color = channel)) +
geom_point() +
geom_hline(yintercept = mean(dtaL$EEG), linetype = "dashed") +
coord_flip() +
scale_color_manual(values = c("gray", "forestgreen", "orangered", "navy"))+
labs(y = "EEG amplitude", x = "Patient ID") +
theme(legend.position = c(.9, .2))
## Warning: Removed 95 rows containing missing values (geom_point).
summary(m2 <- lmer(EEG ~ channel + (1 | ID), data = dtaL))
## Linear mixed model fit by REML ['lmerMod']
## Formula: EEG ~ channel + (1 | ID)
## Data: dtaL
##
## REML criterion at convergence: 338.6
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.03410 -0.34972 0.01634 0.28895 2.58123
##
## Random effects:
## Groups Name Variance Std.Dev.
## ID (Intercept) 4.444 2.108
## Residual 3.487 1.867
## Number of obs: 76, groups: ID, 19
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 0.99474 0.64605 1.540
## channelCh18 -0.14053 0.60581 -0.232
## channelCh17 0.40789 0.60581 0.673
## channelCh3 -0.09368 0.60581 -0.155
##
## Correlation of Fixed Effects:
## (Intr) chnC18 chnC17
## channelCh18 -0.469
## channelCh17 -0.469 0.500
## channelCh3 -0.469 0.500 0.500
confint(m2)
## Computing profile confidence intervals ...
## 2.5 % 97.5 %
## .sig01 1.4048490 3.071326
## .sigma 1.5287199 2.209885
## (Intercept) -0.2699039 2.259378
## channelCh18 -1.3159656 1.034913
## channelCh17 -0.7675445 1.583334
## channelCh3 -1.2691235 1.081755
ranef(m2)
## $ID
## (Intercept)
## 1 -1.5952534
## 2 3.9600828
## 3 -0.4290090
## 4 -0.7864064
## 5 0.9023489
## 6 1.9348305
## 7 3.7176378
## 8 -1.3026472
## 9 -1.4301399
## 10 -0.1029622
## 11 1.1949550
## 12 0.6369133
## 13 -2.2076362
## 14 -0.9682403
## 15 -0.8553779
## 16 2.4594314
## 17 -0.4143787
## 18 -2.3852899
## 19 -2.3288587
##
## with conditional variances for "ID"
#plot(m2, form = resid(., type = "pearson") ~ fitted(.) | channel,
# abline = 0, lty = 2, layout = c(4,1), pch = 20,
# xlab = "Fitted values", ylab = "Residuals")
Error in [[<-.data.frame
(*tmp*
, j, value = c(4L, 4L, 4L, 4L, 4L, 4L, : replacement has 171 rows, data has 76(看不懂這句話的意思)
v_channel <- dtaL %>%
group_by(channel) %>%
summarize(var_channel = var(EEG)) %>%
as.data.frame
## `summarise()` ungrouping output (override with `.groups` argument)
summarise()
ungrouping output (override with .groups
argument)
dtaL2 <- merge(dtaL, v_channel, by = "channel")
str(dtaL2)
## 'data.frame': 171 obs. of 4 variables:
## $ channel : Factor w/ 4 levels "Ch19","Ch18",..: 3 3 3 3 3 3 3 3 3 3 ...
## $ ID : Factor w/ 19 levels "1","2","3","4",..: 1 2 3 4 5 6 7 8 9 10 ...
## $ EEG : num -4.15 5.48 -0.95 0.8 1.49 4.51 9.94 -0.61 0 1.4 ...
## $ var_channel: num 10.7 10.7 10.7 10.7 10.7 ...
summary(m3 <- lmer(EEG ~ channel + (1 | ID), data = dtaL2,
weights = 1/var_channel))
## Linear mixed model fit by REML ['lmerMod']
## Formula: EEG ~ channel + (1 | ID)
## Data: dtaL2
## Weights: 1/var_channel
##
## REML criterion at convergence: 339.6
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.2933 -0.3407 -0.0196 0.3013 2.6272
##
## Random effects:
## Groups Name Variance Std.Dev.
## ID (Intercept) 3.9336 1.9833
## Residual 0.4728 0.6876
## Number of obs: 76, groups: ID, 19
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 0.99474 0.58783 1.692
## channelCh18 -0.14053 0.55404 -0.254
## channelCh17 0.40789 0.63697 0.640
## channelCh3 -0.09368 0.59468 -0.158
##
## Correlation of Fixed Effects:
## (Intr) chnC18 chnC17
## channelCh18 -0.425
## channelCh17 -0.370 0.393
## channelCh3 -0.396 0.420 0.366
#plot(m3, form = resid(., type = "pearson") ~ fitted(.) | channel,
# abline = 0, lty = 2, layout = c(4,1), pch = 20,
# xlab = "Fitted values", ylab = "Residuals")
Error in [[<-.data.frame
(*tmp*
, j, value = c(1L, 1L, 1L, 1L, 1L, 1L, : replacement has 171 rows, data has 76 (看不懂這句話的意思)
#dtaL_m3 <- fortify(m3, dtaL2) %>%
#select(1:3, 5, 7) %>%
#mutate(fit3 = .fitted, res3 = .scresid) %>%
#select( - contains("."))
#dtaL_m2 <- fortify(m2, dtaL) %>%
# select(1:4, 6) %>%
# mutate( fit2 = .fitted, res2 = .scresid) %>%
# select( - contains("."))
#dta_2m <- merge(dtaL_m2, dtaL_m3, by = c("ID","channel", "EEG"))
#ggplot(dta_2m, aes(fit2, res2)) +
# geom_point(alpha = .3) +
# geom_point(aes(fit3, res3), alpha = .6) +
# geom_hline(yintercept = 0, linetype="dashed") +
# facet_grid(. ~ Emotion) +
# labs(x = "Fitted values", y = "Standardized Residuals")