library(PairedData)
## Loading required package: MASS
## Loading required package: gld
## Loading required package: mvtnorm
## Loading required package: lattice
## Loading required package: ggplot2
##
## Attaching package: 'PairedData'
## The following object is masked from 'package:base':
##
## summary
library(Hmisc)
## Loading required package: survival
## Loading required package: Formula
##
## Attaching package: 'Hmisc'
## The following objects are masked from 'package:base':
##
## format.pval, units
library(tidyr)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:Hmisc':
##
## src, summarize
## The following object is masked from 'package:MASS':
##
## select
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
data(Anorexia, package="PairedData")
str(Anorexia)
## 'data.frame': 17 obs. of 2 variables:
## $ Prior: num 83.8 83.3 86 82.5 86.7 79.6 76.9 94.2 73.4 80.5 ...
## $ Post : num 95.2 94.3 91.5 91.9 100.3 ...
colMeans(Anorexia)
## Prior Post
## 83.22941 90.49412
print(apply(Anorexia, 2, sd), 3)
## Prior Post
## 5.02 8.48
cov(Anorexia)
## Prior Post
## Prior 25.16721 22.88268
## Post 22.88268 71.82684
print(cor(Anorexia),3)
## Prior Post
## Prior 1.000 0.538
## Post 0.538 1.000
covariance(Prior, Post) = SD_PriorSD_Postcor(Prior, Post)=5.028.48.538 = 22.9
5.025.02=25.2
8.488.48=71.9
t.test(Anorexia$Prior, Anorexia$Post)
##
## Welch Two Sample t-test
##
## data: Anorexia$Prior and Anorexia$Post
## t = -3.0414, df = 25.986, p-value = 0.005324
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -12.17472 -2.35469
## sample estimates:
## mean of x mean of y
## 83.22941 90.49412
t.test(Anorexia$Prior, Anorexia$Post, pair=T)
##
## Paired t-test
##
## data: Anorexia$Prior and Anorexia$Post
## t = -4.1849, df = 16, p-value = 0.0007003
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -10.94471 -3.58470
## sample estimates:
## mean of the differences
## -7.264706
dtaL <- gather(Anorexia, key = "Therapy", value = "weight", Prior, Post)
# show first 6 lines
head(dtaL)
## Therapy weight
## 1 Prior 83.8
## 2 Prior 83.3
## 3 Prior 86.0
## 4 Prior 82.5
## 5 Prior 86.7
## 6 Prior 79.6
ggplot(dtaL, aes(Therapy, weight)) +
geom_point(shape = 1) +
stat_summary(fun.data = mean_cl_boot, geom = "pointrange") +
coord_flip() +
labs(x = "Therapy", y = "weight") +
theme_bw()
summary(lm(weight ~ Therapy, data=dtaL))
##
## Call:
## lm(formula = weight ~ Therapy, data = dtaL)
##
## Residuals:
## Min 1Q Median 3Q Max
## -15.294 -2.454 1.106 4.004 11.106
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 90.494 1.689 53.578 < 2e-16 ***
## TherapyPrior -7.265 2.389 -3.041 0.00467 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 6.964 on 32 degrees of freedom
## Multiple R-squared: 0.2242, Adjusted R-squared: 0.2
## F-statistic: 9.25 on 1 and 32 DF, p-value: 0.004673
summary(aov(weight ~ Therapy, data=dtaL))
## Df Sum Sq Mean Sq F value Pr(>F)
## Therapy 1 448.6 448.6 9.25 0.00467 **
## Residuals 32 1551.9 48.5
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
若視為2個獨立的變項,用線性回歸或是anova做出來的結果是一樣的,Therapy前後weight有顯著差異(p=0.00467)
dtaL <- mutate(dtaL, Subject=rep(paste0("S", 0:16), 2), Sbj=paste0("S", 0:33))
Subject=repeat S0 to S16 for deviding two groups
sbj= S0 to S33 for creating independent sbj varible
summary(aov(weight ~ Therapy + Error(Subject/Therapy), data = dtaL))
##
## Error: Subject
## Df Sum Sq Mean Sq F value Pr(>F)
## Residuals 16 1142 71.38
##
## Error: Subject:Therapy
## Df Sum Sq Mean Sq F value Pr(>F)
## Therapy 1 448.6 448.6 17.51 7e-04 ***
## Residuals 16 409.8 25.6
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(aov(weight ~ Therapy + Subject, data = dtaL))
## Df Sum Sq Mean Sq F value Pr(>F)
## Therapy 1 448.6 448.6 17.513 0.0007 ***
## Subject 16 1142.1 71.4 2.787 0.0240 *
## Residuals 16 409.8 25.6
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(aov(weight ~ Therapy + Error(Subject), data = dtaL))
##
## Error: Subject
## Df Sum Sq Mean Sq F value Pr(>F)
## Residuals 16 1142 71.38
##
## Error: Within
## Df Sum Sq Mean Sq F value Pr(>F)
## Therapy 1 448.6 448.6 17.51 7e-04 ***
## Residuals 16 409.8 25.6
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
將Subject random effect鑲嵌在Therapy中
三個模型寫法結果皆相同。
Therapy對weight變異量為448.6,Subject對weight變異量為1142.1,Therapy及Subject對weight剩餘為解釋到的變異量為409.8。
Therapy與Subject具統計顯著,p值分別為0.0007、0.0240
summary(aov(weight ~ Therapy + Sbj, data = dtaL))
## Df Sum Sq Mean Sq
## Therapy 1 448.6 448.6
## Sbj 32 1551.9 48.5
不考慮Subject random effect其實就是2 sample ttest
with(Anorexia, plot(paired(Prior, Post), type="profile")) +
labs(x="Therapy", y="Weight (in lb)") +
theme_bw()
線代表的是每個人治療前和治療後的體重變化
箱型圖代表的是治療前的中位數、Q1、Q3及離群值
看起來治療前個人體重變異性大,治療後,每個人的體重普遍增加,治療對體重有提升效果。