0928hw1

0.1 load package

library(PairedData)

## Loading required package: MASS

## Loading required package: gld

## Loading required package: mvtnorm

## Loading required package: lattice

## Loading required package: ggplot2

## 
## Attaching package: 'PairedData'

## The following object is masked from 'package:base':
## 
##     summary

library(Hmisc)

## Loading required package: survival

## Loading required package: Formula

## 
## Attaching package: 'Hmisc'

## The following objects are masked from 'package:base':
## 
##     format.pval, units

library(tidyr)
library(dplyr)

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:Hmisc':
## 
##     src, summarize

## The following object is masked from 'package:MASS':
## 
##     select

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

0.2 input data

data(Anorexia, package="PairedData")
str(Anorexia)

## 'data.frame':    17 obs. of  2 variables:
##  $ Prior: num  83.8 83.3 86 82.5 86.7 79.6 76.9 94.2 73.4 80.5 ...
##  $ Post : num  95.2 94.3 91.5 91.9 100.3 ...

0.3 compute variable unuvariate

colMeans(Anorexia)

##    Prior     Post 
## 83.22941 90.49412

print(apply(Anorexia, 2, sd), 3)

## Prior  Post 
##  5.02  8.48

cov(Anorexia)

##          Prior     Post
## Prior 25.16721 22.88268
## Post  22.88268 71.82684

print(cor(Anorexia),3)

##       Prior  Post
## Prior 1.000 0.538
## Post  0.538 1.000

covariance(Prior, Post) = SD_PriorSD_Postcor(Prior, Post)=5.028.48.538 = 22.9

5.025.02=25.2
8.488.48=71.9

0.4 2-sample t-test

t.test(Anorexia$Prior, Anorexia$Post)

## 
##  Welch Two Sample t-test
## 
## data:  Anorexia$Prior and Anorexia$Post
## t = -3.0414, df = 25.986, p-value = 0.005324
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -12.17472  -2.35469
## sample estimates:
## mean of x mean of y 
##  83.22941  90.49412

0.5 paired t-test

t.test(Anorexia$Prior, Anorexia$Post, pair=T)

## 
##  Paired t-test
## 
## data:  Anorexia$Prior and Anorexia$Post
## t = -4.1849, df = 16, p-value = 0.0007003
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -10.94471  -3.58470
## sample estimates:
## mean of the differences 
##               -7.264706

0.6 turn wide format to long format

dtaL <- gather(Anorexia, key = "Therapy", value = "weight", Prior, Post)
# show first 6 lines
head(dtaL)

##   Therapy weight
## 1   Prior   83.8
## 2   Prior   83.3
## 3   Prior   86.0
## 4   Prior   82.5
## 5   Prior   86.7
## 6   Prior   79.6

0.7 ggplot for Therapy and weight

ggplot(dtaL, aes(Therapy, weight)) + 
 geom_point(shape = 1) + 
 stat_summary(fun.data = mean_cl_boot, geom = "pointrange") + 
 coord_flip() +
 labs(x = "Therapy", y = "weight") +
 theme_bw()

0.8 show results of anova

summary(lm(weight ~ Therapy, data=dtaL))

## 
## Call:
## lm(formula = weight ~ Therapy, data = dtaL)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -15.294  -2.454   1.106   4.004  11.106 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)    90.494      1.689  53.578  < 2e-16 ***
## TherapyPrior   -7.265      2.389  -3.041  0.00467 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 6.964 on 32 degrees of freedom
## Multiple R-squared:  0.2242, Adjusted R-squared:    0.2 
## F-statistic:  9.25 on 1 and 32 DF,  p-value: 0.004673

summary(aov(weight ~ Therapy, data=dtaL))

##             Df Sum Sq Mean Sq F value  Pr(>F)   
## Therapy      1  448.6   448.6    9.25 0.00467 **
## Residuals   32 1551.9    48.5                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

若視為2個獨立的變項，用線性回歸或是anova做出來的結果是一樣的，Therapy前後weight有顯著差異(p=0.00467)

0.9 Create Subject and Sbj variable

dtaL <- mutate(dtaL, Subject=rep(paste0("S", 0:16), 2), Sbj=paste0("S", 0:33))

Subject=repeat S0 to S16 for deviding two groups
sbj= S0 to S33 for creating independent sbj varible

0.10 ANOVA within subject design

summary(aov(weight ~ Therapy + Error(Subject/Therapy), data = dtaL))

## 
## Error: Subject
##           Df Sum Sq Mean Sq F value Pr(>F)
## Residuals 16   1142   71.38               
## 
## Error: Subject:Therapy
##           Df Sum Sq Mean Sq F value Pr(>F)    
## Therapy    1  448.6   448.6   17.51  7e-04 ***
## Residuals 16  409.8    25.6                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

summary(aov(weight ~ Therapy + Subject, data = dtaL))

##             Df Sum Sq Mean Sq F value Pr(>F)    
## Therapy      1  448.6   448.6  17.513 0.0007 ***
## Subject     16 1142.1    71.4   2.787 0.0240 *  
## Residuals   16  409.8    25.6                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

summary(aov(weight ~ Therapy + Error(Subject), data = dtaL))

## 
## Error: Subject
##           Df Sum Sq Mean Sq F value Pr(>F)
## Residuals 16   1142   71.38               
## 
## Error: Within
##           Df Sum Sq Mean Sq F value Pr(>F)    
## Therapy    1  448.6   448.6   17.51  7e-04 ***
## Residuals 16  409.8    25.6                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

將Subject random effect鑲嵌在Therapy中

三個模型寫法結果皆相同。

Therapy對weight變異量為448.6，Subject對weight變異量為1142.1，Therapy及Subject對weight剩餘為解釋到的變異量為409.8。

Therapy與Subject具統計顯著，p值分別為0.0007、0.0240

0.11 ANOVA between design

summary(aov(weight ~ Therapy + Sbj, data = dtaL))

##             Df Sum Sq Mean Sq
## Therapy      1  448.6   448.6
## Sbj         32 1551.9    48.5

不考慮Subject random effect其實就是2 sample ttest

0.12 plot paired-ttest

with(Anorexia, plot(paired(Prior, Post), type="profile")) +
 labs(x="Therapy", y="Weight (in lb)") +
 theme_bw()

線代表的是每個人治療前和治療後的體重變化
箱型圖代表的是治療前的中位數、Q1、Q3及離群值
看起來治療前個人體重變異性大，治療後，每個人的體重普遍增加，治療對體重有提升效果。