1 Input data

#load package

library(PairedData,Hmisc)

## Loading required package: MASS

## Loading required package: gld

## Loading required package: mvtnorm

## Loading required package: lattice

## Loading required package: ggplot2

## 
## Attaching package: 'PairedData'

## The following object is masked from 'package:base':
## 
##     summary

#
data(Anorexia, package="PairedData")
dta1 <- Anorexia
str(dta1)

## 'data.frame':    17 obs. of  2 variables:
##  $ Prior: num  83.8 83.3 86 82.5 86.7 79.6 76.9 94.2 73.4 80.5 ...
##  $ Post : num  95.2 94.3 91.5 91.9 100.3 ...

head(dta1)

##   Prior  Post
## 1  83.8  95.2
## 2  83.3  94.3
## 3  86.0  91.5
## 4  82.5  91.9
## 5  86.7 100.3
## 6  79.6  76.7

2 Data analysis

# variable means (Prior mean weight=83.22941 < Post mean weight =90.49412)
colMeans(dta1)

##    Prior     Post 
## 83.22941 90.49412

# variable sd (Prior SD of weight=5.02 < Post SD of weight =8.48)
print(apply(dta1, 2, sd), 3)

## Prior  Post 
##  5.02  8.48

# co-variances
cov(dta1)

##          Prior     Post
## Prior 25.16721 22.88268
## Post  22.88268 71.82684

# correlations of Post and Prior weight = 0.538
print(cor(dta1),3)

##       Prior  Post
## Prior 1.000 0.538
## Post  0.538 1.000

# 2-sample t-test
t.test(dta1$Post, dta1$Prior)

## 
##  Welch Two Sample t-test
## 
## data:  dta1$Post and dta1$Prior
## t = 3.0414, df = 25.986, p-value = 0.005324
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##   2.35469 12.17472
## sample estimates:
## mean of x mean of y 
##  90.49412  83.22941

# paired t-test
t.test(dta1$Post, dta1$Prior, pair=T)

## 
##  Paired t-test
## 
## data:  dta1$Post and dta1$Prior
## t = 4.1849, df = 16, p-value = 0.0007003
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##   3.58470 10.94471
## sample estimates:
## mean of the differences 
##                7.264706

3 Visualization

# turn wide format to long format
dta1L <- tidyr::gather(dta1, key = "Therapy", value = "weights", Prior, Post)
# show first 6 lines
head(dta1L)

##   Therapy weights
## 1   Prior    83.8
## 2   Prior    83.3
## 3   Prior    86.0
## 4   Prior    82.5
## 5   Prior    86.7
## 6   Prior    79.6

#

#mean_cl_boot: implementation of the basic nonparametric bootstrap for obtaining confidence limits for the population mean without assuming normality.(x, conf.int=.95, B=1000, na.rm=TRUE, reps=FALSE)??

ggplot(dta1L, aes(Therapy, weights)) + 
  geom_point(shape = 1,
             colour = "coral") + 
  stat_summary(fun.data = mean_cl_boot, 
               geom = "pointrange",
               colour = "deepskyblue4", 
               width = 0.5) + 
  coord_flip() + #Cartesian coordinates with x and y flipped?
  labs(x = "Therapy", y = "weights") +
  theme_bw()

## Warning: Ignoring unknown parameters: width

## Warning: Computation failed in `stat_summary()`:
## Hmisc package required for this function

4 Without subject effects

# show results of anova
summary(lm(weights ~ Therapy -1, data=dta1L))

## 
## Call:
## lm(formula = weights ~ Therapy - 1, data = dta1L)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -15.294  -2.454   1.106   4.004  11.106 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## TherapyPost    90.494      1.689   53.58   <2e-16 ***
## TherapyPrior   83.229      1.689   49.28   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 6.964 on 32 degrees of freedom
## Multiple R-squared:  0.994,  Adjusted R-squared:  0.9936 
## F-statistic:  2649 on 2 and 32 DF,  p-value: < 2.2e-16

summary(aov(weights ~ Therapy -1 , data=dta1L))

##           Df Sum Sq Mean Sq F value Pr(>F)    
## Therapy    2 256977  128489    2649 <2e-16 ***
## Residuals 32   1552      48                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

# generate sbj id for each group
dta1L <- dplyr::mutate(dta1L, Subject=rep(paste0("S", 1:17), 2), Sbj=paste0("S", 1:34))
# within subject design
summary(aov(weights ~ Therapy + Error(Subject/Therapy), data = dta1L))

## 
## Error: Subject
##           Df Sum Sq Mean Sq F value Pr(>F)
## Residuals 16   1142   71.38               
## 
## Error: Subject:Therapy
##           Df Sum Sq Mean Sq F value Pr(>F)    
## Therapy    1  448.6   448.6   17.51  7e-04 ***
## Residuals 16  409.8    25.6                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

# within subject design
summary(aov(weights ~ Therapy + Subject, data = dta1L))

##             Df Sum Sq Mean Sq F value Pr(>F)    
## Therapy      1  448.6   448.6  17.513 0.0007 ***
## Subject     16 1142.1    71.4   2.787 0.0240 *  
## Residuals   16  409.8    25.6                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

# between design
summary(aov(weights ~ Therapy + Sbj, data = dta1L))

##             Df Sum Sq Mean Sq
## Therapy      1  448.6   448.6
## Sbj         32 1551.9    48.5

#
with(Anorexia, plot(paired(Prior, Post), type="profile")) +
 labs(x="Therapy", y="Weight (in lb)") +
 theme_bw()

Homework exercise 1:Anorexia example

Hank Lee

2020-10-04

1 Input data

2 Data analysis

3 Visualization

4 Without subject effects