# ~~~~~~~~~~~~~~~~~~~~~~~~~~~
# ~ CRP 241 Module 4 Day 10 ~
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~
# Cholesterol Data Example:
# Data was obtained from a study conducted to examine
# the relationship between total serum cholesterol
# levels and heart attacks among subjects who experienced
# a recent ACS event. A total of 28 subjects were
# recruited for the study. Each subject had their
# cholesterol levels measured at 2 days, 4 days,
# and 14 days post heart attack.
# Load the data:
load(url("http://www.duke.edu/~sgrambow/crp241data/cholesterol-paired.RData"))
# Data Set: cholesterol.paired
# Data Dictionary:
# (1) patient patient identifier
# (2) DAY2 total serum cholesterol (mg/dL)
# measured 2 days post-MI
# (3) DAY4 total serum cholesterol (mg/dL)
# measured 4 days post-MI
# (4) DAY14 total serum cholesterol (mg/dL)
# measured 14 days post-MI
# (5) DAY14MSNG binary flag for DAY14 missingness
# (1 = missing vs. 0 = non-missing)
# Compute summary statistics for paired differences:
# What is the difference between the next
# command and the one below it?
# paired.diff <- cholesterol.paired$DAY4-cholesterol.paired$DAY2
cholesterol.paired$paired.diff <- cholesterol.paired$DAY4-
cholesterol.paired$DAY2
summary(cholesterol.paired$paired.diff)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -108.00 -40.50 -14.00 -18.61 7.50 46.00
# - Histogram of paired differences:
hist(cholesterol.paired$paired.diff,freq=FALSE,
main='4Days vs. 2Days Post-MI Differences',
ylab='Density',
xlab='Differences')

# - Boxplot of paired differences:
boxplot(cholesterol.paired$paired.diff,
main='4Days vs. 2Days Post-MI Differences',
ylab='cholesterol levels')

# - Compute 95% CI for paired mean diff, test stat,and p-value:
t.test(cholesterol.paired$DAY4,cholesterol.paired$DAY2,paired=TRUE)
##
## Paired t-test
##
## data: cholesterol.paired$DAY4 and cholesterol.paired$DAY2
## t = -2.6529, df = 27, p-value = 0.0132
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -32.998364 -4.215922
## sample estimates:
## mean of the differences
## -18.60714
# OR Alternatively (and equivalently)
t.test(cholesterol.paired$paired.diff)
##
## One Sample t-test
##
## data: cholesterol.paired$paired.diff
## t = -2.6529, df = 27, p-value = 0.0132
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## -32.998364 -4.215922
## sample estimates:
## mean of x
## -18.60714
# - What if we (incorrectly) used the two sample t-test instead?
t.test(cholesterol.paired$DAY4,cholesterol.paired$DAY2)
##
## Welch Two Sample t-test
##
## data: cholesterol.paired$DAY4 and cholesterol.paired$DAY2
## t = -1.4828, df = 53.943, p-value = 0.1439
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -43.765617 6.551331
## sample estimates:
## mean of x mean of y
## 235.3214 253.9286
mean(cholesterol.paired$DAY4)-mean(cholesterol.paired$DAY2)
## [1] -18.60714
########################################################
# Example: The Acupuncture Dataset -- needle.RData
# Key Variables
# group = 0 is control, 1 is acupuncture
# pk1 = severity score (baseline)
# pk2 = severity score posttreatment
# pk5 = severity score (one year followup)
# ChangeFrom.pk1 = change in severity score (pk1-pk5)
# f1 heache frequency at baseline
# f1cat This is a categorized version of f1 with
# low = frequency <= 33rd percentile = 12
# med = frequency > 33rd percentile = 12
# and <= 67th percentile = 19
# high = frequency > 67th percentile = 19
#
########################################################
# Download and load the file
load(url("http://www.duke.edu/~sgrambow/crp245data/needle.RData"))
# Quick summary of key variables
summary(needle$pk1)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 6.75 15.00 21.00 26.51 34.25 94.75
summary(needle$pk5)
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 0.00 8.50 15.00 19.08 25.25 87.25 100
summary(needle$ChangeFrom.pk1)
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## -29.000 -0.500 6.250 6.486 11.750 55.750 100
# Quick summary of key variables by group
# Subset data into two treatment groups
Treat <- subset(needle,needle$fgroup=="Treat")
Control <- subset(needle,needle$fgroup=="Control")
# Summary of outcome by group at baseline
summary(Treat$pk1)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 8.75 14.25 20.75 25.61 33.00 87.50
summary(Control$pk1)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 6.75 16.00 21.88 27.45 35.69 94.75
# Summary of outcome by group at 12 months
summary(Treat$pk5)
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 0.00 7.25 12.00 16.25 21.50 73.73 44
summary(Control$pk5)
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 0.25 10.44 17.00 22.34 28.75 87.25 56
# Summary of Change from baseline in outcome by group
summary(Treat$ChangeFrom.pk1)
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## -28.733 0.500 7.750 8.329 13.500 55.750 44
summary(Control$ChangeFrom.pk1)
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## -29.000 -2.000 4.000 4.367 10.062 36.750 56
# - Boxpots of key variables by group:
# - Boxplot of pk1:
boxplot(pk1~fgroup,data=needle,
main='Boxplots of pk1 by Group',
ylab='pk1 -- Baseline Score')

# - Boxplot of pk5:
boxplot(pk5~fgroup,data=needle,
main='Boxplots of pk5 by Group',
ylab='pk5 -- 12 month Score')

# - Boxplot of ChangeFrom.pk1:
boxplot(ChangeFrom.pk1~fgroup,data=needle,
main='Boxplots of Change from Baseline to 12 months by Group',
ylab='pk1 - pk5 Change from Baseline to 12 months')
############################
## Analysis
############################
####Change Score - Two Sample t-test assuming equal variances
t.test(needle$ChangeFrom.pk1~needle$fgroup,var.equal=TRUE)
##
## Two Sample t-test
##
## data: needle$ChangeFrom.pk1 by needle$fgroup
## t = -2.932, df = 299, p-value = 0.003628
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -6.621294 -1.302774
## sample estimates:
## mean in group Control mean in group Treat
## 4.367262 8.329296
mean(Treat$ChangeFrom.pk1,na.rm=TRUE)-mean(Control$ChangeFrom.pk1,na.rm=TRUE)
## [1] 3.962034
####Follow up score - Two Sample t-test assuming equal variances
t.test(needle$pk5~needle$fgroup,var.equal=TRUE)
##
## Two Sample t-test
##
## data: needle$pk5 by needle$fgroup
## t = 3.4399, df = 299, p-value = 0.0006648
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 2.608793 9.584530
## sample estimates:
## mean in group Control mean in group Treat
## 22.34345 16.24679
mean(Control$pk5,na.rm=TRUE)-mean(Treat$pk5,na.rm=TRUE)
## [1] 6.096661
####Recall Simple Linear Regression
lm.slr<-lm(needle$pk5~needle$group)
summary(lm.slr)
##
## Call:
## lm(formula = needle$pk5 ~ needle$group)
##
## Residuals:
## Min 1Q Median 3Q Max
## -22.093 -9.997 -4.497 5.907 64.907
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 22.343 1.296 17.24 < 2e-16 ***
## needle$group -6.097 1.772 -3.44 0.000665 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 15.34 on 299 degrees of freedom
## (100 observations deleted due to missingness)
## Multiple R-squared: 0.03807, Adjusted R-squared: 0.03485
## F-statistic: 11.83 on 1 and 299 DF, p-value: 0.0006648
confint(lm.slr)
## 2.5 % 97.5 %
## (Intercept) 19.79257 24.894331
## needle$group -9.58453 -2.608793
####ANCOVA model
lm.ancova<-lm(needle$pk5~needle$pk1+needle$group)
summary(lm.ancova)
##
## Call:
## lm(formula = needle$pk5 ~ needle$pk1 + needle$group)
##
## Residuals:
## Min 1Q Median 3Q Max
## -35.881 -6.115 -1.059 5.946 43.041
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.45098 1.41672 2.436 0.015441 *
## needle$pk1 0.70730 0.04055 17.444 < 2e-16 ***
## needle$group -4.58684 1.25177 -3.664 0.000294 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 10.81 on 298 degrees of freedom
## (100 observations deleted due to missingness)
## Multiple R-squared: 0.5241, Adjusted R-squared: 0.5209
## F-statistic: 164.1 on 2 and 298 DF, p-value: < 2.2e-16
confint(lm.ancova)
## 2.5 % 97.5 %
## (Intercept) 0.6629281 6.2390340
## needle$pk1 0.6275041 0.7870946
## needle$group -7.0502728 -2.1234090
# plotting the ancova model
#install.packages("HH")
library(HH)
## Warning: package 'HH' was built under R version 4.0.3
## Loading required package: lattice
## Loading required package: grid
## Loading required package: latticeExtra
## Loading required package: multcomp
## Loading required package: mvtnorm
## Loading required package: survival
## Loading required package: TH.data
## Loading required package: MASS
##
## Attaching package: 'TH.data'
## The following object is masked from 'package:MASS':
##
## geyser
## Loading required package: gridExtra

# plotting ancova
ancovaplot(pk5~pk1+fgroup,data=needle)

# check homogeneity of slopes -- include interaction
ancovaplot(pk5~pk1*fgroup,data=needle)

# End of Program