# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# ~ CRP 241 | Module 4 Day 1 ~
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# Birth Weight Data Example ---------------------------------
# The goal of this study was to identify risk factors
# associated with giving birth to a low birth weight baby
# (weighing less than 2500 grams). Data was collected on
# 189 women at a single US birthing center. Variables which
# were thought to be of importance to infant birth weight
# were age of the subject, weight of the subject at her
# last menstrual period, race, the subjects smoking status
# during pregnancy, and the number of clinic (MD) visits
# during the first trimester.
#
# Data Dictionary:
# ID Identification Code
# LOW Low Birth Weight (0=Birth Weight>=2500g;1=Birth Weight<2500g)
# AGE Age of the Mother in Years
# LWT Weight in Pounds at the Last Menstrual Period
# RACE Race (1=White,2=Black,3=Other)
# SMOKE Smoking Status During Pregnancy (1=Yes,0=No)
# PTL History of Premature Labor (0=None,1=One, etc.)
# HT History of Hypertension (1=Yes,0=No)
# UI Presence of Uterine Irritability (1=Yes,0=No)
# FTV Number of MD Visits During the First Trimester (0=None,1=One,2=Two,etc)
# BWT Birth Weight in Grams
## Download and load the Birth Weight Data dataset:
download.file("http://www.duke.edu/~sgrambow/crp241data/BirthWeightData.RData",
destfile = "BirthWeightData.RData",quiet=TRUE,mode="wb",cacheOK=FALSE)
load("BirthWeightData.RData")
# Create subset of mothers who smoke:
smoke <- subset(bwdata,bwdata$SMOKE==1)
# Compute descriptive statistics and check assumptions
# Note: Birthweight data is in grams, but the
# hypothesis test is in pounds. Need to
# convert --> 1 pound = 453.592 grams.
summary(smoke$BWT/453.592)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1.563 5.226 6.119 6.114 7.156 9.343
# Histogram of birth weights:
hist(smoke$BWT/453.592,freq=FALSE,
main='Birth Weight Among Mothers Who Smoke',
xlab='Birth Weight (lbs)')
box()

# Boxplot of birth weights:
boxplot(smoke$BWT/453.592,
main='Birth Weight of Babies Among Mothers Who Smoke',
ylab='Birth Weight (lbs)')

# One Sample T-test for the Mean:
# (1) Compute 95% CI for the mean
t.test(smoke$BWT/453.592)
##
## One Sample t-test
##
## data: smoke$BWT/453.592
## t = 36.142, df = 73, p-value < 2.2e-16
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## 5.776813 6.451106
## sample estimates:
## mean of x
## 6.11396
# (2) Compute test stat and p-value for H1: mu < 7
t.test(smoke$BWT/453.592,alternative="less",mu=7)
##
## One Sample t-test
##
## data: smoke$BWT/453.592
## t = -5.2377, df = 73, p-value = 7.551e-07
## alternative hypothesis: true mean is less than 7
## 95 percent confidence interval:
## -Inf 6.395789
## sample estimates:
## mean of x
## 6.11396
# Heart Failure Trial Data Example --------------------------
# Current AMA guidelines state that exercise is beneficial
# for patients with heart failure. However, there is limited
# clinical data to support this statement, especially among
# patients with chronic heart failure. Previously published
# studies have found evidence of a relationship between
# improvements in exercise tolerance and physiologic markers,
# suggesting potential mechanisms by which exercise training
# might improve survival and decrease morbidity. 2275 patients
# were randomized at 82 centers across the US in a 1:1 ratio
# to either an exercise training arm or to a usual care arm.
# Among many outcomes measured to evaluate the effects of
# exercise training on exercise tolerance, the trial measures
# peak oxygen uptake (peak Vo2) at baseline and at 3, and 12
# months post-randomization.
#
# Data Dictionary:
# SUBJNO Subject ID number
# TRT Randomize treatment arm [Usual Care; Exercise]
# ETIOLOGY Ischemic or non-ischemic etiology [1=Ischemic; 2=Non-ischemic]
# NONADHERE Non-adherence to treatment randomization [1=Yes; 0=NO]
# NYHACL NHYA class at randomization [2=II; 3=III; 4=IV]
# BESTLVEF Best available baseline LVEF
# AGE Age at randomization (years)
# BMI Body mass index at randomization
# SEX Subject sex [1=male; 2=female]
# RACE Self-reported race [1=black; 2=white; 3=other]
# DIABETES History of diabetes [1=Yes; 0=No]
# PEAKVO2B Peak VO2 on baseline CPX test (mL/kg/min)
# PEAKVO23 Peak VO2 on 3-month CPX test (mL/kg/min)
# PEAKVO212 Peak VO2 on 12-month CPX test (mL/kg/min)
## Download and load the Heart Failure Trial Data dataset:
download.file("http://www.duke.edu/~sgrambow/crp241data/HFCT_Data.RData",
destfile = "HFCT_Data.RData",quiet=TRUE,mode="wb",cacheOK=FALSE)
load("HFCT_Data.RData")
# Create subset for each treatment arm:
usual <- subset(hfdata,hfdata$TRT=='Usual Care')
exercise <- subset(hfdata,hfdata$TRT=='Exercise')
# Compute descriptive statistics and check assumptions by treatment arm:
summary(usual$PEAKVO212)
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 5.20 12.20 15.40 16.04 19.30 38.80 467
summary(exercise$PEAKVO212)
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 5.40 12.28 15.50 16.07 18.82 37.80 401
# Boxplots of cholesterol by case-control status
boxplot(hfdata$PEAKVO212~hfdata$TRT,
main='Peak Vo2 Levels at 12 Months by Treatment Arm',
ylab='Peak Vo2 on 12-month CPX test (mL/kg/min)')

# Two Sample T-test for Means:
# (1) Compute mean difference between two groups:
# (i.e. the point estimate)
mean(exercise$PEAKVO212,na.rm=TRUE)-mean(usual$PEAKVO212,na.rm=TRUE)
## [1] 0.03914795
# (2) Compute 95% CI, test statistic, and p-value for
# difference in means
# Option 1: Use ~ syntax
t.test(hfdata$PEAKVO212~hfdata$TRT)
##
## Welch Two Sample t-test
##
## data: hfdata$PEAKVO212 by hfdata$TRT
## t = 0.14527, df = 1394.9, p-value = 0.8845
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.4894757 0.5677716
## sample estimates:
## mean in group Exercise mean in group Usual Care
## 16.07433 16.03519
# Option 2: Use , syntax
t.test(exercise$PEAKVO212,usual$PEAKVO212)
##
## Welch Two Sample t-test
##
## data: exercise$PEAKVO212 and usual$PEAKVO212
## t = 0.14527, df = 1394.9, p-value = 0.8845
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.4894757 0.5677716
## sample estimates:
## mean of x mean of y
## 16.07433 16.03519
# - Note: Default in R is to assume variances are
# NOT equal, but can change that!
t.test(hfdata$PEAKVO212~hfdata$TRT,var.equal=TRUE)
##
## Two Sample t-test
##
## data: hfdata$PEAKVO212 by hfdata$TRT
## t = 0.14522, df = 1405, p-value = 0.8846
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.4896537 0.5679496
## sample estimates:
## mean in group Exercise mean in group Usual Care
## 16.07433 16.03519
# End of Program