Importing Data

# first row contains variable names, comma is separator 
# assign the variable id to row names

mydata <- read.table("C:\\Users\\Administrator\\Dropbox\\SHI\\Course Materials\\Mindy\\Basic Biostatistics\\Lecture 1\\DIG\\dig_demo.csv", header=TRUE, sep=",")

Q1: What is the BMI of an average person in the DIG trial?

library(psych)
describe.by(mydata$BMI)
##    vars    n  mean   sd median trimmed  mad   min   max range skew
## X1    1 6799 27.11 5.19   26.5   26.72 4.46 14.45 62.66 48.22 1.04
##    kurtosis   se
## X1     2.55 0.06

Q2: What is the 95% confidence interval of the estimated BMI?

Q3: \(H_0\): BMI = 27 vs \(H_A\): BMI \(\neq\) 27

t.test(mydata$BMI, mu=27)
## 
##  One Sample t-test
## 
## data:  mydata$BMI
## t = 1.8228, df = 6798, p-value = 0.06838
## alternative hypothesis: true mean is not equal to 27
## 95 percent confidence interval:
##  26.99134 27.23824
## sample estimates:
## mean of x 
##  27.11479

Q4: \(H_0\): BMI = 25 vs \(H_A\): BMI \(\neq\) 25

t.test(mydata$BMI, mu=25)
## 
##  One Sample t-test
## 
## data:  mydata$BMI
## t = 33.581, df = 6798, p-value < 2.2e-16
## alternative hypothesis: true mean is not equal to 25
## 95 percent confidence interval:
##  26.99134 27.23824
## sample estimates:
## mean of x 
##  27.11479

Q5: \(H_0\): BMI in men = BMI in women vs \(H_A\): BMI in men \(\neq\) BMI in women

t.test(BMI ~ SEX, data=mydata)
## 
##  Welch Two Sample t-test
## 
## data:  BMI by SEX
## t = 0.63458, df = 2463.4, p-value = 0.5258
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.2001769  0.3917229
## sample estimates:
## mean in group Female   mean in group Male 
##             27.18918             27.09341

Q6: Number of subjects needed to detect a 0.1 difference in BMI between men and women (men=27.09, women=27.19), at a significance level of 5% and with 80% power?

library(pwr)
delta = 0.1
sigma = 5.19
effectsize = delta/sigma
pwr.t.test(d=effectsize, sig.level=0.05, power = 0.8, type='two.sample')
## 
##      Two-sample t test power calculation 
## 
##               n = 42284.5
##               d = 0.01926782
##       sig.level = 0.05
##           power = 0.8
##     alternative = two.sided
## 
## NOTE: n is number in *each* group

Q7: Suppose the DIG trial is designed to detect a 0.1 difference in BMI between men and women (men=27.09, women=27.19), at a significance level of 5%. What is the power of this trial?

delta = 0.1
sigma = 5.19
effectsize = delta/sigma
pwr.t.test(n=3400, d= effectsize, sig.level=0.05, type='two.sample')
## 
##      Two-sample t test power calculation 
## 
##               n = 3400
##               d = 0.01926782
##       sig.level = 0.05
##           power = 0.1248203
##     alternative = two.sided
## 
## NOTE: n is number in *each* group