Analysis of clinical trial data

# simulated input values
n = 100
mu = 100
sd = 10
mu.d = 20
age.mu = 50
age.sd = 10

# fix the seed for random number generation
set.seed(123)
# use "rnorm" to generate random normal
age = rnorm(n, age.mu, age.sd)
bp.base = rnorm(n,mu,sd)
bp.end = rnorm(n,mu,sd)

# take the difference between endpoint and baseline
bp.diff = bp.end-bp.base
# put the data together using "cbind" to column-bind
dat4placebo = round(cbind(age,bp.base,bp.end,bp.diff))

head(dat4placebo)

##      age bp.base bp.end bp.diff
## [1,]  44      93    122      29
## [2,]  48     103    113      11
## [3,]  66      98     97       0
## [4,]  51      97    105       9
## [5,]  51      90     96       5
## [6,]  67     100     95      -4

age = rnorm(n, age.mu, age.sd)
bp.base = rnorm(n,mu,sd)
bp.end = rnorm(n,mu-mu.d,sd)
bp.diff = bp.end-bp.base
dat4drug = round(cbind(age,bp.base,bp.end,bp.diff))

# make a dataframe to hold all data
dat = data.frame(rbind(dat4placebo,dat4drug))
# make "trt" as a factor for treatment.
dat$trt = as.factor(rep(c("Placebo", "Drug"), each=n))

# check the data dimension
dim(dat)

## [1] 200   5

# print the first 6 observations to see the variable names
head(dat)

##   age bp.base bp.end bp.diff     trt
## 1  44      93    122      29 Placebo
## 2  48     103    113      11 Placebo
## 3  66      98     97       0 Placebo
## 4  51      97    105       9 Placebo
## 5  51      90     96       5 Placebo
## 6  67     100     95      -4 Placebo

# call boxplot
boxplot(dat4placebo, las=1, main="Placebo")

boxplot(dat4drug, las=1, main="Drug")

#load the lattice library
library(lattice)

# call xyplot function and print it
print(xyplot(bp.diff~age|trt, data=dat,xlab="Age", strip=strip.custom(bg="white"),
ylab="Blood Pressure Difference",lwd=3,cex=1.3,pch=20,
type=c("p", "r")))

lm1 = lm(bp.diff~trt*age, data=dat)
summary(lm1)

## 
## Call:
## lm(formula = bp.diff ~ trt * age, data = dat)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -37.975  -8.883   0.037   8.488  45.678 
## 
## Coefficients:
##                  Estimate Std. Error t value Pr(>|t|)   
## (Intercept)    -17.627094   6.242472  -2.824  0.00524 **
## trtPlacebo      24.153635   9.586926   2.519  0.01255 * 
## age             -0.076284   0.123209  -0.619  0.53654   
## trtPlacebo:age  -0.006311   0.186973  -0.034  0.97311   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 12.8 on 196 degrees of freedom
## Multiple R-squared:  0.4684, Adjusted R-squared:  0.4602 
## F-statistic: 57.56 on 3 and 196 DF,  p-value: < 2.2e-16

# load the xtable library and call xtable to make the table
library(xtable)
print(xtable(lm1, caption="ANOVA table for simulated clinical trial data", label = "tab4RI.coef"),
table.placement = "htbp",caption.placement = "top")

## % latex table generated in R 3.4.3 by xtable 1.8-2 package
## % Mon Jan 01 22:05:24 2018
## \begin{table}[htbp]
## \centering
## \caption{ANOVA table for simulated clinical trial data} 
## \label{tab4RI.coef}
## \begin{tabular}{rrrrr}
##   \hline
##  & Estimate & Std. Error & t value & Pr($>$$|$t$|$) \\ 
##   \hline
## (Intercept) & -17.6271 & 6.2425 & -2.82 & 0.0052 \\ 
##   trtPlacebo & 24.1536 & 9.5869 & 2.52 & 0.0126 \\ 
##   age & -0.0763 & 0.1232 & -0.62 & 0.5365 \\ 
##   trtPlacebo:age & -0.0063 & 0.1870 & -0.03 & 0.9731 \\ 
##    \hline
## \end{tabular}
## \end{table}

layout(matrix(1:4, nrow=2))
plot(lm1)

dat <- read.csv ("dat1.csv", header = TRUE)
# create the "diff"
dat$diff = dat$DBP5-dat$DBP1
# show first 6 observations using function "head"
head(dat)

##   subject TRT DBP1 DBP2 DBP3 DBP4 DBP5 Age Sex diff
## 1       1   A  114  115  113  109  105  43   F   -9
## 2       2   A  116  113  112  103  101  51   M  -15
## 3       3   A  119  115  113  104   98  48   F  -21
## 4       4   A  115  113  112  109  101  42   F  -14
## 5       5   A  116  112  107  104  105  49   M  -11
## 6       6   A  117  112  113  104  102  47   M  -15

# call boxplot
boxplot(diff~TRT, dat, xlab="Treatment", ylab="DBP Changes", las=1)

# call t-test with equal variance
t.test(diff~TRT, dat, var.equal=T)

## 
##  Two Sample t-test
## 
## data:  diff by TRT
## t = 0.70349, df = 38, p-value = 0.486
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -6.1024 12.6024
## sample estimates:
## mean in group A mean in group B 
##          -15.90          -19.15

t.test(diff~TRT, dat, var.equal=F)

## 
##  Welch Two Sample t-test
## 
## data:  diff by TRT
## t = 0.70349, df = 20.216, p-value = 0.4898
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -6.380234 12.880234
## sample estimates:
## mean in group A mean in group B 
##          -15.90          -19.15

var.test(diff~TRT, dat)

## 
##  F test to compare two variances
## 
## data:  diff by TRT
## F = 0.032042, num df = 19, denom df = 19, p-value = 3.512e-10
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
##  0.01268243 0.08095136
## sample estimates:
## ratio of variances 
##         0.03204153

wilcox.test(diff~TRT, dat)

## Warning in wilcox.test.default(x = c(-9L, -15L, -21L, -14L, -11L, -15L, :
## cannot compute exact p-value with ties

## 
##  Wilcoxon rank sum test with continuity correction
## 
## data:  diff by TRT
## W = 186, p-value = 0.7136
## alternative hypothesis: true location shift is not equal to 0

str(dat)

## 'data.frame':    40 obs. of  10 variables:
##  $ subject: int  1 2 3 4 5 6 7 8 9 10 ...
##  $ TRT    : Factor w/ 2 levels "A","B": 1 1 1 1 1 1 1 1 1 1 ...
##  $ DBP1   : int  114 116 119 115 116 117 118 120 114 115 ...
##  $ DBP2   : int  115 113 115 113 112 112 111 115 112 113 ...
##  $ DBP3   : int  113 112 113 112 107 113 100 113 113 108 ...
##  $ DBP4   : int  109 103 104 109 104 104 109 102 109 106 ...
##  $ DBP5   : int  105 101 98 101 105 102 99 102 103 97 ...
##  $ Age    : int  43 51 48 42 49 47 50 61 43 51 ...
##  $ Sex    : Factor w/ 2 levels "F","M": 1 2 1 1 2 2 1 2 2 2 ...
##  $ diff   : int  -9 -15 -21 -14 -11 -15 -19 -18 -11 -18 ...

# data from treatment A
diff.A = dat[dat$TRT=="A",]$diff
# data from treatment B
diff.B = dat[dat$TRT=="B",]$diff
# call t.test for one-sided test
t.test(diff.A, diff.B,alternative="less")

## 
##  Welch Two Sample t-test
## 
## data:  diff.A and diff.B
## t = 0.70349, df = 20.216, p-value = 0.7551
## alternative hypothesis: true difference in means is less than 0
## 95 percent confidence interval:
##      -Inf 11.21381
## sample estimates:
## mean of x mean of y 
##    -15.90    -19.15

library(bootstrap)
# define a function to calculate the mean difference
# between treatment groups A to B: i.e., A-B
mean.diff = function(bn,dat)
-diff(tapply(dat[bn,]$diff, dat[bn,]$TRT,mean))

# number of bootstrap
nboot = 1000
# call "bootstrap" function
boot.mean = bootstrap(1:dim(dat)[1], nboot, mean.diff,dat)

# extract the mean differences
x = boot.mean$thetastar
# calcualte the bootstrap quantiles
x.quantile = quantile(x, c(0.025,0.5, 0.975))
# show the quantiles
print(x.quantile)

##      2.5%       50%     97.5% 
## -2.980815  2.781313 13.484295

# make a histogram
hist(boot.mean$thetastar, xlab="Mean Differences", main="")
# add the vertical lines for the quantiles
abline(v=x.quantile,lwd=2, lty=c(4,1,4))

aggregate(dat[,3:7], list(TRT=dat$TRT), mean)

##   TRT  DBP1   DBP2   DBP3   DBP4   DBP5
## 1   A 116.9 113.10 111.00 106.00 101.00
## 2   B 116.5 113.65 109.95 105.25  97.35

# call reshape
Dat = reshape(dat, direction="long",
varying=c("DBP1","DBP2","DBP3","DBP4","DBP5"),
idvar = c("subject","TRT","Age","Sex","diff"),sep="")
colnames(Dat) = c("Subject","TRT","Age","Sex","diff","Time","DBP")
Dat$Time = as.factor(Dat$Time)
# show the first 6 observations
head(Dat)

##                Subject TRT Age Sex diff Time DBP
## 1.A.43.F.-9.1        1   A  43   F   -9    1 114
## 2.A.51.M.-15.1       2   A  51   M  -15    1 116
## 3.A.48.F.-21.1       3   A  48   F  -21    1 119
## 4.A.42.F.-14.1       4   A  42   F  -14    1 115
## 5.A.49.M.-11.1       5   A  49   M  -11    1 116
## 6.A.47.M.-15.1       6   A  47   M  -15    1 117

# test treatment "A"
datA = Dat[Dat$TRT=="A",]
test.A = aov(DBP~Time, datA)
summary(test.A)

##             Df Sum Sq Mean Sq F value Pr(>F)    
## Time         4 3088.4   772.1   129.7 <2e-16 ***
## Residuals   95  565.6     6.0                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

# test treatment "B"
datB = Dat[Dat$TRT=="B",]
test.B = aov(DBP~Time, datB)
summary(test.B)

##             Df Sum Sq Mean Sq F value   Pr(>F)    
## Time         4   4550  1137.5    12.8 2.25e-08 ***
## Residuals   95   8441    88.9                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

TukeyHSD(test.A)

##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = DBP ~ Time, data = datA)
## 
## $Time
##      diff        lwr          upr     p adj
## 2-1  -3.8  -5.945718  -1.65428154 0.0000346
## 3-1  -5.9  -8.045718  -3.75428154 0.0000000
## 4-1 -10.9 -13.045718  -8.75428154 0.0000000
## 5-1 -15.9 -18.045718 -13.75428154 0.0000000
## 3-2  -2.1  -4.245718   0.04571846 0.0581512
## 4-2  -7.1  -9.245718  -4.95428154 0.0000000
## 5-2 -12.1 -14.245718  -9.95428154 0.0000000
## 4-3  -5.0  -7.145718  -2.85428154 0.0000000
## 5-3 -10.0 -12.145718  -7.85428154 0.0000000
## 5-4  -5.0  -7.145718  -2.85428154 0.0000000

TukeyHSD(test.B)

##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = DBP ~ Time, data = datB)
## 
## $Time
##       diff       lwr        upr     p adj
## 2-1  -2.85 -11.13915   5.439147 0.8738641
## 3-1  -6.55 -14.83915   1.739147 0.1896406
## 4-1 -11.25 -19.53915  -2.960853 0.0025373
## 5-1 -19.15 -27.43915 -10.860853 0.0000001
## 3-2  -3.70 -11.98915   4.589147 0.7272848
## 4-2  -8.40 -16.68915  -0.110853 0.0454011
## 5-2 -16.30 -24.58915  -8.010853 0.0000036
## 4-3  -4.70 -12.98915   3.589147 0.5158689
## 5-3 -12.60 -20.88915  -4.310853 0.0005126
## 5-4  -7.90 -16.18915   0.389147 0.0694288

mod2 = aov(DBP~ TRT*Time, Dat)
summary(mod2)

##              Df Sum Sq Mean Sq F value Pr(>F)    
## TRT           1     56    56.2   1.185  0.278    
## Time          4   7540  1885.0  39.767 <2e-16 ***
## TRT:Time      4     98    24.6   0.519  0.722    
## Residuals   190   9006    47.4                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

par(mfrow=c(2,1),mar=c(5,3,1,1))

with(Dat,interaction.plot(Time,TRT,DBP,las=1,legend=T))
with(Dat,interaction.plot(TRT,Time,DBP,las=1,legend=T))

TukeyHSD(aov(DBP ~ TRT*Time,Dat))

##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = DBP ~ TRT * Time, data = Dat)
## 
## $TRT
##      diff     lwr       upr    p adj
## B-A -1.06 -2.9806 0.8606005 0.277683
## 
## $Time
##        diff        lwr         upr     p adj
## 2-1  -3.325  -7.565057   0.9150569 0.1996809
## 3-1  -6.225 -10.465057  -1.9849431 0.0007230
## 4-1 -11.075 -15.315057  -6.8349431 0.0000000
## 5-1 -17.525 -21.765057 -13.2849431 0.0000000
## 3-2  -2.900  -7.140057   1.3400569 0.3296466
## 4-2  -7.750 -11.990057  -3.5099431 0.0000109
## 5-2 -14.200 -18.440057  -9.9599431 0.0000000
## 4-3  -4.850  -9.090057  -0.6099431 0.0160706
## 5-3 -11.300 -15.540057  -7.0599431 0.0000000
## 5-4  -6.450 -10.690057  -2.2099431 0.0004080
## 
## $`TRT:Time`
##           diff        lwr          upr     p adj
## B:1-A:1  -0.40  -7.370786   6.57078605 1.0000000
## A:2-A:1  -3.80 -10.770786   3.17078605 0.7679652
## B:2-A:1  -3.25 -10.220786   3.72078605 0.8936255
## A:3-A:1  -5.90 -12.870786   1.07078605 0.1772683
## B:3-A:1  -6.95 -13.920786   0.02078605 0.0513925
## A:4-A:1 -10.90 -17.870786  -3.92921395 0.0000545
## B:4-A:1 -11.65 -18.620786  -4.67921395 0.0000110
## A:5-A:1 -15.90 -22.870786  -8.92921395 0.0000000
## B:5-A:1 -19.55 -26.520786 -12.57921395 0.0000000
## A:2-B:1  -3.40 -10.370786   3.57078605 0.8646594
## B:2-B:1  -2.85  -9.820786   4.12078605 0.9505513
## A:3-B:1  -5.50 -12.470786   1.47078605 0.2614706
## B:3-B:1  -6.55 -13.520786   0.42078605 0.0853270
## A:4-B:1 -10.50 -17.470786  -3.52921395 0.0001239
## B:4-B:1 -11.25 -18.220786  -4.27921395 0.0000261
## A:5-B:1 -15.50 -22.470786  -8.52921395 0.0000000
## B:5-B:1 -19.15 -26.120786 -12.17921395 0.0000000
## B:2-A:2   0.55  -6.420786   7.52078605 0.9999999
## A:3-A:2  -2.10  -9.070786   4.87078605 0.9938368
## B:3-A:2  -3.15 -10.120786   3.82078605 0.9105948
## A:4-A:2  -7.10 -14.070786  -0.12921395 0.0420490
## B:4-A:2  -7.85 -14.820786  -0.87921395 0.0142236
## A:5-A:2 -12.10 -19.070786  -5.12921395 0.0000040
## B:5-A:2 -15.75 -22.720786  -8.77921395 0.0000000
## A:3-B:2  -2.65  -9.620786   4.32078605 0.9688374
## B:3-B:2  -3.70 -10.670786   3.27078605 0.7945858
## A:4-B:2  -7.65 -14.620786  -0.67921395 0.0192331
## B:4-B:2  -8.40 -15.370786  -1.42921395 0.0059328
## A:5-B:2 -12.65 -19.620786  -5.67921395 0.0000012
## B:5-B:2 -16.30 -23.270786  -9.32921395 0.0000000
## B:3-A:3  -1.05  -8.020786   5.92078605 0.9999774
## A:4-A:3  -5.00 -11.970786   1.97078605 0.3961621
## B:4-A:3  -5.75 -12.720786   1.22078605 0.2062506
## A:5-A:3 -10.00 -16.970786  -3.02921395 0.0003336
## B:5-A:3 -13.65 -20.620786  -6.67921395 0.0000001
## A:4-B:3  -3.95 -10.920786   3.02078605 0.7254591
## B:4-B:3  -4.70 -11.670786   2.27078605 0.4888512
## A:5-B:3  -8.95 -15.920786  -1.97921395 0.0023269
## B:5-B:3 -12.60 -19.570786  -5.62921395 0.0000013
## B:4-A:4  -0.75  -7.720786   6.22078605 0.9999988
## A:5-A:4  -5.00 -11.970786   1.97078605 0.3961621
## B:5-A:4  -8.65 -15.620786  -1.67921395 0.0039056
## A:5-B:4  -4.25 -11.220786   2.72078605 0.6334851
## B:5-B:4  -7.90 -14.870786  -0.92921395 0.0131718
## B:5-A:5  -3.65 -10.620786   3.32078605 0.8073222

n = c(168, 182, 165,188)
p4 = c(.41, .62, .73, .77)
x4 = c(69, 113, 120, 145)

prop.test(x4, n)

## 
##  4-sample test for equality of proportions without continuity
##  correction
## 
## data:  x4 out of n
## X-squared = 57.799, df = 3, p-value = 1.735e-12
## alternative hypothesis: two.sided
## sample estimates:
##    prop 1    prop 2    prop 3    prop 4 
## 0.4107143 0.6208791 0.7272727 0.7712766

prop.test(x4[c(1,3)], n[c(1,3)])

## 
##  2-sample test for equality of proportions with continuity
##  correction
## 
## data:  x4[c(1, 3)] out of n[c(1, 3)]
## X-squared = 32.71, df = 1, p-value = 1.07e-08
## alternative hypothesis: two.sided
## 95 percent confidence interval:
##  -0.4233221 -0.2097948
## sample estimates:
##    prop 1    prop 2 
## 0.4107143 0.7272727

prop.test(x4[c(2,3)], n[c(2,3)])

## 
##  2-sample test for equality of proportions with continuity
##  correction
## 
## data:  x4[c(2, 3)] out of n[c(2, 3)]
## X-squared = 3.9715, df = 1, p-value = 0.04628
## alternative hypothesis: two.sided
## 95 percent confidence interval:
##  -0.210080057 -0.002707156
## sample estimates:
##    prop 1    prop 2 
## 0.6208791 0.7272727

prop.test(x4[c(3,4)], n[c(3,4)])

## 
##  2-sample test for equality of proportions with continuity
##  correction
## 
## data:  x4[c(3, 4)] out of n[c(3, 4)]
## X-squared = 0.68929, df = 1, p-value = 0.4064
## alternative hypothesis: two.sided
## 95 percent confidence interval:
##  -0.14037146  0.05236373
## sample estimates:
##    prop 1    prop 2 
## 0.7272727 0.7712766

# create a dataframe for the Ulcer trial
Ulcer = data.frame(
# use ``factor" to create the treatment factor
trt = factor(rep(c("0 mg C","400 mg C","800 mg C","1600 mg C"),
each=2),levels=c("0 mg C","400 mg C","800 mg C","1600 mg C") ),
Heal = c("Yes","No","Yes","No","Yes","No","Yes","No"),
y = c(x4[1],n[1]-x4[1],x4[2],n[2]-x4[2],x4[3],
n[3]-x4[3],x4[4],n[4]-x4[4]))
Ulcer

##         trt Heal   y
## 1    0 mg C  Yes  69
## 2    0 mg C   No  99
## 3  400 mg C  Yes 113
## 4  400 mg C   No  69
## 5  800 mg C  Yes 120
## 6  800 mg C   No  45
## 7 1600 mg C  Yes 145
## 8 1600 mg C   No  43

tab.Ulcer = xtabs(y~trt+Heal,Ulcer)
tab.Ulcer

##            Heal
## trt          No Yes
##   0 mg C     99  69
##   400 mg C   69 113
##   800 mg C   45 120
##   1600 mg C  43 145

# layout for the plot
par(mfrow=c(1,2), mar=c(4,2,1,1))
# call ``dotchart"
dotchart(tab.Ulcer)
# call ``mosaicplot"
mosaicplot(tab.Ulcer,color=T,las=1, main=" ", xlab="Treatment",ylab="Heal Status" )

margin.table(tab.Ulcer,1)

## trt
##    0 mg C  400 mg C  800 mg C 1600 mg C 
##       168       182       165       188

summary(tab.Ulcer)

## Call: xtabs(formula = y ~ trt + Heal, data = Ulcer)
## Number of cases in table: 703 
## Number of factors: 2 
## Test for independence of all factors:
##  Chisq = 57.8, df = 3, p-value = 1.735e-12

library(MASS)
library(multcomp)

## Loading required package: mvtnorm

## Loading required package: survival

## Loading required package: TH.data

## 
## Attaching package: 'TH.data'

## The following object is masked from 'package:MASS':
## 
##     geyser

library(scatterplot3d)
library(mvtnorm)

boxplot(DBP1~TRT, dat, las=1, xlab="Treatment", ylab="DBP at Baseline")
t.test(DBP1~TRT, dat)

## 
##  Welch Two Sample t-test
## 
## data:  DBP1 by TRT
## t = 0.43266, df = 29.284, p-value = 0.6684
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -1.490059  2.290059
## sample estimates:
## mean in group A mean in group B 
##           116.9           116.5

# call function table to make the 2 by 2 table
SexbyTRT = table(dat$TRT,dat$Sex)
# print it
SexbyTRT

##    
##      F  M
##   A  9 11
##   B 11  9

# call prop.test to test the difference
prop.test(SexbyTRT)

## 
##  2-sample test for equality of proportions with continuity
##  correction
## 
## data:  SexbyTRT
## X-squared = 0.1, df = 1, p-value = 0.7518
## alternative hypothesis: two.sided
## 95 percent confidence interval:
##  -0.4583441  0.2583441
## sample estimates:
## prop 1 prop 2 
##   0.45   0.55

# Fit the main effect model on "Sex" and "Age"
bm1=lm(DBP1~Sex+Age, dat)
# Show the result
summary(bm1)

## 
## Call:
## lm(formula = DBP1 ~ Sex + Age, data = dat)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -6.1867 -1.7229 -0.4227  2.0864  6.2503 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 118.71611    3.96640  29.930   <2e-16 ***
## SexM         -0.47269    0.93502  -0.506    0.616    
## Age          -0.03641    0.08064  -0.452    0.654    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.951 on 37 degrees of freedom
## Multiple R-squared:  0.0131, Adjusted R-squared:  -0.04025 
## F-statistic: 0.2455 on 2 and 37 DF,  p-value: 0.7836

# plot the Age to DBP1
plot(DBP1~Age,las=1,pch=as.character(Sex), dat,
xlab="Age", ylab="Baseline DBP")
# add the regression lines using ``abline"
abline(bm1$coef[1], bm1$coef[3],lwd=2, lty=1)
abline(bm1$coef[1]+bm1$coef[2], bm1$coef[3],lwd=2, lty=4)

# start with full model
m0 = lm(diff~TRT*Age*Sex, dat)
# stepwise model selection
m1 = step(m0)

## Start:  AIC=225.31
## diff ~ TRT * Age * Sex
## 
##               Df Sum of Sq    RSS    AIC
## - TRT:Age:Sex  1    29.551 7521.8 223.47
## <none>                     7492.3 225.31
## 
## Step:  AIC=223.47
## diff ~ TRT + Age + Sex + TRT:Age + TRT:Sex + Age:Sex
## 
##           Df Sum of Sq    RSS    AIC
## - Age:Sex  1    25.301 7547.1 221.60
## - TRT:Age  1    80.806 7602.6 221.90
## - TRT:Sex  1   250.657 7772.5 222.78
## <none>                 7521.8 223.47
## 
## Step:  AIC=221.6
## diff ~ TRT + Age + Sex + TRT:Age + TRT:Sex
## 
##           Df Sum of Sq    RSS    AIC
## - TRT:Age  1    61.769 7608.9 219.93
## - TRT:Sex  1   234.630 7781.8 220.83
## <none>                 7547.1 221.60
## 
## Step:  AIC=219.93
## diff ~ TRT + Age + Sex + TRT:Sex
## 
##           Df Sum of Sq    RSS    AIC
## - Age      1    42.504 7651.4 218.15
## - TRT:Sex  1   215.892 7824.8 219.05
## <none>                 7608.9 219.93
## 
## Step:  AIC=218.15
## diff ~ TRT + Sex + TRT:Sex
## 
##           Df Sum of Sq    RSS    AIC
## - TRT:Sex  1    218.88 7870.3 217.28
## <none>                 7651.4 218.15
## 
## Step:  AIC=217.28
## diff ~ TRT + Sex
## 
##        Df Sum of Sq    RSS    AIC
## - TRT   1    138.66 8009.0 215.98
## - Sex   1    240.06 8110.3 216.48
## <none>              7870.3 217.28
## 
## Step:  AIC=215.98
## diff ~ Sex
## 
##        Df Sum of Sq  RSS    AIC
## - Sex   1    207.03 8216 215.00
## <none>              8009 215.98
## 
## Step:  AIC=215
## diff ~ 1

# fit the reduced model
m2 = lm(diff~TRT+Age, dat)
# output the anova
anova(m2)

## Analysis of Variance Table
## 
## Response: diff
##           Df Sum Sq Mean Sq F value Pr(>F)
## TRT        1  105.6 105.625  0.4837 0.4911
## Age        1   31.1  31.059  0.1422 0.7082
## Residuals 37 8079.3 218.359

# output the model fit
summary(m2)

## 
## Call:
## lm(formula = diff ~ TRT + Age, data = dat)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -83.789  -2.192   1.988   4.806  12.279 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)
## (Intercept) -23.2793    19.8434  -1.173    0.248
## TRTB         -3.4103     4.6922  -0.727    0.472
## Age           0.1526     0.4047   0.377    0.708
## 
## Residual standard error: 14.78 on 37 degrees of freedom
## Multiple R-squared:  0.01664,    Adjusted R-squared:  -0.03652 
## F-statistic: 0.313 on 2 and 37 DF,  p-value: 0.7332

plot(diff~Age,las=1,pch=as.character(TRT), dat,
xlab="Age", ylab="DBP Change")
abline(m2$coef[1], m2$coef[3],lwd=2, lty=1)
abline(m2$coef[1]+m2$coef[2], m2$coef[3],lwd=2, lty=4)

betablocker <- read.csv("betablocker.csv", header = TRUE)

# fit a logistic regression using glm
str(betablocker)

## 'data.frame':    44 obs. of  4 variables:
##  $ Deaths: int  3 14 11 127 27 6 152 48 37 188 ...
##  $ Total : int  39 116 93 1520 365 52 939 471 282 1921 ...
##  $ Center: int  1 2 3 4 5 6 7 8 9 10 ...
##  $ TRT   : Factor w/ 2 levels "control","treated": 1 1 1 1 1 1 1 1 1 1 ...

betablocker$Center <- factor(betablocker$Center)
beta.glm = glm(cbind(Deaths,Total-Deaths)~TRT+Center, family=binomial,data=betablocker)
# print the model fitting
summary(beta.glm)

## 
## Call:
## glm(formula = cbind(Deaths, Total - Deaths) ~ TRT + Center, family = binomial, 
##     data = betablocker)
## 
## Deviance Residuals: 
##      Min        1Q    Median        3Q       Max  
## -1.82826  -0.61775   0.00396   0.53502   1.92138  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept) -2.34930    0.42597  -5.515 3.48e-08 ***
## TRTtreated  -0.26097    0.04994  -5.225 1.74e-07 ***
## Center2      0.17389    0.48316   0.360   0.7189    
## Center3      0.24281    0.50043   0.485   0.6275    
## Center4     -0.03907    0.43093  -0.091   0.9278    
## Center5     -0.02168    0.44797  -0.048   0.9614    
## Center6      0.16851    0.53947   0.312   0.7548    
## Center7      0.59662    0.43080   1.385   0.1661    
## Center8      0.27148    0.43734   0.621   0.5348    
## Center9      0.38877    0.44625   0.871   0.3836    
## Center10     0.09580    0.42933   0.223   0.8234    
## Center11     0.05200    0.43632   0.119   0.9051    
## Center12     0.91535    0.44064   2.077   0.0378 *  
## Center13    -0.63570    0.47200  -1.347   0.1780    
## Center14    -0.30645    0.43749  -0.700   0.4836    
## Center15     1.00157    0.45052   2.223   0.0262 *  
## Center16     0.87990    0.44493   1.978   0.0480 *  
## Center17     0.39967    0.45728   0.874   0.3821    
## Center18    -0.56348    0.50586  -1.114   0.2653    
## Center19    -1.01443    0.54359  -1.866   0.0620 .  
## Center20     0.87595    0.44465   1.970   0.0488 *  
## Center21     0.19660    0.44355   0.443   0.6576    
## Center22    -0.58122    0.44514  -1.306   0.1917    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 332.993  on 43  degrees of freedom
## Residual deviance:  23.621  on 21  degrees of freedom
## AIC: 287.05
## 
## Number of Fisher Scoring iterations: 4

est.dp = sum(resid(beta.glm, type="pearson")^2)/beta.glm$df.res
est.dp

## [1] 1.121871

summary(beta.glm, dispersion=est.dp)

## 
## Call:
## glm(formula = cbind(Deaths, Total - Deaths) ~ TRT + Center, family = binomial, 
##     data = betablocker)
## 
## Deviance Residuals: 
##      Min        1Q    Median        3Q       Max  
## -1.82826  -0.61775   0.00396   0.53502   1.92138  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept) -2.34930    0.45118  -5.207 1.92e-07 ***
## TRTtreated  -0.26097    0.05290  -4.933 8.09e-07 ***
## Center2      0.17389    0.51176   0.340   0.7340    
## Center3      0.24281    0.53005   0.458   0.6469    
## Center4     -0.03907    0.45643  -0.086   0.9318    
## Center5     -0.02168    0.47448  -0.046   0.9636    
## Center6      0.16851    0.57140   0.295   0.7681    
## Center7      0.59662    0.45630   1.308   0.1910    
## Center8      0.27148    0.46322   0.586   0.5578    
## Center9      0.38877    0.47266   0.823   0.4108    
## Center10     0.09580    0.45474   0.211   0.8331    
## Center11     0.05200    0.46215   0.113   0.9104    
## Center12     0.91535    0.46672   1.961   0.0498 *  
## Center13    -0.63570    0.49994  -1.272   0.2035    
## Center14    -0.30645    0.46338  -0.661   0.5084    
## Center15     1.00157    0.47718   2.099   0.0358 *  
## Center16     0.87990    0.47126   1.867   0.0619 .  
## Center17     0.39967    0.48434   0.825   0.4093    
## Center18    -0.56348    0.53579  -1.052   0.2930    
## Center19    -1.01443    0.57576  -1.762   0.0781 .  
## Center20     0.87595    0.47097   1.860   0.0629 .  
## Center21     0.19660    0.46980   0.418   0.6756    
## Center22    -0.58122    0.47148  -1.233   0.2177    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1.121871)
## 
##     Null deviance: 332.993  on 43  degrees of freedom
## Residual deviance:  23.621  on 21  degrees of freedom
## AIC: 287.05
## 
## Number of Fisher Scoring iterations: 4

# fit quasi-likelihood for binomial data
beta.glm2 = glm(cbind(Deaths,Total- Deaths)~TRT+Center,
family=quasibinomial,data=betablocker)
# print the model fit
summary(beta.glm2)

## 
## Call:
## glm(formula = cbind(Deaths, Total - Deaths) ~ TRT + Center, family = quasibinomial, 
##     data = betablocker)
## 
## Deviance Residuals: 
##      Min        1Q    Median        3Q       Max  
## -1.82826  -0.61775   0.00396   0.53502   1.92138  
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -2.34930    0.45118  -5.207 3.68e-05 ***
## TRTtreated  -0.26097    0.05290  -4.933 7.02e-05 ***
## Center2      0.17389    0.51176   0.340   0.7374    
## Center3      0.24281    0.53005   0.458   0.6516    
## Center4     -0.03907    0.45643  -0.086   0.9326    
## Center5     -0.02168    0.47448  -0.046   0.9640    
## Center6      0.16851    0.57140   0.295   0.7710    
## Center7      0.59662    0.45630   1.308   0.2052    
## Center8      0.27148    0.46322   0.586   0.5641    
## Center9      0.38877    0.47266   0.823   0.4200    
## Center10     0.09580    0.45474   0.211   0.8352    
## Center11     0.05200    0.46215   0.113   0.9115    
## Center12     0.91535    0.46672   1.961   0.0632 .  
## Center13    -0.63570    0.49994  -1.272   0.2174    
## Center14    -0.30645    0.46338  -0.661   0.5156    
## Center15     1.00157    0.47718   2.099   0.0481 *  
## Center16     0.87990    0.47126   1.867   0.0759 .  
## Center17     0.39967    0.48434   0.825   0.4185    
## Center18    -0.56348    0.53579  -1.052   0.3049    
## Center19    -1.01443    0.57576  -1.762   0.0926 .  
## Center20     0.87595    0.47097   1.860   0.0770 .  
## Center21     0.19660    0.46980   0.418   0.6799    
## Center22    -0.58122    0.47148  -1.233   0.2313    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for quasibinomial family taken to be 1.121871)
## 
##     Null deviance: 332.993  on 43  degrees of freedom
## Residual deviance:  23.621  on 21  degrees of freedom
## AIC: NA
## 
## Number of Fisher Scoring iterations: 4

polyps <- read.csv("polyps.csv", header = TRUE)
# Poisson Regression
m0.polyps = glm(number~trt*age, polyps, family=poisson())
# print the model fit
summary(m0.polyps)

## 
## Call:
## glm(formula = number ~ trt * age, family = poisson(), data = polyps)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -4.2406  -3.0403  -0.0865   1.4392   5.8490  
## 
## Coefficients:
##                 Estimate Std. Error z value Pr(>|z|)    
## (Intercept)     3.261861   0.445995   7.314  2.6e-13 ***
## trtplacebo      1.257258   0.471626   2.666  0.00768 ** 
## age            -0.043033   0.019864  -2.166  0.03028 *  
## trtplacebo:age  0.004631   0.020823   0.222  0.82402    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for poisson family taken to be 1)
## 
##     Null deviance: 378.66  on 19  degrees of freedom
## Residual deviance: 179.49  on 16  degrees of freedom
## AIC: 275.83
## 
## Number of Fisher Scoring iterations: 5

est.dp = sum(resid(m0.polyps,
type="pearson")^2)/m0.polyps$df.res
est.dp

## [1] 11.37537

summary(m0.polyps, dispersion=est.dp)

## 
## Call:
## glm(formula = number ~ trt * age, family = poisson(), data = polyps)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -4.2406  -3.0403  -0.0865   1.4392   5.8490  
## 
## Coefficients:
##                 Estimate Std. Error z value Pr(>|z|)  
## (Intercept)     3.261861   1.504224   2.168   0.0301 *
## trtplacebo      1.257258   1.590673   0.790   0.4293  
## age            -0.043033   0.066997  -0.642   0.5207  
## trtplacebo:age  0.004631   0.070230   0.066   0.9474  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for poisson family taken to be 11.37537)
## 
##     Null deviance: 378.66  on 19  degrees of freedom
## Residual deviance: 179.49  on 16  degrees of freedom
## AIC: 275.83
## 
## Number of Fisher Scoring iterations: 5

# refit the model without interaction
m1.polyps = glm(number~trt+age, polyps, family=poisson())
# estimate the dispersion parameter
est.dp = sum(resid(m1.polyps,
type="pearson")^2)/m1.polyps$df.res
# print the estimated dispersion parameter
est.dp

## [1] 10.72783

# print the model fit adjusting the over-dispersion
summary(m1.polyps, dispersion=est.dp)

## 
## Call:
## glm(formula = number ~ trt + age, family = poisson(), data = polyps)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -4.2212  -3.0536  -0.1802   1.4459   5.8301  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)  3.16994    0.55094   5.754 8.73e-09 ***
## trtplacebo   1.35908    0.38532   3.527  0.00042 ***
## age         -0.03883    0.01951  -1.991  0.04651 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for poisson family taken to be 10.72783)
## 
##     Null deviance: 378.66  on 19  degrees of freedom
## Residual deviance: 179.54  on 17  degrees of freedom
## AIC: 273.88
## 
## Number of Fisher Scoring iterations: 5

# fit the quasi Poisson
m2.polyps = glm(number~trt+age, polyps, family=quasipoisson())
# print the model fit
summary(m2.polyps)

## 
## Call:
## glm(formula = number ~ trt + age, family = quasipoisson(), data = polyps)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -4.2212  -3.0536  -0.1802   1.4459   5.8301  
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  3.16994    0.55095   5.754 2.34e-05 ***
## trtplacebo   1.35908    0.38533   3.527  0.00259 ** 
## age         -0.03883    0.01951  -1.991  0.06284 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for quasipoisson family taken to be 10.72805)
## 
##     Null deviance: 378.66  on 19  degrees of freedom
## Residual deviance: 179.54  on 17  degrees of freedom
## AIC: NA
## 
## Number of Fisher Scoring iterations: 5

# load the MASS library
library(MASS)
# fit the negative-binomial model
m3.polyps = glm.nb(number~trt+age, polyps)
# print the model fit
summary(m3.polyps)

## 
## Call:
## glm.nb(formula = number ~ trt + age, data = polyps, init.theta = 1.719491, 
##     link = log)
## 
## Deviance Residuals: 
##      Min        1Q    Median        3Q       Max  
## -1.83270  -1.13898  -0.08851   0.33637   1.89785  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)  3.15791    0.55753   5.664 1.48e-08 ***
## trtplacebo   1.36812    0.36903   3.707 0.000209 ***
## age         -0.03856    0.02095  -1.840 0.065751 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for Negative Binomial(1.7195) family taken to be 1)
## 
##     Null deviance: 36.734  on 19  degrees of freedom
## Residual deviance: 22.002  on 17  degrees of freedom
## AIC: 164.88
## 
## Number of Fisher Scoring iterations: 1
## 
## 
##               Theta:  1.719 
##           Std. Err.:  0.607 
## 
##  2 x log-likelihood:  -156.880

Analysis of clinical trial data

Kushan De Silva

January 1, 2018