# simulated input values
n = 100
mu = 100
sd = 10
mu.d = 20
age.mu = 50
age.sd = 10
# fix the seed for random number generation
set.seed(123)
# use "rnorm" to generate random normal
age = rnorm(n, age.mu, age.sd)
bp.base = rnorm(n,mu,sd)
bp.end = rnorm(n,mu,sd)
# take the difference between endpoint and baseline
bp.diff = bp.end-bp.base
# put the data together using "cbind" to column-bind
dat4placebo = round(cbind(age,bp.base,bp.end,bp.diff))
head(dat4placebo)
## age bp.base bp.end bp.diff
## [1,] 44 93 122 29
## [2,] 48 103 113 11
## [3,] 66 98 97 0
## [4,] 51 97 105 9
## [5,] 51 90 96 5
## [6,] 67 100 95 -4
age = rnorm(n, age.mu, age.sd)
bp.base = rnorm(n,mu,sd)
bp.end = rnorm(n,mu-mu.d,sd)
bp.diff = bp.end-bp.base
dat4drug = round(cbind(age,bp.base,bp.end,bp.diff))
# make a dataframe to hold all data
dat = data.frame(rbind(dat4placebo,dat4drug))
# make "trt" as a factor for treatment.
dat$trt = as.factor(rep(c("Placebo", "Drug"), each=n))
# check the data dimension
dim(dat)
## [1] 200 5
# print the first 6 observations to see the variable names
head(dat)
## age bp.base bp.end bp.diff trt
## 1 44 93 122 29 Placebo
## 2 48 103 113 11 Placebo
## 3 66 98 97 0 Placebo
## 4 51 97 105 9 Placebo
## 5 51 90 96 5 Placebo
## 6 67 100 95 -4 Placebo
# call boxplot
boxplot(dat4placebo, las=1, main="Placebo")

boxplot(dat4drug, las=1, main="Drug")
#load the lattice library
library(lattice)

# call xyplot function and print it
print(xyplot(bp.diff~age|trt, data=dat,xlab="Age", strip=strip.custom(bg="white"),
ylab="Blood Pressure Difference",lwd=3,cex=1.3,pch=20,
type=c("p", "r")))

lm1 = lm(bp.diff~trt*age, data=dat)
summary(lm1)
##
## Call:
## lm(formula = bp.diff ~ trt * age, data = dat)
##
## Residuals:
## Min 1Q Median 3Q Max
## -37.975 -8.883 0.037 8.488 45.678
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -17.627094 6.242472 -2.824 0.00524 **
## trtPlacebo 24.153635 9.586926 2.519 0.01255 *
## age -0.076284 0.123209 -0.619 0.53654
## trtPlacebo:age -0.006311 0.186973 -0.034 0.97311
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 12.8 on 196 degrees of freedom
## Multiple R-squared: 0.4684, Adjusted R-squared: 0.4602
## F-statistic: 57.56 on 3 and 196 DF, p-value: < 2.2e-16
# load the xtable library and call xtable to make the table
library(xtable)
print(xtable(lm1, caption="ANOVA table for simulated clinical trial data", label = "tab4RI.coef"),
table.placement = "htbp",caption.placement = "top")
## % latex table generated in R 3.4.3 by xtable 1.8-2 package
## % Mon Jan 01 22:05:24 2018
## \begin{table}[htbp]
## \centering
## \caption{ANOVA table for simulated clinical trial data}
## \label{tab4RI.coef}
## \begin{tabular}{rrrrr}
## \hline
## & Estimate & Std. Error & t value & Pr($>$$|$t$|$) \\
## \hline
## (Intercept) & -17.6271 & 6.2425 & -2.82 & 0.0052 \\
## trtPlacebo & 24.1536 & 9.5869 & 2.52 & 0.0126 \\
## age & -0.0763 & 0.1232 & -0.62 & 0.5365 \\
## trtPlacebo:age & -0.0063 & 0.1870 & -0.03 & 0.9731 \\
## \hline
## \end{tabular}
## \end{table}
layout(matrix(1:4, nrow=2))
plot(lm1)

dat <- read.csv ("dat1.csv", header = TRUE)
# create the "diff"
dat$diff = dat$DBP5-dat$DBP1
# show first 6 observations using function "head"
head(dat)
## subject TRT DBP1 DBP2 DBP3 DBP4 DBP5 Age Sex diff
## 1 1 A 114 115 113 109 105 43 F -9
## 2 2 A 116 113 112 103 101 51 M -15
## 3 3 A 119 115 113 104 98 48 F -21
## 4 4 A 115 113 112 109 101 42 F -14
## 5 5 A 116 112 107 104 105 49 M -11
## 6 6 A 117 112 113 104 102 47 M -15
# call boxplot
boxplot(diff~TRT, dat, xlab="Treatment", ylab="DBP Changes", las=1)
# call t-test with equal variance
t.test(diff~TRT, dat, var.equal=T)
##
## Two Sample t-test
##
## data: diff by TRT
## t = 0.70349, df = 38, p-value = 0.486
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -6.1024 12.6024
## sample estimates:
## mean in group A mean in group B
## -15.90 -19.15
t.test(diff~TRT, dat, var.equal=F)
##
## Welch Two Sample t-test
##
## data: diff by TRT
## t = 0.70349, df = 20.216, p-value = 0.4898
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -6.380234 12.880234
## sample estimates:
## mean in group A mean in group B
## -15.90 -19.15
var.test(diff~TRT, dat)
##
## F test to compare two variances
##
## data: diff by TRT
## F = 0.032042, num df = 19, denom df = 19, p-value = 3.512e-10
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
## 0.01268243 0.08095136
## sample estimates:
## ratio of variances
## 0.03204153
wilcox.test(diff~TRT, dat)
## Warning in wilcox.test.default(x = c(-9L, -15L, -21L, -14L, -11L, -15L, :
## cannot compute exact p-value with ties
##
## Wilcoxon rank sum test with continuity correction
##
## data: diff by TRT
## W = 186, p-value = 0.7136
## alternative hypothesis: true location shift is not equal to 0
str(dat)
## 'data.frame': 40 obs. of 10 variables:
## $ subject: int 1 2 3 4 5 6 7 8 9 10 ...
## $ TRT : Factor w/ 2 levels "A","B": 1 1 1 1 1 1 1 1 1 1 ...
## $ DBP1 : int 114 116 119 115 116 117 118 120 114 115 ...
## $ DBP2 : int 115 113 115 113 112 112 111 115 112 113 ...
## $ DBP3 : int 113 112 113 112 107 113 100 113 113 108 ...
## $ DBP4 : int 109 103 104 109 104 104 109 102 109 106 ...
## $ DBP5 : int 105 101 98 101 105 102 99 102 103 97 ...
## $ Age : int 43 51 48 42 49 47 50 61 43 51 ...
## $ Sex : Factor w/ 2 levels "F","M": 1 2 1 1 2 2 1 2 2 2 ...
## $ diff : int -9 -15 -21 -14 -11 -15 -19 -18 -11 -18 ...
# data from treatment A
diff.A = dat[dat$TRT=="A",]$diff
# data from treatment B
diff.B = dat[dat$TRT=="B",]$diff
# call t.test for one-sided test
t.test(diff.A, diff.B,alternative="less")
##
## Welch Two Sample t-test
##
## data: diff.A and diff.B
## t = 0.70349, df = 20.216, p-value = 0.7551
## alternative hypothesis: true difference in means is less than 0
## 95 percent confidence interval:
## -Inf 11.21381
## sample estimates:
## mean of x mean of y
## -15.90 -19.15
library(bootstrap)
# define a function to calculate the mean difference
# between treatment groups A to B: i.e., A-B
mean.diff = function(bn,dat)
-diff(tapply(dat[bn,]$diff, dat[bn,]$TRT,mean))
# number of bootstrap
nboot = 1000
# call "bootstrap" function
boot.mean = bootstrap(1:dim(dat)[1], nboot, mean.diff,dat)
# extract the mean differences
x = boot.mean$thetastar
# calcualte the bootstrap quantiles
x.quantile = quantile(x, c(0.025,0.5, 0.975))
# show the quantiles
print(x.quantile)
## 2.5% 50% 97.5%
## -2.980815 2.781313 13.484295
# make a histogram
hist(boot.mean$thetastar, xlab="Mean Differences", main="")
# add the vertical lines for the quantiles
abline(v=x.quantile,lwd=2, lty=c(4,1,4))
aggregate(dat[,3:7], list(TRT=dat$TRT), mean)
## TRT DBP1 DBP2 DBP3 DBP4 DBP5
## 1 A 116.9 113.10 111.00 106.00 101.00
## 2 B 116.5 113.65 109.95 105.25 97.35
# call reshape
Dat = reshape(dat, direction="long",
varying=c("DBP1","DBP2","DBP3","DBP4","DBP5"),
idvar = c("subject","TRT","Age","Sex","diff"),sep="")
colnames(Dat) = c("Subject","TRT","Age","Sex","diff","Time","DBP")
Dat$Time = as.factor(Dat$Time)
# show the first 6 observations
head(Dat)
## Subject TRT Age Sex diff Time DBP
## 1.A.43.F.-9.1 1 A 43 F -9 1 114
## 2.A.51.M.-15.1 2 A 51 M -15 1 116
## 3.A.48.F.-21.1 3 A 48 F -21 1 119
## 4.A.42.F.-14.1 4 A 42 F -14 1 115
## 5.A.49.M.-11.1 5 A 49 M -11 1 116
## 6.A.47.M.-15.1 6 A 47 M -15 1 117
# test treatment "A"
datA = Dat[Dat$TRT=="A",]
test.A = aov(DBP~Time, datA)
summary(test.A)
## Df Sum Sq Mean Sq F value Pr(>F)
## Time 4 3088.4 772.1 129.7 <2e-16 ***
## Residuals 95 565.6 6.0
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# test treatment "B"
datB = Dat[Dat$TRT=="B",]
test.B = aov(DBP~Time, datB)
summary(test.B)
## Df Sum Sq Mean Sq F value Pr(>F)
## Time 4 4550 1137.5 12.8 2.25e-08 ***
## Residuals 95 8441 88.9
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
TukeyHSD(test.A)
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = DBP ~ Time, data = datA)
##
## $Time
## diff lwr upr p adj
## 2-1 -3.8 -5.945718 -1.65428154 0.0000346
## 3-1 -5.9 -8.045718 -3.75428154 0.0000000
## 4-1 -10.9 -13.045718 -8.75428154 0.0000000
## 5-1 -15.9 -18.045718 -13.75428154 0.0000000
## 3-2 -2.1 -4.245718 0.04571846 0.0581512
## 4-2 -7.1 -9.245718 -4.95428154 0.0000000
## 5-2 -12.1 -14.245718 -9.95428154 0.0000000
## 4-3 -5.0 -7.145718 -2.85428154 0.0000000
## 5-3 -10.0 -12.145718 -7.85428154 0.0000000
## 5-4 -5.0 -7.145718 -2.85428154 0.0000000
TukeyHSD(test.B)
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = DBP ~ Time, data = datB)
##
## $Time
## diff lwr upr p adj
## 2-1 -2.85 -11.13915 5.439147 0.8738641
## 3-1 -6.55 -14.83915 1.739147 0.1896406
## 4-1 -11.25 -19.53915 -2.960853 0.0025373
## 5-1 -19.15 -27.43915 -10.860853 0.0000001
## 3-2 -3.70 -11.98915 4.589147 0.7272848
## 4-2 -8.40 -16.68915 -0.110853 0.0454011
## 5-2 -16.30 -24.58915 -8.010853 0.0000036
## 4-3 -4.70 -12.98915 3.589147 0.5158689
## 5-3 -12.60 -20.88915 -4.310853 0.0005126
## 5-4 -7.90 -16.18915 0.389147 0.0694288
mod2 = aov(DBP~ TRT*Time, Dat)
summary(mod2)
## Df Sum Sq Mean Sq F value Pr(>F)
## TRT 1 56 56.2 1.185 0.278
## Time 4 7540 1885.0 39.767 <2e-16 ***
## TRT:Time 4 98 24.6 0.519 0.722
## Residuals 190 9006 47.4
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
par(mfrow=c(2,1),mar=c(5,3,1,1))

with(Dat,interaction.plot(Time,TRT,DBP,las=1,legend=T))
with(Dat,interaction.plot(TRT,Time,DBP,las=1,legend=T))

TukeyHSD(aov(DBP ~ TRT*Time,Dat))
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = DBP ~ TRT * Time, data = Dat)
##
## $TRT
## diff lwr upr p adj
## B-A -1.06 -2.9806 0.8606005 0.277683
##
## $Time
## diff lwr upr p adj
## 2-1 -3.325 -7.565057 0.9150569 0.1996809
## 3-1 -6.225 -10.465057 -1.9849431 0.0007230
## 4-1 -11.075 -15.315057 -6.8349431 0.0000000
## 5-1 -17.525 -21.765057 -13.2849431 0.0000000
## 3-2 -2.900 -7.140057 1.3400569 0.3296466
## 4-2 -7.750 -11.990057 -3.5099431 0.0000109
## 5-2 -14.200 -18.440057 -9.9599431 0.0000000
## 4-3 -4.850 -9.090057 -0.6099431 0.0160706
## 5-3 -11.300 -15.540057 -7.0599431 0.0000000
## 5-4 -6.450 -10.690057 -2.2099431 0.0004080
##
## $`TRT:Time`
## diff lwr upr p adj
## B:1-A:1 -0.40 -7.370786 6.57078605 1.0000000
## A:2-A:1 -3.80 -10.770786 3.17078605 0.7679652
## B:2-A:1 -3.25 -10.220786 3.72078605 0.8936255
## A:3-A:1 -5.90 -12.870786 1.07078605 0.1772683
## B:3-A:1 -6.95 -13.920786 0.02078605 0.0513925
## A:4-A:1 -10.90 -17.870786 -3.92921395 0.0000545
## B:4-A:1 -11.65 -18.620786 -4.67921395 0.0000110
## A:5-A:1 -15.90 -22.870786 -8.92921395 0.0000000
## B:5-A:1 -19.55 -26.520786 -12.57921395 0.0000000
## A:2-B:1 -3.40 -10.370786 3.57078605 0.8646594
## B:2-B:1 -2.85 -9.820786 4.12078605 0.9505513
## A:3-B:1 -5.50 -12.470786 1.47078605 0.2614706
## B:3-B:1 -6.55 -13.520786 0.42078605 0.0853270
## A:4-B:1 -10.50 -17.470786 -3.52921395 0.0001239
## B:4-B:1 -11.25 -18.220786 -4.27921395 0.0000261
## A:5-B:1 -15.50 -22.470786 -8.52921395 0.0000000
## B:5-B:1 -19.15 -26.120786 -12.17921395 0.0000000
## B:2-A:2 0.55 -6.420786 7.52078605 0.9999999
## A:3-A:2 -2.10 -9.070786 4.87078605 0.9938368
## B:3-A:2 -3.15 -10.120786 3.82078605 0.9105948
## A:4-A:2 -7.10 -14.070786 -0.12921395 0.0420490
## B:4-A:2 -7.85 -14.820786 -0.87921395 0.0142236
## A:5-A:2 -12.10 -19.070786 -5.12921395 0.0000040
## B:5-A:2 -15.75 -22.720786 -8.77921395 0.0000000
## A:3-B:2 -2.65 -9.620786 4.32078605 0.9688374
## B:3-B:2 -3.70 -10.670786 3.27078605 0.7945858
## A:4-B:2 -7.65 -14.620786 -0.67921395 0.0192331
## B:4-B:2 -8.40 -15.370786 -1.42921395 0.0059328
## A:5-B:2 -12.65 -19.620786 -5.67921395 0.0000012
## B:5-B:2 -16.30 -23.270786 -9.32921395 0.0000000
## B:3-A:3 -1.05 -8.020786 5.92078605 0.9999774
## A:4-A:3 -5.00 -11.970786 1.97078605 0.3961621
## B:4-A:3 -5.75 -12.720786 1.22078605 0.2062506
## A:5-A:3 -10.00 -16.970786 -3.02921395 0.0003336
## B:5-A:3 -13.65 -20.620786 -6.67921395 0.0000001
## A:4-B:3 -3.95 -10.920786 3.02078605 0.7254591
## B:4-B:3 -4.70 -11.670786 2.27078605 0.4888512
## A:5-B:3 -8.95 -15.920786 -1.97921395 0.0023269
## B:5-B:3 -12.60 -19.570786 -5.62921395 0.0000013
## B:4-A:4 -0.75 -7.720786 6.22078605 0.9999988
## A:5-A:4 -5.00 -11.970786 1.97078605 0.3961621
## B:5-A:4 -8.65 -15.620786 -1.67921395 0.0039056
## A:5-B:4 -4.25 -11.220786 2.72078605 0.6334851
## B:5-B:4 -7.90 -14.870786 -0.92921395 0.0131718
## B:5-A:5 -3.65 -10.620786 3.32078605 0.8073222
n = c(168, 182, 165,188)
p4 = c(.41, .62, .73, .77)
x4 = c(69, 113, 120, 145)
prop.test(x4, n)
##
## 4-sample test for equality of proportions without continuity
## correction
##
## data: x4 out of n
## X-squared = 57.799, df = 3, p-value = 1.735e-12
## alternative hypothesis: two.sided
## sample estimates:
## prop 1 prop 2 prop 3 prop 4
## 0.4107143 0.6208791 0.7272727 0.7712766
prop.test(x4[c(1,3)], n[c(1,3)])
##
## 2-sample test for equality of proportions with continuity
## correction
##
## data: x4[c(1, 3)] out of n[c(1, 3)]
## X-squared = 32.71, df = 1, p-value = 1.07e-08
## alternative hypothesis: two.sided
## 95 percent confidence interval:
## -0.4233221 -0.2097948
## sample estimates:
## prop 1 prop 2
## 0.4107143 0.7272727
prop.test(x4[c(2,3)], n[c(2,3)])
##
## 2-sample test for equality of proportions with continuity
## correction
##
## data: x4[c(2, 3)] out of n[c(2, 3)]
## X-squared = 3.9715, df = 1, p-value = 0.04628
## alternative hypothesis: two.sided
## 95 percent confidence interval:
## -0.210080057 -0.002707156
## sample estimates:
## prop 1 prop 2
## 0.6208791 0.7272727
prop.test(x4[c(3,4)], n[c(3,4)])
##
## 2-sample test for equality of proportions with continuity
## correction
##
## data: x4[c(3, 4)] out of n[c(3, 4)]
## X-squared = 0.68929, df = 1, p-value = 0.4064
## alternative hypothesis: two.sided
## 95 percent confidence interval:
## -0.14037146 0.05236373
## sample estimates:
## prop 1 prop 2
## 0.7272727 0.7712766
# create a dataframe for the Ulcer trial
Ulcer = data.frame(
# use ``factor" to create the treatment factor
trt = factor(rep(c("0 mg C","400 mg C","800 mg C","1600 mg C"),
each=2),levels=c("0 mg C","400 mg C","800 mg C","1600 mg C") ),
Heal = c("Yes","No","Yes","No","Yes","No","Yes","No"),
y = c(x4[1],n[1]-x4[1],x4[2],n[2]-x4[2],x4[3],
n[3]-x4[3],x4[4],n[4]-x4[4]))
Ulcer
## trt Heal y
## 1 0 mg C Yes 69
## 2 0 mg C No 99
## 3 400 mg C Yes 113
## 4 400 mg C No 69
## 5 800 mg C Yes 120
## 6 800 mg C No 45
## 7 1600 mg C Yes 145
## 8 1600 mg C No 43
tab.Ulcer = xtabs(y~trt+Heal,Ulcer)
tab.Ulcer
## Heal
## trt No Yes
## 0 mg C 99 69
## 400 mg C 69 113
## 800 mg C 45 120
## 1600 mg C 43 145
# layout for the plot
par(mfrow=c(1,2), mar=c(4,2,1,1))
# call ``dotchart"
dotchart(tab.Ulcer)
# call ``mosaicplot"
mosaicplot(tab.Ulcer,color=T,las=1, main=" ", xlab="Treatment",ylab="Heal Status" )

margin.table(tab.Ulcer,1)
## trt
## 0 mg C 400 mg C 800 mg C 1600 mg C
## 168 182 165 188
summary(tab.Ulcer)
## Call: xtabs(formula = y ~ trt + Heal, data = Ulcer)
## Number of cases in table: 703
## Number of factors: 2
## Test for independence of all factors:
## Chisq = 57.8, df = 3, p-value = 1.735e-12
library(MASS)
library(multcomp)
## Loading required package: mvtnorm
## Loading required package: survival
## Loading required package: TH.data
##
## Attaching package: 'TH.data'
## The following object is masked from 'package:MASS':
##
## geyser
library(scatterplot3d)
library(mvtnorm)
boxplot(DBP1~TRT, dat, las=1, xlab="Treatment", ylab="DBP at Baseline")
t.test(DBP1~TRT, dat)
##
## Welch Two Sample t-test
##
## data: DBP1 by TRT
## t = 0.43266, df = 29.284, p-value = 0.6684
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -1.490059 2.290059
## sample estimates:
## mean in group A mean in group B
## 116.9 116.5
# call function table to make the 2 by 2 table
SexbyTRT = table(dat$TRT,dat$Sex)
# print it
SexbyTRT
##
## F M
## A 9 11
## B 11 9
# call prop.test to test the difference
prop.test(SexbyTRT)
##
## 2-sample test for equality of proportions with continuity
## correction
##
## data: SexbyTRT
## X-squared = 0.1, df = 1, p-value = 0.7518
## alternative hypothesis: two.sided
## 95 percent confidence interval:
## -0.4583441 0.2583441
## sample estimates:
## prop 1 prop 2
## 0.45 0.55
# Fit the main effect model on "Sex" and "Age"
bm1=lm(DBP1~Sex+Age, dat)
# Show the result
summary(bm1)
##
## Call:
## lm(formula = DBP1 ~ Sex + Age, data = dat)
##
## Residuals:
## Min 1Q Median 3Q Max
## -6.1867 -1.7229 -0.4227 2.0864 6.2503
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 118.71611 3.96640 29.930 <2e-16 ***
## SexM -0.47269 0.93502 -0.506 0.616
## Age -0.03641 0.08064 -0.452 0.654
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.951 on 37 degrees of freedom
## Multiple R-squared: 0.0131, Adjusted R-squared: -0.04025
## F-statistic: 0.2455 on 2 and 37 DF, p-value: 0.7836
# plot the Age to DBP1
plot(DBP1~Age,las=1,pch=as.character(Sex), dat,
xlab="Age", ylab="Baseline DBP")
# add the regression lines using ``abline"
abline(bm1$coef[1], bm1$coef[3],lwd=2, lty=1)
abline(bm1$coef[1]+bm1$coef[2], bm1$coef[3],lwd=2, lty=4)

# start with full model
m0 = lm(diff~TRT*Age*Sex, dat)
# stepwise model selection
m1 = step(m0)
## Start: AIC=225.31
## diff ~ TRT * Age * Sex
##
## Df Sum of Sq RSS AIC
## - TRT:Age:Sex 1 29.551 7521.8 223.47
## <none> 7492.3 225.31
##
## Step: AIC=223.47
## diff ~ TRT + Age + Sex + TRT:Age + TRT:Sex + Age:Sex
##
## Df Sum of Sq RSS AIC
## - Age:Sex 1 25.301 7547.1 221.60
## - TRT:Age 1 80.806 7602.6 221.90
## - TRT:Sex 1 250.657 7772.5 222.78
## <none> 7521.8 223.47
##
## Step: AIC=221.6
## diff ~ TRT + Age + Sex + TRT:Age + TRT:Sex
##
## Df Sum of Sq RSS AIC
## - TRT:Age 1 61.769 7608.9 219.93
## - TRT:Sex 1 234.630 7781.8 220.83
## <none> 7547.1 221.60
##
## Step: AIC=219.93
## diff ~ TRT + Age + Sex + TRT:Sex
##
## Df Sum of Sq RSS AIC
## - Age 1 42.504 7651.4 218.15
## - TRT:Sex 1 215.892 7824.8 219.05
## <none> 7608.9 219.93
##
## Step: AIC=218.15
## diff ~ TRT + Sex + TRT:Sex
##
## Df Sum of Sq RSS AIC
## - TRT:Sex 1 218.88 7870.3 217.28
## <none> 7651.4 218.15
##
## Step: AIC=217.28
## diff ~ TRT + Sex
##
## Df Sum of Sq RSS AIC
## - TRT 1 138.66 8009.0 215.98
## - Sex 1 240.06 8110.3 216.48
## <none> 7870.3 217.28
##
## Step: AIC=215.98
## diff ~ Sex
##
## Df Sum of Sq RSS AIC
## - Sex 1 207.03 8216 215.00
## <none> 8009 215.98
##
## Step: AIC=215
## diff ~ 1
# fit the reduced model
m2 = lm(diff~TRT+Age, dat)
# output the anova
anova(m2)
## Analysis of Variance Table
##
## Response: diff
## Df Sum Sq Mean Sq F value Pr(>F)
## TRT 1 105.6 105.625 0.4837 0.4911
## Age 1 31.1 31.059 0.1422 0.7082
## Residuals 37 8079.3 218.359
# output the model fit
summary(m2)
##
## Call:
## lm(formula = diff ~ TRT + Age, data = dat)
##
## Residuals:
## Min 1Q Median 3Q Max
## -83.789 -2.192 1.988 4.806 12.279
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -23.2793 19.8434 -1.173 0.248
## TRTB -3.4103 4.6922 -0.727 0.472
## Age 0.1526 0.4047 0.377 0.708
##
## Residual standard error: 14.78 on 37 degrees of freedom
## Multiple R-squared: 0.01664, Adjusted R-squared: -0.03652
## F-statistic: 0.313 on 2 and 37 DF, p-value: 0.7332
plot(diff~Age,las=1,pch=as.character(TRT), dat,
xlab="Age", ylab="DBP Change")
abline(m2$coef[1], m2$coef[3],lwd=2, lty=1)
abline(m2$coef[1]+m2$coef[2], m2$coef[3],lwd=2, lty=4)
betablocker <- read.csv("betablocker.csv", header = TRUE)
# fit a logistic regression using glm
str(betablocker)
## 'data.frame': 44 obs. of 4 variables:
## $ Deaths: int 3 14 11 127 27 6 152 48 37 188 ...
## $ Total : int 39 116 93 1520 365 52 939 471 282 1921 ...
## $ Center: int 1 2 3 4 5 6 7 8 9 10 ...
## $ TRT : Factor w/ 2 levels "control","treated": 1 1 1 1 1 1 1 1 1 1 ...
betablocker$Center <- factor(betablocker$Center)
beta.glm = glm(cbind(Deaths,Total-Deaths)~TRT+Center, family=binomial,data=betablocker)
# print the model fitting
summary(beta.glm)
##
## Call:
## glm(formula = cbind(Deaths, Total - Deaths) ~ TRT + Center, family = binomial,
## data = betablocker)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.82826 -0.61775 0.00396 0.53502 1.92138
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -2.34930 0.42597 -5.515 3.48e-08 ***
## TRTtreated -0.26097 0.04994 -5.225 1.74e-07 ***
## Center2 0.17389 0.48316 0.360 0.7189
## Center3 0.24281 0.50043 0.485 0.6275
## Center4 -0.03907 0.43093 -0.091 0.9278
## Center5 -0.02168 0.44797 -0.048 0.9614
## Center6 0.16851 0.53947 0.312 0.7548
## Center7 0.59662 0.43080 1.385 0.1661
## Center8 0.27148 0.43734 0.621 0.5348
## Center9 0.38877 0.44625 0.871 0.3836
## Center10 0.09580 0.42933 0.223 0.8234
## Center11 0.05200 0.43632 0.119 0.9051
## Center12 0.91535 0.44064 2.077 0.0378 *
## Center13 -0.63570 0.47200 -1.347 0.1780
## Center14 -0.30645 0.43749 -0.700 0.4836
## Center15 1.00157 0.45052 2.223 0.0262 *
## Center16 0.87990 0.44493 1.978 0.0480 *
## Center17 0.39967 0.45728 0.874 0.3821
## Center18 -0.56348 0.50586 -1.114 0.2653
## Center19 -1.01443 0.54359 -1.866 0.0620 .
## Center20 0.87595 0.44465 1.970 0.0488 *
## Center21 0.19660 0.44355 0.443 0.6576
## Center22 -0.58122 0.44514 -1.306 0.1917
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 332.993 on 43 degrees of freedom
## Residual deviance: 23.621 on 21 degrees of freedom
## AIC: 287.05
##
## Number of Fisher Scoring iterations: 4
est.dp = sum(resid(beta.glm, type="pearson")^2)/beta.glm$df.res
est.dp
## [1] 1.121871
summary(beta.glm, dispersion=est.dp)
##
## Call:
## glm(formula = cbind(Deaths, Total - Deaths) ~ TRT + Center, family = binomial,
## data = betablocker)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.82826 -0.61775 0.00396 0.53502 1.92138
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -2.34930 0.45118 -5.207 1.92e-07 ***
## TRTtreated -0.26097 0.05290 -4.933 8.09e-07 ***
## Center2 0.17389 0.51176 0.340 0.7340
## Center3 0.24281 0.53005 0.458 0.6469
## Center4 -0.03907 0.45643 -0.086 0.9318
## Center5 -0.02168 0.47448 -0.046 0.9636
## Center6 0.16851 0.57140 0.295 0.7681
## Center7 0.59662 0.45630 1.308 0.1910
## Center8 0.27148 0.46322 0.586 0.5578
## Center9 0.38877 0.47266 0.823 0.4108
## Center10 0.09580 0.45474 0.211 0.8331
## Center11 0.05200 0.46215 0.113 0.9104
## Center12 0.91535 0.46672 1.961 0.0498 *
## Center13 -0.63570 0.49994 -1.272 0.2035
## Center14 -0.30645 0.46338 -0.661 0.5084
## Center15 1.00157 0.47718 2.099 0.0358 *
## Center16 0.87990 0.47126 1.867 0.0619 .
## Center17 0.39967 0.48434 0.825 0.4093
## Center18 -0.56348 0.53579 -1.052 0.2930
## Center19 -1.01443 0.57576 -1.762 0.0781 .
## Center20 0.87595 0.47097 1.860 0.0629 .
## Center21 0.19660 0.46980 0.418 0.6756
## Center22 -0.58122 0.47148 -1.233 0.2177
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1.121871)
##
## Null deviance: 332.993 on 43 degrees of freedom
## Residual deviance: 23.621 on 21 degrees of freedom
## AIC: 287.05
##
## Number of Fisher Scoring iterations: 4
# fit quasi-likelihood for binomial data
beta.glm2 = glm(cbind(Deaths,Total- Deaths)~TRT+Center,
family=quasibinomial,data=betablocker)
# print the model fit
summary(beta.glm2)
##
## Call:
## glm(formula = cbind(Deaths, Total - Deaths) ~ TRT + Center, family = quasibinomial,
## data = betablocker)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.82826 -0.61775 0.00396 0.53502 1.92138
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -2.34930 0.45118 -5.207 3.68e-05 ***
## TRTtreated -0.26097 0.05290 -4.933 7.02e-05 ***
## Center2 0.17389 0.51176 0.340 0.7374
## Center3 0.24281 0.53005 0.458 0.6516
## Center4 -0.03907 0.45643 -0.086 0.9326
## Center5 -0.02168 0.47448 -0.046 0.9640
## Center6 0.16851 0.57140 0.295 0.7710
## Center7 0.59662 0.45630 1.308 0.2052
## Center8 0.27148 0.46322 0.586 0.5641
## Center9 0.38877 0.47266 0.823 0.4200
## Center10 0.09580 0.45474 0.211 0.8352
## Center11 0.05200 0.46215 0.113 0.9115
## Center12 0.91535 0.46672 1.961 0.0632 .
## Center13 -0.63570 0.49994 -1.272 0.2174
## Center14 -0.30645 0.46338 -0.661 0.5156
## Center15 1.00157 0.47718 2.099 0.0481 *
## Center16 0.87990 0.47126 1.867 0.0759 .
## Center17 0.39967 0.48434 0.825 0.4185
## Center18 -0.56348 0.53579 -1.052 0.3049
## Center19 -1.01443 0.57576 -1.762 0.0926 .
## Center20 0.87595 0.47097 1.860 0.0770 .
## Center21 0.19660 0.46980 0.418 0.6799
## Center22 -0.58122 0.47148 -1.233 0.2313
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for quasibinomial family taken to be 1.121871)
##
## Null deviance: 332.993 on 43 degrees of freedom
## Residual deviance: 23.621 on 21 degrees of freedom
## AIC: NA
##
## Number of Fisher Scoring iterations: 4
polyps <- read.csv("polyps.csv", header = TRUE)
# Poisson Regression
m0.polyps = glm(number~trt*age, polyps, family=poisson())
# print the model fit
summary(m0.polyps)
##
## Call:
## glm(formula = number ~ trt * age, family = poisson(), data = polyps)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -4.2406 -3.0403 -0.0865 1.4392 5.8490
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 3.261861 0.445995 7.314 2.6e-13 ***
## trtplacebo 1.257258 0.471626 2.666 0.00768 **
## age -0.043033 0.019864 -2.166 0.03028 *
## trtplacebo:age 0.004631 0.020823 0.222 0.82402
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for poisson family taken to be 1)
##
## Null deviance: 378.66 on 19 degrees of freedom
## Residual deviance: 179.49 on 16 degrees of freedom
## AIC: 275.83
##
## Number of Fisher Scoring iterations: 5
est.dp = sum(resid(m0.polyps,
type="pearson")^2)/m0.polyps$df.res
est.dp
## [1] 11.37537
summary(m0.polyps, dispersion=est.dp)
##
## Call:
## glm(formula = number ~ trt * age, family = poisson(), data = polyps)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -4.2406 -3.0403 -0.0865 1.4392 5.8490
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 3.261861 1.504224 2.168 0.0301 *
## trtplacebo 1.257258 1.590673 0.790 0.4293
## age -0.043033 0.066997 -0.642 0.5207
## trtplacebo:age 0.004631 0.070230 0.066 0.9474
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for poisson family taken to be 11.37537)
##
## Null deviance: 378.66 on 19 degrees of freedom
## Residual deviance: 179.49 on 16 degrees of freedom
## AIC: 275.83
##
## Number of Fisher Scoring iterations: 5
# refit the model without interaction
m1.polyps = glm(number~trt+age, polyps, family=poisson())
# estimate the dispersion parameter
est.dp = sum(resid(m1.polyps,
type="pearson")^2)/m1.polyps$df.res
# print the estimated dispersion parameter
est.dp
## [1] 10.72783
# print the model fit adjusting the over-dispersion
summary(m1.polyps, dispersion=est.dp)
##
## Call:
## glm(formula = number ~ trt + age, family = poisson(), data = polyps)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -4.2212 -3.0536 -0.1802 1.4459 5.8301
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 3.16994 0.55094 5.754 8.73e-09 ***
## trtplacebo 1.35908 0.38532 3.527 0.00042 ***
## age -0.03883 0.01951 -1.991 0.04651 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for poisson family taken to be 10.72783)
##
## Null deviance: 378.66 on 19 degrees of freedom
## Residual deviance: 179.54 on 17 degrees of freedom
## AIC: 273.88
##
## Number of Fisher Scoring iterations: 5
# fit the quasi Poisson
m2.polyps = glm(number~trt+age, polyps, family=quasipoisson())
# print the model fit
summary(m2.polyps)
##
## Call:
## glm(formula = number ~ trt + age, family = quasipoisson(), data = polyps)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -4.2212 -3.0536 -0.1802 1.4459 5.8301
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.16994 0.55095 5.754 2.34e-05 ***
## trtplacebo 1.35908 0.38533 3.527 0.00259 **
## age -0.03883 0.01951 -1.991 0.06284 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for quasipoisson family taken to be 10.72805)
##
## Null deviance: 378.66 on 19 degrees of freedom
## Residual deviance: 179.54 on 17 degrees of freedom
## AIC: NA
##
## Number of Fisher Scoring iterations: 5
# load the MASS library
library(MASS)
# fit the negative-binomial model
m3.polyps = glm.nb(number~trt+age, polyps)
# print the model fit
summary(m3.polyps)
##
## Call:
## glm.nb(formula = number ~ trt + age, data = polyps, init.theta = 1.719491,
## link = log)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.83270 -1.13898 -0.08851 0.33637 1.89785
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 3.15791 0.55753 5.664 1.48e-08 ***
## trtplacebo 1.36812 0.36903 3.707 0.000209 ***
## age -0.03856 0.02095 -1.840 0.065751 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for Negative Binomial(1.7195) family taken to be 1)
##
## Null deviance: 36.734 on 19 degrees of freedom
## Residual deviance: 22.002 on 17 degrees of freedom
## AIC: 164.88
##
## Number of Fisher Scoring iterations: 1
##
##
## Theta: 1.719
## Std. Err.: 0.607
##
## 2 x log-likelihood: -156.880
