Format RMarkdown, read data from Excel file.
## Settings for RMarkdown http://yihui.name/knitr/options#chunk_options
opts_chunk$set(comment = "", warning = FALSE, message = FALSE, echo = TRUE,
tidy = FALSE, fig.width = 8, fig.height = 7)
options(width = 116, scipen = 10)
setwd("~/statistics/bio208/")
## Read xls file
library(XLConnect)
wb <- loadWorkbook("~/statistics/bio208/survses.xls")
survses <- readWorksheet(wb, sheet = 1)
names(survses) <- tolower(names(survses))
RxC Contingency Tables
## xtabs() function creates a simple cross table
tab.income.educat <- xtabs(data = survses, ~ income + educat)
tab.income.educat
educat
income 1 2 3 4
1 128 104 12 4
2 129 216 34 5
3 24 112 48 14
4 4 27 39 17
## Page 190: Example: CrossTable() function turns a simple table to SAS-like one
## chisq = TRUE option adds chi-squared test
library(gmodels)
CrossTable(tab.income.educat, chisq = TRUE)
Cell Contents
|-------------------------|
| N |
| Chi-square contribution |
| N / Row Total |
| N / Col Total |
| N / Table Total |
|-------------------------|
Total Observations in Table: 917
| educat
income | 1 | 2 | 3 | 4 | Row Total |
-------------|-----------|-----------|-----------|-----------|-----------|
1 | 128 | 104 | 12 | 4 | 248 |
| 33.643 | 3.266 | 15.973 | 4.297 | |
| 0.516 | 0.419 | 0.048 | 0.016 | 0.270 |
| 0.449 | 0.227 | 0.090 | 0.100 | |
| 0.140 | 0.113 | 0.013 | 0.004 | |
-------------|-----------|-----------|-----------|-----------|-----------|
2 | 129 | 216 | 34 | 5 | 384 |
| 0.781 | 2.945 | 8.451 | 8.243 | |
| 0.336 | 0.562 | 0.089 | 0.013 | 0.419 |
| 0.453 | 0.471 | 0.256 | 0.125 | |
| 0.141 | 0.236 | 0.037 | 0.005 | |
-------------|-----------|-----------|-----------|-----------|-----------|
3 | 24 | 112 | 48 | 14 | 198 |
| 22.898 | 1.677 | 12.947 | 3.330 | |
| 0.121 | 0.566 | 0.242 | 0.071 | 0.216 |
| 0.084 | 0.244 | 0.361 | 0.350 | |
| 0.026 | 0.122 | 0.052 | 0.015 | |
-------------|-----------|-----------|-----------|-----------|-----------|
4 | 4 | 27 | 39 | 17 | 87 |
| 19.631 | 6.288 | 55.157 | 45.948 | |
| 0.046 | 0.310 | 0.448 | 0.195 | 0.095 |
| 0.014 | 0.059 | 0.293 | 0.425 | |
| 0.004 | 0.029 | 0.043 | 0.019 | |
-------------|-----------|-----------|-----------|-----------|-----------|
Column Total | 285 | 459 | 133 | 40 | 917 |
| 0.311 | 0.501 | 0.145 | 0.044 | |
-------------|-----------|-----------|-----------|-----------|-----------|
Statistics for All Table Factors
Pearson's Chi-squared test
------------------------------------------------------------
Chi^2 = 245.5 d.f. = 9 p = 8.998e-48
Trend Tests
## Page 195: Example: Create mother's smoking and malformation data (fake data?)
malf <-
matrix(c(889,1988,
182, 426,
203, 420,
55, 86,
40, 48),
ncol = 2, byrow = TRUE,
dimname = list(cig.daily = c("0","1-10","11-20","21-30","above31"),
outcome = c("Malf","No Malf.")))
## Show as table
malf
outcome
cig.daily Malf No Malf.
0 889 1988
1-10 182 426
11-20 203 420
21-30 55 86
above31 40 48
## addmargins() functions does what it sounds like
addmargins(malf)
outcome
cig.daily Malf No Malf. Sum
0 889 1988 2877
1-10 182 426 608
11-20 203 420 623
21-30 55 86 141
above31 40 48 88
Sum 1369 2968 4337
## Usual chi-squared test without continuity correction
result.chisq <- chisq.test(malf, correct = FALSE)
result.chisq
Pearson's Chi-squared test
data: malf
X-squared = 13.11, df = 4, p-value = 0.01075
## Expected values can be obtaine by $expected operator
result.chisq$expected
outcome
cig.daily Malf No Malf.
0 908.14 1968.86
1-10 191.92 416.08
11-20 196.65 426.35
21-30 44.51 96.49
above31 27.78 60.22
## Trend test
num.total <- rowSums(malf) # Total number in each bin
num.malf <- malf[,1] # Malformation number in each bin
## Page 196: weights: 0 1 2 3 4
prop.trend.test(x = num.malf, n = num.total, score = c(0:4))
Chi-squared Test for Trend in Proportions
data: num.malf out of num.total ,
using scores: 0 1 2 3 4
X-squared = 7.649, df = 1, p-value = 0.00568
## Page 196: weights: 0 5 15 25 35
prop.trend.test(x = num.malf, n = num.total, score = c(0,5,15,25,35))
Chi-squared Test for Trend in Proportions
data: num.malf out of num.total ,
using scores: 0 5 15 25 35
X-squared = 9.344, df = 1, p-value = 0.002237
## Illness (IPS 2,3,4) x income example
survses.dead <- survses[survses$dead != 0,] # Delete rows where dead is 0.
## Recoding by library(car)'s recode() function
## Create new variable sick 1 (ips is 2, 3, or 4) sick 0 (ips is 0 or 1)
## Create new variable newedu by collapsing 3, 4 categories to 3
survses.dead <- within(survses.dead, {
sick <- car::recode(ips, "c(0,1) = 0; c(2,3,4) = 1") # 0 and 1 changed to 0
newedu <- car::recode(educat, "4 = 3; 3 = 4")
})
## Create a table and show with marging
tab.sick.income <- xtabs(data = survses.dead, ~ sick + income)
addmargins(tab.sick.income)
income
sick 1 2 3 4 Sum
0 131 204 123 62 520
1 90 106 45 13 254
Sum 221 310 168 75 774
## Extract 2nd row and column totals
num.sick <- tab.sick.income[2,]
num.total <- colSums(tab.sick.income)
## Page 199: Example 1: sick vs income linear trend
prop.trend.test(x = num.sick, n = num.total, score = 1:4)
Chi-squared Test for Trend in Proportions
data: num.sick out of num.total ,
using scores: 1 2 3 4
X-squared = 17.32, df = 1, p-value = 0.00003166
## Page 200: Example 2: Linear trend
tab.sick.educat <- xtabs(data = survses.dead, ~ sick + educat)
prop.trend.test(x = tab.sick.educat[2,], n = colSums(tab.sick.educat), score = 1:4)
Chi-squared Test for Trend in Proportions
data: tab.sick.educat[2, ] out of colSums(tab.sick.educat) ,
using scores: 1 2 3 4
X-squared = 7.678, df = 1, p-value = 0.005591
## Page 201: Example 3: Non-linear trend
tab.sick.newedu <- xtabs(data = survses.dead, ~ sick + newedu)
prop.trend.test(x = tab.sick.newedu[2,], n = colSums(tab.sick.newedu), score = 1:4)
Chi-squared Test for Trend in Proportions
data: tab.sick.newedu[2, ] out of colSums(tab.sick.newedu) ,
using scores: 1 2 3 4
X-squared = 12.15, df = 1, p-value = 0.0004913
Stratified Tests
smoking.stratified <-
array(c( 8, 22, 16, 44,
63, 7, 36, 4),
dim = c(2,2,2),
dimnames = list(
alcohol = c("Yes","No"),
MI = c("Yes","No"),
Smoking = c("Non-Smokers", "Smokers")
))
smoking.crude <- addmargins(smoking.stratified, margin = 3)[,,3]
## Page 202: Crude table: OR 2.26
library(epiR)
epi.2by2(smoking.crude, units = 1)
Disease + Disease - Total Inc risk * Odds
Exposed + 71 52 123 0.577 1.365
Exposed - 29 48 77 0.377 0.604
Total 100 100 200 0.500 1.000
Point estimates and 95 % CIs:
---------------------------------------------------------
Inc risk ratio 1.53 (1.11, 2.12)
Odds ratio 2.26 (1.26, 4.05)
Attrib risk * 0.2 (0.06, 0.34)
Attrib risk in population * 0.12 (-0.01, 0.25)
Attrib fraction in exposed (%) 34.75 (9.72, 52.85)
Attrib fraction in population (%) 24.68 (4.99, 40.28)
---------------------------------------------------------
* Cases per population unit
## Chi-squared test is significant
chisq.test(smoking.crude, correct = F)
Pearson's Chi-squared test
data: smoking.crude
X-squared = 7.623, df = 1, p-value = 0.005762
## Page 203: Stratified table: OR 1 in each. Array to List apPLY (alply) method
library(plyr)
res.stratified <-
alply(.data = smoking.stratified,
.margins = 3,
epi.2by2,
units = 1)
Disease + Disease - Total Inc risk * Odds
Exposed + 8 16 24 0.333 0.5
Exposed - 22 44 66 0.333 0.5
Total 30 60 90 0.333 0.5
Point estimates and 95 % CIs:
---------------------------------------------------------
Inc risk ratio 1 (0.52, 1.94)
Odds ratio 1 (0.37, 2.69)
Attrib risk * 0 (-0.22, 0.22)
Attrib risk in population * 0 (-0.15, 0.15)
Attrib fraction in exposed (%) 0 (-93.62, 48.35)
Attrib fraction in population (%) 0 (-19.27, 16.15)
---------------------------------------------------------
* Cases per population unit
Disease + Disease - Total Inc risk * Odds
Exposed + 63 36 99 0.636 1.75
Exposed - 7 4 11 0.636 1.75
Total 70 40 110 0.636 1.75
Point estimates and 95 % CIs:
---------------------------------------------------------
Inc risk ratio 1 (0.62, 1.6)
Odds ratio 1 (0.27, 3.65)
Attrib risk * 0 (-0.3, 0.3)
Attrib risk in population * 0 (-0.3, 0.3)
Attrib fraction in exposed (%) 0 (-60.14, 37.55)
Attrib fraction in population (%) 0 (-52.77, 34.54)
---------------------------------------------------------
* Cases per population unit
## Chi-squared test is not significant in each strata
res.chisq <-
alply(.data = smoking.stratified,
.margins = 3,
chisq.test,
correct = F)
lapply(res.chisq, invisible)
$`1`
Pearson's Chi-squared test
data: piece
X-squared = 0, df = 1, p-value = 1
$`2`
Pearson's Chi-squared test
data: piece
X-squared = 0, df = 1, p-value = 1
## Page 209: Example 1: Dichotomized IPS by dichotomized education stratified by sex
## lowedu created. 1 if educat < 3, 0 if not
survses.dead$lowedu <- as.numeric(survses.dead$educat < 3)
## Create stratified data
tab.lowedu.sick.sex <- xtabs(data = survses.dead, ~ lowedu +sick +sex)
tab.lowedu.sick.sex
, , sex = 1
sick
lowedu 0 1
0 84 19
1 292 156
, , sex = 2
sick
lowedu 0 1
0 39 15
1 161 97
## Page 212: Mantel-Haenszel test
mantelhaen.test(tab.lowedu.sick.sex, correct = FALSE)
Mantel-Haenszel chi-squared test without continuity correction
data: tab.lowedu.sick.sex
Mantel-Haenszel X-squared = 11.5, df = 1, p-value = 0.0006976
alternative hypothesis: true common odds ratio is not equal to 1
95 percent confidence interval:
1.338 3.039
sample estimates:
common odds ratio
2.016
## Crude and adjusted at a glance
epi.2by2(tab.lowedu.sick.sex, units = 1)
Disease + Disease - Total Inc risk * Odds
Exposed + 123 34 157 0.783 3.62
Exposed - 453 253 706 0.642 1.79
Total 576 287 863 0.667 2.01
Point estimates and 95 % CIs:
---------------------------------------------------------
Inc risk ratio (crude) 1.22 (1.11, 1.35)
Inc risk ratio (M-H) 1.22 (1.1, 1.35)
Inc risk ratio (crude:M-H) 1
Odds ratio (crude) 2.02 (1.34, 3.04)
Odds ratio (M-H) 2.02 (1.23, 3.31)
Odds ratio (crude:M-H) 1
Attrib risk (crude) * 0.14 (0.07, 0.22)
Attrib risk (M-H) * 0.14 (-0.04, 0.32)
Attrib risk (crude:M-H) 1.01
---------------------------------------------------------
* Cases per population unit
## Page 213: Example 2: IPS by education, stratified by sex
tab.educat.ips.sex <- xtabs(data = survses.dead, ~ educat +ips +sex)
tab.educat.ips.sex
, , sex = 1
ips
educat 0 1 2 3 4
1 50 75 37 22 8
2 61 106 57 30 2
3 23 43 7 3 2
4 8 10 4 1 2
, , sex = 2
ips
educat 0 1 2 3 4
1 18 38 30 8 2
2 36 69 32 22 3
3 7 25 9 1 1
4 4 3 3 0 1
## More decorative table
junk <-
alply(.data = tab.educat.ips.sex,
.margin = 3,
.fun = CrossTable,
prop.chisq = FALSE,
prop.t = FALSE)
Cell Contents
|-------------------------|
| N |
| N / Row Total |
| N / Col Total |
|-------------------------|
Total Observations in Table: 551
| ips
educat | 0 | 1 | 2 | 3 | 4 | Row Total |
-------------|-----------|-----------|-----------|-----------|-----------|-----------|
1 | 50 | 75 | 37 | 22 | 8 | 192 |
| 0.260 | 0.391 | 0.193 | 0.115 | 0.042 | 0.348 |
| 0.352 | 0.321 | 0.352 | 0.393 | 0.571 | |
-------------|-----------|-----------|-----------|-----------|-----------|-----------|
2 | 61 | 106 | 57 | 30 | 2 | 256 |
| 0.238 | 0.414 | 0.223 | 0.117 | 0.008 | 0.465 |
| 0.430 | 0.453 | 0.543 | 0.536 | 0.143 | |
-------------|-----------|-----------|-----------|-----------|-----------|-----------|
3 | 23 | 43 | 7 | 3 | 2 | 78 |
| 0.295 | 0.551 | 0.090 | 0.038 | 0.026 | 0.142 |
| 0.162 | 0.184 | 0.067 | 0.054 | 0.143 | |
-------------|-----------|-----------|-----------|-----------|-----------|-----------|
4 | 8 | 10 | 4 | 1 | 2 | 25 |
| 0.320 | 0.400 | 0.160 | 0.040 | 0.080 | 0.045 |
| 0.056 | 0.043 | 0.038 | 0.018 | 0.143 | |
-------------|-----------|-----------|-----------|-----------|-----------|-----------|
Column Total | 142 | 234 | 105 | 56 | 14 | 551 |
| 0.258 | 0.425 | 0.191 | 0.102 | 0.025 | |
-------------|-----------|-----------|-----------|-----------|-----------|-----------|
Cell Contents
|-------------------------|
| N |
| N / Row Total |
| N / Col Total |
|-------------------------|
Total Observations in Table: 312
| ips
educat | 0 | 1 | 2 | 3 | 4 | Row Total |
-------------|-----------|-----------|-----------|-----------|-----------|-----------|
1 | 18 | 38 | 30 | 8 | 2 | 96 |
| 0.188 | 0.396 | 0.312 | 0.083 | 0.021 | 0.308 |
| 0.277 | 0.281 | 0.405 | 0.258 | 0.286 | |
-------------|-----------|-----------|-----------|-----------|-----------|-----------|
2 | 36 | 69 | 32 | 22 | 3 | 162 |
| 0.222 | 0.426 | 0.198 | 0.136 | 0.019 | 0.519 |
| 0.554 | 0.511 | 0.432 | 0.710 | 0.429 | |
-------------|-----------|-----------|-----------|-----------|-----------|-----------|
3 | 7 | 25 | 9 | 1 | 1 | 43 |
| 0.163 | 0.581 | 0.209 | 0.023 | 0.023 | 0.138 |
| 0.108 | 0.185 | 0.122 | 0.032 | 0.143 | |
-------------|-----------|-----------|-----------|-----------|-----------|-----------|
4 | 4 | 3 | 3 | 0 | 1 | 11 |
| 0.364 | 0.273 | 0.273 | 0.000 | 0.091 | 0.035 |
| 0.062 | 0.022 | 0.041 | 0.000 | 0.143 | |
-------------|-----------|-----------|-----------|-----------|-----------|-----------|
Column Total | 65 | 135 | 74 | 31 | 7 | 312 |
| 0.208 | 0.433 | 0.237 | 0.099 | 0.022 | |
-------------|-----------|-----------|-----------|-----------|-----------|-----------|
## General association
mantelhaen.test(tab.educat.ips.sex)
Cochran-Mantel-Haenszel test
data: tab.educat.ips.sex
Cochran-Mantel-Haenszel M^2 = 30.76, df = 12, p-value = 0.002145
Log-Rank Tests
## IPS = 0 only
survses.ips0 <- subset(survses, ips == 0 & !is.na(ips))
library(survival)
survses.fit <- survfit(Surv(time, dead) ~ sex, data = survses.ips0)
## use summary(survses.fit) for more detailed output
survses.fit
Call: survfit(formula = Surv(time, dead) ~ sex, data = survses.ips0)
records n.max n.start events median 0.95LCL 0.95UCL
sex=1 355 355 355 261 451 392 507
sex=2 192 192 192 127 590 468 695
## Log-rank test
survdiff(Surv(time, dead) ~ sex, data = survses.ips0)
Call:
survdiff(formula = Surv(time, dead) ~ sex, data = survses.ips0)
N Observed Expected (O-E)^2/E (O-E)^2/V
sex=1 355 261 236 2.71 6.95
sex=2 192 127 152 4.20 6.95
Chisq= 7 on 1 degrees of freedom, p= 0.00838
## Peto & Peto modification of the Gehan-Wilcoxon test. More weight in the left side of curves.
survdiff(Surv(time, dead) ~ sex, data = survses.ips0, rho = 1)
Call:
survdiff(formula = Surv(time, dead) ~ sex, data = survses.ips0,
rho = 1)
N Observed Expected (O-E)^2/E (O-E)^2/V
sex=1 355 166.3 148.6 2.09 7.82
sex=2 192 74.2 91.9 3.38 7.82
Chisq= 7.8 on 1 degrees of freedom, p= 0.00517
## Kaplan-Meier plot. Use library(rms)'s survplot
library(rms)
survplot(survses.fit)
## Some tweaking is necessary to make it work.
survplot(survses.fit,
time.inc = 200, # Intervals for number at risk
n.risk = TRUE, # Show number at risk
label.curves = FALSE, # No labeling of curves
conf = "none" # No confidence intervals
)
## Add legend
legend("topright", bty = "n", lty = 1:2, legend = c("Sex = 1","Sex = 2"))
## Page 227: Stratification by study
survses.fit2 <- survfit(Surv(time, dead) ~ study, data = survses)
survses.fit2
Call: survfit.formula(formula = Surv(time, dead) ~ study, data = survses)
records n.max n.start events median 0.95LCL 0.95UCL
study=7751 64 64 64 12 NA NA NA
study=7761 593 593 593 419 813 694 907
study=7781 304 304 304 273 357 332 397
study=7782 310 310 310 303 216 193 246
study=7981 253 253 253 232 199 179 227
study=7982 180 180 180 177 148 128 188
study=8083 390 390 390 265 413 364 455
## Log-rank test
survdiff(Surv(time, dead) ~ study, data = survses)
Call:
survdiff(formula = Surv(time, dead) ~ study, data = survses)
N Observed Expected (O-E)^2/E (O-E)^2/V
study=7751 64 12 109.3 86.579 94.574
study=7761 593 419 723.0 127.839 242.697
study=7781 304 273 241.9 3.994 4.703
study=7782 310 303 150.1 155.720 174.752
study=7981 253 232 114.4 120.823 132.066
study=7982 180 177 69.8 164.736 174.864
study=8083 390 265 272.5 0.206 0.251
Chisq= 735 on 6 degrees of freedom, p= 0
## Peto & Peto modification of the Gehan-Wilcoxon test. More weight in the left side of curves.
survdiff(Surv(time, dead) ~ study, data = survses, rho = 1)
Call:
survdiff(formula = Surv(time, dead) ~ study, data = survses,
rho = 1)
N Observed Expected (O-E)^2/E (O-E)^2/V
study=7751 64 4.03 50.4 42.7029 75.8003
study=7761 593 184.79 365.2 89.1034 221.8289
study=7781 304 146.58 149.5 0.0584 0.0985
study=7782 310 204.90 103.9 98.1621 147.8074
study=7981 253 160.76 80.0 81.5943 118.3381
study=7982 180 127.82 50.7 117.3068 160.6767
study=8083 390 147.78 176.9 4.8016 8.2737
Chisq= 632 on 6 degrees of freedom, p= 0
## Graphic
par(mar = c(10, 4.1, 4.1, 4.1))
survplot(survses.fit2,
time.inc = 200, # Intervals for number at risk
n.risk = TRUE, # Show number at risk
y.n.risk = -0.5, # modify n.risk position
conf = "none", # No confidence intervals
xlab = "" # No X label
)
McNemar test
## Page 232: Assess discordant cells (b = 22, c = 8)
mat <-
matrix(c(200, 8, 22, 400), ncol = 2,
dimnames = list(
"Treatment B" = c("Success", "Failure"),
"Treatment A" = c("Success", "Failure")
)
)
## Normal approximation McNemer's test
mcnemar.test(mat, correct = FALSE)
McNemar's Chi-squared test
data: mat
McNemar's chi-squared = 6.533, df = 1, p-value = 0.01059
## Exact McNemer's test
library(exact2x2)
mcnemar.exact(mat)
Exact McNemar test (with central confidence intervals)
data: mat
b = 22, c = 8, p-value = 0.01612
alternative hypothesis: true odds ratio is not equal to 1
95 percent confidence interval:
1.179 7.144
sample estimates:
odds ratio
2.75
Kappa coefficient
## Page 234: Assess agreement cells (a, d)
## library(vcd)'s Kappa() appears to use the same method as SAS
library(vcd)
res.kappa <- vcd::Kappa(mat)
res.kappa
value ASE
Unweighted 0.8941 0.01886
Weighted 0.8941 0.04850
## Confidence interval
confint(res.kappa)
Kappa lwr upr
Unweighted 0.8572 0.9311
Weighted 0.7991 0.9892
Test and Gold standard
mat.test <-
matrix(c(47,21,15,130), ncol = 2,
dimnames = list(
"Test" = c("Positive", "Negative"),
"Gold standard" = c("Positive", "Negative")
)
)
library(epiR)
epi.tests(mat.test)
Disease + Disease - Total
Test + 47 15 62
Test - 21 130 151
Total 68 145 213
Point estimates and 95 % CIs:
---------------------------------------------------------
Apparent prevalence 0.29 (0.23, 0.36)
True prevalence 0.32 (0.26, 0.39)
Sensitivity 0.69 (0.57, 0.8)
Specificity 0.9 (0.84, 0.94)
Positive predictive value 0.76 (0.63, 0.86)
Negative predictive value 0.86 (0.8, 0.91)
---------------------------------------------------------
One-sample test for continuous outcomes
## Read 2011 data
library(XLConnect)
wb <- loadWorkbook("~/statistics/bio206/poms11.xls")
poms11 <- readWorksheet(wb, sheet = 1)
## Create delta depress. Somehow only 88 cases, not the same as notes.
poms11 <- within(poms11, {
ddepress <- depress2 - depress
})
## one-sample t-test: Requires normality
t.test(poms11$ddepress)
One Sample t-test
data: poms11$ddepress
t = 0.992, df = 87, p-value = 0.324
alternative hypothesis: true mean is not equal to 0
95 percent confidence interval:
-0.9353 2.7989
sample estimates:
mean of x
0.9318
## Paired test is the same thing.
with(poms11, t.test(depress2, depress, paired = TRUE))
Paired t-test
data: depress2 and depress
t = 0.992, df = 87, p-value = 0.324
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
-0.9353 2.7989
sample estimates:
mean of the differences
0.9318
## one-sample Wilcoxon test: Requires symmetry
wilcox.test(poms11$ddepress)
Wilcoxon signed rank test with continuity correction
data: poms11$ddepress
V = 1669, p-value = 0.523
alternative hypothesis: true location is not equal to 0
## Paired test is the same thing.
with(poms11, wilcox.test(depress2, depress, paired = TRUE))
Wilcoxon signed rank test with continuity correction
data: depress2 and depress
V = 1669, p-value = 0.523
alternative hypothesis: true location shift is not equal to 0
## one-sample sign test: No assumption made
library(BSDA)
SIGN.test(poms11$ddepress)
One-sample Sign-Test
data: poms11$ddepress
s = 37, p-value = 0.7343
alternative hypothesis: true median is not equal to 0
95 percent confidence interval:
-1 1
sample estimates:
median of x
0
Conf.Level L.E.pt U.E.pt
Lower Achieved CI 0.9307 -1 1
Interpolated CI 0.9500 -1 1
Upper Achieved CI 0.9578 -1 1
ANOVA
## Page 265: ANOVA
## Mean in each group
library(doBy)
summaryBy(ips ~ income, data = survses, na.rm = TRUE)
income ips.mean
1 1 1.3577
2 2 1.2167
3 3 1.0550
4 4 0.8851
5 NA 1.2757
## ANOVA
anova(lm(data = survses, ips ~ factor(income)))
Analysis of Variance Table
Response: ips
Df Sum Sq Mean Sq F value Pr(>F)
factor(income) 3 19 6.31 6.18 0.00037 ***
Residuals 912 931 1.02
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Kruskal-Wallis test
kruskal.test(data = survses, ips ~ factor(income))
Kruskal-Wallis rank sum test
data: ips by factor(income)
Kruskal-Wallis chi-squared = 22.97, df = 3, p-value = 0.00004102
Pairwise comparison
## Use anova(lm()) for ANOVA
anova(lm(data = survses, depress ~ factor(ips)))
Analysis of Variance Table
Response: depress
Df Sum Sq Mean Sq F value Pr(>F)
factor(ips) 4 1265 316 3.19 0.013 *
Residuals 827 81845 99
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## pairwise t-test. Use with(data, pairwise.t.test(outcome, group, "bon"))
## Bonferroni correction
with(survses,
pairwise.t.test(x = depress, g = factor(ips), p.adjust.method = "bon")
)
Pairwise comparisons using t tests with pooled SD
data: depress and factor(ips)
0 1 2 3
1 1.000 - - -
2 0.177 0.678 - -
3 0.033 0.121 1.000 -
4 1.000 1.000 1.000 1.000
P value adjustment method: bonferroni
## Student-Nuwman-Keuls procedure
library(mutoss)
res.snk <- snk(data = survses, depress ~ factor(ips), alpha = 0.05, silent = TRUE)
res.snk$SNK
Comparison Diff Statistic Adj.P Rejected Layer
1 3-0 3.7577 4.1623 0.0275 TRUE 1
2 4-0 2.9057 1.8845 0.5424 FALSE 2
3 3-1 3.0506 3.5571 0.0583 FALSE 2
4 2-0 2.4243 3.3604 > 0.05 FALSE 3
5 4-1 2.1986 1.4504 0.5609 FALSE 3
6 3-2 1.3333 1.4184 0.5752 FALSE 3
7 1-0 0.7071 1.1588 0.4128 FALSE 4
8 2-1 1.7172 2.5861 0.0678 FALSE 4
9 4-2 0.4814 0.3078 0.8278 FALSE 4
10 3-4 0.8520 0.5146 0.716 FALSE 4
## ANOVA for vigor ~ ips
anova(lm(data = survses, vigor ~ factor(ips)))
Analysis of Variance Table
Response: vigor
Df Sum Sq Mean Sq F value Pr(>F)
factor(ips) 4 3119 780 20 8.6e-16 ***
Residuals 918 35829 39
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## pairwise t-test for vigor ~ ips
with(survses,
pairwise.t.test(x = vigor, g = factor(ips), p.adjust.method = "bon")
)
Pairwise comparisons using t tests with pooled SD
data: vigor and factor(ips)
0 1 2 3
1 0.00312 - - -
2 0.0000000000982 0.00023 - -
3 0.0000000000025 0.0000013813992 0.69638 -
4 0.19465 1.00000 1.00000 0.69435
P value adjustment method: bonferroni
## Student-Nuwman-Keuls procedure
res.snk <- snk(data = survses, vigor ~ factor(ips), alpha = 0.05, silent = TRUE)
res.snk$SNK
Comparison Diff Statistic Adj.P Rejected Layer
1 0-3 5.739 10.502 0 TRUE 1
2 1-3 3.897 7.508 0 TRUE 2
3 0-2 4.262 9.755 0 TRUE 2
4 4-3 2.612 2.571 0.1643 FALSE 3
5 1-2 2.420 6.019 0.0001 TRUE 3
6 0-4 3.127 3.310 0.0509 FALSE 3
7 2-3 1.477 2.569 0.0696 FALSE 4
8 4-2 1.135 1.180 0.4043 FALSE 4
9 1-4 1.286 1.383 0.3282 FALSE 4
10 0-1 1.842 5.118 > 0.05 FALSE 4
## Scatter plot
plot(data = survses, tension ~ depress)
## ggplot2 can give you a fancier plot
library(ggplot2)
qplot(data = survses, y = tension, x = depress)
## Provide data for correlation matrix
cor.dat <- survses[,c("depress","tension","anger","vigor","fatigue")]
## Pairs graph
pairs(cor.dat)
## This gives you a fancier graph with coefficients and significance (red stars)
library(PerformanceAnalytics)
PerformanceAnalytics::chart.Correlation(cor.dat)
## cor(use = "pair") gives you correlation matrix
cor(x = cor.dat, use = "pair")
depress tension anger vigor fatigue
depress 1.0000 0.7301 0.6483 -0.3673 0.5415
tension 0.7301 1.0000 0.5161 -0.3764 0.5429
anger 0.6483 0.5161 1.0000 -0.1682 0.3425
vigor -0.3673 -0.3764 -0.1682 1.0000 -0.4651
fatigue 0.5415 0.5429 0.3425 -0.4651 1.0000
## This gives coefficients, sample size, and p-values (too small, and all zeros)
library(psych)
corr.test(cor.dat, adjust = "none")
Call:corr.test(x = cor.dat, adjust = "none")
Correlation matrix
depress tension anger vigor fatigue
depress 1.00 0.73 0.65 -0.37 0.54
tension 0.73 1.00 0.52 -0.38 0.54
anger 0.65 0.52 1.00 -0.17 0.34
vigor -0.37 -0.38 -0.17 1.00 -0.47
fatigue 0.54 0.54 0.34 -0.47 1.00
Sample Size
depress tension anger vigor fatigue
depress 846 844 696 832 806
tension 844 944 734 924 872
anger 696 734 739 728 700
vigor 832 924 728 937 865
fatigue 806 872 700 865 880
Probability values (Entries above the diagonal are adjusted for multiple tests.)
depress tension anger vigor fatigue
depress 0 0 0 0 0
tension 0 0 0 0 0
anger 0 0 0 0 0
vigor 0 0 0 0 0
fatigue 0 0 0 0 0
No reproducible examples were given.