BIO 208 SAS codes translated to R

Format RMarkdown, read data from Excel file.

## Settings for RMarkdown http://yihui.name/knitr/options#chunk_options
opts_chunk$set(comment = "", warning = FALSE, message = FALSE, echo = TRUE, 
    tidy = FALSE, fig.width = 8, fig.height = 7)
options(width = 116, scipen = 10)

setwd("~/statistics/bio208/")

## Read xls file
library(XLConnect)
wb <- loadWorkbook("~/statistics/bio208/survses.xls")
survses <- readWorksheet(wb, sheet = 1)
names(survses) <- tolower(names(survses))

Chi-Square Based Test

RxC Contingency Tables

## xtabs() function creates a simple cross table
tab.income.educat <- xtabs(data = survses, ~ income + educat)
tab.income.educat

      educat
income   1   2   3   4
     1 128 104  12   4
     2 129 216  34   5
     3  24 112  48  14
     4   4  27  39  17


## Page 190: Example: CrossTable() function turns a simple table to SAS-like one
## chisq = TRUE option adds chi-squared test
library(gmodels)
CrossTable(tab.income.educat, chisq = TRUE)



   Cell Contents
|-------------------------|
|                       N |
| Chi-square contribution |
|           N / Row Total |
|           N / Col Total |
|         N / Table Total |
|-------------------------|


Total Observations in Table:  917 


             | educat 
      income |         1 |         2 |         3 |         4 | Row Total | 
-------------|-----------|-----------|-----------|-----------|-----------|
           1 |       128 |       104 |        12 |         4 |       248 | 
             |    33.643 |     3.266 |    15.973 |     4.297 |           | 
             |     0.516 |     0.419 |     0.048 |     0.016 |     0.270 | 
             |     0.449 |     0.227 |     0.090 |     0.100 |           | 
             |     0.140 |     0.113 |     0.013 |     0.004 |           | 
-------------|-----------|-----------|-----------|-----------|-----------|
           2 |       129 |       216 |        34 |         5 |       384 | 
             |     0.781 |     2.945 |     8.451 |     8.243 |           | 
             |     0.336 |     0.562 |     0.089 |     0.013 |     0.419 | 
             |     0.453 |     0.471 |     0.256 |     0.125 |           | 
             |     0.141 |     0.236 |     0.037 |     0.005 |           | 
-------------|-----------|-----------|-----------|-----------|-----------|
           3 |        24 |       112 |        48 |        14 |       198 | 
             |    22.898 |     1.677 |    12.947 |     3.330 |           | 
             |     0.121 |     0.566 |     0.242 |     0.071 |     0.216 | 
             |     0.084 |     0.244 |     0.361 |     0.350 |           | 
             |     0.026 |     0.122 |     0.052 |     0.015 |           | 
-------------|-----------|-----------|-----------|-----------|-----------|
           4 |         4 |        27 |        39 |        17 |        87 | 
             |    19.631 |     6.288 |    55.157 |    45.948 |           | 
             |     0.046 |     0.310 |     0.448 |     0.195 |     0.095 | 
             |     0.014 |     0.059 |     0.293 |     0.425 |           | 
             |     0.004 |     0.029 |     0.043 |     0.019 |           | 
-------------|-----------|-----------|-----------|-----------|-----------|
Column Total |       285 |       459 |       133 |        40 |       917 | 
             |     0.311 |     0.501 |     0.145 |     0.044 |           | 
-------------|-----------|-----------|-----------|-----------|-----------|


Statistics for All Table Factors


Pearson's Chi-squared test 
------------------------------------------------------------
Chi^2 =  245.5     d.f. =  9     p =  8.998e-48

Trend Tests

## Page 195: Example: Create mother's smoking and malformation data (fake data?)
malf <-
    matrix(c(889,1988,
             182, 426,
             203, 420,
             55,  86,
             40,  48),
           ncol = 2, byrow = TRUE,
           dimname = list(cig.daily = c("0","1-10","11-20","21-30","above31"),
               outcome = c("Malf","No Malf.")))

## Show as table
malf

         outcome
cig.daily Malf No Malf.
  0        889     1988
  1-10     182      426
  11-20    203      420
  21-30     55       86
  above31   40       48


## addmargins() functions does what it sounds like
addmargins(malf)

         outcome
cig.daily Malf No Malf.  Sum
  0        889     1988 2877
  1-10     182      426  608
  11-20    203      420  623
  21-30     55       86  141
  above31   40       48   88
  Sum     1369     2968 4337


## Usual chi-squared test without continuity correction
result.chisq <- chisq.test(malf, correct = FALSE)
result.chisq


    Pearson's Chi-squared test

data:  malf 
X-squared = 13.11, df = 4, p-value = 0.01075


## Expected values can be obtaine by $expected operator
result.chisq$expected

         outcome
cig.daily   Malf No Malf.
  0       908.14  1968.86
  1-10    191.92   416.08
  11-20   196.65   426.35
  21-30    44.51    96.49
  above31  27.78    60.22


## Trend test
num.total <- rowSums(malf)    # Total number in each bin
num.malf <- malf[,1]          # Malformation number in each bin

## Page 196: weights: 0 1 2 3 4
prop.trend.test(x = num.malf, n = num.total, score = c(0:4))


    Chi-squared Test for Trend in Proportions

data:  num.malf out of num.total ,
 using scores: 0 1 2 3 4 
X-squared = 7.649, df = 1, p-value = 0.00568


## Page 196: weights: 0  5 15 25 35
prop.trend.test(x = num.malf, n = num.total, score = c(0,5,15,25,35))


    Chi-squared Test for Trend in Proportions

data:  num.malf out of num.total ,
 using scores: 0 5 15 25 35 
X-squared = 9.344, df = 1, p-value = 0.002237


## Illness (IPS 2,3,4) x income example
survses.dead <- survses[survses$dead != 0,]     # Delete rows where dead is 0.

## Recoding by library(car)'s recode() function
## Create new variable sick 1 (ips is 2, 3, or 4) sick 0 (ips is 0 or 1)
## Create new variable newedu by collapsing 3, 4 categories to 3
survses.dead <- within(survses.dead, {
                       sick   <- car::recode(ips, "c(0,1) = 0; c(2,3,4) = 1")    # 0 and 1 changed to 0
                       newedu <- car::recode(educat, "4 = 3; 3 = 4")
                       })

## Create a table and show with marging
tab.sick.income <- xtabs(data = survses.dead, ~ sick + income)
addmargins(tab.sick.income)

     income
sick    1   2   3   4 Sum
  0   131 204 123  62 520
  1    90 106  45  13 254
  Sum 221 310 168  75 774


## Extract 2nd row and column totals
num.sick  <- tab.sick.income[2,]
num.total <- colSums(tab.sick.income)

## Page 199: Example 1: sick vs income linear trend
prop.trend.test(x = num.sick, n = num.total, score = 1:4)


    Chi-squared Test for Trend in Proportions

data:  num.sick out of num.total ,
 using scores: 1 2 3 4 
X-squared = 17.32, df = 1, p-value = 0.00003166


## Page 200: Example 2: Linear trend
tab.sick.educat <- xtabs(data = survses.dead, ~ sick + educat)
prop.trend.test(x = tab.sick.educat[2,], n = colSums(tab.sick.educat), score = 1:4)


    Chi-squared Test for Trend in Proportions

data:  tab.sick.educat[2, ] out of colSums(tab.sick.educat) ,
 using scores: 1 2 3 4 
X-squared = 7.678, df = 1, p-value = 0.005591


## Page 201: Example 3: Non-linear trend
tab.sick.newedu <- xtabs(data = survses.dead, ~ sick + newedu)
prop.trend.test(x = tab.sick.newedu[2,], n = colSums(tab.sick.newedu), score = 1:4)


    Chi-squared Test for Trend in Proportions

data:  tab.sick.newedu[2, ] out of colSums(tab.sick.newedu) ,
 using scores: 1 2 3 4 
X-squared = 12.15, df = 1, p-value = 0.0004913

Stratified Tests

smoking.stratified <-
    array(c( 8, 22, 16, 44,
            63,  7, 36,  4),
          dim = c(2,2,2),
          dimnames = list(
              alcohol = c("Yes","No"),
              MI      = c("Yes","No"),
              Smoking = c("Non-Smokers", "Smokers")
              ))

smoking.crude <- addmargins(smoking.stratified, margin = 3)[,,3]

## Page 202: Crude table: OR 2.26
library(epiR)
epi.2by2(smoking.crude, units = 1)

             Disease +    Disease -      Total        Inc risk *        Odds
Exposed +           71           52        123             0.577       1.365
Exposed -           29           48         77             0.377       0.604
Total              100          100        200             0.500       1.000

Point estimates and 95 % CIs:
---------------------------------------------------------
Inc risk ratio                         1.53 (1.11, 2.12)
Odds ratio                             2.26 (1.26, 4.05)
Attrib risk *                          0.2 (0.06, 0.34)
Attrib risk in population *            0.12 (-0.01, 0.25)
Attrib fraction in exposed (%)         34.75 (9.72, 52.85)
Attrib fraction in population (%)      24.68 (4.99, 40.28)
---------------------------------------------------------
 * Cases per population unit

## Chi-squared test is significant
chisq.test(smoking.crude, correct = F)


    Pearson's Chi-squared test

data:  smoking.crude 
X-squared = 7.623, df = 1, p-value = 0.005762



## Page 203: Stratified table: OR 1 in each. Array to List apPLY (alply) method
library(plyr)
res.stratified <-
    alply(.data = smoking.stratified,
          .margins = 3,
          epi.2by2,
          units = 1)

             Disease +    Disease -      Total        Inc risk *        Odds
Exposed +            8           16         24             0.333         0.5
Exposed -           22           44         66             0.333         0.5
Total               30           60         90             0.333         0.5

Point estimates and 95 % CIs:
---------------------------------------------------------
Inc risk ratio                         1 (0.52, 1.94)
Odds ratio                             1 (0.37, 2.69)
Attrib risk *                          0 (-0.22, 0.22)
Attrib risk in population *            0 (-0.15, 0.15)
Attrib fraction in exposed (%)         0 (-93.62, 48.35)
Attrib fraction in population (%)      0 (-19.27, 16.15)
---------------------------------------------------------
 * Cases per population unit 
             Disease +    Disease -      Total        Inc risk *        Odds
Exposed +           63           36         99             0.636        1.75
Exposed -            7            4         11             0.636        1.75
Total               70           40        110             0.636        1.75

Point estimates and 95 % CIs:
---------------------------------------------------------
Inc risk ratio                         1 (0.62, 1.6)
Odds ratio                             1 (0.27, 3.65)
Attrib risk *                          0 (-0.3, 0.3)
Attrib risk in population *            0 (-0.3, 0.3)
Attrib fraction in exposed (%)         0 (-60.14, 37.55)
Attrib fraction in population (%)      0 (-52.77, 34.54)
---------------------------------------------------------
 * Cases per population unit

## Chi-squared test is not significant in each strata
res.chisq <-
    alply(.data = smoking.stratified,
          .margins = 3,
          chisq.test,
          correct = F)

lapply(res.chisq, invisible)

$`1`

    Pearson's Chi-squared test

data:  piece 
X-squared = 0, df = 1, p-value = 1


$`2`

    Pearson's Chi-squared test

data:  piece 
X-squared = 0, df = 1, p-value = 1


## Page 209: Example 1: Dichotomized IPS by dichotomized education stratified by sex
## lowedu created. 1 if educat < 3, 0 if not
survses.dead$lowedu <- as.numeric(survses.dead$educat  < 3)

## Create stratified data
tab.lowedu.sick.sex <- xtabs(data = survses.dead, ~ lowedu +sick +sex)
tab.lowedu.sick.sex

, , sex = 1

      sick
lowedu   0   1
     0  84  19
     1 292 156

, , sex = 2

      sick
lowedu   0   1
     0  39  15
     1 161  97


## Page 212: Mantel-Haenszel test
mantelhaen.test(tab.lowedu.sick.sex, correct = FALSE)


    Mantel-Haenszel chi-squared test without continuity correction

data:  tab.lowedu.sick.sex 
Mantel-Haenszel X-squared = 11.5, df = 1, p-value = 0.0006976
alternative hypothesis: true common odds ratio is not equal to 1 
95 percent confidence interval:
 1.338 3.039 
sample estimates:
common odds ratio 
            2.016


## Crude and adjusted at a glance
epi.2by2(tab.lowedu.sick.sex, units = 1)

             Disease +    Disease -      Total        Inc risk *        Odds
Exposed +          123           34        157             0.783        3.62
Exposed -          453          253        706             0.642        1.79
Total              576          287        863             0.667        2.01


Point estimates and 95 % CIs:
---------------------------------------------------------
Inc risk ratio (crude)                    1.22 (1.11, 1.35)
Inc risk ratio (M-H)                      1.22 (1.1, 1.35)
Inc risk ratio (crude:M-H)                1
Odds ratio (crude)                        2.02 (1.34, 3.04)
Odds ratio (M-H)                          2.02 (1.23, 3.31)
Odds ratio (crude:M-H)                    1
Attrib risk (crude) *                     0.14 (0.07, 0.22)
Attrib risk (M-H) *                       0.14 (-0.04, 0.32)
Attrib risk (crude:M-H)                   1.01
---------------------------------------------------------
 * Cases per population unit



## Page 213: Example 2: IPS by education, stratified by sex
tab.educat.ips.sex <- xtabs(data = survses.dead, ~ educat +ips +sex)
tab.educat.ips.sex

, , sex = 1

      ips
educat   0   1   2   3   4
     1  50  75  37  22   8
     2  61 106  57  30   2
     3  23  43   7   3   2
     4   8  10   4   1   2

, , sex = 2

      ips
educat   0   1   2   3   4
     1  18  38  30   8   2
     2  36  69  32  22   3
     3   7  25   9   1   1
     4   4   3   3   0   1


## More decorative table
junk <-
    alply(.data      = tab.educat.ips.sex,
          .margin    = 3,
          .fun       = CrossTable,
          prop.chisq = FALSE,
          prop.t     = FALSE)



   Cell Contents
|-------------------------|
|                       N |
|           N / Row Total |
|           N / Col Total |
|-------------------------|


Total Observations in Table:  551 


             | ips 
      educat |         0 |         1 |         2 |         3 |         4 | Row Total | 
-------------|-----------|-----------|-----------|-----------|-----------|-----------|
           1 |        50 |        75 |        37 |        22 |         8 |       192 | 
             |     0.260 |     0.391 |     0.193 |     0.115 |     0.042 |     0.348 | 
             |     0.352 |     0.321 |     0.352 |     0.393 |     0.571 |           | 
-------------|-----------|-----------|-----------|-----------|-----------|-----------|
           2 |        61 |       106 |        57 |        30 |         2 |       256 | 
             |     0.238 |     0.414 |     0.223 |     0.117 |     0.008 |     0.465 | 
             |     0.430 |     0.453 |     0.543 |     0.536 |     0.143 |           | 
-------------|-----------|-----------|-----------|-----------|-----------|-----------|
           3 |        23 |        43 |         7 |         3 |         2 |        78 | 
             |     0.295 |     0.551 |     0.090 |     0.038 |     0.026 |     0.142 | 
             |     0.162 |     0.184 |     0.067 |     0.054 |     0.143 |           | 
-------------|-----------|-----------|-----------|-----------|-----------|-----------|
           4 |         8 |        10 |         4 |         1 |         2 |        25 | 
             |     0.320 |     0.400 |     0.160 |     0.040 |     0.080 |     0.045 | 
             |     0.056 |     0.043 |     0.038 |     0.018 |     0.143 |           | 
-------------|-----------|-----------|-----------|-----------|-----------|-----------|
Column Total |       142 |       234 |       105 |        56 |        14 |       551 | 
             |     0.258 |     0.425 |     0.191 |     0.102 |     0.025 |           | 
-------------|-----------|-----------|-----------|-----------|-----------|-----------|




   Cell Contents
|-------------------------|
|                       N |
|           N / Row Total |
|           N / Col Total |
|-------------------------|


Total Observations in Table:  312 


             | ips 
      educat |         0 |         1 |         2 |         3 |         4 | Row Total | 
-------------|-----------|-----------|-----------|-----------|-----------|-----------|
           1 |        18 |        38 |        30 |         8 |         2 |        96 | 
             |     0.188 |     0.396 |     0.312 |     0.083 |     0.021 |     0.308 | 
             |     0.277 |     0.281 |     0.405 |     0.258 |     0.286 |           | 
-------------|-----------|-----------|-----------|-----------|-----------|-----------|
           2 |        36 |        69 |        32 |        22 |         3 |       162 | 
             |     0.222 |     0.426 |     0.198 |     0.136 |     0.019 |     0.519 | 
             |     0.554 |     0.511 |     0.432 |     0.710 |     0.429 |           | 
-------------|-----------|-----------|-----------|-----------|-----------|-----------|
           3 |         7 |        25 |         9 |         1 |         1 |        43 | 
             |     0.163 |     0.581 |     0.209 |     0.023 |     0.023 |     0.138 | 
             |     0.108 |     0.185 |     0.122 |     0.032 |     0.143 |           | 
-------------|-----------|-----------|-----------|-----------|-----------|-----------|
           4 |         4 |         3 |         3 |         0 |         1 |        11 | 
             |     0.364 |     0.273 |     0.273 |     0.000 |     0.091 |     0.035 | 
             |     0.062 |     0.022 |     0.041 |     0.000 |     0.143 |           | 
-------------|-----------|-----------|-----------|-----------|-----------|-----------|
Column Total |        65 |       135 |        74 |        31 |         7 |       312 | 
             |     0.208 |     0.433 |     0.237 |     0.099 |     0.022 |           | 
-------------|-----------|-----------|-----------|-----------|-----------|-----------|


## General association
mantelhaen.test(tab.educat.ips.sex)


    Cochran-Mantel-Haenszel test

data:  tab.educat.ips.sex 
Cochran-Mantel-Haenszel M^2 = 30.76, df = 12, p-value = 0.002145

Log-Rank Tests

## IPS = 0 only
survses.ips0 <- subset(survses, ips == 0 & !is.na(ips))

library(survival)
survses.fit <- survfit(Surv(time, dead) ~ sex, data = survses.ips0)
## use summary(survses.fit) for more detailed output
survses.fit

Call: survfit(formula = Surv(time, dead) ~ sex, data = survses.ips0)

      records n.max n.start events median 0.95LCL 0.95UCL
sex=1     355   355     355    261    451     392     507
sex=2     192   192     192    127    590     468     695


## Log-rank test
survdiff(Surv(time, dead) ~ sex, data = survses.ips0)

Call:
survdiff(formula = Surv(time, dead) ~ sex, data = survses.ips0)

        N Observed Expected (O-E)^2/E (O-E)^2/V
sex=1 355      261      236      2.71      6.95
sex=2 192      127      152      4.20      6.95

 Chisq= 7  on 1 degrees of freedom, p= 0.00838


## Peto & Peto modification of the Gehan-Wilcoxon test. More weight in the left side of curves.
survdiff(Surv(time, dead) ~ sex, data = survses.ips0, rho = 1)

Call:
survdiff(formula = Surv(time, dead) ~ sex, data = survses.ips0, 
    rho = 1)

        N Observed Expected (O-E)^2/E (O-E)^2/V
sex=1 355    166.3    148.6      2.09      7.82
sex=2 192     74.2     91.9      3.38      7.82

 Chisq= 7.8  on 1 degrees of freedom, p= 0.00517


## Kaplan-Meier plot. Use library(rms)'s survplot
library(rms)
survplot(survses.fit)

plot of chunk unnamed-chunk-5


## Some tweaking is necessary to make it work.
survplot(survses.fit,
         time.inc = 200,        # Intervals for number at risk
         n.risk = TRUE,         # Show number at risk
         label.curves = FALSE,  # No labeling of curves
         conf = "none"          # No confidence intervals
         )
## Add legend
legend("topright", bty = "n", lty = 1:2, legend = c("Sex = 1","Sex = 2"))

plot of chunk unnamed-chunk-5



## Page 227: Stratification by study
survses.fit2 <- survfit(Surv(time, dead) ~ study, data = survses)
survses.fit2

Call: survfit.formula(formula = Surv(time, dead) ~ study, data = survses)

           records n.max n.start events median 0.95LCL 0.95UCL
study=7751      64    64      64     12     NA      NA      NA
study=7761     593   593     593    419    813     694     907
study=7781     304   304     304    273    357     332     397
study=7782     310   310     310    303    216     193     246
study=7981     253   253     253    232    199     179     227
study=7982     180   180     180    177    148     128     188
study=8083     390   390     390    265    413     364     455


## Log-rank test
survdiff(Surv(time, dead) ~ study, data = survses)

Call:
survdiff(formula = Surv(time, dead) ~ study, data = survses)

             N Observed Expected (O-E)^2/E (O-E)^2/V
study=7751  64       12    109.3    86.579    94.574
study=7761 593      419    723.0   127.839   242.697
study=7781 304      273    241.9     3.994     4.703
study=7782 310      303    150.1   155.720   174.752
study=7981 253      232    114.4   120.823   132.066
study=7982 180      177     69.8   164.736   174.864
study=8083 390      265    272.5     0.206     0.251

 Chisq= 735  on 6 degrees of freedom, p= 0


## Peto & Peto modification of the Gehan-Wilcoxon test. More weight in the left side of curves.
survdiff(Surv(time, dead) ~ study, data = survses, rho = 1)

Call:
survdiff(formula = Surv(time, dead) ~ study, data = survses, 
    rho = 1)

             N Observed Expected (O-E)^2/E (O-E)^2/V
study=7751  64     4.03     50.4   42.7029   75.8003
study=7761 593   184.79    365.2   89.1034  221.8289
study=7781 304   146.58    149.5    0.0584    0.0985
study=7782 310   204.90    103.9   98.1621  147.8074
study=7981 253   160.76     80.0   81.5943  118.3381
study=7982 180   127.82     50.7  117.3068  160.6767
study=8083 390   147.78    176.9    4.8016    8.2737

 Chisq= 632  on 6 degrees of freedom, p= 0


## Graphic
par(mar = c(10, 4.1, 4.1, 4.1))
survplot(survses.fit2,
         time.inc = 200,        # Intervals for number at risk
         n.risk = TRUE,         # Show number at risk
         y.n.risk = -0.5,       # modify n.risk position
         conf = "none",         # No confidence intervals
         xlab = ""              # No X label
         )

plot of chunk unnamed-chunk-5

One-Sample or Paired Tests

McNemar test

## Page 232: Assess discordant cells (b = 22, c = 8)
mat <-
    matrix(c(200, 8, 22, 400), ncol = 2,
           dimnames = list(
               "Treatment B" = c("Success", "Failure"),
               "Treatment A" = c("Success", "Failure")
               )
           )

## Normal approximation McNemer's test
mcnemar.test(mat, correct = FALSE)


    McNemar's Chi-squared test

data:  mat 
McNemar's chi-squared = 6.533, df = 1, p-value = 0.01059


## Exact McNemer's test
library(exact2x2)
mcnemar.exact(mat)


    Exact McNemar test (with central confidence intervals)

data:  mat 
b = 22, c = 8, p-value = 0.01612
alternative hypothesis: true odds ratio is not equal to 1 
95 percent confidence interval:
 1.179 7.144 
sample estimates:
odds ratio 
      2.75

Kappa coefficient

## Page 234: Assess agreement cells (a, d)
## library(vcd)'s Kappa() appears to use the same method as SAS
library(vcd)
res.kappa <- vcd::Kappa(mat)
res.kappa

            value     ASE
Unweighted 0.8941 0.01886
Weighted   0.8941 0.04850


## Confidence interval
confint(res.kappa)


Kappa           lwr    upr
  Unweighted 0.8572 0.9311
  Weighted   0.7991 0.9892

Test and Gold standard

mat.test <-
    matrix(c(47,21,15,130), ncol = 2,
           dimnames = list(
               "Test"           = c("Positive", "Negative"),
               "Gold standard"  = c("Positive", "Negative")
               )
           )

library(epiR)
epi.tests(mat.test)

          Disease +    Disease -      Total
Test +           47           15         62
Test -           21          130        151
Total            68          145        213

Point estimates and 95 % CIs:
---------------------------------------------------------
Apparent prevalence                    0.29 (0.23, 0.36)
True prevalence                        0.32 (0.26, 0.39)
Sensitivity                            0.69 (0.57, 0.8)
Specificity                            0.9 (0.84, 0.94)
Positive predictive value              0.76 (0.63, 0.86)
Negative predictive value              0.86 (0.8, 0.91)
---------------------------------------------------------

One-sample test for continuous outcomes

## Read 2011 data
library(XLConnect)
wb <- loadWorkbook("~/statistics/bio206/poms11.xls")
poms11 <- readWorksheet(wb, sheet = 1)

## Create delta depress. Somehow only 88 cases, not the same as notes.
poms11 <- within(poms11, {
    ddepress <- depress2 - depress
})

## one-sample t-test: Requires normality
t.test(poms11$ddepress)


    One Sample t-test

data:  poms11$ddepress 
t = 0.992, df = 87, p-value = 0.324
alternative hypothesis: true mean is not equal to 0 
95 percent confidence interval:
 -0.9353  2.7989 
sample estimates:
mean of x 
   0.9318


## Paired test is the same thing.
with(poms11, t.test(depress2, depress, paired = TRUE))


    Paired t-test

data:  depress2 and depress 
t = 0.992, df = 87, p-value = 0.324
alternative hypothesis: true difference in means is not equal to 0 
95 percent confidence interval:
 -0.9353  2.7989 
sample estimates:
mean of the differences 
                 0.9318


## one-sample Wilcoxon test: Requires symmetry
wilcox.test(poms11$ddepress)


    Wilcoxon signed rank test with continuity correction

data:  poms11$ddepress 
V = 1669, p-value = 0.523
alternative hypothesis: true location is not equal to 0


## Paired test is the same thing.
with(poms11, wilcox.test(depress2, depress, paired = TRUE))


    Wilcoxon signed rank test with continuity correction

data:  depress2 and depress 
V = 1669, p-value = 0.523
alternative hypothesis: true location shift is not equal to 0


## one-sample sign test: No assumption made
library(BSDA)
SIGN.test(poms11$ddepress)


    One-sample Sign-Test

data:  poms11$ddepress 
s = 37, p-value = 0.7343
alternative hypothesis: true median is not equal to 0 
95 percent confidence interval:
 -1  1 
sample estimates:
median of x 
          0

                  Conf.Level L.E.pt U.E.pt
Lower Achieved CI     0.9307     -1      1
Interpolated CI       0.9500     -1      1
Upper Achieved CI     0.9578     -1      1

Comparing 3 or More Samples

ANOVA

## Page 265: ANOVA
## Mean in each group
library(doBy)
summaryBy(ips ~ income, data = survses, na.rm = TRUE)

  income ips.mean
1      1   1.3577
2      2   1.2167
3      3   1.0550
4      4   0.8851
5     NA   1.2757

## ANOVA
anova(lm(data = survses, ips ~ factor(income)))

Analysis of Variance Table

Response: ips
                Df Sum Sq Mean Sq F value  Pr(>F)    
factor(income)   3     19    6.31    6.18 0.00037 ***
Residuals      912    931    1.02                    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Kruskal-Wallis test

kruskal.test(data = survses, ips ~ factor(income))


    Kruskal-Wallis rank sum test

data:  ips by factor(income) 
Kruskal-Wallis chi-squared = 22.97, df = 3, p-value = 0.00004102

Pairwise comparison

## Use anova(lm()) for ANOVA
anova(lm(data = survses, depress ~ factor(ips)))

Analysis of Variance Table

Response: depress
             Df Sum Sq Mean Sq F value Pr(>F)  
factor(ips)   4   1265     316    3.19  0.013 *
Residuals   827  81845      99                 
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1


## pairwise t-test. Use with(data, pairwise.t.test(outcome, group, "bon"))
## Bonferroni correction
with(survses,
     pairwise.t.test(x = depress, g = factor(ips), p.adjust.method = "bon")
     )


    Pairwise comparisons using t tests with pooled SD 

data:  depress and factor(ips) 

  0     1     2     3    
1 1.000 -     -     -    
2 0.177 0.678 -     -    
3 0.033 0.121 1.000 -    
4 1.000 1.000 1.000 1.000

P value adjustment method: bonferroni


## Student-Nuwman-Keuls procedure
library(mutoss)
res.snk <- snk(data = survses, depress ~ factor(ips), alpha = 0.05, silent = TRUE)
res.snk$SNK

   Comparison   Diff Statistic  Adj.P Rejected Layer
1         3-0 3.7577    4.1623 0.0275     TRUE     1
2         4-0 2.9057    1.8845 0.5424    FALSE     2
3         3-1 3.0506    3.5571 0.0583    FALSE     2
4         2-0 2.4243    3.3604 > 0.05    FALSE     3
5         4-1 2.1986    1.4504 0.5609    FALSE     3
6         3-2 1.3333    1.4184 0.5752    FALSE     3
7         1-0 0.7071    1.1588 0.4128    FALSE     4
8         2-1 1.7172    2.5861 0.0678    FALSE     4
9         4-2 0.4814    0.3078 0.8278    FALSE     4
10        3-4 0.8520    0.5146  0.716    FALSE     4


## ANOVA for vigor ~ ips
anova(lm(data = survses, vigor ~ factor(ips)))

Analysis of Variance Table

Response: vigor
             Df Sum Sq Mean Sq F value  Pr(>F)    
factor(ips)   4   3119     780      20 8.6e-16 ***
Residuals   918  35829      39                    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1


## pairwise t-test for vigor ~ ips
with(survses,
     pairwise.t.test(x = vigor, g = factor(ips), p.adjust.method = "bon")
     )


    Pairwise comparisons using t tests with pooled SD 

data:  vigor and factor(ips) 

  0               1               2       3      
1 0.00312         -               -       -      
2 0.0000000000982 0.00023         -       -      
3 0.0000000000025 0.0000013813992 0.69638 -      
4 0.19465         1.00000         1.00000 0.69435

P value adjustment method: bonferroni


## Student-Nuwman-Keuls procedure
res.snk <- snk(data = survses, vigor ~ factor(ips), alpha = 0.05, silent = TRUE)
res.snk$SNK

   Comparison  Diff Statistic  Adj.P Rejected Layer
1         0-3 5.739    10.502      0     TRUE     1
2         1-3 3.897     7.508      0     TRUE     2
3         0-2 4.262     9.755      0     TRUE     2
4         4-3 2.612     2.571 0.1643    FALSE     3
5         1-2 2.420     6.019 0.0001     TRUE     3
6         0-4 3.127     3.310 0.0509    FALSE     3
7         2-3 1.477     2.569 0.0696    FALSE     4
8         4-2 1.135     1.180 0.4043    FALSE     4
9         1-4 1.286     1.383 0.3282    FALSE     4
10        0-1 1.842     5.118 > 0.05    FALSE     4

Continous Outcome and Continuous Predictor

## Scatter plot
plot(data = survses, tension ~ depress)

plot of chunk unnamed-chunk-13


## ggplot2 can give you a fancier plot
library(ggplot2)
qplot(data = survses, y = tension, x = depress)

plot of chunk unnamed-chunk-13



## Provide data for correlation matrix
cor.dat <- survses[,c("depress","tension","anger","vigor","fatigue")]

## Pairs graph
pairs(cor.dat)

plot of chunk unnamed-chunk-13


## This gives you a fancier graph with coefficients and significance (red stars)
library(PerformanceAnalytics)
PerformanceAnalytics::chart.Correlation(cor.dat)

plot of chunk unnamed-chunk-13


## cor(use = "pair") gives you correlation matrix
cor(x = cor.dat, use = "pair")

        depress tension   anger   vigor fatigue
depress  1.0000  0.7301  0.6483 -0.3673  0.5415
tension  0.7301  1.0000  0.5161 -0.3764  0.5429
anger    0.6483  0.5161  1.0000 -0.1682  0.3425
vigor   -0.3673 -0.3764 -0.1682  1.0000 -0.4651
fatigue  0.5415  0.5429  0.3425 -0.4651  1.0000


## This gives coefficients, sample size, and p-values (too small, and all zeros)
library(psych)
corr.test(cor.dat, adjust = "none")

Call:corr.test(x = cor.dat, adjust = "none")
Correlation matrix 
        depress tension anger vigor fatigue
depress    1.00    0.73  0.65 -0.37    0.54
tension    0.73    1.00  0.52 -0.38    0.54
anger      0.65    0.52  1.00 -0.17    0.34
vigor     -0.37   -0.38 -0.17  1.00   -0.47
fatigue    0.54    0.54  0.34 -0.47    1.00
Sample Size 
        depress tension anger vigor fatigue
depress     846     844   696   832     806
tension     844     944   734   924     872
anger       696     734   739   728     700
vigor       832     924   728   937     865
fatigue     806     872   700   865     880
Probability values (Entries above the diagonal are adjusted for multiple tests.) 
        depress tension anger vigor fatigue
depress       0       0     0     0       0
tension       0       0     0     0       0
anger         0       0     0     0       0
vigor         0       0     0     0       0
fatigue       0       0     0     0       0

Regression

No reproducible examples were given.