Biostatistics 201: Lab 1

## Settings for RMarkdown http://yihui.name/knitr/options#chunk_options
opts_chunk$set(comment = "", warning = FALSE, message = FALSE, tidy = FALSE, 
    echo = TRUE, fig.width = 7, fig.height = 7)
options(width = 116, scipen = 10)

setwd("~/statistics/bio201/")

References

Load dataset

library(foreign)
lead <- read.dta("./LEAD.DAT.dta")
lead <- read.dta("./lead.dta")

str(lead)
'data.frame':   124 obs. of  12 variables:
 $ id      : num  101 102 103 104 105 106 107 108 109 110 ...
 $ age     : num  11.08 9.42 11.08 6.92 11.25 ...
 $ sex     : Factor w/ 2 levels "F","M": 2 2 2 2 2 2 2 1 1 2 ...
 $ status  : num  77 77 30 77 62 72 54 73 22 77 ...
 $ verbiq  : num  61 82 70 72 72 95 89 57 116 95 ...
 $ perfiq  : num  85 90 107 85 100 97 101 64 111 100 ...
 $ fulliq  : num  70 85 86 76 84 96 94 56 115 97 ...
 $ iqtype  : Factor w/ 2 levels "WISC","WPPSI": 1 1 1 1 1 1 1 1 1 1 ...
 $ totyrs  : num  11 6 5 5 11 6 6 15 7 7 ...
 $ hyperact: num  NA 0 NA 2 NA 0 0 NA 0 0 ...
 $ tapping : num  72 61 49 48 51 49 50 58 50 51 ...
 $ group   : Factor w/ 2 levels "lead < 40","lead >= 40": 1 1 1 1 1 1 1 1 1 1 ...
 - attr(*, "datalabel")= chr "Texas Lead Study"
 - attr(*, "time.stamp")= chr " 3 Sep 2007 14:32"
 - attr(*, "formats")= chr  "%12.0g" "%9.2f" "%12.0g" "%12.0g" ...
 - attr(*, "types")= int  254 254 254 254 254 254 254 254 254 254 ...
 - attr(*, "val.labels")= chr  "" "" "sexlbl" "" ...
 - attr(*, "var.labels")= chr  "ID number" "Age (years)" "Gender" "Social status index" ...
 - attr(*, "version")= int 8
 - attr(*, "label.table")=List of 3
  ..$ iqlbl : Named num  1 2
  .. ..- attr(*, "names")= chr  "WISC" "WPPSI"
  ..$ sexlbl: Named num  0 1
  .. ..- attr(*, "names")= chr  "F" "M"
  ..$ grplbl: Named num  0 1
  .. ..- attr(*, "names")= chr  "lead < 40" "lead >= 40"
head(lead)
   id    age sex status verbiq perfiq fulliq iqtype totyrs hyperact tapping     group
1 101 11.083   M     77     61     85     70   WISC     11       NA      72 lead < 40
2 102  9.417   M     77     82     90     85   WISC      6        0      61 lead < 40
3 103 11.083   M     30     70    107     86   WISC      5       NA      49 lead < 40
4 104  6.917   M     77     72     85     76   WISC      5        2      48 lead < 40
5 105 11.250   M     62     72    100     84   WISC     11       NA      51 lead < 40
6 106  6.500   M     72     95     97     96   WISC      6        0      49 lead < 40

Example 1

addmargins(table(lead$sex))

  F   M Sum 
 48  76 124 
prop.table(table(lead$sex))

     F      M 
0.3871 0.6129 

Example 2

addmargins(table(lead$group))

 lead < 40 lead >= 40        Sum 
        78         46        124 
prop.table(table(lead$group))

 lead < 40 lead >= 40 
     0.629      0.371 

Example 3

summary(lead$age)
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
   3.75    6.17    8.38    8.94   12.00   15.90 
sd(lead$age)
[1] 3.537
range(lead$age)
[1]  3.75 15.92

library(plyr)
summarise(.data = lead, Obs = length(age), Mean = mean(age), StdDev = sd(age), Min = min(age), Max = max(age))
  Obs  Mean StdDev  Min   Max
1 124 8.935  3.537 3.75 15.92

Example 4

summary(lead$fulliq)
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
   46.0    81.5    91.0    91.1    99.0   141.0 
sd(lead$fulliq)
[1] 14.4
range(lead$fulliq)
[1]  46 141

summarise(.data = lead, Obs = length(fulliq), Mean = mean(fulliq), StdDev = sd(fulliq), Min = min(fulliq), Max = max(fulliq))
  Obs  Mean StdDev Min Max
1 124 91.08   14.4  46 141

Example 5

hist(lead$age)

plot of chunk unnamed-chunk-7


summary(lead$age)
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
   3.75    6.17    8.38    8.94   12.00   15.90 
head(sort(lead$age))
[1] 3.750 3.750 3.750 3.750 3.833 3.917
tail(sort(lead$age))
[1] 15.00 15.25 15.25 15.42 15.42 15.92
quantile(lead$age, probs = c(1, 5, 10, 25, 50, 75, 90, 95, 99) / 100)
    1%     5%    10%    25%    50%    75%    90%    95%    99% 
 3.750  3.929  4.333  6.167  8.375 12.021 14.000 15.000 15.417 

Example 6

boxplot(fulliq ~ group, lead)

plot of chunk unnamed-chunk-8

Example 7

library(doBy)
summaryBy(fulliq ~ group, lead, FUN = summary)
       group fulliq.Min. fulliq.1st Qu. fulliq.Median fulliq.Mean fulliq.3rd Qu. fulliq.Max.
1  lead < 40          50             85            94        92.9          101.0         141
2 lead >= 40          46             80            88        88.0           93.8         114
summaryBy(fulliq ~ group, lead, FUN = sd)
       group fulliq.sd
1  lead < 40     15.34
2 lead >= 40     12.21

with(lead, tapply(fulliq, group, summary))
$`lead < 40`
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
   50.0    85.0    94.0    92.9   101.0   141.0 

$`lead >= 40`
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
   46.0    80.0    88.0    88.0    93.8   114.0 

with(lead, tapply(fulliq, group, sd))
 lead < 40 lead >= 40 
     15.34      12.21 

library(e1071)
with(lead, tapply(fulliq, group, skewness, type = 1)) # type = 1 for Stata value, type = 2 for SAS value
 lead < 40 lead >= 40 
    0.2202    -0.4309 
with(lead, tapply(fulliq, group, kurtosis, type = 1)) # same
 lead < 40 lead >= 40 
    0.9375     1.8065 

Example 8

tab.8 <- xtabs(~ sex +group, lead)
tab.8
   group
sex lead < 40 lead >= 40
  F        32         16
  M        46         30

library(gmodels)
CrossTable(tab.8, prop.c = F, prop.t = F, prop.chisq = F)


   Cell Contents
|-------------------------|
|                       N |
|           N / Row Total |
|-------------------------|


Total Observations in Table:  124 


             | group 
         sex |  lead < 40 | lead >= 40 |  Row Total | 
-------------|------------|------------|------------|
           F |         32 |         16 |         48 | 
             |      0.667 |      0.333 |      0.387 | 
-------------|------------|------------|------------|
           M |         46 |         30 |         76 | 
             |      0.605 |      0.395 |      0.613 | 
-------------|------------|------------|------------|
Column Total |         78 |         46 |        124 | 
-------------|------------|------------|------------|


Example 9

plot(fulliq ~ age, lead)

plot of chunk unnamed-chunk-11


library(ggplot2)
ggplot(lead, aes(y = fulliq, x = age)) + geom_point() + theme_bw()

plot of chunk unnamed-chunk-11