2.2 The 6 types of objects in R are the following:
-vector: a collection of like elements without dimension
-matrix: a 2-D representation of a vector
-array: 3-D or more representation of a vector
-list: a collection of bins each containing any kind of R object
-dataframe: a list of tabular form where eah bin contains a data vector of the same length
-functions 2.3 3 main ways to index objects: by position, by name, by logical. Vectors can also be indexed by unique values and duplicated values.
2.4 Any R component(s) that can be indexed, can be replaced.
2.5 Code
smoking_y <- c(dead=139, alive=443)
smoking_n <- c(dead=230, alive=502)
cbind(smoking_y, smoking_n)
## smoking_y smoking_n
## dead 139 230
## alive 443 502
2.6 Code
dat <- matrix(c(139, 443, 230, 502), 2, 2) #numbers ordered by row
dimnames(dat) <- list(c("Dead", "Alive"),
c("Yes", "No"))
cbind(dat)
## Yes No
## Dead 139 230
## Alive 443 502
rowtot <- apply(dat, 1, sum)
dat2 <- cbind(dat, rowtot)
coltot <- apply(dat2, 2, sum)
rbind(dat2, coltot)
## Yes No rowtot
## Dead 139 230 369
## Alive 443 502 945
## coltot 582 732 1314
2.7 Code
sweep(dat, 1, apply(dat, 1, sum), "/")
## Yes No
## Dead 0.3767 0.6233
## Alive 0.4688 0.5312
sweep(dat, 2, apply(dat, 2, sum), "/")
## Yes No
## Dead 0.2388 0.3142
## Alive 0.7612 0.6858
2.8 Code
coltot2 <- apply(dat, 2, sum)
risks <- dat["Dead", ]/coltot2
risk.ratio <- risks/risks[2]
odds <- risks/(1 - risks)
odds.ratio <- odds/odds[2]
dat # display results
## Yes No
## Dead 139 230
## Alive 443 502
rbind(risks, risk.ratio, odds, odds.ratio)
## Yes No
## risks 0.2388 0.3142
## risk.ratio 0.7601 1.0000
## odds 0.3138 0.4582
## odds.ratio 0.6848 1.0000
2.9 Risk calculations
Risk of death among non-smokers
Age
Vital.Status 18-24 25-34 35-44 45-54 55-64 65-74 75+
Alive 0.98 0.97 0.94 0.85 0.67 0.22 0.00
Dead 0.02 0.03 0.06 0.15 0.33 0.78 1.00
Risk of death among smokers
Age
Vital.Status 18-24 25-34 35-44 45-54 55-64 65-74 75+
Alive 0.96 0.98 0.87 0.79 0.56 0.19 0.00
Dead 0.04 0.02 0.13 0.21 0.44 0.81 1.00
Interpretation: The risk of death among smokers and non-smokers is about equivalent when comparing the age age groups between the two strata (smoking status). For both strata, risk of death increases with age.
2.10 Code
dat <- read.table("http://www.medepi.net/data/syphilis89c.txt", header = TRUE, sep = ",")
table(dat$Race, dat$Age, dat$Sex)
## , , = Female
##
##
## <=14 >55 15-19 20-24 25-29 30-34 35-44 45-54
## Black 165 92 2257 4503 3590 2628 1505 392
## Other 11 15 158 307 283 167 149 40
## White 14 24 253 475 433 316 243 55
##
## , , = Male
##
##
## <=14 >55 15-19 20-24 25-29 30-34 35-44 45-54
## Black 31 823 1412 4059 4121 4453 3858 1619
## Other 7 108 210 654 633 520 492 202
## White 2 216 88 407 550 564 654 323
attach(dat)
table(Race, Age, Sex)
## , , Sex = Female
##
## Age
## Race <=14 >55 15-19 20-24 25-29 30-34 35-44 45-54
## Black 165 92 2257 4503 3590 2628 1505 392
## Other 11 15 158 307 283 167 149 40
## White 14 24 253 475 433 316 243 55
##
## , , Sex = Male
##
## Age
## Race <=14 >55 15-19 20-24 25-29 30-34 35-44 45-54
## Black 31 823 1412 4059 4121 4453 3858 1619
## Other 7 108 210 654 633 520 492 202
## White 2 216 88 407 550 564 654 323
xtabs(~ Race + Age + Sex)
## , , Sex = Female
##
## Age
## Race <=14 >55 15-19 20-24 25-29 30-34 35-44 45-54
## Black 165 92 2257 4503 3590 2628 1505 392
## Other 11 15 158 307 283 167 149 40
## White 14 24 253 475 433 316 243 55
##
## , , Sex = Male
##
## Age
## Race <=14 >55 15-19 20-24 25-29 30-34 35-44 45-54
## Black 31 823 1412 4059 4121 4453 3858 1619
## Other 7 108 210 654 633 520 492 202
## White 2 216 88 407 550 564 654 323
2.11 Marginal totals code
sdat <- xtabs(~ Race + Age + Sex)
apply(sdat, c(1,2,3), sum) #race is dim1(rows), age is dim2(columns), sex is dim3(strata)
## , , Sex = Female
##
## Age
## Race <=14 >55 15-19 20-24 25-29 30-34 35-44 45-54
## Black 165 92 2257 4503 3590 2628 1505 392
## Other 11 15 158 307 283 167 149 40
## White 14 24 253 475 433 316 243 55
##
## , , Sex = Male
##
## Age
## Race <=14 >55 15-19 20-24 25-29 30-34 35-44 45-54
## Black 31 823 1412 4059 4121 4453 3858 1619
## Other 7 108 210 654 633 520 492 202
## White 2 216 88 407 550 564 654 323
#2-d
tab.ar <- apply(sdat, c(1, 2), sum); tab.ar
## Age
## Race <=14 >55 15-19 20-24 25-29 30-34 35-44 45-54
## Black 196 915 3669 8562 7711 7081 5363 2011
## Other 18 123 368 961 916 687 641 242
## White 16 240 341 882 983 880 897 378
tab.as <- apply(sdat, c(1, 3), sum); tab.as
## Sex
## Race Female Male
## Black 15132 20376
## Other 1130 2826
## White 1813 2804
tab.rs <- apply(sdat, c(2, 3), sum); tab.rs
## Sex
## Age Female Male
## <=14 190 40
## >55 131 1147
## 15-19 2668 1710
## 20-24 5285 5120
## 25-29 4306 5304
## 30-34 3111 5537
## 35-44 1897 5004
## 45-54 487 2144
#1-d
tab.a <- apply(sdat, 1, sum); tab.a
## Black Other White
## 35508 3956 4617
tab.r <- apply(sdat, 2, sum); tab.r
## <=14 >55 15-19 20-24 25-29 30-34 35-44 45-54
## 230 1278 4378 10405 9610 8648 6901 2631
tab.s <- apply(sdat, 3, sum); tab.s
## Female Male
## 18075 26006
2.12 Code
tab.ars <- xtabs(~ Age + Race + Sex, data = sdat)
rowt <- apply(tab.ars, c(1, 3), sum) # row distrib
rowd <- sweep(tab.ars, c(1, 3), rowt, "/"); rowd
## , , Sex = Female
##
## Race
## Age Black Other White
## <=14 0.3333 0.3333 0.3333
## >55 0.3333 0.3333 0.3333
## 15-19 0.3333 0.3333 0.3333
## 20-24 0.3333 0.3333 0.3333
## 25-29 0.3333 0.3333 0.3333
## 30-34 0.3333 0.3333 0.3333
## 35-44 0.3333 0.3333 0.3333
## 45-54 0.3333 0.3333 0.3333
##
## , , Sex = Male
##
## Race
## Age Black Other White
## <=14 0.3333 0.3333 0.3333
## >55 0.3333 0.3333 0.3333
## 15-19 0.3333 0.3333 0.3333
## 20-24 0.3333 0.3333 0.3333
## 25-29 0.3333 0.3333 0.3333
## 30-34 0.3333 0.3333 0.3333
## 35-44 0.3333 0.3333 0.3333
## 45-54 0.3333 0.3333 0.3333
apply(rowd, c(1, 3), sum) # confirm
## Sex
## Age Female Male
## <=14 1 1
## >55 1 1
## 15-19 1 1
## 20-24 1 1
## 25-29 1 1
## 30-34 1 1
## 35-44 1 1
## 45-54 1 1
colt <- apply(tab.ars, c(2, 3), sum) # col distrib
cold <- sweep(tab.ars, c(2, 3), colt, "/"); cold
## , , Sex = Female
##
## Race
## Age Black Other White
## <=14 0.125 0.125 0.125
## >55 0.125 0.125 0.125
## 15-19 0.125 0.125 0.125
## 20-24 0.125 0.125 0.125
## 25-29 0.125 0.125 0.125
## 30-34 0.125 0.125 0.125
## 35-44 0.125 0.125 0.125
## 45-54 0.125 0.125 0.125
##
## , , Sex = Male
##
## Race
## Age Black Other White
## <=14 0.125 0.125 0.125
## >55 0.125 0.125 0.125
## 15-19 0.125 0.125 0.125
## 20-24 0.125 0.125 0.125
## 25-29 0.125 0.125 0.125
## 30-34 0.125 0.125 0.125
## 35-44 0.125 0.125 0.125
## 45-54 0.125 0.125 0.125
apply(cold, c(2, 3), sum) # confirm
## Sex
## Race Female Male
## Black 1 1
## Other 1 1
## White 1 1
jtt <- apply(tab.ars, 3, sum) # joint distrib
jtd <- sweep(tab.ars, 3, jtt, "/"); jtd
## , , Sex = Female
##
## Race
## Age Black Other White
## <=14 0.04167 0.04167 0.04167
## >55 0.04167 0.04167 0.04167
## 15-19 0.04167 0.04167 0.04167
## 20-24 0.04167 0.04167 0.04167
## 25-29 0.04167 0.04167 0.04167
## 30-34 0.04167 0.04167 0.04167
## 35-44 0.04167 0.04167 0.04167
## 45-54 0.04167 0.04167 0.04167
##
## , , Sex = Male
##
## Race
## Age Black Other White
## <=14 0.04167 0.04167 0.04167
## >55 0.04167 0.04167 0.04167
## 15-19 0.04167 0.04167 0.04167
## 20-24 0.04167 0.04167 0.04167
## 25-29 0.04167 0.04167 0.04167
## 30-34 0.04167 0.04167 0.04167
## 35-44 0.04167 0.04167 0.04167
## 45-54 0.04167 0.04167 0.04167
apply(jtd, 3, sum) # confirm
## Female Male
## 1 1
distr <- list(rowd, cold, jtd); distr
## [[1]]
## , , Sex = Female
##
## Race
## Age Black Other White
## <=14 0.3333 0.3333 0.3333
## >55 0.3333 0.3333 0.3333
## 15-19 0.3333 0.3333 0.3333
## 20-24 0.3333 0.3333 0.3333
## 25-29 0.3333 0.3333 0.3333
## 30-34 0.3333 0.3333 0.3333
## 35-44 0.3333 0.3333 0.3333
## 45-54 0.3333 0.3333 0.3333
##
## , , Sex = Male
##
## Race
## Age Black Other White
## <=14 0.3333 0.3333 0.3333
## >55 0.3333 0.3333 0.3333
## 15-19 0.3333 0.3333 0.3333
## 20-24 0.3333 0.3333 0.3333
## 25-29 0.3333 0.3333 0.3333
## 30-34 0.3333 0.3333 0.3333
## 35-44 0.3333 0.3333 0.3333
## 45-54 0.3333 0.3333 0.3333
##
##
## [[2]]
## , , Sex = Female
##
## Race
## Age Black Other White
## <=14 0.125 0.125 0.125
## >55 0.125 0.125 0.125
## 15-19 0.125 0.125 0.125
## 20-24 0.125 0.125 0.125
## 25-29 0.125 0.125 0.125
## 30-34 0.125 0.125 0.125
## 35-44 0.125 0.125 0.125
## 45-54 0.125 0.125 0.125
##
## , , Sex = Male
##
## Race
## Age Black Other White
## <=14 0.125 0.125 0.125
## >55 0.125 0.125 0.125
## 15-19 0.125 0.125 0.125
## 20-24 0.125 0.125 0.125
## 25-29 0.125 0.125 0.125
## 30-34 0.125 0.125 0.125
## 35-44 0.125 0.125 0.125
## 45-54 0.125 0.125 0.125
##
##
## [[3]]
## , , Sex = Female
##
## Race
## Age Black Other White
## <=14 0.04167 0.04167 0.04167
## >55 0.04167 0.04167 0.04167
## 15-19 0.04167 0.04167 0.04167
## 20-24 0.04167 0.04167 0.04167
## 25-29 0.04167 0.04167 0.04167
## 30-34 0.04167 0.04167 0.04167
## 35-44 0.04167 0.04167 0.04167
## 45-54 0.04167 0.04167 0.04167
##
## , , Sex = Male
##
## Race
## Age Black Other White
## <=14 0.04167 0.04167 0.04167
## >55 0.04167 0.04167 0.04167
## 15-19 0.04167 0.04167 0.04167
## 20-24 0.04167 0.04167 0.04167
## 25-29 0.04167 0.04167 0.04167
## 30-34 0.04167 0.04167 0.04167
## 35-44 0.04167 0.04167 0.04167
## 45-54 0.04167 0.04167 0.04167
2.13 Code
std <- read.table("http://www.medepi.net/data/syphilis89b.txt", header = TRUE, sep = ",")
sex <- rep(std$Sex, std$Freq)
race <- rep(std$Race, std$Freq)
age <- rep(std$Age, std$Freq)
std.df <- data.frame(sex, race, age)
2.14
Read county names: scan indicates how many unique values are within the field.
Ready county pop estimates: read.csv reads text into the data frame as data without displaying output.
Replace county number with name: the for-loop will repeat for the number of unique county numbers within the dataset