2.5.1 From RStudio main menu, selet ‘File’ > ‘New Project’ > ‘New Directory’ > ‘Empty Project’. Name the new directory ph251d-homework. Use R to display the file path to the work directory?
getwd()
## [1] "/Users/Michelle/Desktop/Berkeley MPH/Fall 2016/R for Epi's/R Program Files"
2.5.2 Recreate Table 2.26 using any combination of the matrix, cbind, rbind, dimnames, or names functions.
dat <- matrix(c(139, 443, 230, 50), 2,2)
rownames(dat) <- c("Dead", "Alive")
colnames(dat) <- c("Smoker", "Non-Smoker")
coltot <- apply(dat, 2, sum) #column totals
risks <- dat['Dead', ]/coltot
risk.ratio <- risks/risks[2] #risk ratio
odds <- risks/(1 - risks)
odds.ratio <- odds/odds[2] #odds ratio
dat # display results
## Smoker Non-Smoker
## Dead 139 230
## Alive 443 50
2.5.3 Starting with the 2x2 matrix object we created in Table 2.26, using any combination of apply, cbind, rbind, names, and dimnames functions, recreate the Table 2.27.
#one way
rtots <- apply(dat, 1, sum) #y axis
dat_tots <- cbind(dat, Total = rtots)
ctots <- apply(dat_tots, 2, sum); #x axis
dat_tots <- rbind(dat_tots, Total = ctots); dat_tots
## Smoker Non-Smoker Total
## Dead 139 230 369
## Alive 443 50 493
## Total 582 280 862
#another way
addmargins(dat)
## Smoker Non-Smoker Sum
## Dead 139 230 369
## Alive 443 50 493
## Sum 582 280 862
2.5.4 Using the 2×2 data from Table 2.26, use the sweep and apply functions to calculate row marginal, column marginal, and joint distributions (i.e., three tables).
tab.rowdist <- sweep(dat, 1, rtots, '/'); tab.rowdist
## Smoker Non-Smoker
## Dead 0.3766938 0.6233062
## Alive 0.8985801 0.1014199
tab.coldist <- sweep(dat, 2, ctots, '/'); tab.coldist
## Warning in sweep(dat, 2, ctots, "/"): STATS is longer than the extent of
## 'dim(x)[MARGIN]'
## Smoker Non-Smoker
## Dead 0.2388316 0.82142857
## Alive 0.5139211 0.08591065
2.5.5 Using the data from the previous problems, recreate Table 2.28 and interpret the results.
#dat <- matrix(c(30, 174, 21, 184), 2, 2)
#rownames(dat) <- c('Deaths', 'Survivors')
#colnames(dat) <- c('Tolbutamide', 'Placebo')
#coltot <- apply(dat, 2, sum) #column totals
dat
## Smoker Non-Smoker
## Dead 139 230
## Alive 443 50
c_tots <- apply(dat, 2, sum) #x axis
c_tots
## Smoker Non-Smoker
## 582 280
risks <- dat ['Dead']/c_tots
risks
## Smoker Non-Smoker
## NA NA
risk.ratio <- risks/risks[2]
odds <- risks/(1 - risks)
odds.ratio <- odds/odds[2]
rbind (risks, risk.ratio, odds, odds.ratio) # display results
## Smoker Non-Smoker
## risks NA NA
## risk.ratio NA NA
## odds NA NA
## odds.ratio NA NA
2.5.6 Read in the Whickham, England data using the R code below. Stratified by age category, calculate the risk of death comparing smokers to nonsmokers. Show your results. What is your interpretation.
whickdat = read.table("http://www.medepi.net/data/whickham.txt", sep = ",", header = TRUE)
str(whickdat)
## 'data.frame': 1314 obs. of 3 variables:
## $ Vital.Status: Factor w/ 2 levels "Alive","Dead": 2 2 1 1 1 1 1 1 1 1 ...
## $ Smoking : Factor w/ 2 levels "No","Yes": 2 2 2 2 2 2 2 2 2 2 ...
## $ Age : Factor w/ 7 levels "18-24","25-34",..: 1 1 1 1 1 1 1 1 1 1 ...
xtab <- xtabs(~Vital.Status + Age + Smoking, data = whickdat)
xtab
## , , Smoking = No
##
## Age
## Vital.Status 18-24 25-34 35-44 45-54 55-64 65-74 75+
## Alive 61 152 114 66 81 28 0
## Dead 1 5 7 12 40 101 64
##
## , , Smoking = Yes
##
## Age
## Vital.Status 18-24 25-34 35-44 45-54 55-64 65-74 75+
## Alive 53 121 95 103 64 7 0
## Dead 2 3 14 27 51 29 13
rowtotwhick <- apply(dat, 1, sum) #x axis
rowtotwhick
## Dead Alive
## 369 493
#rtotswhickx <- apply(xtab, 1, sum) #y axis
#whick_totsx <- cbind(xtab, Total = rtotswhickx)
#ctotswhickx <- apply(whick_totsx, 2, sum); #x axis
#whick_totsx <- rbind(whick_totsx, Total = ctotswhickx); whick_totsx
whickdat.table <- table(whickdat$Vital.Status, whickdat$Age)
whickdat.table
##
## 18-24 25-34 35-44 45-54 55-64 65-74 75+
## Alive 114 273 209 169 145 35 0
## Dead 3 8 21 39 91 130 77
rtotswhick <- apply(whickdat.table, 1, sum) #y axis
whick_tots <- cbind(whickdat.table, Total = rtotswhick)
ctotswhick <- apply(whick_tots, 2, sum); #x axis
whick_tots <- rbind(whick_tots, Total = ctotswhick); whick_tots
## 18-24 25-34 35-44 45-54 55-64 65-74 75+ Total
## Alive 114 273 209 169 145 35 0 945
## Dead 3 8 21 39 91 130 77 369
## Total 117 281 230 208 236 165 77 1314
#whickdat.array <- array (whick_tots, c(2,7,2))
#dimnames(whickdat.array) <- list(Outcome = c('Dead', 'ALive'),
# Age = c(),
# 'Smoker' = c('Yes', 'No')); whickdat.array
3.5.1
Use the read.table function to read in the syphilis. Evaluate structure of data frame. Do not attach the data frame (yet). Create a 3-dimensional array using both the table or xtabs function. Now attach the data frame using the attach function. Create the same 3-dimensional array using both the table or xtabs function.
std89c = read.table("http://www.medepi.net/data/syphilis89c.txt", sep = ",", header = TRUE)
xtabstd <- xtabs(~Race + Age + Sex, data = std89c)
xtabstd
## , , Sex = Female
##
## Age
## Race <=14 >55 15-19 20-24 25-29 30-34 35-44 45-54
## Black 165 92 2257 4503 3590 2628 1505 392
## Other 11 15 158 307 283 167 149 40
## White 14 24 253 475 433 316 243 55
##
## , , Sex = Male
##
## Age
## Race <=14 >55 15-19 20-24 25-29 30-34 35-44 45-54
## Black 31 823 1412 4059 4121 4453 3858 1619
## Other 7 108 210 654 633 520 492 202
## White 2 216 88 407 550 564 654 323
str(std89c)
## 'data.frame': 44081 obs. of 3 variables:
## $ Sex : Factor w/ 2 levels "Female","Male": 2 2 2 2 2 2 2 2 2 2 ...
## $ Race: Factor w/ 3 levels "Black","Other",..: 3 3 3 3 3 3 3 3 3 3 ...
## $ Age : Factor w/ 8 levels "<=14",">55","15-19",..: 1 1 3 3 3 3 3 3 3 3 ...
attach(std89c)
3.5.2 Use the apply function to get marginal totals for the syphilis 3-dimensional array.
#std.array <- array(std89c, c(3, 7, 2))
#dimnames(std.array) <- list (Race = c('Black', 'White', 'Other'),
# 'Age group' = c('18-24', '25-34', '35-44', '45-54', '55-64', '65-74', '75+'),
# Sex = c('Male', 'Female')); std.array
#std.array
#rtotstd <- apply(std.xtab, 1, sum) #y axis
#std_tots <- cbind(std.xtab, Total = rtotstd)
#ctotstd <- apply(std_tots, 2, sum); #x axis
#std_tots <- rbind(std_tots, Total = ctotstd); std_tots
3.5.3 Use the sweep and apply functions to get marginal and joint distributions for a 3-D array.
3.5.4 Review and read in the group-level, tabular data set of primary and secondary syphilis cases in the United States in 1989. Use the rep function on the data frame fields to recreate the individual-level data frame with over 40,000 observations.
std89b = read.table("http://www.medepi.net/data/syphilis89b.txt", sep = ",", header = TRUE)
3.5.5 Working with population estimates can be challenging because of the amount of data manipulation. Study the 2000 population estimates for California Counties: [http://www.medepi.net/data/calpop/CalCounties2000.txt]. Now, study and implement this R code. For each expression or group of expressions, explain in words what the R code is doing. Be sure to display intermediate objects to understand each step.