PH 251D Homework 3

wvincent — Oct 21, 2013, 7:50 PM

#Wilson Vincent
#PH 251D: Applied Epidemiology Using R
#Homework 3

#3.1 - Creating a data frame 
Status <- rep(c("Dead", "Survived"), 4)
Treatment <- rep(c("Tolbutamide", "Placebo"), c(2, 2), 2)
Agegrp <- rep(c("<55", "55+"), c(4, 4))
Freq <- c(8, 98, 5, 115, 22, 76,  16, 69)
dat <- data.frame(Status, Treatment, Agegrp, Freq); dat
    Status   Treatment Agegrp Freq
1     Dead Tolbutamide    <55    8
2 Survived     Placebo    <55   98
3     Dead Tolbutamide    <55    5
4 Survived     Placebo    <55  115
5     Dead Tolbutamide    55+   22
6 Survived     Placebo    55+   76
7     Dead Tolbutamide    55+   16
8 Survived     Placebo    55+   69

#3.2 - Collecting classmate data and using these to create a data frame
#Sanjiv Baxi UCSF sanjiv.baxi@bekeley.edu sanjiv.baxi@ucsf.edu
#Anobel Odisho UCSF anobel.odisho@berkeley.edu OdishoA@urology.ucsf.edu
#Brett Smith UCB brett.james.smith@gmail.com brett.smith@berkeley.edu
First.nm <- c("Sanjiv", "Anobel", "Brett")
Last.nm <- c("Baxi", "Odisho", "Smith")
Affil <- rep(c("UCSF", "Cal"), c(2, 1))
Email1 <- c("private1@bekeley.edu", "private1@berkeley.edu", "private1@gmail.com")
Email2 <- c("private2@ucsf.edu", "private2@ucsf.edu", "private2@berkeley.edu")
Class <- data.frame(First.nm, Last.nm, Affil, Email1, Email2); Class
  First.nm Last.nm Affil                Email1                Email2
1   Sanjiv    Baxi  UCSF  private1@bekeley.edu     private2@ucsf.edu
2   Anobel  Odisho  UCSF private1@berkeley.edu     private2@ucsf.edu
3    Brett   Smith   Cal    private1@gmail.com private2@berkeley.edu
First.nm; Last.nm; Affil; Email1; Email2; Class
  First.nm Last.nm Affil                Email1                Email2
1   Sanjiv    Baxi  UCSF  private1@bekeley.edu     private2@ucsf.edu
2   Anobel  Odisho  UCSF private1@berkeley.edu     private2@ucsf.edu
3    Brett   Smith   Cal    private1@gmail.com private2@berkeley.edu

#3.3 - Reading data into a data frame and graphing
aids <- read.table("http://www.medepi.net/data/aids.txt", header=TRUE, 
                   sep="", na.strings=".")
head(aids)
  cases year
1    NA 1980
2    NA 1981
3    NA 1982
4    NA 1983
5  4445 1984
6  8249 1985
plot(aids$year, aids$cases, type = "l", xlab = "Year", lwd = 2,
     ylab = "Cases", main = "Reported AIDS Cases in United States, 1980--2003")

plot of chunk unnamed-chunk-1



#3.4 - Reading data into a data frame and graphing
meas <- read.table("http://www.medepi.net/data/measles.txt",
                   header=TRUE, sep="")
head(meas)
  year  cases
1 1950 319000
2 1951 530000
3 1952 683000
4 1953 449000
5 1954 683000
6 1955 555000
plot(meas$year, meas$cases, type = "l", xlab = "Year", lwd = 2,
     ylab = "Cases",
     main = "Reported Measles Cases in United States, 1980--2003")

plot of chunk unnamed-chunk-1

plot(meas$year, meas$cases, type = "l", xlab = "Year", lwd = 2,
     log = "y", ylab = "Cases",
     main = "Reported Measles Cases in United States, 1980--2003")

plot of chunk unnamed-chunk-1




#3.5 - Reading data into a data frame and plotting
aids <- read.table("http://www.medepi.net/data/aids.txt", sep="",
                   header = TRUE, na.strings=".")
hepb <- read.table("http://www.medepi.net/data/hepb.txt", sep="",
                   header = TRUE)
matplot(hepb$year, cbind(hepb$cases,aids$cases),
        type = "l", lwd = 2, xlab = "Year", ylab = "Cases",
        main = "Reported cases of Hepatitis B and AIDS,
        United States, 1980-2003")
legend(1980, 100000, legend = c("Hepatitis B", "AIDS"),
       lwd = 2, lty = 1:2, col = 1:2)

plot of chunk unnamed-chunk-1




#3.6 - Manipulating data 
#3.6a
edat <- read.table("http://www.medepi.net/data/evans.txt", header = TRUE, sep="")
str(edat)
'data.frame':   609 obs. of  12 variables:
 $ id : int  21 31 51 71 74 91 111 131 141 191 ...
 $ chd: int  0 0 1 0 0 0 1 0 0 0 ...
 $ cat: int  0 0 1 1 0 0 0 0 0 0 ...
 $ age: int  56 43 56 64 49 46 52 63 42 55 ...
 $ chl: int  270 159 201 179 243 252 179 217 176 250 ...
 $ smk: int  0 1 1 1 1 1 1 0 1 0 ...
 $ ecg: int  0 0 1 0 0 0 1 0 0 1 ...
 $ dbp: int  80 74 112 100 82 88 80 92 76 114 ...
 $ sbp: int  138 128 164 200 145 142 128 135 114 182 ...
 $ hpt: int  0 0 1 1 0 0 0 0 0 1 ...
 $ ch : int  0 0 1 1 0 0 0 0 0 0 ...
 $ cc : int  0 0 201 179 0 0 0 0 0 0 ...
table(edat$chd)

  0   1 
538  71 
edat$chd2 <- factor(edat$chd, levels = 0:1, labels = c("No", "Yes"))
table(edat$chd2)

 No Yes 
538  71 
table(edat$cat)

  0   1 
487 122 
edat$cat2 <- factor(edat$cat, levels = 0:1, labels = c("Normal", "High"))
table(edat$cat2)

Normal   High 
   487    122 
table(edat$smk)

  0   1 
222 387 
edat$smk2 <- factor(edat$smk, levels = 0:1, labels = c("Never", "Ever"))
table(edat$smk2)

Never  Ever 
  222   387 
table(edat$ecg)

  0   1 
443 166 
edat$ecg2 <- factor(edat$ecg, levels = 0:1, labels = c("Normal", "Abnormal"))
table(edat$ecg2)

  Normal Abnormal 
     443      166 
table(edat$hpt)

  0   1 
354 255 
edat$hpt2 <- factor(edat$hpt, levels = 0:1, labels = c("No", "Yes"))
table(edat$hpt2)

 No Yes 
354 255 
head(edat)
  id chd cat age chl smk ecg dbp sbp hpt ch  cc chd2   cat2  smk2     ecg2
1 21   0   0  56 270   0   0  80 138   0  0   0   No Normal Never   Normal
2 31   0   0  43 159   1   0  74 128   0  0   0   No Normal  Ever   Normal
3 51   1   1  56 201   1   1 112 164   1  1 201  Yes   High  Ever Abnormal
4 71   0   1  64 179   1   0 100 200   1  1 179   No   High  Ever   Normal
5 74   0   0  49 243   1   0  82 145   0  0   0   No Normal  Ever   Normal
6 91   0   0  46 252   1   0  88 142   0  0   0   No Normal  Ever   Normal
  hpt2
1   No
2   No
3  Yes
4  Yes
5   No
6   No

#3.6b
quantile(edat$age)
  0%  25%  50%  75% 100% 
  40   46   52   60   76 

edat$age4 <- cut(edat$age, quantile(edat$age), right = FALSE, include.lowest = TRUE)
table(edat$age4)

[40,46) [46,52) [52,60) [60,76] 
    134     158     158     159 

#3.6c
hptnew <- rep(NA, nrow(edat))
normal <- edat$sbp<120 & edat$dbp<80
hptnew[normal] <- 1
prehyp <- (edat$sbp>=120 & edat$sbp<140) | (edat$dbp>=80 & edat$dbp<90)
hptnew[prehyp] <- 2
stage1 <- (edat$sbp>=140 & edat$sbp<160) | (edat$dbp>=90 & edat$dbp<100)
hptnew[stage1] <- 3
stage2 <- edat$sbp>=160 | edat$dbp>=100
hptnew[stage2] <- 4
edat$hpt4 <- factor(hptnew, levels=1:4, labels=c("Normal", "PreHTN", "HTN.Stage1", "HTN.Stage2"))
table(edat$hpt4)

    Normal     PreHTN HTN.Stage1 HTN.Stage2 
        56        165        177        211 

#3.6d
table("Old HTN"=edat$hpt2, "New HTN"=edat$hpt4)
       New HTN
Old HTN Normal PreHTN HTN.Stage1 HTN.Stage2
    No      56    165        133          0
    Yes      0      0         44        211


#3.7 - Exporting data as an ASCII file
wdat <- read.table("http://www.medepi.net/data/wnv/wnv2004raw.txt", header=TRUE, 
                   sep=",", as.is=TRUE, na.strings=c(".","Unknown"))
str(wdat)
'data.frame':   779 obs. of  8 variables:
 $ id         : int  1 2 3 4 5 6 7 8 9 10 ...
 $ county     : chr  "San Bernardino" "San Bernardino" "San Bernardino" "San Bernardino" ...
 $ age        : int  40 64 19 12 12 17 61 74 71 26 ...
 $ sex        : chr  "F" "F" "M" "M" ...
 $ syndrome   : chr  "WNF" "WNF" "WNF" "WNF" ...
 $ date.onset : chr  "05/19/2004" "05/22/2004" "05/22/2004" "05/16/2004" ...
 $ date.tested: chr  "06/02/2004" "06/16/2004" "06/16/2004" "06/16/2004" ...
 $ death      : chr  "No" "No" "No" "No" ...
wdat$date.onset2 <- as.Date(wdat$date.onset, format="%m/%d/%Y")
wdat$date.tested2 <- as.Date(wdat$date.tested, format="%m/%d/%Y")
write.table(wdat, "C:/Users/wvincent/Desktop", sep=",", row.names=FALSE)
Warning: cannot open file 'C:/Users/wvincent/Desktop': Permission denied
Error: cannot open the connection
#I was on my office computer. I do not have administrative permission to complete this function on the computer than I am using.
#I used an older version of R Studio on my Macbook, which cannot download the new version, and it worked!

#3.8 - Graphing, manipulating, and analyzing data

#3.8a
odat <- read.table("http://www.medepi.net/data/oswego.txt", sep = "", header = TRUE, na.strings = ".")
str(odat)
'data.frame':   75 obs. of  21 variables:
 $ id                 : int  2 3 4 6 7 8 9 10 14 16 ...
 $ age                : int  52 65 59 63 70 40 15 33 10 32 ...
 $ sex                : Factor w/ 2 levels "F","M": 1 2 1 1 2 1 1 1 2 1 ...
 $ meal.time          : Factor w/ 6 levels "10:00 PM","11:00 AM",..: 6 3 3 5 5 5 1 4 5 NA ...
 $ ill                : Factor w/ 2 levels "N","Y": 2 2 2 2 2 2 2 2 2 2 ...
 $ onset.date         : Factor w/ 2 levels "4/18","4/19": 2 2 2 1 1 2 2 1 2 2 ...
 $ onset.time         : Factor w/ 17 levels "1:00 AM","10:00 PM",..: 9 9 9 5 5 10 1 6 10 4 ...
 $ baked.ham          : Factor w/ 2 levels "N","Y": 2 2 2 2 2 1 1 2 1 2 ...
 $ spinach            : Factor w/ 2 levels "N","Y": 2 2 2 2 2 1 1 2 1 2 ...
 $ mashed.potato      : Factor w/ 2 levels "N","Y": 2 2 1 1 2 1 1 2 1 1 ...
 $ cabbage.salad      : Factor w/ 2 levels "N","Y": 1 2 1 2 1 1 1 1 1 1 ...
 $ jello              : Factor w/ 2 levels "N","Y": 1 1 1 2 2 1 1 1 1 1 ...
 $ rolls              : Factor w/ 2 levels "N","Y": 2 1 1 1 2 1 1 2 1 2 ...
 $ brown.bread        : Factor w/ 2 levels "N","Y": 1 1 1 1 2 1 1 2 1 1 ...
 $ milk               : Factor w/ 2 levels "N","Y": 1 1 1 1 1 1 1 1 1 1 ...
 $ coffee             : Factor w/ 2 levels "N","Y": 2 2 2 1 2 1 1 1 1 2 ...
 $ water              : Factor w/ 2 levels "N","Y": 1 1 1 2 2 1 1 2 1 1 ...
 $ cakes              : Factor w/ 2 levels "N","Y": 1 1 2 1 1 1 2 1 1 2 ...
 $ vanilla.ice.cream  : Factor w/ 2 levels "N","Y": 2 2 2 2 2 2 1 2 2 2 ...
 $ chocolate.ice.cream: Factor w/ 2 levels "N","Y": 1 2 2 1 1 2 2 2 2 2 ...
 $ fruit.salad        : Factor w/ 2 levels "N","Y": 1 1 1 1 1 1 1 1 1 1 ...
head(odat)
  id age sex meal.time ill onset.date onset.time baked.ham spinach
1  2  52   F   8:00 PM   Y       4/19   12:30 AM         Y       Y
2  3  65   M   6:30 PM   Y       4/19   12:30 AM         Y       Y
3  4  59   F   6:30 PM   Y       4/19   12:30 AM         Y       Y
4  6  63   F   7:30 PM   Y       4/18   10:30 PM         Y       Y
5  7  70   M   7:30 PM   Y       4/18   10:30 PM         Y       Y
6  8  40   F   7:30 PM   Y       4/19    2:00 AM         N       N
  mashed.potato cabbage.salad jello rolls brown.bread milk coffee water
1             Y             N     N     Y           N    N      Y     N
2             Y             Y     N     N           N    N      Y     N
3             N             N     N     N           N    N      Y     N
4             N             Y     Y     N           N    N      N     Y
5             Y             N     Y     Y           Y    N      Y     Y
6             N             N     N     N           N    N      N     N
  cakes vanilla.ice.cream chocolate.ice.cream fruit.salad
1     N                 Y                   N           N
2     N                 Y                   Y           N
3     Y                 Y                   Y           N
4     N                 Y                   N           N
5     N                 Y                   N           N
6     N                 Y                   Y           N
mdt <- paste("4/18/1940", odat$meal.time)
meal.dt <- strptime(mdt, "%m/%d/%Y %I:%M %p")
odt <- paste(paste(odat$onset.date,"/1940",sep = ""), odat$onset.time)
onset.dt <- strptime(odt, "%m/%d/%Y %I:%M %p")
hist(onset.dt, breaks = 30, freq = TRUE)

plot of chunk unnamed-chunk-1




#3.8b
min.obs.pos <- which(onset.dt==min(onset.dt,na.rm=T))
min.obs.pos
[1] 33
max.obs.pos <- which(onset.dt==max(onset.dt,na.rm=T))
max.obs.pos
[1] 10
odat[min.obs.pos,]
   id age sex meal.time ill onset.date onset.time baked.ham spinach
33 52   8   M  11:00 AM   Y       4/18    3:00 PM         N       N
   mashed.potato cabbage.salad jello rolls brown.bread milk coffee water
33             N             N     N     N           N    N      N     N
   cakes vanilla.ice.cream chocolate.ice.cream fruit.salad
33     N                 Y                   Y           N
odat[max.obs.pos,]
   id age sex meal.time ill onset.date onset.time baked.ham spinach
10 16  32   F      <NA>   Y       4/19   10:30 AM         Y       Y
   mashed.potato cabbage.salad jello rolls brown.bread milk coffee water
10             N             N     N     Y           N    N      Y     N
   cakes vanilla.ice.cream chocolate.ice.cream fruit.salad
10     Y                 Y                   Y           N

#3.8c
onset.ct <- as.POSIXct(onset.dt)
odat2 <- odat[order(odat$ill, onset.ct), ]
head(odat2)
   id age sex meal.time ill onset.date onset.time baked.ham spinach
47  1  11   M      <NA>   N       <NA>       <NA>         N       N
48  5  13   F      <NA>   N       <NA>       <NA>         N       N
49 11  65   M      <NA>   N       <NA>       <NA>         Y       Y
50 12  38   F      <NA>   N       <NA>       <NA>         Y       Y
51 13  62   F      <NA>   N       <NA>       <NA>         Y       Y
52 15  25   M      <NA>   N       <NA>       <NA>         Y       Y
   mashed.potato cabbage.salad jello rolls brown.bread milk coffee water
47             N             N     N     N           N    N      N     N
48             N             N     N     N           N    N      N     N
49             Y             N     Y     Y           N    N      N     N
50             Y             N     N     Y           N    N      Y     N
51             N             Y     Y     Y           Y    N      N     Y
52             Y             Y     Y     Y           Y    Y      Y     Y
   cakes vanilla.ice.cream chocolate.ice.cream fruit.salad
47     N                 N                   Y           N
48     N                 N                   Y           N
49     N                 Y                   N           N
50     N                 Y                   Y           Y
51     N                 N                   N           N
52     Y                 Y                   N           N

#3.8d
incub.dt <- onset.dt - meal.dt
library(MASS) #load MASS package
truehist(as.numeric(incub.dt), nbins = 7, prob = FALSE,
         col = "skyblue", xlab = "Incubation Period (hours)")

plot of chunk unnamed-chunk-1

mean(incub.dt, na.rm = TRUE)
Time difference of 4.295 hours
median(incub.dt, na.rm = TRUE)
Time difference of 4 hours
range(incub.dt, na.rm = TRUE)
Time differences in hours
[1] 3 7