Chapter 3 Exercise 3.1

Status<-rep(c("Dead", "Survived"), 4)
Treatment<-rep(c("Tolbutamide","Tolbutamide","Placebo","Placebo"),2)
Agegrp<-c(rep(("<55"),4),rep(("55+"),4))
Freq<-c(8,98,5,115,22,76,16,69)
dat<-data.frame(Status,Treatment,Agegrp,Freq)

Exercise 3.2

dat1 <- read.csv("C:/Users/Toshiba L505-ES5018/Desktop/PH251D/dat1.csv", header=FALSE)
## Warning: incomplete final line found by readTableHeader on
## 'C:/Users/Toshiba L505-ES5018/Desktop/PH251D/dat1.csv'
colnames(dat1)<-c("Last Name","First Name","Email","Today's Date")
dat1
##   Last Name First Name              Email Today's Date
## 1    Potter      Harry  hp@griffindor.edu    10/6/2014
## 2     Draco     Malfoy   dm@slytherin.edu    10/6/2014
## 3  Lovegood       Luna   ll@ravenclaw.edu    10/6/2014

Exercise 3.3

aids<- read.table("http://www.medepi.net/data/aids.txt",header=TRUE,sep="",na.strings=".")
plot(aids$year, aids$cases, type = "l", xlab = "Year", lwd = 2, ylab = "Number of AIDS Cases", main = "AIDS Cases from 1980 to 2003")

plot of chunk unnamed-chunk-3

Exercise 3.4

measles<-read.table("http://www.medepi.net/data/measles.txt",header=TRUE,sep="",na.strings=".")
#Arithmetic Scale Plot
plot(measles$year,measles$cases,type = "l", xlab = "Year", lwd = 2, ylab = "Number of Measles Cases", main = "Measles Cases from 1950 to 2001")

plot of chunk unnamed-chunk-4

#Semi-log Scale Plot
plot(measles$year,measles$cases,type = "l", lwd = 2, ylab = "Measles Cases (Log Scale)",log = "y", xlab = "Year", main = "Log Scale of Measles Cases from 1950 to 2001")

plot of chunk unnamed-chunk-4

Exercise 3.5

hbv<-read.table("http://www.medepi.net/data/hepb.txt",header=TRUE,sep="",na.strings=".")
matplot(hbv$year, cbind(hbv$cases,aids$cases), type = "l", lwd = 2, xlab = "Year", ylab = "Cases", main = "Reported cases of Hepatitis B and AIDS, United States, 1980-2003")
legend(1980, 100000, legend = c("Hepatitis B", "AIDS"),lwd = 2, lty = 1:2, col = 1:2)

plot of chunk unnamed-chunk-5

Exercise 3.6

#Part A
evans<-read.table("http://www.medepi.net/data/evans.txt",header=TRUE,sep="",na.strings=".")
evans$chd<-as.factor(evans$chd)
evans$cat<-as.factor(evans$cat)
evans$smk<-as.factor(evans$smk)
evans$ecg<-as.factor(evans$ecg)
evans$hpt<-as.factor(evans$hpt)
evans$ch<-as.factor(evans$ch)
#Part B
age.factor<-cut(evans$age,c(breaks=5*(8:16),right=FALSE))
age.factor<-as.factor(age.factor)
evans$age.factor<-age.factor
#Part C
hpt.factor<-rep(NA,nrow(evans))
hpt.normal<-evans$sbp<120&evans$dbp<80
hpt.factor[hpt.normal]<-1
hpt.pre<-(evans$sbp>=120&evans$sbp<140)|(evans$dbp>=80&evans$dbp<90)
hpt.factor[hpt.pre]<-2
hpt.stg1<-(evans$sbp>=140&evans$sbp<160)|(evans$dbp>=90&evans$dbp<100)
hpt.factor[hpt.stg1]<-3
hpt.stg2<-(evans$sbp>=160)|(evans$dbp>=100)
hpt.factor[hpt.stg2]<-4
hpt.factor<-factor(hpt.factor,levels=c(1,2,3,4),labels=c("Normal","Prehypertension","Hypertension Stage 1","Hypertension Stage 2"))
table(hpt.factor)
## hpt.factor
##               Normal      Prehypertension Hypertension Stage 1 
##                   56                  165                  177 
## Hypertension Stage 2 
##                  211
#Part D
table("Old Hypertension Factor"=evans$hpt,"New Hypertension Factor"=hpt.factor)
##                        New Hypertension Factor
## Old Hypertension Factor Normal Prehypertension Hypertension Stage 1
##                       0     56             165                  133
##                       1      0               0                   44
##                        New Hypertension Factor
## Old Hypertension Factor Hypertension Stage 2
##                       0                    0
##                       1                  211

Exercise 3.7

wnile<-read.table("http://www.medepi.net/data/wnv/wnv2004raw.txt",header=TRUE,sep=",",na.strings=c(".", "Unkown"))
str(wnile)
## 'data.frame':    779 obs. of  8 variables:
##  $ id         : int  1 2 3 4 5 6 7 8 9 10 ...
##  $ county     : Factor w/ 23 levels "Butte","Fresno",..: 14 14 14 14 14 14 14 14 8 12 ...
##  $ age        : int  40 64 19 12 12 17 61 74 71 26 ...
##  $ sex        : Factor w/ 2 levels "F","M": 1 1 2 2 2 2 2 1 2 2 ...
##  $ syndrome   : Factor w/ 3 levels "Unknown","WNF",..: 2 2 2 2 2 2 3 3 2 3 ...
##  $ date.onset : Factor w/ 130 levels "02/02/2005","05/14/2004",..: 4 5 5 3 2 6 8 11 8 10 ...
##  $ date.tested: Factor w/ 104 levels "01/21/2005","02/04/2005",..: 4 5 5 5 5 6 7 8 9 9 ...
##  $ death      : Factor w/ 2 levels "No","Yes": 1 1 1 1 1 1 1 1 1 1 ...
wnile$date.onset<-as.Date(wnile$date.onset,"%m/%d/%Y")
wnile$date.tested<-as.Date(wnile$date.tested,"%m/%d/%Y")
write.table(wnile,file="West Nile Data.csv",quote=FALSE,sep=",",na="Unknown")

Exercise 3.8

#Part A
#Reading in the data, also going to need to clean up the time and date variables.
oswego<-read.table("http://www.medepi.net/data/oswego.txt",header=TRUE,sep=" ",na.strings=".")
temp<-paste("4/18/1940",oswego$meal.time)
exposure.time<-strptime(temp,format="%m/%d/%Y %I:%M %p")
temp1<-paste(paste(oswego$onset.date,"/1940",sep=""),oswego$onset.time)
outcome.time<-strptime(temp1,format="%m/%d/%Y %I:%M %p")
hist(outcome.time,breaks=15,main="Histogram of Outcome",freq=TRUE)

plot of chunk unnamed-chunk-8

#Part B
range(outcome.time,na.rm=TRUE)
## [1] "1940-04-18 15:00:00 PST" "1940-04-19 10:30:00 PST"
min<-which(outcome.time==min(outcome.time,na.rm=TRUE))
oswego[min,]
##    id age sex meal.time ill onset.date onset.time baked.ham spinach
## 33 52   8   M  11:00 AM   Y       4/18    3:00 PM         N       N
##    mashed.potato cabbage.salad jello rolls brown.bread milk coffee water
## 33             N             N     N     N           N    N      N     N
##    cakes van.ice.cream choc.ice.cream fruit.salad
## 33     N             Y              Y           N
max<-which(outcome.time==max(outcome.time,na.rm=TRUE))
oswego[max,]
##    id age sex meal.time ill onset.date onset.time baked.ham spinach
## 10 16  32   F      <NA>   Y       4/19   10:30 AM         Y       Y
##    mashed.potato cabbage.salad jello rolls brown.bread milk coffee water
## 10             N             N     N     Y           N    N      Y     N
##    cakes van.ice.cream choc.ice.cream fruit.salad
## 10     Y             Y              Y           N
#from this we see the first person to report symptoms did on 4-18-1940 at 3PM. This not only is before the food was served from 6 to 11pm, but they reported their meal time that day as 11am. Clearly this person misunderstood the question and reported an unrelated GI episode and should be removed. The second person who is an outlier in the opposite direction reported symptoms at 4-19-1940 at 10:30pm, almost 24 hours after the last possible time they could have eaten contaminated food. It is theoretically plausible that their episode was caused by the church dinner the previous night, but it is also plausible that it was caused by what they ate the following day; there is simply no way to know.
#Part C
temp2<-as.POSIXct(outcome.time)
oswego.1<-oswego[order(oswego$ill,temp2),]
head(oswego.1)
##    id age sex meal.time ill onset.date onset.time baked.ham spinach
## 47  1  11   M      <NA>   N       <NA>       <NA>         N       N
## 48  5  13   F      <NA>   N       <NA>       <NA>         N       N
## 49 11  65   M      <NA>   N       <NA>       <NA>         Y       Y
## 50 12  38   F      <NA>   N       <NA>       <NA>         Y       Y
## 51 13  62   F      <NA>   N       <NA>       <NA>         Y       Y
## 52 15  25   M      <NA>   N       <NA>       <NA>         Y       Y
##    mashed.potato cabbage.salad jello rolls brown.bread milk coffee water
## 47             N             N     N     N           N    N      N     N
## 48             N             N     N     N           N    N      N     N
## 49             Y             N     Y     Y           N    N      N     N
## 50             Y             N     N     Y           N    N      Y     N
## 51             N             Y     Y     Y           Y    N      N     Y
## 52             Y             Y     Y     Y           Y    Y      Y     Y
##    cakes van.ice.cream choc.ice.cream fruit.salad
## 47     N             N              Y           N
## 48     N             N              Y           N
## 49     N             Y              N           N
## 50     N             Y              Y           Y
## 51     N             N              N           N
## 52     Y             Y              N           N
#Part D
incubation<-(outcome.time-exposure.time)
library(MASS)
truehist(as.numeric(incubation),nbins=10,prob=FALSE,col="cyan",xlab="Incubation Time",ylab="Number of Cases")

plot of chunk unnamed-chunk-8

mean(incubation,na.rm=TRUE)
## Time difference of 4.295 hours
median(incubation,na.rm=TRUE)
## Time difference of 4 hours
range(incubation,na.rm=TRUE)
## Time differences in hours
## [1] 3 7