Chapter 3 Exercise 3.1
Status<-rep(c("Dead", "Survived"), 4)
Treatment<-rep(c("Tolbutamide","Tolbutamide","Placebo","Placebo"),2)
Agegrp<-c(rep(("<55"),4),rep(("55+"),4))
Freq<-c(8,98,5,115,22,76,16,69)
dat<-data.frame(Status,Treatment,Agegrp,Freq)
Exercise 3.2
dat1 <- read.csv("C:/Users/Toshiba L505-ES5018/Desktop/PH251D/dat1.csv", header=FALSE)
## Warning: incomplete final line found by readTableHeader on
## 'C:/Users/Toshiba L505-ES5018/Desktop/PH251D/dat1.csv'
colnames(dat1)<-c("Last Name","First Name","Email","Today's Date")
dat1
## Last Name First Name Email Today's Date
## 1 Potter Harry hp@griffindor.edu 10/6/2014
## 2 Draco Malfoy dm@slytherin.edu 10/6/2014
## 3 Lovegood Luna ll@ravenclaw.edu 10/6/2014
Exercise 3.3
aids<- read.table("http://www.medepi.net/data/aids.txt",header=TRUE,sep="",na.strings=".")
plot(aids$year, aids$cases, type = "l", xlab = "Year", lwd = 2, ylab = "Number of AIDS Cases", main = "AIDS Cases from 1980 to 2003")
Exercise 3.4
measles<-read.table("http://www.medepi.net/data/measles.txt",header=TRUE,sep="",na.strings=".")
#Arithmetic Scale Plot
plot(measles$year,measles$cases,type = "l", xlab = "Year", lwd = 2, ylab = "Number of Measles Cases", main = "Measles Cases from 1950 to 2001")
#Semi-log Scale Plot
plot(measles$year,measles$cases,type = "l", lwd = 2, ylab = "Measles Cases (Log Scale)",log = "y", xlab = "Year", main = "Log Scale of Measles Cases from 1950 to 2001")
Exercise 3.5
hbv<-read.table("http://www.medepi.net/data/hepb.txt",header=TRUE,sep="",na.strings=".")
matplot(hbv$year, cbind(hbv$cases,aids$cases), type = "l", lwd = 2, xlab = "Year", ylab = "Cases", main = "Reported cases of Hepatitis B and AIDS, United States, 1980-2003")
legend(1980, 100000, legend = c("Hepatitis B", "AIDS"),lwd = 2, lty = 1:2, col = 1:2)
Exercise 3.6
#Part A
evans<-read.table("http://www.medepi.net/data/evans.txt",header=TRUE,sep="",na.strings=".")
evans$chd<-as.factor(evans$chd)
evans$cat<-as.factor(evans$cat)
evans$smk<-as.factor(evans$smk)
evans$ecg<-as.factor(evans$ecg)
evans$hpt<-as.factor(evans$hpt)
evans$ch<-as.factor(evans$ch)
#Part B
age.factor<-cut(evans$age,c(breaks=5*(8:16),right=FALSE))
age.factor<-as.factor(age.factor)
evans$age.factor<-age.factor
#Part C
hpt.factor<-rep(NA,nrow(evans))
hpt.normal<-evans$sbp<120&evans$dbp<80
hpt.factor[hpt.normal]<-1
hpt.pre<-(evans$sbp>=120&evans$sbp<140)|(evans$dbp>=80&evans$dbp<90)
hpt.factor[hpt.pre]<-2
hpt.stg1<-(evans$sbp>=140&evans$sbp<160)|(evans$dbp>=90&evans$dbp<100)
hpt.factor[hpt.stg1]<-3
hpt.stg2<-(evans$sbp>=160)|(evans$dbp>=100)
hpt.factor[hpt.stg2]<-4
hpt.factor<-factor(hpt.factor,levels=c(1,2,3,4),labels=c("Normal","Prehypertension","Hypertension Stage 1","Hypertension Stage 2"))
table(hpt.factor)
## hpt.factor
## Normal Prehypertension Hypertension Stage 1
## 56 165 177
## Hypertension Stage 2
## 211
#Part D
table("Old Hypertension Factor"=evans$hpt,"New Hypertension Factor"=hpt.factor)
## New Hypertension Factor
## Old Hypertension Factor Normal Prehypertension Hypertension Stage 1
## 0 56 165 133
## 1 0 0 44
## New Hypertension Factor
## Old Hypertension Factor Hypertension Stage 2
## 0 0
## 1 211
Exercise 3.7
wnile<-read.table("http://www.medepi.net/data/wnv/wnv2004raw.txt",header=TRUE,sep=",",na.strings=c(".", "Unkown"))
str(wnile)
## 'data.frame': 779 obs. of 8 variables:
## $ id : int 1 2 3 4 5 6 7 8 9 10 ...
## $ county : Factor w/ 23 levels "Butte","Fresno",..: 14 14 14 14 14 14 14 14 8 12 ...
## $ age : int 40 64 19 12 12 17 61 74 71 26 ...
## $ sex : Factor w/ 2 levels "F","M": 1 1 2 2 2 2 2 1 2 2 ...
## $ syndrome : Factor w/ 3 levels "Unknown","WNF",..: 2 2 2 2 2 2 3 3 2 3 ...
## $ date.onset : Factor w/ 130 levels "02/02/2005","05/14/2004",..: 4 5 5 3 2 6 8 11 8 10 ...
## $ date.tested: Factor w/ 104 levels "01/21/2005","02/04/2005",..: 4 5 5 5 5 6 7 8 9 9 ...
## $ death : Factor w/ 2 levels "No","Yes": 1 1 1 1 1 1 1 1 1 1 ...
wnile$date.onset<-as.Date(wnile$date.onset,"%m/%d/%Y")
wnile$date.tested<-as.Date(wnile$date.tested,"%m/%d/%Y")
write.table(wnile,file="West Nile Data.csv",quote=FALSE,sep=",",na="Unknown")
Exercise 3.8
#Part A
#Reading in the data, also going to need to clean up the time and date variables.
oswego<-read.table("http://www.medepi.net/data/oswego.txt",header=TRUE,sep=" ",na.strings=".")
temp<-paste("4/18/1940",oswego$meal.time)
exposure.time<-strptime(temp,format="%m/%d/%Y %I:%M %p")
temp1<-paste(paste(oswego$onset.date,"/1940",sep=""),oswego$onset.time)
outcome.time<-strptime(temp1,format="%m/%d/%Y %I:%M %p")
hist(outcome.time,breaks=15,main="Histogram of Outcome",freq=TRUE)
#Part B
range(outcome.time,na.rm=TRUE)
## [1] "1940-04-18 15:00:00 PST" "1940-04-19 10:30:00 PST"
min<-which(outcome.time==min(outcome.time,na.rm=TRUE))
oswego[min,]
## id age sex meal.time ill onset.date onset.time baked.ham spinach
## 33 52 8 M 11:00 AM Y 4/18 3:00 PM N N
## mashed.potato cabbage.salad jello rolls brown.bread milk coffee water
## 33 N N N N N N N N
## cakes van.ice.cream choc.ice.cream fruit.salad
## 33 N Y Y N
max<-which(outcome.time==max(outcome.time,na.rm=TRUE))
oswego[max,]
## id age sex meal.time ill onset.date onset.time baked.ham spinach
## 10 16 32 F <NA> Y 4/19 10:30 AM Y Y
## mashed.potato cabbage.salad jello rolls brown.bread milk coffee water
## 10 N N N Y N N Y N
## cakes van.ice.cream choc.ice.cream fruit.salad
## 10 Y Y Y N
#from this we see the first person to report symptoms did on 4-18-1940 at 3PM. This not only is before the food was served from 6 to 11pm, but they reported their meal time that day as 11am. Clearly this person misunderstood the question and reported an unrelated GI episode and should be removed. The second person who is an outlier in the opposite direction reported symptoms at 4-19-1940 at 10:30pm, almost 24 hours after the last possible time they could have eaten contaminated food. It is theoretically plausible that their episode was caused by the church dinner the previous night, but it is also plausible that it was caused by what they ate the following day; there is simply no way to know.
#Part C
temp2<-as.POSIXct(outcome.time)
oswego.1<-oswego[order(oswego$ill,temp2),]
head(oswego.1)
## id age sex meal.time ill onset.date onset.time baked.ham spinach
## 47 1 11 M <NA> N <NA> <NA> N N
## 48 5 13 F <NA> N <NA> <NA> N N
## 49 11 65 M <NA> N <NA> <NA> Y Y
## 50 12 38 F <NA> N <NA> <NA> Y Y
## 51 13 62 F <NA> N <NA> <NA> Y Y
## 52 15 25 M <NA> N <NA> <NA> Y Y
## mashed.potato cabbage.salad jello rolls brown.bread milk coffee water
## 47 N N N N N N N N
## 48 N N N N N N N N
## 49 Y N Y Y N N N N
## 50 Y N N Y N N Y N
## 51 N Y Y Y Y N N Y
## 52 Y Y Y Y Y Y Y Y
## cakes van.ice.cream choc.ice.cream fruit.salad
## 47 N N Y N
## 48 N N Y N
## 49 N Y N N
## 50 N Y Y Y
## 51 N N N N
## 52 Y Y N N
#Part D
incubation<-(outcome.time-exposure.time)
library(MASS)
truehist(as.numeric(incubation),nbins=10,prob=FALSE,col="cyan",xlab="Incubation Time",ylab="Number of Cases")
mean(incubation,na.rm=TRUE)
## Time difference of 4.295 hours
median(incubation,na.rm=TRUE)
## Time difference of 4 hours
range(incubation,na.rm=TRUE)
## Time differences in hours
## [1] 3 7