tolbutamide <- read.csv('http://www.medepi.net/data/ugdp.txt', header = TRUE)
tolbutamide[1:5,]
## Status Treatment Agegrp
## 1 Death Tolbutamide <55
## 2 Death Tolbutamide <55
## 3 Death Tolbutamide <55
## 4 Death Tolbutamide <55
## 5 Death Tolbutamide <55
mode(tolbutamide)
## [1] "list"
str(tolbutamide)
## 'data.frame': 409 obs. of 3 variables:
## $ Status : Factor w/ 2 levels "Death","Survivor": 1 1 1 1 1 1 1 1 1 1 ...
## $ Treatment: Factor w/ 2 levels "Placebo","Tolbutamide": 2 2 2 2 2 2 2 2 2 2 ...
## $ Agegrp : Factor w/ 2 levels "<55","55+": 1 1 1 1 1 1 1 1 2 2 ...
tolbframe <-data.frame()
head(tolbutamide)
## Status Treatment Agegrp
## 1 Death Tolbutamide <55
## 2 Death Tolbutamide <55
## 3 Death Tolbutamide <55
## 4 Death Tolbutamide <55
## 5 Death Tolbutamide <55
## 6 Death Tolbutamide <55
tolxtab <- xtabs(~Status+Treatment, data = tolbutamide)[,2:1]; tolxtab
## Treatment
## Status Tolbutamide Placebo
## Death 30 21
## Survivor 174 184
xtab3way <- xtabs(~Status + Treatment + Agegrp, data=tolbutamide); xtab3way
## , , Agegrp = <55
##
## Treatment
## Status Placebo Tolbutamide
## Death 5 8
## Survivor 115 98
##
## , , Agegrp = 55+
##
## Treatment
## Status Placebo Tolbutamide
## Death 16 22
## Survivor 69 76
toltable <- table(tolbutamide$Status, tolbutamide$Treatment, tolbutamide$Agegrp); toltable
## , , = <55
##
##
## Placebo Tolbutamide
## Death 5 8
## Survivor 115 98
##
## , , = 55+
##
##
## Placebo Tolbutamide
## Death 16 22
## Survivor 69 76
#toltest <- aggregate(tolbutamide[,"Treatment"], list(tolbutamide$Agegrp))
#toltest2 <-list (Status, Treatment)
#FName LName Affiliation Email1 Email2 Date
#first.names <- c("Corey", "Eileen", "Tommy", "Nicki")
#last.names <- c("Mike", "Shelton", "Kenning", "Kenning")
#affiliation <- c ("Firestorm", "Wonderwoman", "Batman", "Supergirl")
#email1 <- c("corey.microphone@gmail.com", "eileen.shelton@gmail.com", "tommytequila@gmail.com", "nicole@gmail.com")
#email2 <- c("corey.mike@hotmail.com", "dragonl8y@yahoo.com", "tom_kenning@comcast.com", "nickickenning@gmail.com")
#date.added <- c(10/15/16, 10/15/16, 10/15/16, 10/15/16)
#Table1 <- data.frame(first.names, last.names, affiliation, email1, email2, date.added)
#sink(file = Table1, "/Users/Michelle/Desktop/Berkeley MPH/Fall 2016/R for Epi's/HW3Table1.rtf")
mydata <- read.table ("/Users/Michelle/Desktop/Berkeley MPH/Fall 2016/R for Epi's/HW3TextEdit.rtf", header=TRUE, sep=",")
mydata
## X..rtf1.ansi.ansicpg1252.cocoartf1348.cocoasubrtf170
## 1 {\\fonttbl\\f0\\fswiss\\fcharset0 Helvetica;}
## 2 {\\colortbl;\\red255\\green255\\blue255;}
## 3 \\margl1440\\margr1440\\vieww10800\\viewh8400\\viewkind0
## 4 \\pard\\tx720\\tx1440\\tx2160\\tx2880\\tx3600\\tx4320\\tx5040\\tx5760\\tx6480\\tx7200\\tx7920\\tx8640\\pardirnatural
## 5 \\f0\\fs24 \\cf0 FName
## 6 LName
## 7 Affiliation
## 8 Email1
## 9 Email2
## 10 Date\\
## 11 1
## 12 Corey
## 13 Mike
## 14 Firestorm
## 15 corey.microphone@gmail.com
## 16 corey.mike@hotmail.com
## 17 10/15/16\\
## 18 2
## 19 Eileen
## 20 Shelton
## 21 Wonderwoman
## 22 eileen.shelton@gmail.com
## 23 dragonl8y@yahoo.com
## 24 10/15/16\\
## 25 3
## 26 Tommy
## 27 Kenning
## 28 Batman
## 29 tommytequila@gmail.com
## 30 tom_kenning@comcast.com
## 31 10/15/16\\
## 32 4
## 33 Nicki
## 34 Kenning
## 35 Supergirl
## 36 nicole@gmail.com
## 37 nickickenning@gmail.com
## 38 10/15/16}
mydataframe <-data.frame(mydata)
mydataframe
## X..rtf1.ansi.ansicpg1252.cocoartf1348.cocoasubrtf170
## 1 {\\fonttbl\\f0\\fswiss\\fcharset0 Helvetica;}
## 2 {\\colortbl;\\red255\\green255\\blue255;}
## 3 \\margl1440\\margr1440\\vieww10800\\viewh8400\\viewkind0
## 4 \\pard\\tx720\\tx1440\\tx2160\\tx2880\\tx3600\\tx4320\\tx5040\\tx5760\\tx6480\\tx7200\\tx7920\\tx8640\\pardirnatural
## 5 \\f0\\fs24 \\cf0 FName
## 6 LName
## 7 Affiliation
## 8 Email1
## 9 Email2
## 10 Date\\
## 11 1
## 12 Corey
## 13 Mike
## 14 Firestorm
## 15 corey.microphone@gmail.com
## 16 corey.mike@hotmail.com
## 17 10/15/16\\
## 18 2
## 19 Eileen
## 20 Shelton
## 21 Wonderwoman
## 22 eileen.shelton@gmail.com
## 23 dragonl8y@yahoo.com
## 24 10/15/16\\
## 25 3
## 26 Tommy
## 27 Kenning
## 28 Batman
## 29 tommytequila@gmail.com
## 30 tom_kenning@comcast.com
## 31 10/15/16\\
## 32 4
## 33 Nicki
## 34 Kenning
## 35 Supergirl
## 36 nicole@gmail.com
## 37 nickickenning@gmail.com
## 38 10/15/16}
aidsdata <- read.table ("http://www.medepi.net/data/aids.txt", header = TRUE, sep="", na.strings = ".")
aidsdata
## cases year
## 1 NA 1980
## 2 NA 1981
## 3 NA 1982
## 4 NA 1983
## 5 4445 1984
## 6 8249 1985
## 7 12932 1986
## 8 21070 1987
## 9 31001 1988
## 10 33722 1989
## 11 41595 1990
## 12 43672 1991
## 13 45472 1992
## 14 103691 1993
## 15 78279 1994
## 16 71547 1995
## 17 66885 1996
## 18 58492 1997
## 19 46521 1998
## 20 45104 1999
## 21 40758 2000
## 22 41868 2001
## 23 42745 2002
## 24 44232 2003
plot(aidsdata$year, aidsdata$cases, type = "l", xlab = "Year", lwd = 2, ylab = "Number of Cases", main = "AIDS Cases Over Time")
(Measles <- read.table ("http://www.medepi.net/data/measles.txt", header = TRUE, na.strings = "."))
## year cases
## 1 1950 319000
## 2 1951 530000
## 3 1952 683000
## 4 1953 449000
## 5 1954 683000
## 6 1955 555000
## 7 1956 612000
## 8 1957 487000
## 9 1958 763000
## 10 1959 406000
## 11 1960 442000
## 12 1961 424000
## 13 1962 482000
## 14 1963 385000
## 15 1964 458000
## 16 1965 262000
## 17 1966 204000
## 18 1967 63000
## 19 1968 22000
## 20 1969 26000
## 21 1970 47351
## 22 1971 75290
## 23 1972 32275
## 24 1973 26690
## 25 1974 22094
## 26 1975 24374
## 27 1976 41126
## 28 1977 57345
## 29 1978 26871
## 30 1979 13597
## 31 1980 13506
## 32 1981 3124
## 33 1982 1714
## 34 1983 1497
## 35 1984 2587
## 36 1985 2822
## 37 1986 6282
## 38 1987 3655
## 39 1988 3396
## 40 1989 18193
## 41 1990 27786
## 42 1991 9643
## 43 1992 2237
## 44 1993 312
## 45 1994 963
## 46 1995 309
## 47 1996 508
## 48 1997 138
## 49 1998 100
## 50 1999 100
## 51 2000 86
## 52 2001 116
plot(Measles$year, Measles$cases, type = "l", lwd = 2, xlab = "Year", ylab="Number of Cases", main = "Number of Measles Cases Over Time")
plot(Measles$year, Measles$cases, type = "l", lwd = 2, xlab = "Year", log="y", ylab = "Number of Cases", main = "Logarithmic: Number of Measles Cases Over Time")
HepB <- read.table ("http://www.medepi.net/data/hepb.txt", header=TRUE, na.strings = ".")
HepB
## cases year
## 1 19015 1980
## 2 21152 1981
## 3 22177 1982
## 4 24318 1983
## 5 26115 1984
## 6 26611 1985
## 7 26107 1986
## 8 25916 1987
## 9 23177 1988
## 10 23419 1989
## 11 21102 1990
## 12 18003 1991
## 13 16126 1992
## 14 13361 1993
## 15 12517 1994
## 16 10805 1995
## 17 10637 1996
## 18 10416 1997
## 19 10258 1998
## 20 7694 1999
## 21 8036 2000
## 22 7843 2001
## 23 7996 2002
## 24 7526 2003
matplot(HepB$year, cbind(HepB$cases,aidsdata$cases), type = "l", lwd = 2, xlab = "Year", ylab = "Cases", main = "Reported cases of Hepatitis B and AIDS, United States, 1980--2003")
legend(1980, 100000, legend = c("Hepatitis B", "AIDS"), lwd = 2, lty = 1:2, col = 1:2)
Evans <- read.table ("http://www.medepi.net/data/evans.txt", header=TRUE)
Evans[1:4,]
## id chd cat age chl smk ecg dbp sbp hpt ch cc
## 1 21 0 0 56 270 0 0 80 138 0 0 0
## 2 31 0 0 43 159 1 0 74 128 0 0 0
## 3 51 1 1 56 201 1 1 112 164 1 1 201
## 4 71 0 1 64 179 1 0 100 200 1 1 179
Evans$chd2 <- factor(Evans$chd, levels = 0:1, labels = c("No", "Yes"))
Evans$cat2 <- factor(Evans$cat, levels = 0:1, labels = c("Normal", "High"))
Evans$smk2 <- factor(Evans$smk, levels = 0:1, labels = c("No", "Yes"))
Evans$ecg2 <- factor(Evans$ecg, levels = 0:1, labels = c("No Abnormality", "Abnormality"))
Evans$hpt2 <- factor(Evans$hpt, levels = 0:1, labels = c("No", "Yes"))
Evans[1:4,]
## id chd cat age chl smk ecg dbp sbp hpt ch cc chd2 cat2 smk2
## 1 21 0 0 56 270 0 0 80 138 0 0 0 No Normal No
## 2 31 0 0 43 159 1 0 74 128 0 0 0 No Normal Yes
## 3 51 1 1 56 201 1 1 112 164 1 1 201 Yes High Yes
## 4 71 0 1 64 179 1 0 100 200 1 1 179 No High Yes
## ecg2 hpt2
## 1 No Abnormality No
## 2 No Abnormality No
## 3 Abnormality Yes
## 4 No Abnormality Yes
range(Evans$age)
## [1] 40 76
Evans$agecat <- cut (Evans$age, breaks = c(40, 50, 60, 70, 80))
xtabs(~agecat, data=Evans)
## agecat
## (40,50] (50,60] (60,70] (70,80]
## 257 188 113 39
Evans[1:4,]
## id chd cat age chl smk ecg dbp sbp hpt ch cc chd2 cat2 smk2
## 1 21 0 0 56 270 0 0 80 138 0 0 0 No Normal No
## 2 31 0 0 43 159 1 0 74 128 0 0 0 No Normal Yes
## 3 51 1 1 56 201 1 1 112 164 1 1 201 Yes High Yes
## 4 71 0 1 64 179 1 0 100 200 1 1 179 No High Yes
## ecg2 hpt2 agecat
## 1 No Abnormality No (50,60]
## 2 No Abnormality No (40,50]
## 3 Abnormality Yes (50,60]
## 4 No Abnormality Yes (60,70]
hptnew <- rep(NA, nrow(Evans))
normal <- Evans$sbp < 120 & Evans$dbp < 80
hptnew[normal] <- 1
prehyp <- (Evans$sbp >= 120 & Evans$sbp < 140) | (Evans$dbp >= 80 & Evans$dbp < 90)
hptnew[prehyp] <- 2
stage1 <- (Evans$sbp >= 140 & Evans$sbp < 160) | (Evans$dbp >= 90 & Evans$dbp < 100)
hptnew[stage1] <- 3
stage2 <- Evans$sbp >= 160 | Evans$dbp >= 100
hptnew[stage2] <- 4
Evans$hpt4 <- factor(hptnew, levels = 1:4, labels=c("Normal", "PreHTN", "HTN.Stage1", "HTN.Stage2"))
xtabs(~hpt4, data = Evans) # test
## hpt4
## Normal PreHTN HTN.Stage1 HTN.Stage2
## 56 165 177 211
(compare <- xtabs(~hpt2 + hpt4, data = Evans))
## hpt4
## hpt2 Normal PreHTN HTN.Stage1 HTN.Stage2
## No 56 165 133 0
## Yes 0 0 44 211
WNV <- read.csv ("http://www.medepi.net/data/wnv/wnv2004raw.txt", header=TRUE, sep=",",as.is=TRUE, na.strings = ".")
WNV[1:10,]
## id county age sex syndrome date.onset date.tested death
## 1 1 San Bernardino 40 F WNF 05/19/2004 06/02/2004 No
## 2 2 San Bernardino 64 F WNF 05/22/2004 06/16/2004 No
## 3 3 San Bernardino 19 M WNF 05/22/2004 06/16/2004 No
## 4 4 San Bernardino 12 M WNF 05/16/2004 06/16/2004 No
## 5 5 San Bernardino 12 M WNF 05/14/2004 06/16/2004 No
## 6 6 San Bernardino 17 M WNF 06/07/2004 06/17/2004 No
## 7 7 San Bernardino 61 M WNND 06/09/2004 06/18/2004 No
## 8 8 San Bernardino 74 F WNND 06/14/2004 06/22/2004 No
## 9 9 Los Angeles 71 M WNF 06/09/2004 06/24/2004 No
## 10 10 Riverside 26 M WNND 06/13/2004 06/24/2004 No
onset.standard <- as.Date(WNV$date.onset, format = "%m/%d/%Y")
WNV2 <- cbind(WNV, onset.standard)
test.standard <- as.Date(WNV$date.tested, format = "%m/%d/%Y")
WNV2 <- cbind(WNV2, test.standard)
WNV2[1:10,]
## id county age sex syndrome date.onset date.tested death
## 1 1 San Bernardino 40 F WNF 05/19/2004 06/02/2004 No
## 2 2 San Bernardino 64 F WNF 05/22/2004 06/16/2004 No
## 3 3 San Bernardino 19 M WNF 05/22/2004 06/16/2004 No
## 4 4 San Bernardino 12 M WNF 05/16/2004 06/16/2004 No
## 5 5 San Bernardino 12 M WNF 05/14/2004 06/16/2004 No
## 6 6 San Bernardino 17 M WNF 06/07/2004 06/17/2004 No
## 7 7 San Bernardino 61 M WNND 06/09/2004 06/18/2004 No
## 8 8 San Bernardino 74 F WNND 06/14/2004 06/22/2004 No
## 9 9 Los Angeles 71 M WNF 06/09/2004 06/24/2004 No
## 10 10 Riverside 26 M WNND 06/13/2004 06/24/2004 No
## onset.standard test.standard
## 1 2004-05-19 2004-06-02
## 2 2004-05-22 2004-06-16
## 3 2004-05-22 2004-06-16
## 4 2004-05-16 2004-06-16
## 5 2004-05-14 2004-06-16
## 6 2004-06-07 2004-06-17
## 7 2004-06-09 2004-06-18
## 8 2004-06-14 2004-06-22
## 9 2004-06-09 2004-06-24
## 10 2004-06-13 2004-06-24
write.table (WNV2, file = "/Users/Michelle/Desktop/Berkeley MPH/Fall 2016/R for Epi's/WNVExport.dat")
oswego <- read.table("http://www.medepi.net/data/oswego/oswego.txt", sep = "", header = TRUE, na.strings = ".")
#str(oswego)
#head(oswego)
mdt <- paste("4/18/1940", oswego$meal.time)
meal.dt <- strptime(mdt, "%m/%d/%Y %I:%M %p")
odt <- paste(paste(oswego$onset.date,"/1940",sep = ""), oswego$onset.time)
onset.dt <- strptime(odt, "%m/%d/%Y %I:%M %p")
odt[1:10]
## [1] "4/19/1940 12:30 AM" "4/19/1940 12:30 AM" "4/19/1940 12:30 AM"
## [4] "4/18/1940 10:30 PM" "4/18/1940 10:30 PM" "4/19/1940 2:00 AM"
## [7] "4/19/1940 1:00 AM" "4/18/1940 11:00 PM" "4/19/1940 2:00 AM"
## [10] "4/19/1940 10:30 AM"
hist(onset.dt, breaks = 30, freq = TRUE)
One case several days before - maybe the source? And one case several days after - maybe a secondary case?
oswego[1:10, 1:8]
## id age sex meal.time ill onset.date onset.time baked.ham
## 1 2 52 F 8:00 PM Y 4/19 12:30 AM Y
## 2 3 65 M 6:30 PM Y 4/19 12:30 AM Y
## 3 4 59 F 6:30 PM Y 4/19 12:30 AM Y
## 4 6 63 F 7:30 PM Y 4/18 10:30 PM Y
## 5 7 70 M 7:30 PM Y 4/18 10:30 PM Y
## 6 8 40 F 7:30 PM Y 4/19 2:00 AM N
## 7 9 15 F 10:00 PM Y 4/19 1:00 AM N
## 8 10 33 F 7:00 PM Y 4/18 11:00 PM Y
## 9 14 10 M 7:30 PM Y 4/19 2:00 AM N
## 10 16 32 F NA Y 4/19 10:30 AM Y
onset.dt <- strptime(odt, "%m/%d/%Y %I:%M %p")
onset.dt[1:4]
## [1] "1940-04-19 00:30:00 PST" "1940-04-19 00:30:00 PST"
## [3] "1940-04-19 00:30:00 PST" "1940-04-18 22:30:00 PST"
oswego.order <- cbind(oswego, onset.dt)
oswego.order <- oswego.order[order(oswego.order$onset.dt),]
oswego.order[1:10, ]
## id age sex meal.time ill onset.date onset.time baked.ham spinach
## 33 52 8 M 11:00 AM Y 4/18 3:00 PM N N
## 20 31 35 M NA Y 4/18 9:00 PM Y Y
## 23 36 35 F NA Y 4/18 9:15 PM Y Y
## 26 40 68 M NA Y 4/18 9:30 PM Y N
## 29 44 58 M NA Y 4/18 9:30 PM Y Y
## 16 24 3 M NA Y 4/18 9:45 PM N Y
## 17 26 59 F NA Y 4/18 9:45 PM N Y
## 13 20 33 F NA Y 4/18 10:00 PM Y Y
## 12 18 36 M NA Y 4/18 10:15 PM Y Y
## 4 6 63 F 7:30 PM Y 4/18 10:30 PM Y Y
## mashed.potato cabbage.salad jello rolls brown.bread milk coffee water
## 33 N N N N N N N N
## 20 Y N Y Y Y N Y N
## 23 Y Y N Y Y N Y N
## 26 Y Y N N Y N Y N
## 29 Y N N N Y Y Y N
## 16 Y N N Y N N N Y
## 17 Y Y N Y Y N N Y
## 13 Y Y Y Y N N Y Y
## 12 N Y N Y Y N N N
## 4 N Y Y N N N N Y
## cakes vanilla.ice.cream chocolate.ice.cream fruit.salad
## 33 N Y Y N
## 20 Y Y N Y
## 23 N Y N N
## 26 N Y N N
## 29 N Y NA Y
## 16 Y Y N N
## 17 Y Y N N
## 13 Y Y Y N
## 12 N Y N N
## 4 N Y N N
## onset.dt
## 33 1940-04-18 15:00:00
## 20 1940-04-18 21:00:00
## 23 1940-04-18 21:15:00
## 26 1940-04-18 21:30:00
## 29 1940-04-18 21:30:00
## 16 1940-04-18 21:45:00
## 17 1940-04-18 21:45:00
## 13 1940-04-18 22:00:00
## 12 1940-04-18 22:15:00
## 4 1940-04-18 22:30:00
onset.julian <- paste(oswego$onset.date,"/1940", sep="")
onset.julian <- as.Date(onset.julian, format = "%m/%d/%Y")
as.numeric(onset.julian)
## [1] -10849 -10849 -10849 -10850 -10850 -10849 -10849 -10850 -10849 -10849
## [11] -10849 -10850 -10850 -10849 -10850 -10850 -10850 -10849 -10850 -10850
## [21] -10849 -10849 -10850 -10850 -10849 -10850 -10849 -10849 -10850 -10849
## [31] -10849 -10850 -10850 -10849 -10850 -10850 -10849 -10849 -10850 -10849
## [41] -10849 -10849 -10849 -10849 -10849 -10850 NA NA NA NA
## [51] NA NA NA NA NA NA NA NA NA NA
## [61] NA NA NA NA NA NA NA NA NA NA
## [71] NA NA NA NA NA
meal.date <- "4/18/1940"
meal.julian <- as.Date(meal.date, format = "%m/%d/%Y")
incubation <- onset.julian - meal.julian
incubation
## Time differences in days
## [1] 1 1 1 0 0 1 1 0 1 1 1 0 0 1 0 0 0 1 0 0 1 1 0
## [24] 0 1 0 1 1 0 1 1 0 0 1 0 0 1 1 0 1 1 1 1 1 1 0
## [47] NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA
## [70] NA NA NA NA NA NA
library(MASS) #load MASS package
truehist(as.numeric(incubation), nbins = 7, prob = FALSE, ylab = 'Frequency', xlab = "Incubation Period (days)")
mean(incubation, na.rm = TRUE)
## Time difference of 0.5652174 days
median(incubation, na.rm = TRUE)
## Time difference of 1 days
range(incubation, na.rm = TRUE)
## Time differences in days
## [1] 0 1