##Call library with load front into R.
library(tidyverse)
library(extrafont)
loadfonts(device = "win")
##Import dataset into R.
data <- read_csv("Tutorial_epicurve_dataset.csv")
#This dataset is about food poisoning outbreak.
glimpse(data)
## Rows: 465
## Columns: 22
## $ No <dbl> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 1…
## $ INSTITUTE <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, …
## $ HOSPITAL <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, …
## $ SEX <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, …
## $ AGE <dbl> 13, 16, 17, 14, 31, 9, 15, 15, 12, 13, 12, 15, 13, 13, 11, 1…
## $ OCC <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, …
## $ EXPTIME <chr> "25/8/1990 18:00:00", "25/8/1990 18:00:00", "25/8/1990 18:00…
## $ EXDATE <chr> "25/08/1990", "25/08/1990", "25/08/1990", "25/08/1990", "25/…
## $ EXTIME <time> 18:00:00, 18:00:00, 18:00:00, 18:00:00, 18:00:00, 18:00:00,…
## $ BEEFCURRY <dbl> 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, …
## $ SALTEGG <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, …
## $ ECLAIR <dbl> 1, 0, 2, 2, 1, 1, 1, 1, 1, 50, 1, 2, 2, 2, 4, 2, 1, 2, 2, 2,…
## $ WATER <dbl> 1, 6, 1, 1, 7, 1, 1, 1, 1, 1, 1, 1, 5, 1, 1, 1, 1, 1, 1, 1, …
## $ OTHERS <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ ONSET <chr> "25/8/1990 22:00:00", "25/8/1990 22:00:00", "25/8/1990 21:00…
## $ ONSETDATE <chr> "25/08/1990", "25/08/1990", "25/08/1990", "25/08/1990", "25/…
## $ ONSETTIME <time> 22:00:00, 22:00:00, 21:00:00, 22:00:00, 23:00:00, 23:00:00,…
## $ NAUSEA <dbl> 1, 0, 1, 1, 1, 1, 1, 1, 1, 5, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, …
## $ VOMITING <dbl> 1, 0, 1, 1, 1, 1, 6, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, …
## $ ABDPAIN <dbl> 1, 0, 1, 1, 1, 1, 1, 1, NA, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,…
## $ DIARRHEA <dbl> 1, 7, 1, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, …
## $ OTHSYMP <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
Exposure time (EXPTIME) and onset time
(ONSET) is <chr>,not in proper format
for analysis yet.
Use as.POSIXct() to define dataset date-time format.
data$EXPTIME <- as.POSIXct(data$EXPTIME,format = "%d/%m/%Y %H:%M:%S", tz = "GMT")
data$ONSET <- as.POSIXct(data$ONSET,format = "%d/%m/%Y %H:%M:%S", tz = "GMT")
Now we can find an incubation period by subtract exposure time from onset time.
mean(data$ONSET-data$EXPTIME)
## Time difference of 3.652652 hours
range(data$ONSET-data$EXPTIME)
## Time differences in hours
## [1] 1.0 14.5
From this dataset mean incubation period is 3.65 (1.0-14.5)
hours.
We can also plot epidemic curve with geom_histogram() from
ggplot()like this.
##Create vector for defining x axis range, which should cover 1-2 times of incubation period.
datebreaks <- as.POSIXct(strptime(c("1990-08-25 14:00","1990-08-26 20:00"),
format = "%Y-%m-%d %H:%M"))
ggplot(data = data)+
geom_histogram(aes(ONSET),
binwidth = 3600, #define binwidth equal to one-third of incubation period, in seconds unit ,(1*60*60).
fill = "#6B8891") +
#define labels for our plot and adjusting cosmetic things.
labs(title = "Number of food poisoning cases between 25-26 Aug 1990 \nin province X, (n= 465)")+
xlab("Onset time")+
ylab("Number of cases")+
scale_x_datetime(breaks = scales::date_breaks("hour"),
labels = scales::date_format("%b %d %H:%M"),
limits = datebreaks)+
scale_y_continuous(expand = c(0, 0), limits = c(0, NA))+
theme(panel.background = element_blank(),
axis.text.x = element_text(angle = 60, hjust = 1, size = 8 ),
axis.title.x = element_text(size = 16, family="serif", color ="black"),
axis.title.y = element_text(size = 16, family="serif", color ="black"),
axis.line.x = element_line(color="black", size = 0.5),
axis.line.y = element_line(color="black", size = 0.5),
plot.title = element_text(size = 18, color ="black", family="serif"))