# Install packages if not already installed
if("tidyverse" %in% rownames(installed.packages()) == FALSE) {install.packages("tidyverse", repos='http://cran.us.r-project.org')};
if("stringr" %in% rownames(installed.packages()) == FALSE) {install.packages("stringr", repos='http://cran.us.r-project.org')};
if("lubridate" %in% rownames(installed.packages()) == FALSE) {install.packages("lubridate", repos='http://cran.us.r-project.org')};
if("plotly" %in% rownames(installed.packages()) == FALSE) {install.packages("plotly", repos='http://cran.us.r-project.org')};
# Load necessary packages
library(tidyverse)
library(stringr)
library(lubridate)
library(plotly)
# Download data file from GitHub repository
download.file("https://github.com/haroldgil/SyS-tools/raw/master/data/syndromicData_raw.csv", "syndromicData_raw.csv")
syn_raw = read_csv("syndromicData_raw.csv")
Go here to see a short tutorial video on regular expressions from Stanford’s Natural Language Processing course.
# Using grepl()
syn_g <- mutate(syn_raw, ILI = grepl("FLU", Chief.Complaint) | (grepl("FEVER", Chief.Complaint) & grepl("COUGH|SORE THROAT", Chief.Complaint)))
# Using str_detect()
syn_s <- mutate(syn_raw, ILI = str_detect(Chief.Complaint, "FLU") | (str_detect(Chief.Complaint, "FEVER") & str_detect(Chief.Complaint, "COUGH|SORE THROAT")))
Get daily counts from line-level data:
# Filter for records where the visit did reflect ILI
syn_s <- filter(syn_s, ILI == T)
# Reformat Date field to be a Date data type
syn_s <- mutate(syn_s, Date = mdy(Date))
# Get aggregate counts by date
syn_s_agg <- syn_s %>% group_by(Date) %>% summarize(Count = n())
Learn about basic plotting syntax here.
# Static plot of daily counts
ggplot(data = syn_s_agg) + geom_line(mapping = aes(x = Date, y = Count))
# Dynamic plot of daily counts
plot_ly(syn_s_agg, x = ~Date) %>%
add_lines(y = ~Count, name = "ILI") %>%
layout(title = "Syndrome Trendline",
xaxis = list(rangeselector = list(buttons = list(list(count = 3,label = "3 mo",step = "month",stepmode = "backward"),list(count = 6,label = "6 mo", step = "month", stepmode = "backward"),list(count = 1,label = "1 yr",step = "year",stepmode = "backward"),list(count = 1,label = "YTD",step = "year",stepmode = "todate"),list(step = "all"))),rangeslider = list(type = "date")),
yaxis = list(title = "Count"))