# install.packages(c("tidyverse", "dplyr", "lubridate", "MMWRweek","zoo"))
# install.packages(c("DBI", "odbc"))
# 2. Load libraries
library(tidyverse)
library(dplyr)
library(lubridate)
library(MMWRweek)
library(readr)
library(ggplot2)
library(zoo)
library(DBI)
library(odbc)
library(plotly)
TRUE QC 1: ELR source data has been updated today: 2024-09-09
TRUE QC 2: ELR table has a 7 days lag from today: 2024-09-09
TRUE Analysis will proceed.
TRUE QC 3: Start and end dates are confirmed:
# Rename columns & create MMWR week and ISO month columns
class(elr_data$SPECIMEN_DATE)
TRUE [1] "Date"
elr_data <- cbind(elr_data, MMWRweek(elr_data$SPECIMEN_DATE)) # Create MMWR D/W/Y
elr_data2 <- elr_data %>%
select(
SPECIMEN_DATE,
MMWRweek,
MMWRyear,
POSITIVE_TESTS,
TOTAL_TESTS,
TEST_POSITIVITY_RATE
) %>%
mutate(
Month = format(SPECIMEN_DATE, "%b")
)
tail(elr_data2)
# Rearrange the variables
elr_data2 <- elr_data2 %>%
select(
SPECIMEN_DATE,
MMWRyear,
Month,
MMWRweek,
POSITIVE_TESTS,
TOTAL_TESTS,
TEST_POSITIVITY_RATE
)
elr_data2 <- as_tibble(elr_data2)
head(elr_data2)
# QC 4 Check for errors in MMWR week formatting
# Check mmwr wk 1 2024 date and day of week
this_mmwr_year <- year(Sys.Date())
mmwr_week <- 1
this_mmwr_yr_wk1 <- floor_date(as.Date(paste(this_mmwr_year, 1, 1, sep = "-")), unit = "week", week_start = 7)
# print(this_mmwr_yr_wk1)
if (weekdays(this_mmwr_yr_wk1) == "Sunday") {
cat("QC 4: MMWR Week 1 of", this_mmwr_year, "Started on", weekdays(this_mmwr_yr_wk1), as.character(this_mmwr_yr_wk1), "\n")
cat("MMWR Week start is correct.", "\n")
} else {
print("QC 4: MMWR week format is not correct, please review!!!")
}
TRUE QC 4: MMWR Week 1 of 2024 Started on Sunday 2023-12-31
TRUE MMWR Week start is correct.
# QC 5: Check for error in ELR data MMWR week
# current_mmwr_week <- as.integer(format(Sys.Date(), "%U")) + 1
current_mmwr_week <- MMWRweek(Sys.Date())
latest_elr_mmwr_week <- max(elr_data2$SPECIMEN_DATE) %>%
MMWRweek()
# print(latest_elr_mmwr_week)
if (latest_elr_mmwr_week$MMWRweek == (current_mmwr_week$MMWRweek - 1)) {
cat("QC 5: Lastest Elr Data MMWR week is just a week behind Current MMWR week.", "\n")
} else {
print("Lastest Elr Data MMWR week needs review!!!")
}
TRUE QC 5: Lastest Elr Data MMWR week is just a week behind Current MMWR week.
# Select records from 2022/2023 seasons to current season (MMWR week 27 of 2022 to date)
elr_data3 <- elr_data2 %>%
filter((MMWRyear >= 2022 & MMWRweek >= 27) | (MMWRyear > 2022 & MMWRyear <= year(ymd(Sys.Date()))))
# Create Season_period from Flu Season MMWR week 27 to MMWR week 26 of following year
elr_data4 <- elr_data3 %>%
mutate(season_period = case_when(
((MMWRyear == 2022 & MMWRweek >= 27) | (MMWRyear == 2023 & MMWRweek < 27)) ~ "2022/2023",
((MMWRyear == 2023 & MMWRweek >= 27) | (MMWRyear == 2024 & MMWRweek < 27)) ~ "2023/2024",
((MMWRyear == 2024 & MMWRweek >= 27) | (MMWRyear == 2025 & MMWRweek < 27)) ~ "2024/2025"
))
head(elr_data4)
# rename and rearrange output columns to match expected format
colnames(elr_data4)
TRUE [1] "SPECIMEN_DATE" "MMWRyear" "Month"
TRUE [4] "MMWRweek" "POSITIVE_TESTS" "TOTAL_TESTS"
TRUE [7] "TEST_POSITIVITY_RATE" "season_period"
daily_covid_positivity_rate <- elr_data4 %>%
rename(
MMWR_WK = MMWRweek,
YEAR = MMWRyear,
MONTH = Month,
SEASON_PERIOD = season_period,
No_Of_TESTS = TOTAL_TESTS,
TEST_POSITIVITY = TEST_POSITIVITY_RATE
) %>%
select(
SEASON_PERIOD,
YEAR,
MONTH,
MMWR_WK,
SPECIMEN_DATE,
POSITIVE_TESTS,
No_Of_TESTS,
TEST_POSITIVITY
)
tail(daily_covid_positivity_rate)
# QC 6: Check errors in season_periods of 1st & last dates in ELR data
season_qc <- daily_covid_positivity_rate %>%
summarize(
first_season = SEASON_PERIOD[which.min(SPECIMEN_DATE)],
current_season = SEASON_PERIOD[which.max(SPECIMEN_DATE)]
)
print(season_qc)
TRUE # A tibble: 1 × 2
TRUE first_season current_season
TRUE <chr> <chr>
TRUE 1 2022/2023 2024/2025
# Check current season-period of ELR data
elr_curr_season <- tail(daily_covid_positivity_rate$SEASON_PERIOD, 1)
# Check season-period of current date
current_mmwr <- MMWRweek((ymd(Sys.Date())))
todays_date_season <- current_mmwr %>%
mutate(season_period = case_when(
((MMWRyear == 2023 & MMWRweek >= 27) | (MMWRyear == 2024 & MMWRweek < 27)) ~ "2023/2024",
((MMWRyear == 2024 & MMWRweek >= 27) | (MMWRyear == 2025 & MMWRweek < 27)) ~ "2024/2025",
((MMWRyear == 2025 & MMWRweek >= 27) | (MMWRyear == 2026 & MMWRweek < 27)) ~ "2025/2026"
))
todays_date_season
if (todays_date_season$season_period == elr_curr_season) {
cat("QC 6: ELR data season-period is CONSISTENT with current season-period!")
} else {
print("QC 6: ELR data season-period is INCONSISTENT with CURRENT season-period.
Please review code snippets for ELR data & current season-periods.")
}
TRUE QC 6: ELR data season-period is CONSISTENT with current season-period!
# write.csv(daily_covid_positivity_rate,
# paste0("Outputs/COVID_ELR_POSITIVITY_", Sys.Date(), ".csv"),
# row.names = FALSE)
summarized_covid_positivity <- daily_covid_positivity_rate %>%
group_by(SEASON_PERIOD, YEAR, MMWR_WK) %>%
summarize(No_Of_TESTS = sum(No_Of_TESTS),
TEST_POSITIVITY = (sum(POSITIVE_TESTS)/sum(No_Of_TESTS) * 100))
TRUE `summarise()` has grouped output by 'SEASON_PERIOD', 'YEAR'. You can override
TRUE using the `.groups` argument.
summarized_covid_positivity$TEST_POSITIVITY <- round(summarized_covid_positivity$TEST_POSITIVITY, 1)
# summarized_covid_positivity$MONTH <- match(summarized_covid_positivity$MONTH, month.abb)
summarized_covid_positivity <- summarized_covid_positivity[
order(summarized_covid_positivity$SEASON_PERIOD,
summarized_covid_positivity$YEAR,
summarized_covid_positivity$MMWR_WK
),]
write.csv(summarized_covid_positivity,
paste0("V:/FluLikeSurveillanceForCOVID19/Respiratory Virus Report/COVID ELR/COVID_ELR.csv"),
row.names = FALSE)
write.csv(summarized_covid_positivity, "H:/COVID_ELR_TEST_POSITIVITY/Outputs/COVID_POSITIVITY.csv",
row.names = FALSE)
write.csv(daily_covid_positivity_rate,
paste0("H:/COVID_ELR_TEST_POSITIVITY/Outputs/COVID_ELR_POSITIVITY_", Sys.Date(), ".csv"),
row.names = FALSE)

rolling_data <- elr_data4 %>%
arrange(SPECIMEN_DATE) %>%
mutate(
roll_pos_cnt = zoo::rollsum(POSITIVE_TESTS, k = 7, align = "right", fill = NA),
roll_7_day_total = zoo::rollsum(TOTAL_TESTS, k = 7, align = "right",fill = NA)
) %>%
select(
season_period,
Month, MMWRweek,
SPECIMEN_DATE,
POSITIVE_TESTS,
TOTAL_TESTS,
TEST_POSITIVITY_RATE,
roll_pos_cnt,
roll_7_day_total
)
rolling_data2 <- rolling_data %>%
mutate(rolling_7d_rate = roll_pos_cnt/roll_7_day_total * 100)
rolling_data2$rolling_7d_rate <- round(rolling_data2$rolling_7d_rate, 1)
tail(rolling_data2,10)
write.csv(rolling_data2,
paste0("H:/COVID_ELR_TEST_POSITIVITY/Outputs/COVID_ELR 7D rolling rate ", Sys.Date(), ".csv"),
row.names = FALSE)
summarized_cov_elr <- rolling_data2 %>%
group_by(season_period, Month, MMWRweek) %>%
summarize(No_Of_TESTS = sum(TOTAL_TESTS),
TEST_POSITIVITY = (sum(POSITIVE_TESTS)/sum(TOTAL_TESTS) * 100))
TRUE `summarise()` has grouped output by 'season_period', 'Month'. You can override
TRUE using the `.groups` argument.
summarized_cov_elr$TEST_POSITIVITY <- round(summarized_cov_elr$TEST_POSITIVITY, 1)
summarized_cov_elr <- summarized_cov_elr[
order(summarized_cov_elr$season_period,
summarized_cov_elr$MMWRweek,
summarized_cov_elr$Month
),]
## [1] "season_period" "Month" "MMWRweek"
## [4] "SPECIMEN_DATE" "POSITIVE_TESTS" "TOTAL_TESTS"
## [7] "TEST_POSITIVITY_RATE" "roll_pos_cnt" "roll_7_day_total"
## [10] "rolling_7d_rate"
TRUE [1] "End of Script!"
TRUE [1] "Review chart and compare with current RV Dashboard chart."