rm(list = ls(all.names = TRUE))
library(readxl)
library(janitor)
library(dplyr)
library(tidyr)
library(lubridate)
library(forcats)
library(ggplot2)
master <- readxl::read_excel("master-mmwr-3.xlsx", sheet = "MASTER")
[1] "Region" "Week" "Report_Date" "Data_Date" "County" "State"
[7] "Pop" "FIPS" "New_Status" "Tribal" "Driver_1" "Driver_2"
[13] "Driver_3" "Driver_4" "Driver_5" "Drivers" "Demographics" "Cases"
[19] "Testing" "Location" "Prev_Report" "Narrative" "Outreach_Date" "Outreach_Team"
[25] "Outreach_Type"
long <- dplyr::select(master, Region:Driver_5) %>%
tidyr::pivot_longer(Driver_1:Driver_5) %>%
dplyr::mutate(week_num = lubridate::week(Data_Date))
table(master$Driver_1, useNA = "always")
C F L M n/a P T TR U W <NA>
215 37 48 2 29 39 49 1 128 6 0
# count of each driver by FIPS
t1 <- long %>%
dplyr::group_by(Region, County, State, FIPS) %>%
dplyr::count(value) %>%
tidyr::drop_na() %>%
tidyr::pivot_wider(names_from = value, values_from = n)
t1
t2 <- long %>%
dplyr::group_by(Data_Date, Region, County, State, FIPS) %>%
dplyr::count(value) %>%
tidyr::drop_na() %>%
tidyr::pivot_wider(names_from = value, values_from = n)
t2
day_counts_fits <- long %>%
dplyr::group_by(Region, County, State, FIPS) %>%
dplyr::summarize(ndays = n())
`summarise()` regrouping output by 'Region', 'County', 'State' (override with `.groups` argument)
# there are rows without any driver information
# there are FIPS code where no driver is listed
nrow(t1)
[1] 553
[1] 553
driver_and_day <- dplyr::full_join(t1, day_counts_fits, by = c("Region", "County", "State", "FIPS")) %>%
dplyr::select(Region:FIPS, ndays, everything()) # reorder columns
driver_and_day
group_by_week <- long %>%
dplyr::group_by(week_num) %>%
dplyr::count(value) %>%
tidyr::drop_na() %>%
dplyr::filter(value != "n/a")
g <- ggplot(group_by_week,
aes(x = as.factor(week_num),
y = n,
fill = forcats::fct_reorder(value, n))) +
theme_minimal()
# by count
g + geom_bar(stat = "identity", position = "stack")

g + geom_bar(stat = "identity", position = "dodge")

# filled to 100
g + geom_bar(stat = "identity", position = "fill")

ggplot(group_by_week, aes(x = week_num, y = n, color = value)) +
geom_line() +
theme_minimal()

group_by_region_week <- long %>%
dplyr::group_by(Region, week_num) %>%
dplyr::count(value) %>%
tidyr::drop_na() %>%
dplyr::filter(value != "n/a")
ggplot(group_by_region_week,
aes(x = week_num,
y = n,
color = value)) +
geom_line() +
facet_wrap(~Region) + # scales="free"
theme_minimal() +
ggtitle("Frequency of drivers over time by region")

ggplot(group_by_region_week,
aes(x = week_num,
y = n,
color = as.factor(Region))) +
geom_line() +
facet_wrap(~value) +
theme_minimal() +
ggtitle("Driver trend by region")

# convert the above code into readable table
group_by_region_week %>%
tidyr::pivot_wider(names_from = value, values_from = n)
LS0tDQp0aXRsZTogIkhvdHNwb3QgZHJpdmVyczogTmV3IGNvdW50aWVzIG9ubHkiDQpvdXRwdXQ6DQogIGh0bWxfbm90ZWJvb2s6DQogICAgdG9jOiB5ZXMNCiAgICBoaWdobGlnaHQ6IHRhbmdvDQogICAgdGhlbWU6IGNvc21vDQogIGh0bWxfZG9jdW1lbnQ6IA0KICAgIHRvYzogeWVzDQogICAgaGlnaGxpZ2h0OiB0YW5nbw0KICAgIHRoZW1lOiBjb3Ntbw0KICAgIGtlZXBfbWQ6IHllcw0KZWRpdG9yX29wdGlvbnM6DQogIGNodW5rX291dHB1dF90eXBlOiBpbmxpbmUNCi0tLQ0KDQpgYGB7ciBtZXNzYWdlPUZBTFNFLCB3YXJuaW5nPUZBTFNFLCBjYWNoZT1UUlVFfQ0Kcm0obGlzdCA9IGxzKGFsbC5uYW1lcyA9IFRSVUUpKQ0KDQpsaWJyYXJ5KHJlYWR4bCkNCmxpYnJhcnkoamFuaXRvcikNCmxpYnJhcnkoZHBseXIpDQpsaWJyYXJ5KHRpZHlyKQ0KbGlicmFyeShsdWJyaWRhdGUpDQpsaWJyYXJ5KGZvcmNhdHMpDQpsaWJyYXJ5KGdncGxvdDIpDQpgYGANCg0KDQpgYGB7ciBtZXNzYWdlPUZBTFNFLCB3YXJuaW5nPUZBTFNFfQ0KbWFzdGVyIDwtIHJlYWR4bDo6cmVhZF9leGNlbCgibWFzdGVyLW1td3ItMy54bHN4Iiwgc2hlZXQgPSAiTUFTVEVSIikNCmBgYA0KDQpgYGB7cn0NCm5hbWVzKG1hc3RlcikNCg0KbG9uZyA8LSBkcGx5cjo6c2VsZWN0KG1hc3RlciwgUmVnaW9uOkRyaXZlcl81KSAlPiUNCiAgdGlkeXI6OnBpdm90X2xvbmdlcihEcml2ZXJfMTpEcml2ZXJfNSkgJT4lDQogIGRwbHlyOjptdXRhdGUod2Vla19udW0gPSBsdWJyaWRhdGU6OndlZWsoRGF0YV9EYXRlKSkNCg0KdGFibGUobWFzdGVyJERyaXZlcl8xLCB1c2VOQSA9ICJhbHdheXMiKQ0KYGBgDQoNCmBgYHtyfQ0KIyBjb3VudCBvZiBlYWNoIGRyaXZlciBieSBGSVBTDQp0MSA8LSAgbG9uZyAlPiUNCiAgZHBseXI6Omdyb3VwX2J5KFJlZ2lvbiwgQ291bnR5LCBTdGF0ZSwgRklQUykgJT4lDQogIGRwbHlyOjpjb3VudCh2YWx1ZSkgJT4lDQogIHRpZHlyOjpkcm9wX25hKCkgJT4lDQogIHRpZHlyOjpwaXZvdF93aWRlcihuYW1lc19mcm9tID0gdmFsdWUsIHZhbHVlc19mcm9tID0gbikNCnQxDQpgYGANCg0KYGBge3J9DQp0MiA8LSAgbG9uZyAlPiUNCiAgZHBseXI6Omdyb3VwX2J5KERhdGFfRGF0ZSwgUmVnaW9uLCBDb3VudHksIFN0YXRlLCBGSVBTKSAlPiUNCiAgZHBseXI6OmNvdW50KHZhbHVlKSAlPiUNCiAgdGlkeXI6OmRyb3BfbmEoKSAlPiUNCiAgdGlkeXI6OnBpdm90X3dpZGVyKG5hbWVzX2Zyb20gPSB2YWx1ZSwgdmFsdWVzX2Zyb20gPSBuKQ0KdDINCg0KI3dyaXRlLmNzdih0MiwgInQyLmNzdiIpDQpgYGANCg0KYGBge3J9DQpkYXlfY291bnRzX2ZpdHMgPC0gbG9uZyAlPiUNCiAgZHBseXI6Omdyb3VwX2J5KFJlZ2lvbiwgQ291bnR5LCBTdGF0ZSwgRklQUykgJT4lDQogIGRwbHlyOjpzdW1tYXJpemUobmRheXMgPSBuKCkpDQpkYXlfY291bnRzX2ZpdHMNCmBgYA0KDQpgYGB7cn0NCiMgdGhlcmUgYXJlIHJvd3Mgd2l0aG91dCBhbnkgZHJpdmVyIGluZm9ybWF0aW9uDQojIHRoZXJlIGFyZSBGSVBTIGNvZGUgd2hlcmUgbm8gZHJpdmVyIGlzIGxpc3RlZA0KbnJvdyh0MSkNCm5yb3coZGF5X2NvdW50c19maXRzKQ0KDQpkcml2ZXJfYW5kX2RheSA8LSBkcGx5cjo6ZnVsbF9qb2luKHQxLCBkYXlfY291bnRzX2ZpdHMsIGJ5ID0gYygiUmVnaW9uIiwgIkNvdW50eSIsICJTdGF0ZSIsICJGSVBTIikpICU+JQ0KICBkcGx5cjo6c2VsZWN0KFJlZ2lvbjpGSVBTLCBuZGF5cywgZXZlcnl0aGluZygpKSAjIHJlb3JkZXIgY29sdW1ucw0KZHJpdmVyX2FuZF9kYXkNCmBgYA0KDQpgYGB7cn0NCmdyb3VwX2J5X3dlZWsgPC0gbG9uZyAlPiUNCiAgZHBseXI6Omdyb3VwX2J5KHdlZWtfbnVtKSAlPiUNCiAgZHBseXI6OmNvdW50KHZhbHVlKSAlPiUNCiAgdGlkeXI6OmRyb3BfbmEoKSAlPiUNCiAgZHBseXI6OmZpbHRlcih2YWx1ZSAhPSAibi9hIikNCmBgYA0KDQoNCmBgYHtyfQ0KZyA8LSBnZ3Bsb3QoZ3JvdXBfYnlfd2VlaywNCiAgICAgICAgICAgIGFlcyh4ID0gYXMuZmFjdG9yKHdlZWtfbnVtKSwNCiAgICAgICAgICAgICAgICB5ID0gbiwNCiAgICAgICAgICAgICAgICBmaWxsID0gZm9yY2F0czo6ZmN0X3Jlb3JkZXIodmFsdWUsIG4pKSkgKw0KICB0aGVtZV9taW5pbWFsKCkNCg0KIyBieSBjb3VudA0KZyArIGdlb21fYmFyKHN0YXQgPSAiaWRlbnRpdHkiLCBwb3NpdGlvbiA9ICJzdGFjayIpDQoNCiNnZ3NhdmUoImZpZzEtMS5wZGYiKQ0KYGBgDQoNCg0KYGBge3J9DQpnICsgZ2VvbV9iYXIoc3RhdCA9ICJpZGVudGl0eSIsIHBvc2l0aW9uID0gImRvZGdlIikNCg0KI2dnc2F2ZSgiZmlnMi0xLnBkZiIpDQpgYGANCg0KDQpgYGB7cn0NCiMgZmlsbGVkIHRvIDEwMCANCmcgKyBnZW9tX2JhcihzdGF0ID0gImlkZW50aXR5IiwgcG9zaXRpb24gPSAiZmlsbCIpDQoNCiNnZ3NhdmUoImZpZzMtMS5wZGYiKQ0KYGBgDQoNCg0KYGBge3J9DQpnZ3Bsb3QoZ3JvdXBfYnlfd2VlaywgYWVzKHggPSB3ZWVrX251bSwgeSA9IG4sIGNvbG9yID0gdmFsdWUpKSArIA0KICBnZW9tX2xpbmUoKSArDQogIHRoZW1lX21pbmltYWwoKQ0KDQojZ2dzYXZlKCJmaWc0LTEucGRmIikNCmBgYA0KDQoNCmBgYHtyfQ0KZ3JvdXBfYnlfcmVnaW9uX3dlZWsgPC0gbG9uZyAlPiUNCiAgZHBseXI6Omdyb3VwX2J5KFJlZ2lvbiwgd2Vla19udW0pICU+JQ0KICBkcGx5cjo6Y291bnQodmFsdWUpICU+JQ0KICB0aWR5cjo6ZHJvcF9uYSgpICU+JQ0KICBkcGx5cjo6ZmlsdGVyKHZhbHVlICE9ICJuL2EiKQ0KDQpnZ3Bsb3QoZ3JvdXBfYnlfcmVnaW9uX3dlZWssDQogICAgICAgYWVzKHggPSB3ZWVrX251bSwNCiAgICAgICAgICAgeSA9IG4sDQogICAgICAgICAgIGNvbG9yID0gdmFsdWUpKSArIA0KICBnZW9tX2xpbmUoKSArDQogIGZhY2V0X3dyYXAoflJlZ2lvbikgKyAjIHNjYWxlcz0iZnJlZSINCiAgdGhlbWVfbWluaW1hbCgpICsNCiAgZ2d0aXRsZSgiRnJlcXVlbmN5IG9mIGRyaXZlcnMgb3ZlciB0aW1lIGJ5IHJlZ2lvbiIpDQoNCiNnZ3NhdmUoImZpZzUtMS5wZGYiKQ0KYGBgDQoNCg0KYGBge3J9DQpnZ3Bsb3QoZ3JvdXBfYnlfcmVnaW9uX3dlZWssDQogICAgICAgYWVzKHggPSB3ZWVrX251bSwNCiAgICAgICAgICAgeSA9IG4sDQogICAgICAgICAgIGNvbG9yID0gYXMuZmFjdG9yKFJlZ2lvbikpKSArDQogIGdlb21fbGluZSgpICsNCiAgZmFjZXRfd3JhcCh+dmFsdWUpICsNCiAgdGhlbWVfbWluaW1hbCgpICsNCiAgZ2d0aXRsZSgiRHJpdmVyIHRyZW5kIGJ5IHJlZ2lvbiIpDQoNCiNnZ3NhdmUoImZpZzYtMS5wZGYiKQ0KYGBgDQoNCg0KYGBge3J9DQojIGNvbnZlcnQgdGhlIGFib3ZlIGNvZGUgaW50byByZWFkYWJsZSB0YWJsZQ0KZ3JvdXBfYnlfcmVnaW9uX3dlZWsgJT4lDQogIHRpZHlyOjpwaXZvdF93aWRlcihuYW1lc19mcm9tID0gdmFsdWUsIHZhbHVlc19mcm9tID0gbikNCmBgYA0K