rm(list = ls(all.names = TRUE))

library(readxl)
library(janitor)
library(dplyr)
library(tidyr)
library(lubridate)
library(forcats)
library(ggplot2)
master <- readxl::read_excel("master-mmwr-2.xlsx", sheet = "MASTER")
names(master)
 [1] "Region"        "Week"          "Report_Date"   "Data_Date"     "County"        "State"        
 [7] "Pop"           "FIPS"          "New_Status"    "Tribal"        "Driver_1"      "Driver_2"     
[13] "Driver_3"      "Driver_4"      "Driver_5"      "Drivers"       "Demographics"  "Cases"        
[19] "Testing"       "Location"      "Prev_Report"   "Narrative"     "Outreach_Date" "Outreach_Team"
[25] "Outreach_Type"
# combine all driver categories and count them
long <- dplyr::select(master, Region:Driver_5) %>%
  tidyr::pivot_longer(Driver_1:Driver_5) %>%
  dplyr::mutate(week_num = lubridate::week(Data_Date))

table(master$Driver_1, useNA = "always")

   C    D    F    L    M  n/a    P    S    T   TR    U    W <NA> 
2638   49  247  451   43  256  275    4  340    9 1123   88    0 
# count of each driver by FIPS
t1 <-  long %>%
  dplyr::group_by(Region, County, State, FIPS) %>%
  dplyr::count(value) %>%
  tidyr::drop_na() %>%
  tidyr::pivot_wider(names_from = value, values_from = n)
t1
# count number of days listed per county
day_counts_fips <- long %>%
  dplyr::group_by(Region, County, State, FIPS) %>%
  dplyr::summarise(ndays = n())
`summarise()` regrouping output by 'Region', 'County', 'State' (override with `.groups` argument)
day_counts_fips
# these two values should equal; if they don't then:
# there are rows without any driver information, and/or
# there are FIPS code where no driver is listed
nrow(t1)
[1] 656
nrow(day_counts_fips)
[1] 656
# add column
driver_and_day <- dplyr::full_join(t1, day_counts_fips, by = c("Region", "County", "State", "FIPS")) %>%
  dplyr::select(Region:FIPS, ndays, everything()) # reorder columns
driver_and_day
# plot county histogram
p_hist <- ggplot(driver_and_day) +
  geom_bar(mapping = aes(x = ndays))
p_hist


# facet histogram by region
p_hist_region <- p_hist + facet_wrap(~Region)
p_hist_region

# count drivers by week
group_by_week <- long %>%
  dplyr::group_by(week_num) %>%
  dplyr::count(value) %>%
  tidyr::drop_na() %>%
  dplyr::filter(value != "n/a")
group_by_week
# set up plot
g <- ggplot(group_by_week,
            aes(x = as.factor(week_num),
                y = n,
                fill = forcats::fct_reorder(value, n))) +
  theme_minimal()

# stacked bar chart by count
g + geom_bar(stat = "identity", position = "stack")


#ggsave("fig1.pdf")
# bar chart
g + geom_bar(stat = "identity", position = "dodge")


#ggsave("fig2.pdf")
# stacked bar chart by proportion 
g + geom_bar(stat = "identity", position = "fill")


#ggsave("fig3.pdf")
# line graph of number of each driver over time
ggplot(group_by_week, aes(x = week_num, y = n, color = value)) + 
  geom_line() +
  theme_minimal()


#ggsave("fig4.pdf")
# group by region and week
group_by_region_week <- long %>%
  dplyr::group_by(Region, week_num) %>%
  dplyr::count(value) %>%
  tidyr::drop_na() %>%
  dplyr::filter(value != "n/a")
group_by_region_week

# line graph of each driver over time facetted by region
ggplot(group_by_region_week,
       aes(x = week_num,
           y = n,
           color = value)) + 
  geom_line() +
  facet_wrap(~Region) +
  theme_minimal() +
  ggtitle("Frequency of drivers over time by region")


#ggsave("fig5.pdf")
# plot regions over time facetted by driver
ggplot(group_by_region_week,
       aes(x = week_num,
           y = n,
           color = as.factor(Region))) +
  geom_line() +
  facet_wrap(~value) +
  theme_minimal() +
  ggtitle("Driver trend by region")


#ggsave("fig6.pdf")
# convert the above code into readable table
group_by_region_week %>%
  tidyr::pivot_wider(names_from = value, values_from = n)
LS0tDQp0aXRsZTogIkhvdHNwb3QgZHJpdmVyczogQWxsIGNvdW50aWVzIg0Kb3V0cHV0Og0KICBodG1sX25vdGVib29rOiANCiAgICB0b2M6IHllcw0KICAgIGhpZ2hsaWdodDogdGFuZ28NCiAgICB0aGVtZTogY29zbW8NCiAgaHRtbF9kb2N1bWVudDogDQogICAgdG9jOiB5ZXMNCiAgICBoaWdobGlnaHQ6IHRhbmdvDQogICAgdGhlbWU6IGNvc21vDQogICAga2VlcF9tZDogeWVzDQplZGl0b3Jfb3B0aW9uczoNCiAgY2h1bmtfb3V0cHV0X3R5cGU6IGlubGluZQ0KLS0tDQoNCmBgYHtyIG1lc3NhZ2U9RkFMU0UsIHdhcm5pbmc9RkFMU0UsIGNhY2hlPVRSVUV9DQpybShsaXN0ID0gbHMoYWxsLm5hbWVzID0gVFJVRSkpDQoNCmxpYnJhcnkocmVhZHhsKQ0KbGlicmFyeShqYW5pdG9yKQ0KbGlicmFyeShkcGx5cikNCmxpYnJhcnkodGlkeXIpDQpsaWJyYXJ5KGx1YnJpZGF0ZSkNCmxpYnJhcnkoZm9yY2F0cykNCmxpYnJhcnkoZ2dwbG90MikNCmBgYA0KDQoNCmBgYHtyIG1lc3NhZ2U9RkFMU0UsIHdhcm5pbmc9RkFMU0V9DQptYXN0ZXIgPC0gcmVhZHhsOjpyZWFkX2V4Y2VsKCJtYXN0ZXItbW13ci0yLnhsc3giLCBzaGVldCA9ICJNQVNURVIiKQ0KYGBgDQoNCg0KYGBge3J9DQpuYW1lcyhtYXN0ZXIpDQoNCiMgY29tYmluZSBhbGwgZHJpdmVyIGNhdGVnb3JpZXMgYW5kIGNvdW50IHRoZW0NCmxvbmcgPC0gZHBseXI6OnNlbGVjdChtYXN0ZXIsIFJlZ2lvbjpEcml2ZXJfNSkgJT4lDQogIHRpZHlyOjpwaXZvdF9sb25nZXIoRHJpdmVyXzE6RHJpdmVyXzUpICU+JQ0KICBkcGx5cjo6bXV0YXRlKHdlZWtfbnVtID0gbHVicmlkYXRlOjp3ZWVrKERhdGFfRGF0ZSkpDQoNCnRhYmxlKG1hc3RlciREcml2ZXJfMSwgdXNlTkEgPSAiYWx3YXlzIikNCmBgYA0KDQoNCmBgYHtyfQ0KIyBjb3VudCBvZiBlYWNoIGRyaXZlciBieSBGSVBTDQp0MSA8LSAgbG9uZyAlPiUNCiAgZHBseXI6Omdyb3VwX2J5KFJlZ2lvbiwgQ291bnR5LCBTdGF0ZSwgRklQUykgJT4lDQogIGRwbHlyOjpjb3VudCh2YWx1ZSkgJT4lDQogIHRpZHlyOjpkcm9wX25hKCkgJT4lDQogIHRpZHlyOjpwaXZvdF93aWRlcihuYW1lc19mcm9tID0gdmFsdWUsIHZhbHVlc19mcm9tID0gbikNCnQxDQpgYGANCg0KDQpgYGB7cn0NCiMgY291bnQgbnVtYmVyIG9mIGRheXMgbGlzdGVkIHBlciBjb3VudHkNCmRheV9jb3VudHNfZmlwcyA8LSBsb25nICU+JQ0KICBkcGx5cjo6Z3JvdXBfYnkoUmVnaW9uLCBDb3VudHksIFN0YXRlLCBGSVBTKSAlPiUNCiAgZHBseXI6OnN1bW1hcmlzZShuZGF5cyA9IG4oKSkNCmRheV9jb3VudHNfZmlwcw0KYGBgDQoNCg0KYGBge3J9DQojIHRoZXNlIHR3byB2YWx1ZXMgc2hvdWxkIGVxdWFsOyBpZiB0aGV5IGRvbid0IHRoZW46DQojIHRoZXJlIGFyZSByb3dzIHdpdGhvdXQgYW55IGRyaXZlciBpbmZvcm1hdGlvbiwgYW5kL29yDQojIHRoZXJlIGFyZSBGSVBTIGNvZGUgd2hlcmUgbm8gZHJpdmVyIGlzIGxpc3RlZA0KbnJvdyh0MSkNCm5yb3coZGF5X2NvdW50c19maXBzKQ0KYGBgDQoNCg0KYGBge3J9DQojIGFkZCBjb2x1bW4NCmRyaXZlcl9hbmRfZGF5IDwtIGRwbHlyOjpmdWxsX2pvaW4odDEsIGRheV9jb3VudHNfZmlwcywgYnkgPSBjKCJSZWdpb24iLCAiQ291bnR5IiwgIlN0YXRlIiwgIkZJUFMiKSkgJT4lDQogIGRwbHlyOjpzZWxlY3QoUmVnaW9uOkZJUFMsIG5kYXlzLCBldmVyeXRoaW5nKCkpICMgcmVvcmRlciBjb2x1bW5zDQpkcml2ZXJfYW5kX2RheQ0KYGBgDQoNCg0KYGBge3J9DQojIHBsb3QgY291bnR5IGhpc3RvZ3JhbQ0KcF9oaXN0IDwtIGdncGxvdChkcml2ZXJfYW5kX2RheSkgKw0KICBnZW9tX2JhcihtYXBwaW5nID0gYWVzKHggPSBuZGF5cykpDQpwX2hpc3QNCg0KIyBmYWNldCBoaXN0b2dyYW0gYnkgcmVnaW9uDQpwX2hpc3RfcmVnaW9uIDwtIHBfaGlzdCArIGZhY2V0X3dyYXAoflJlZ2lvbikNCnBfaGlzdF9yZWdpb24NCmBgYA0KDQoNCmBgYHtyfQ0KIyBjb3VudCBkcml2ZXJzIGJ5IHdlZWsNCmdyb3VwX2J5X3dlZWsgPC0gbG9uZyAlPiUNCiAgZHBseXI6Omdyb3VwX2J5KHdlZWtfbnVtKSAlPiUNCiAgZHBseXI6OmNvdW50KHZhbHVlKSAlPiUNCiAgdGlkeXI6OmRyb3BfbmEoKSAlPiUNCiAgZHBseXI6OmZpbHRlcih2YWx1ZSAhPSAibi9hIikNCmdyb3VwX2J5X3dlZWsNCmBgYA0KDQoNCmBgYHtyfQ0KIyBzZXQgdXAgcGxvdA0KZyA8LSBnZ3Bsb3QoZ3JvdXBfYnlfd2VlaywNCiAgICAgICAgICAgIGFlcyh4ID0gYXMuZmFjdG9yKHdlZWtfbnVtKSwNCiAgICAgICAgICAgICAgICB5ID0gbiwNCiAgICAgICAgICAgICAgICBmaWxsID0gZm9yY2F0czo6ZmN0X3Jlb3JkZXIodmFsdWUsIG4pKSkgKw0KICB0aGVtZV9taW5pbWFsKCkNCg0KIyBzdGFja2VkIGJhciBjaGFydCBieSBjb3VudA0KZyArIGdlb21fYmFyKHN0YXQgPSAiaWRlbnRpdHkiLCBwb3NpdGlvbiA9ICJzdGFjayIpDQoNCiNnZ3NhdmUoImZpZzEucGRmIikNCmBgYA0KDQoNCmBgYHtyfQ0KIyBiYXIgY2hhcnQNCmcgKyBnZW9tX2JhcihzdGF0ID0gImlkZW50aXR5IiwgcG9zaXRpb24gPSAiZG9kZ2UiKQ0KDQojZ2dzYXZlKCJmaWcyLnBkZiIpDQpgYGANCg0KDQpgYGB7cn0NCiMgc3RhY2tlZCBiYXIgY2hhcnQgYnkgcHJvcG9ydGlvbiANCmcgKyBnZW9tX2JhcihzdGF0ID0gImlkZW50aXR5IiwgcG9zaXRpb24gPSAiZmlsbCIpDQoNCiNnZ3NhdmUoImZpZzMucGRmIikNCmBgYA0KDQoNCmBgYHtyfQ0KIyBsaW5lIGdyYXBoIG9mIG51bWJlciBvZiBlYWNoIGRyaXZlciBvdmVyIHRpbWUNCmdncGxvdChncm91cF9ieV93ZWVrLCBhZXMoeCA9IHdlZWtfbnVtLCB5ID0gbiwgY29sb3IgPSB2YWx1ZSkpICsgDQogIGdlb21fbGluZSgpICsNCiAgdGhlbWVfbWluaW1hbCgpDQoNCiNnZ3NhdmUoImZpZzQucGRmIikNCmBgYA0KDQoNCmBgYHtyfQ0KIyBncm91cCBieSByZWdpb24gYW5kIHdlZWsNCmdyb3VwX2J5X3JlZ2lvbl93ZWVrIDwtIGxvbmcgJT4lDQogIGRwbHlyOjpncm91cF9ieShSZWdpb24sIHdlZWtfbnVtKSAlPiUNCiAgZHBseXI6OmNvdW50KHZhbHVlKSAlPiUNCiAgdGlkeXI6OmRyb3BfbmEoKSAlPiUNCiAgZHBseXI6OmZpbHRlcih2YWx1ZSAhPSAibi9hIikNCmdyb3VwX2J5X3JlZ2lvbl93ZWVrDQoNCiMgbGluZSBncmFwaCBvZiBlYWNoIGRyaXZlciBvdmVyIHRpbWUgZmFjZXR0ZWQgYnkgcmVnaW9uDQpnZ3Bsb3QoZ3JvdXBfYnlfcmVnaW9uX3dlZWssDQogICAgICAgYWVzKHggPSB3ZWVrX251bSwNCiAgICAgICAgICAgeSA9IG4sDQogICAgICAgICAgIGNvbG9yID0gdmFsdWUpKSArIA0KICBnZW9tX2xpbmUoKSArDQogIGZhY2V0X3dyYXAoflJlZ2lvbikgKw0KICB0aGVtZV9taW5pbWFsKCkgKw0KICBnZ3RpdGxlKCJGcmVxdWVuY3kgb2YgZHJpdmVycyBvdmVyIHRpbWUgYnkgcmVnaW9uIikNCg0KI2dnc2F2ZSgiZmlnNS5wZGYiKQ0KYGBgDQoNCg0KYGBge3J9DQojIHBsb3QgcmVnaW9ucyBvdmVyIHRpbWUgZmFjZXR0ZWQgYnkgZHJpdmVyDQpnZ3Bsb3QoZ3JvdXBfYnlfcmVnaW9uX3dlZWssDQogICAgICAgYWVzKHggPSB3ZWVrX251bSwNCiAgICAgICAgICAgeSA9IG4sDQogICAgICAgICAgIGNvbG9yID0gYXMuZmFjdG9yKFJlZ2lvbikpKSArDQogIGdlb21fbGluZSgpICsNCiAgZmFjZXRfd3JhcCh+dmFsdWUpICsNCiAgdGhlbWVfbWluaW1hbCgpICsNCiAgZ2d0aXRsZSgiRHJpdmVyIHRyZW5kIGJ5IHJlZ2lvbiIpDQoNCiNnZ3NhdmUoImZpZzYucGRmIikNCmBgYA0KDQoNCmBgYHtyfQ0KIyBjb252ZXJ0IHRoZSBhYm92ZSBjb2RlIGludG8gcmVhZGFibGUgdGFibGUNCmdyb3VwX2J5X3JlZ2lvbl93ZWVrICU+JQ0KICB0aWR5cjo6cGl2b3Rfd2lkZXIobmFtZXNfZnJvbSA9IHZhbHVlLCB2YWx1ZXNfZnJvbSA9IG4pDQpgYGANCg==