knitr::opts_chunk$set(echo = TRUE)
rm(list = ls(all.names = TRUE))
library(readxl)
library(janitor)
library(dplyr)
library(tidyr)
library(lubridate)
library(forcats)
library(ggplot2)
# all counties
master_all <- readxl::read_excel("master_data.xlsx", sheet = "MASTER")
# view master columns
names(master_all)
[1] "Region" "Week" "Report_Date" "Data_Date" "County"
[6] "State" "Pop" "FIPS" "Census_4" "Urban_3"
[11] "Urban_6" "New_Status" "Tribal" "Driver_1" "Driver_2"
[16] "Driver_3" "Driver_4" "Driver_5" "Drivers" "Demographics"
[21] "Cases" "Testing" "Location" "Prev_Report" "Narrative"
[26] "Outreach_Date" "Outreach_Team" "Outreach_Type"
table(master_all$Driver_1, useNA = "always")
C D F L M P S T TR U UU W <NA>
3584 60 253 512 83 334 5 363 9 2256 218 117 0
table(master_all$Driver_2, useNA = "always")
C D F L M P S T TR U W <NA>
506 153 188 748 112 257 17 479 40 83 279 4932
table(master_all$Driver_3, useNA = "always")
C D F L M P S T TR U W <NA>
70 14 45 171 39 89 7 36 22 20 60 7221
table(master_all$Driver_4, useNA = "always")
C D F L M P S T W <NA>
18 1 5 51 12 9 10 11 5 7672
table(master_all$Driver_5, useNA = "always")
<NA>
7794
# 882 = total counties
summarise(master_all, n_distinct(FIPS))
# 735 = new counties
filter(master_all, New_Status == 1) %>%
summarise(n_distinct(FIPS))
# set dataframe to analyze drivers
master <- master_all
# subset long
long <- dplyr::select(master, c(Region:Driver_5)) %>%
tidyr::pivot_longer(Driver_1:Driver_5) %>% # group driver cols together
#tidyr::drop_na() %>%
dplyr::mutate(Month = lubridate::month(Data_Date)) %>%
dplyr::mutate(Week = lubridate::epiweek(Data_Date)) # add in month
# 38,970 rows
long
# 882 unique counties with drivers
summarise(long, n_distinct(FIPS))
# count of each driver by FIPS
drivers_fips <- long %>%
dplyr::group_by(FIPS, Region, State, County, Tribal, Census_4, Urban_3, Urban_6) %>%
dplyr::count(value) %>%
tidyr::drop_na() %>%
tidyr::pivot_wider(names_from = value, values_from = n)
# 882 rows
drivers_fips
# 882 unique counties
summarise(long, n_distinct(FIPS))
# count number of days per county
ndays_fips <- long %>%
dplyr::group_by(FIPS, Region, State, County, Tribal, Census_4, Urban_3, Urban_6) %>%
dplyr::summarise(ndays = n_distinct(Data_Date)) %>%
arrange(desc(ndays)) # sort by ndays
`summarise()` regrouping output by 'FIPS', 'Region', 'State', 'County', 'Tribal', 'Census_4', 'Urban_3' (override with `.groups` argument)
# join tables
ndays_drivers_fips <- dplyr::full_join(drivers_fips, ndays_fips,
by = c("FIPS", "Region", "State", "County", "Tribal",
"Census_4", "Urban_3", "Urban_6")) %>%
dplyr::select(Census_4, Urban_3, Urban_6, FIPS, ndays, everything()) # reorder columns
# 822 rows
ndays_drivers_fips
#write.csv(ndays_drivers_fips, "by_county.csv")
# create table of drivers with time periods
week_drivers_regions <- long %>%
dplyr::group_by(Week, Month, FIPS, Region, State, County, Tribal, Census_4, Urban_3, Urban_6) %>%
dplyr::count(value) %>%
tidyr::drop_na() %>%
tidyr::pivot_wider(names_from = value, values_from = n)
# 2,912 rows
week_drivers_regions
#write.csv(week_drivers_regions, "by_week.csv")
## plot ndays histogram
# number of unique counties by ndays
ndays_drivers_fips # 822 rows
p_hist <- ggplot(ndays_drivers_fips) +
geom_bar(mapping = aes(x = ndays))
p_hist

# facet ndays histogram by region
p_hist_region <- p_hist + facet_wrap(~Region)
p_hist_region

# more histograms
p_hist + facet_wrap(~Census_4)

p_hist + facet_wrap(~Urban_3)

p_hist + facet_wrap(~Urban_6)

p_hist + facet_grid(Census_4 ~ Urban_3)

p_hist + facet_grid(Census_4 ~ Urban_6)

---
title: "drivers-1"
output:
  html_notebook: 
    toc: yes
    highlight: tango
    theme: cosmo
  html_document: 
    toc: yes
    highlight: tango
    theme: cosmo
    keep_md: yes
editor_options:
  chunk_output_type: inline
---

```{r setup, message=FALSE, warning=FALSE, cache=TRUE}
knitr::opts_chunk$set(echo = TRUE)
```

```{r}
rm(list = ls(all.names = TRUE))

library(readxl)
library(janitor)
library(dplyr)
library(tidyr)
library(lubridate)
library(forcats)
library(ggplot2)
```


```{r message=FALSE, warning=FALSE}
# all counties
master_all <- readxl::read_excel("master_data.xlsx", sheet = "MASTER")
```


```{r}
# view master columns
names(master_all)

# 7,794 rows
master_all

table(master_all$Driver_1, useNA = "always")
table(master_all$Driver_2, useNA = "always")
table(master_all$Driver_3, useNA = "always")
table(master_all$Driver_4, useNA = "always")
table(master_all$Driver_5, useNA = "always")

# 882 = total counties
summarise(master_all, n_distinct(FIPS))

# 735 = new counties
filter(master_all, New_Status == 1) %>%
  summarise(n_distinct(FIPS))
```


```{r}
# set dataframe to analyze drivers
master <- master_all
```


```{r}
# subset long
long <- dplyr::select(master, c(Region:Driver_5)) %>%
  tidyr::pivot_longer(Driver_1:Driver_5) %>%  # group driver cols together
  #tidyr::drop_na() %>%
  dplyr::mutate(Month = lubridate::month(Data_Date)) %>%
  dplyr::mutate(Week = lubridate::epiweek(Data_Date)) # add in month

# 38,970 rows
long

# 882 unique counties with drivers
summarise(long, n_distinct(FIPS))
```


```{r}
# count of each driver by FIPS
drivers_fips <- long %>%
  dplyr::group_by(FIPS, Region, State, County, Tribal, Census_4, Urban_3, Urban_6) %>%
  dplyr::count(value) %>%
  tidyr::drop_na() %>%
  tidyr::pivot_wider(names_from = value, values_from = n)

# 882 rows
drivers_fips

# 882 unique counties
summarise(long, n_distinct(FIPS))
```


```{r}
# count number of days per county
ndays_fips <- long %>%
  dplyr::group_by(FIPS, Region, State, County, Tribal, Census_4, Urban_3, Urban_6) %>%
  dplyr::summarise(ndays = n_distinct(Data_Date)) %>%
  arrange(desc(ndays))  # sort by ndays

# 882 rows
ndays_fips
```


```{r}
# join tables
ndays_drivers_fips <- dplyr::full_join(drivers_fips, ndays_fips,
                                           by = c("FIPS", "Region", "State", "County", "Tribal",
                                                  "Census_4", "Urban_3", "Urban_6")) %>%
  dplyr::select(Census_4, Urban_3, Urban_6, FIPS, ndays, everything()) # reorder columns

# 822 rows
ndays_drivers_fips

#write.csv(ndays_drivers_fips, "by_county.csv")
```

```{r}
# create table of drivers with time periods
week_drivers_regions <- long %>%
  dplyr::group_by(Week, Month, FIPS, Region, State, County, Tribal, Census_4, Urban_3, Urban_6) %>%
  dplyr::count(value) %>%
  tidyr::drop_na() %>%
  tidyr::pivot_wider(names_from = value, values_from = n)

# 2,912 rows
week_drivers_regions

#write.csv(week_drivers_regions, "by_week.csv")
```


```{r}
## plot ndays histogram

# number of unique counties by ndays
ndays_drivers_fips  # 822 rows

p_hist <- ggplot(ndays_drivers_fips) +
  geom_bar(mapping = aes(x = ndays))
p_hist

# facet ndays histogram by region
p_hist_region <- p_hist + facet_wrap(~Region)
p_hist_region
```


```{r}
# more histograms
p_hist + facet_wrap(~Census_4)

p_hist + facet_wrap(~Urban_3)

p_hist + facet_wrap(~Urban_6)

p_hist + facet_grid(Census_4 ~ Urban_3)

p_hist + facet_grid(Census_4 ~ Urban_6)
```

```{r eval=FALSE, include=FALSE}
## plot driver counts over time period and by region

# driver counts by time and region
ndrivers_by_time_region <- long %>%
  select(Census_4, Urban_3, Urban_6, FIPS, Month, Week, value) %>%
  distinct() %>%
  dplyr::group_by(Census_4, Urban_3, Urban_6, Month, Week) %>%
  dplyr::count(value) %>%
  tidyr::drop_na() %>%
  dplyr::filter(value != "n/a")

# 1,319 rows
ndrivers_by_time_region

# set up weekly plot
g <- ggplot(ndrivers_by_time_region,
            aes(x = as.factor(Week),
                y = n,
                fill = forcats::fct_reorder(value, n))) +
  theme_minimal()


# stacked bar chart of driver counts by week
g + geom_bar(stat = "identity", position = "stack")
#ggsave("fig1.pdf")

# bar chart
g + geom_bar(stat = "identity", position = "dodge")
#ggsave("fig2.pdf")

# stacked bar chart by proportion 
g + geom_bar(stat = "identity", position = "fill")
#ggsave("fig3.pdf")

```

```{r eval=FALSE, include=FALSE}
## facet by regions over time

# bar chart
g + geom_bar(stat = "identity", position = "dodge") +
    facet_wrap(~Census_4)

g + geom_bar(stat = "identity", position = "dodge") +
    facet_wrap(~Urban_3)

g + geom_bar(stat = "identity", position = "dodge") +
    facet_wrap(~Urban_6, nrow = 2)



# stacked bar chart of driver counts by week
g + geom_bar(stat = "identity", position = "stack") +
  facet_grid(Census_4 ~ Urban_3)

g + geom_bar(stat = "identity", position = "stack") +
  facet_grid(Census_4 ~ Urban_6)



# stacked bar chart by proportion 
g + geom_bar(stat = "identity", position = "fill") +
    facet_wrap(~Census_4)

g + geom_bar(stat = "identity", position = "fill") +
    facet_grid(Census_4 ~ Urban_3)

g + geom_bar(stat = "identity", position = "fill") +
    facet_wrap(~Urban_3, nrow = 3)

g + geom_bar(stat = "identity", position = "fill") +
    facet_wrap(~Urban_6)

```

```{r eval=FALSE, include=FALSE}
## plot by driver and census region over time
# group by census region and week
ndrivers_by_region_week <- long %>%
  dplyr::group_by(Census_4, Week) %>%
  dplyr::count(value) %>%
  tidyr::drop_na() %>%
  dplyr::filter(value != "n/a")


# 266 rows
ndrivers_by_region_week


# fill by census region
g2 <- ggplot(ndrivers_by_time_region,
            aes(x = as.factor(Week),
                y = n,
                fill = Census_4)) +
  theme_minimal()


# grouped by census region
g2 + geom_bar(stat = "identity", position = "dodge")
g2 + geom_bar(stat = "identity", position = "stack")
g2 + geom_bar(stat = "identity", position = "fill")


# bar plots
g2 + geom_bar(stat = "identity", position = "dodge") +
    facet_wrap(~value)

g2 + geom_bar(stat = "identity", position = "dodge") +
    facet_grid(value ~ Urban_3)



# stacked bar chart of driver counts by week
g + geom_bar(stat = "identity", position = "stack") +
  facet_grid(value ~ Urban_3)

g + geom_bar(stat = "identity", position = "stack") +
  facet_grid(value ~ Urban_6)



# stacked bar chart by proportion 
g2 + geom_bar(stat = "identity", position = "fill") +
    facet_wrap(~value)

g2 + geom_bar(stat = "identity", position = "fill") +
    facet_grid(value ~ Urban_3)

g2 + geom_bar(stat = "identity", position = "fill") +
    facet_wrap(~Urban_3)

g2 + geom_bar(stat = "identity", position = "fill") +
    facet_wrap(~Urban_6)

```

```{r eval=FALSE, include=FALSE}
# plot regions over time facetted by driver
ggplot(ndrivers_by_region_week,
       aes(x = Week,
           y = n,
           color = as.factor(Census_4))) +
  geom_line() +
  facet_wrap(~value) +
  theme_minimal() +
  ggtitle("Driver trend by region")

#ggsave("fig6.pdf")


# convert the above code into readable table
ndrivers_by_region_week %>%
  tidyr::pivot_wider(names_from = value, values_from = n)

```
