COVID-19 County Data

Data Input and Parsing

#Tell me where to find the latest and greatest COVID DATA
jhu_url <- paste("https://raw.githubusercontent.com/CSSEGISandData/",
                 "COVID-19/master/csse_covid_19_data/", "csse_covid_19_time_series/",
                 "time_series_covid19_confirmed_US.csv", sep = "")

votes_url <- paste("https://raw.githubusercontent.com/tonmcg/",
                   "US_County_Level_Election_Results_08-16/master/",
                   "2016_US_County_Level_Presidential_Results.csv",sep = "")

census_url <- paste("https://www2.census.gov/programs-surveys/popest/",
                    "datasets/2010-2019/counties/totals/co-est2019-alldata.csv",sep = "")

abbrev_url <- paste("https://raw.githubusercontent.com/jasonong/",
                    "List-of-US-States/master/states.csv",sep = "")

Begin by reading in the voting and covid data.

#Read in Voting Data 
votes <- read_csv(votes_url) %>%
  rename(FIPS="combined_fips")

## Warning: Missing column names filled in: 'X1' [1]

## Parsed with column specification:
## cols(
##   X1 = col_double(),
##   votes_dem = col_double(),
##   votes_gop = col_double(),
##   total_votes = col_double(),
##   per_dem = col_double(),
##   per_gop = col_double(),
##   diff = col_number(),
##   per_point_diff = col_character(),
##   state_abbr = col_character(),
##   county_name = col_character(),
##   combined_fips = col_double()
## )

#Combine COVID + Voting Data
covidData <-
  read_csv(jhu_url) %>%
  rename(province = "Province_State",
         country_region = "Country_Region",
         county="Admin2")  %>%
  select(-c(UID,iso2,iso3,code3,country_region,Lat,Long_,Combined_Key)) %>% 
  pivot_longer(-c(province,county,FIPS), names_to = "d", 
               values_to = "cumulative_cases") %>%
  separate(d,c("Month","Day","Year"),sep="/") %>%
  mutate(dstring=sprintf("%02i/%02i/%02i",   #some parsing to make dates work correctly
                         as.numeric(Month), 
                         as.numeric(Day), 
                         as.numeric(Year)),
         d=as.Date(dstring,"%m/%d/%y")) %>%
  select(d,county,province,FIPS,cumulative_cases) %>%
  arrange(d) %>%
  mutate(FIPS=ifelse(FIPS==46102,46113,FIPS)) %>%
  group_by(FIPS) %>%  #for each FIPS ID, calculate new cases per day
  mutate(new_cases = cumulative_cases-dplyr::lag(cumulative_cases,1)) %>%
  ungroup() %>%
  left_join(select(votes,FIPS,votes_dem,votes_gop),by="FIPS") %>%  #add voting data for each FIPS ID
  mutate(elect = ifelse(votes_dem > votes_gop,"Clinton","Trump"))

## Parsed with column specification:
## cols(
##   .default = col_double(),
##   iso2 = col_character(),
##   iso3 = col_character(),
##   Admin2 = col_character(),
##   Province_State = col_character(),
##   Country_Region = col_character(),
##   Combined_Key = col_character()
## )

## See spec(...) for full column specifications.

Now read in the state abbreviation table, and figure out state voting data.

#Read in State Abbreviation Data
states <- read_csv(abbrev_url) %>%
  rename(province="State",
         state_abbr="Abbreviation")

## Parsed with column specification:
## cols(
##   State = col_character(),
##   Abbreviation = col_character()
## )

#Aggreate voting data by state vice county
votes_state = votes %>%
  group_by(state_abbr) %>%
  summarize(votes_dem=sum(votes_dem),votes_gop=sum(votes_gop)) %>%
  mutate(elect = ifelse(votes_dem > votes_gop,"Clinton","Trump")) %>%
  left_join(states,by="state_abbr")

## `summarise()` ungrouping output (override with `.groups` argument)

Read in population data and add the 2019 estimate for population into the COVID data.

#Read in Population Data
co_est2019_alldata =
  read_csv(census_url) %>%
  select(STATE,COUNTY,POPESTIMATE2019) %>%
  rename(population="POPESTIMATE2019") %>%
  mutate(FIPS=as.numeric(str_c(as.character(STATE),as.character(COUNTY)))) %>%
    mutate(FIPS=ifelse(FIPS==46102,46113,FIPS))

## Parsed with column specification:
## cols(
##   .default = col_double(),
##   SUMLEV = col_character(),
##   STATE = col_character(),
##   COUNTY = col_character(),
##   STNAME = col_character(),
##   CTYNAME = col_character()
## )

## See spec(...) for full column specifications.

#Combine Covid and population Data
covidData =
  covidData %>%
  left_join(select(co_est2019_alldata,FIPS,population),by="FIPS") %>%
  filter(!is.na(population))

County Density Plots

countyCovid=covidData %>%
  ungroup() %>%
  filter(!is.na(elect)) %>%
  mutate(ncap=new_cases/population) %>%
  group_by(FIPS) %>%
  arrange(d) %>%
  mutate(rncap=rollmean(ncap,7,fill=NA,align="right")) %>%
  mutate(rnew=rollmean(new_cases,7,fill=NA,align="right"))

And a plot! NOTE: To show off some detail in the plot, any county where the number of new cases per 100,000 people is > \(cmax\) (set to 50), we set the value of the new case rate to \(cmax\).

library(mapdata)

## Loading required package: maps

library(viridis)

## Loading required package: viridisLite

library(mapproj)
library(scales)

## 
## Attaching package: 'scales'

## The following object is masked from 'package:viridis':
## 
##     viridis_pal

## The following object is masked from 'package:readr':
## 
##     col_factor

cmax=50

votes=votes %>%
  mutate(elect = ifelse(votes_dem > votes_gop,"Clinton","Trump"))


data(county.fips)
cfips=county.fips %>%
  mutate(polyname=str_remove_all(polyname,pattern=":.*")) 
  

#lets look at the map of the US on a specific date.  
dCovid = covidData %>%
  ungroup() %>%
  mutate(ncap=new_cases/population*1e5) %>%
  group_by(FIPS) %>%
  mutate(rnew=rollmean(new_cases,7,fill=NA,align="right")) %>%
  mutate(rncap=rollmean(ncap,7,fill=NA,align="right")) %>%
  ungroup()




ddCovid=dCovid %>%
  filter(d=="2020-07-04")

states <- map_data("state")


counties=map_data("county") %>%
  mutate(polyname=str_c(region,',',subregion)) %>%
  left_join(cfips,by="polyname") %>%
  rename(FIPS="fips") %>%
  left_join(ddCovid,by="FIPS")


p1=counties %>%
  # mutate(rncap =ifelse(rncap>cmax,cmax,rncap)) %>%
  ggplot() +
  aes(x = long, y = lat,group=group) +
  geom_polygon(aes(fill=rncap,subgroup=subregion),size=0.1) +
  theme_bw()+
  theme(axis.line=element_blank(),axis.text.x=element_blank(),
          axis.text.y=element_blank(),axis.ticks=element_blank(),
          axis.title.x=element_blank(),
          axis.title.y=element_blank(),
        legend.position = "bottom")+
  scale_fill_viridis(limits=c(0,50),
                     breaks=seq(0,50,10),
                     labels=c("0","10","20","30","40",">50"),
                     oob=squish) +
  geom_path(data=states,color="black",size=0.4) +
  labs(fill = "New Cases \n per 100,000")  +
  ggtitle("New COVID-19 Cases Per Capita \n (7-Day Rolling Average) | 4JUL2020")

library(plotly)

## 
## Attaching package: 'plotly'

## The following object is masked from 'package:ggplot2':
## 
##     last_plot

## The following object is masked from 'package:stats':
## 
##     filter

## The following object is masked from 'package:graphics':
## 
##     layout

ggp1=ggplotly(p1)

## Warning: `group_by_()` is deprecated as of dplyr 0.7.0.
## Please use `group_by()` instead.
## See vignette('programming') for more help
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_warnings()` to see where this warning was generated.

ggp1$x$data[[3022]]$hoverinfo = "none"
ggp1

COVID-19 County Data

2020-07-08

Data Input and Parsing

County Density Plots