#Tell me where to find the latest and greatest COVID DATA
jhu_url <- paste("https://raw.githubusercontent.com/CSSEGISandData/",
"COVID-19/master/csse_covid_19_data/", "csse_covid_19_time_series/",
"time_series_covid19_confirmed_US.csv", sep = "")
votes_url <- paste("https://raw.githubusercontent.com/tonmcg/",
"US_County_Level_Election_Results_08-16/master/",
"2016_US_County_Level_Presidential_Results.csv",sep = "")
census_url <- paste("https://www2.census.gov/programs-surveys/popest/",
"datasets/2010-2019/counties/totals/co-est2019-alldata.csv",sep = "")
abbrev_url <- paste("https://raw.githubusercontent.com/jasonong/",
"List-of-US-States/master/states.csv",sep = "")
Begin by reading in the voting and covid data.
#Read in Voting Data
votes <- read_csv(votes_url) %>%
rename(FIPS="combined_fips")
## Warning: Missing column names filled in: 'X1' [1]
## Parsed with column specification:
## cols(
## X1 = col_double(),
## votes_dem = col_double(),
## votes_gop = col_double(),
## total_votes = col_double(),
## per_dem = col_double(),
## per_gop = col_double(),
## diff = col_number(),
## per_point_diff = col_character(),
## state_abbr = col_character(),
## county_name = col_character(),
## combined_fips = col_double()
## )
#Combine COVID + Voting Data
covidData <-
read_csv(jhu_url) %>%
rename(province = "Province_State",
country_region = "Country_Region",
county="Admin2") %>%
select(-c(UID,iso2,iso3,code3,country_region,Lat,Long_,Combined_Key)) %>%
pivot_longer(-c(province,county,FIPS), names_to = "d",
values_to = "cumulative_cases") %>%
separate(d,c("Month","Day","Year"),sep="/") %>%
mutate(dstring=sprintf("%02i/%02i/%02i", #some parsing to make dates work correctly
as.numeric(Month),
as.numeric(Day),
as.numeric(Year)),
d=as.Date(dstring,"%m/%d/%y")) %>%
select(d,county,province,FIPS,cumulative_cases) %>%
arrange(d) %>%
mutate(FIPS=ifelse(FIPS==46102,46113,FIPS)) %>%
group_by(FIPS) %>% #for each FIPS ID, calculate new cases per day
mutate(new_cases = cumulative_cases-dplyr::lag(cumulative_cases,1)) %>%
ungroup() %>%
left_join(select(votes,FIPS,votes_dem,votes_gop),by="FIPS") %>% #add voting data for each FIPS ID
mutate(elect = ifelse(votes_dem > votes_gop,"Clinton","Trump"))
## Parsed with column specification:
## cols(
## .default = col_double(),
## iso2 = col_character(),
## iso3 = col_character(),
## Admin2 = col_character(),
## Province_State = col_character(),
## Country_Region = col_character(),
## Combined_Key = col_character()
## )
## See spec(...) for full column specifications.
Now read in the state abbreviation table, and figure out state voting data.
#Read in State Abbreviation Data
states <- read_csv(abbrev_url) %>%
rename(province="State",
state_abbr="Abbreviation")
## Parsed with column specification:
## cols(
## State = col_character(),
## Abbreviation = col_character()
## )
#Aggreate voting data by state vice county
votes_state = votes %>%
group_by(state_abbr) %>%
summarize(votes_dem=sum(votes_dem),votes_gop=sum(votes_gop)) %>%
mutate(elect = ifelse(votes_dem > votes_gop,"Clinton","Trump")) %>%
left_join(states,by="state_abbr")
## `summarise()` ungrouping output (override with `.groups` argument)
Read in population data and add the 2019 estimate for population into the COVID data.
#Read in Population Data
co_est2019_alldata =
read_csv(census_url) %>%
select(STATE,COUNTY,POPESTIMATE2019) %>%
rename(population="POPESTIMATE2019") %>%
mutate(FIPS=as.numeric(str_c(as.character(STATE),as.character(COUNTY)))) %>%
mutate(FIPS=ifelse(FIPS==46102,46113,FIPS))
## Parsed with column specification:
## cols(
## .default = col_double(),
## SUMLEV = col_character(),
## STATE = col_character(),
## COUNTY = col_character(),
## STNAME = col_character(),
## CTYNAME = col_character()
## )
## See spec(...) for full column specifications.
#Combine Covid and population Data
covidData =
covidData %>%
left_join(select(co_est2019_alldata,FIPS,population),by="FIPS") %>%
filter(!is.na(population))
countyCovid=covidData %>%
ungroup() %>%
filter(!is.na(elect)) %>%
mutate(ncap=new_cases/population) %>%
group_by(FIPS) %>%
arrange(d) %>%
mutate(rncap=rollmean(ncap,7,fill=NA,align="right")) %>%
mutate(rnew=rollmean(new_cases,7,fill=NA,align="right"))
And a plot! NOTE: To show off some detail in the plot, any county where the number of new cases per 100,000 people is > \(cmax\) (set to 50), we set the value of the new case rate to \(cmax\).
library(mapdata)
## Loading required package: maps
library(viridis)
## Loading required package: viridisLite
library(mapproj)
library(scales)
##
## Attaching package: 'scales'
## The following object is masked from 'package:viridis':
##
## viridis_pal
## The following object is masked from 'package:readr':
##
## col_factor
cmax=50
votes=votes %>%
mutate(elect = ifelse(votes_dem > votes_gop,"Clinton","Trump"))
data(county.fips)
cfips=county.fips %>%
mutate(polyname=str_remove_all(polyname,pattern=":.*"))
#lets look at the map of the US on a specific date.
dCovid = covidData %>%
ungroup() %>%
mutate(ncap=new_cases/population*1e5) %>%
group_by(FIPS) %>%
mutate(rnew=rollmean(new_cases,7,fill=NA,align="right")) %>%
mutate(rncap=rollmean(ncap,7,fill=NA,align="right")) %>%
ungroup()
ddCovid=dCovid %>%
filter(d=="2020-07-04")
states <- map_data("state")
counties=map_data("county") %>%
mutate(polyname=str_c(region,',',subregion)) %>%
left_join(cfips,by="polyname") %>%
rename(FIPS="fips") %>%
left_join(ddCovid,by="FIPS")
p1=counties %>%
# mutate(rncap =ifelse(rncap>cmax,cmax,rncap)) %>%
ggplot() +
aes(x = long, y = lat,group=group) +
geom_polygon(aes(fill=rncap,subgroup=subregion),size=0.1) +
theme_bw()+
theme(axis.line=element_blank(),axis.text.x=element_blank(),
axis.text.y=element_blank(),axis.ticks=element_blank(),
axis.title.x=element_blank(),
axis.title.y=element_blank(),
legend.position = "bottom")+
scale_fill_viridis(limits=c(0,50),
breaks=seq(0,50,10),
labels=c("0","10","20","30","40",">50"),
oob=squish) +
geom_path(data=states,color="black",size=0.4) +
labs(fill = "New Cases \n per 100,000") +
ggtitle("New COVID-19 Cases Per Capita \n (7-Day Rolling Average) | 4JUL2020")
library(plotly)
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
ggp1=ggplotly(p1)
## Warning: `group_by_()` is deprecated as of dplyr 0.7.0.
## Please use `group_by()` instead.
## See vignette('programming') for more help
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_warnings()` to see where this warning was generated.
ggp1$x$data[[3022]]$hoverinfo = "none"
ggp1