#Tell me where to find the latest and greatest COVID DATA
jhu_url <- paste("https://raw.githubusercontent.com/CSSEGISandData/",
"COVID-19/master/csse_covid_19_data/", "csse_covid_19_time_series/",
"time_series_covid19_confirmed_US.csv", sep = "")
votes_url <- paste("https://raw.githubusercontent.com/tonmcg/",
"US_County_Level_Election_Results_08-16/master/",
"2016_US_County_Level_Presidential_Results.csv",sep = "")
census_url <- paste("https://www2.census.gov/programs-surveys/popest/",
"datasets/2010-2019/counties/totals/co-est2019-alldata.csv",sep = "")
abbrev_url <- paste("https://raw.githubusercontent.com/jasonong/",
"List-of-US-States/master/states.csv",sep = "")
Begin by reading in the voting and covid data.
#Read in Voting Data
votes <- read_csv(votes_url) %>%
rename(FIPS="combined_fips")
## Warning: Missing column names filled in: 'X1' [1]
## Parsed with column specification:
## cols(
## X1 = col_double(),
## votes_dem = col_double(),
## votes_gop = col_double(),
## total_votes = col_double(),
## per_dem = col_double(),
## per_gop = col_double(),
## diff = col_number(),
## per_point_diff = col_character(),
## state_abbr = col_character(),
## county_name = col_character(),
## combined_fips = col_double()
## )
#Combine COVID + Voting Data
covidData <-
read_csv(jhu_url) %>%
rename(province = "Province_State",
country_region = "Country_Region",
county="Admin2") %>%
select(-c(UID,iso2,iso3,code3,country_region,Lat,Long_,Combined_Key)) %>%
pivot_longer(-c(province,county,FIPS), names_to = "d",
values_to = "cumulative_cases") %>%
separate(d,c("Month","Day","Year"),sep="/") %>%
mutate(dstring=sprintf("%02i/%02i/%02i", #some parsing to make dates work correctly
as.numeric(Month),
as.numeric(Day),
as.numeric(Year)),
d=as.Date(dstring,"%m/%d/%y")) %>%
select(d,county,province,FIPS,cumulative_cases) %>%
arrange(d) %>%
group_by(FIPS) %>% #for each FIPS ID, calculate new cases per day
mutate(new_cases = cumulative_cases-dplyr::lag(cumulative_cases,1)) %>%
ungroup() %>%
left_join(select(votes,FIPS,votes_dem,votes_gop),by="FIPS") %>% #add voting data for each FIPS ID
mutate(elect = ifelse(votes_dem > votes_gop,"Clinton","Trump"))
## Parsed with column specification:
## cols(
## .default = col_double(),
## iso2 = col_character(),
## iso3 = col_character(),
## Admin2 = col_character(),
## Province_State = col_character(),
## Country_Region = col_character(),
## Combined_Key = col_character()
## )
## See spec(...) for full column specifications.
Now read in the state abbreviation table, and figure out state voting data.
#Read in State Abbreviation Data
states <- read_csv(abbrev_url) %>%
rename(province="State",
state_abbr="Abbreviation")
## Parsed with column specification:
## cols(
## State = col_character(),
## Abbreviation = col_character()
## )
#Aggreate voting data by state vice county
votes_state = votes %>%
group_by(state_abbr) %>%
summarize(votes_dem=sum(votes_dem),votes_gop=sum(votes_gop)) %>%
mutate(elect = ifelse(votes_dem > votes_gop,"Clinton","Trump")) %>%
left_join(states,by="state_abbr")
Read in population data and add the 2019 estimate for population into the COVID data.
#Read in Population Data
co_est2019_alldata =
read_csv(census_url) %>%
select(STATE,COUNTY,POPESTIMATE2019) %>%
rename(population="POPESTIMATE2019") %>%
mutate(FIPS=as.numeric(str_c(as.character(STATE),as.character(COUNTY))))
## Parsed with column specification:
## cols(
## .default = col_double(),
## SUMLEV = col_character(),
## STATE = col_character(),
## COUNTY = col_character(),
## STNAME = col_character(),
## CTYNAME = col_character()
## )
## See spec(...) for full column specifications.
#Combine Covid and population Data
covidData =
covidData %>%
left_join(select(co_est2019_alldata,FIPS,population),by="FIPS") %>%
filter(!is.na(population))
covidData %>%
filter(!is.na(elect)) %>%
group_by(d,elect) %>%
summarize(new=sum(new_cases)) %>% #add up all the county counts, binned by date and election outcome
group_by(elect) %>%
mutate(rnew=rollmean(new,7,fill=NA,align="right")) %>% #calculate 7 day running average
ungroup() %>%
ggplot()+
aes(x=d,y=new,color=elect) +
geom_line(linetype=2,alpha=0.4) +
geom_line(aes(y=rnew)) +
theme_bw() +
xlab("Date") +
ylab("New Cases") +
ggtitle("COVID-19 Cases", subtitle="Binned by County Election Results") +
scale_color_manual(values=c("blue","red")) +
scale_x_date(date_labels = "%b %d",date_breaks = "1 month")
## Warning: Removed 2 rows containing missing values (geom_path).
## Warning: Removed 14 rows containing missing values (geom_path).
covidData %>%
group_by(d,province) %>%
summarize(new=sum(new_cases)) %>%
left_join(select(votes_state,province,elect),by="province") %>%
group_by(elect) %>%
filter(!is.na(elect)) %>%
group_by(d,elect) %>%
summarize(new=sum(new)) %>%
group_by(elect) %>%
mutate(rnew=rollmean(new,7,fill=NA,align="right")) %>%
ungroup() %>%
ggplot()+
aes(x=d,color=elect) +
geom_line(aes(y=new),linetype=2,alpha=0.4) +
geom_line(aes(y=rnew)) +
theme_bw() +
xlab("Date") +
ylab("Number of New Cases") +
ggtitle("COVID-19 Cases" ,subtitle="Binned by State Election Results") +
scale_color_manual(values=c("blue","red")) +
scale_x_date(date_labels = "%b %d",date_breaks = "1 month")
## Warning: Removed 2 rows containing missing values (geom_path).
## Warning: Removed 14 rows containing missing values (geom_path).
covidData %>%
filter(!is.na(elect)) %>%
group_by(d,elect) %>%
summarize(new=sum(new_cases), population=sum(population),ncap=new/population*1e6) %>%
group_by(elect) %>%
mutate(rncap=rollmean(ncap,7,fill=NA,align="right")) %>%
ungroup() %>%
ggplot()+
aes(x=d,y=ncap,color=elect) +
geom_line(alpha=0.4,linetype=2) +
geom_line(aes(y=rncap)) +
theme_bw() +
xlab("Date") +
ylab("New Cases per 1 Million") +
ggtitle("COVID-19 Cases Per Capita " ,subtitle="Binned by County Election Results") +
scale_color_manual(values=c("blue","red")) +
scale_x_date(date_labels = "%b %d",date_breaks = "1 month")
## Warning: Removed 2 rows containing missing values (geom_path).
## Warning: Removed 14 rows containing missing values (geom_path).
covidData %>%
group_by(d,province) %>%
summarize(new=sum(new_cases),population=sum(population)) %>%
left_join(select(votes_state,province,elect),by="province") %>%
filter(!is.na(elect)) %>%
group_by(d,elect) %>%
summarize(new=sum(new),ncap=new/sum(population)*1e6) %>%
group_by(elect) %>%
mutate(rncap=rollmean(ncap,7,fill=NA,align="right")) %>%
ungroup() %>%
ggplot()+
aes(x=d,y=ncap,color=elect) +
geom_line(alpha=0.4,linetype=2) +
geom_line(aes(y=rncap)) +
xlab("Date") +
ylab("New Cases per 1 Million") +
ggtitle("COVID-19 Cases Per Capita " ,subtitle="Binned by State Election Results") +
scale_color_manual(values=c("blue","red")) +
scale_x_date(date_labels = "%b %d",date_breaks = "1 month") +
theme_bw()
## Warning: Removed 2 rows containing missing values (geom_path).
## Warning: Removed 14 rows containing missing values (geom_path).