COVID-19 and Election Data

Data Input and Parsing

#Tell me where to find the latest and greatest COVID DATA
jhu_url <- paste("https://raw.githubusercontent.com/CSSEGISandData/",
                 "COVID-19/master/csse_covid_19_data/", "csse_covid_19_time_series/",
                 "time_series_covid19_confirmed_US.csv", sep = "")

votes_url <- paste("https://raw.githubusercontent.com/tonmcg/",
                   "US_County_Level_Election_Results_08-16/master/",
                   "2016_US_County_Level_Presidential_Results.csv",sep = "")

census_url <- paste("https://www2.census.gov/programs-surveys/popest/",
                    "datasets/2010-2019/counties/totals/co-est2019-alldata.csv",sep = "")

abbrev_url <- paste("https://raw.githubusercontent.com/jasonong/",
                    "List-of-US-States/master/states.csv",sep = "")

Begin by reading in the voting and covid data.

#Read in Voting Data 
votes <- read_csv(votes_url) %>%
  rename(FIPS="combined_fips")

## Warning: Missing column names filled in: 'X1' [1]

## Parsed with column specification:
## cols(
##   X1 = col_double(),
##   votes_dem = col_double(),
##   votes_gop = col_double(),
##   total_votes = col_double(),
##   per_dem = col_double(),
##   per_gop = col_double(),
##   diff = col_number(),
##   per_point_diff = col_character(),
##   state_abbr = col_character(),
##   county_name = col_character(),
##   combined_fips = col_double()
## )

#Combine COVID + Voting Data
covidData <-
  read_csv(jhu_url) %>%
  rename(province = "Province_State",
         country_region = "Country_Region",
         county="Admin2")  %>%
  select(-c(UID,iso2,iso3,code3,country_region,Lat,Long_,Combined_Key)) %>% 
  pivot_longer(-c(province,county,FIPS), names_to = "d", 
               values_to = "cumulative_cases") %>%
  separate(d,c("Month","Day","Year"),sep="/") %>%
  mutate(dstring=sprintf("%02i/%02i/%02i",   #some parsing to make dates work correctly
                         as.numeric(Month), 
                         as.numeric(Day), 
                         as.numeric(Year)),
         d=as.Date(dstring,"%m/%d/%y")) %>%
  select(d,county,province,FIPS,cumulative_cases) %>%
  arrange(d) %>%
  group_by(FIPS) %>%  #for each FIPS ID, calculate new cases per day
  mutate(new_cases = cumulative_cases-dplyr::lag(cumulative_cases,1)) %>%
  ungroup() %>%
  left_join(select(votes,FIPS,votes_dem,votes_gop),by="FIPS") %>%  #add voting data for each FIPS ID
  mutate(elect = ifelse(votes_dem > votes_gop,"Clinton","Trump"))

## Parsed with column specification:
## cols(
##   .default = col_double(),
##   iso2 = col_character(),
##   iso3 = col_character(),
##   Admin2 = col_character(),
##   Province_State = col_character(),
##   Country_Region = col_character(),
##   Combined_Key = col_character()
## )

## See spec(...) for full column specifications.

Now read in the state abbreviation table, and figure out state voting data.

#Read in State Abbreviation Data
states <- read_csv(abbrev_url) %>%
  rename(province="State",
         state_abbr="Abbreviation")

## Parsed with column specification:
## cols(
##   State = col_character(),
##   Abbreviation = col_character()
## )

#Aggreate voting data by state vice county
votes_state = votes %>%
  group_by(state_abbr) %>%
  summarize(votes_dem=sum(votes_dem),votes_gop=sum(votes_gop)) %>%
  mutate(elect = ifelse(votes_dem > votes_gop,"Clinton","Trump")) %>%
  left_join(states,by="state_abbr")

Read in population data and add the 2019 estimate for population into the COVID data.

#Read in Population Data
co_est2019_alldata =
  read_csv(census_url) %>%
  select(STATE,COUNTY,POPESTIMATE2019) %>%
  rename(population="POPESTIMATE2019") %>%
  mutate(FIPS=as.numeric(str_c(as.character(STATE),as.character(COUNTY))))

## Parsed with column specification:
## cols(
##   .default = col_double(),
##   SUMLEV = col_character(),
##   STATE = col_character(),
##   COUNTY = col_character(),
##   STNAME = col_character(),
##   CTYNAME = col_character()
## )

## See spec(...) for full column specifications.

#Combine Covid and population Data
covidData =
  covidData %>%
  left_join(select(co_est2019_alldata,FIPS,population),by="FIPS") %>%
  filter(!is.na(population))

Plots for Raw Counts

covidData %>%
  filter(!is.na(elect)) %>%
  group_by(d,elect) %>%
  summarize(new=sum(new_cases))  %>%  #add up all the county counts, binned by date and election outcome
  group_by(elect) %>%
  mutate(rnew=rollmean(new,7,fill=NA,align="right")) %>% #calculate 7 day running average
  ungroup() %>%
  ggplot()+
  aes(x=d,y=new,color=elect) +
  geom_line(linetype=2,alpha=0.4) +
  geom_line(aes(y=rnew)) +
  theme_bw() +
  xlab("Date") +
  ylab("New Cases") +
  ggtitle("COVID-19 Cases", subtitle="Binned by County Election Results") +
  scale_color_manual(values=c("blue","red")) +
  scale_x_date(date_labels = "%b %d",date_breaks = "1 month")

## Warning: Removed 2 rows containing missing values (geom_path).

## Warning: Removed 14 rows containing missing values (geom_path).

covidData %>%
  group_by(d,province) %>%
  summarize(new=sum(new_cases)) %>%
  left_join(select(votes_state,province,elect),by="province") %>%
  group_by(elect) %>%
  filter(!is.na(elect)) %>%
  group_by(d,elect) %>%
  summarize(new=sum(new)) %>%
  group_by(elect) %>%
  mutate(rnew=rollmean(new,7,fill=NA,align="right")) %>%
  ungroup() %>%
  ggplot()+
  aes(x=d,color=elect) +
  geom_line(aes(y=new),linetype=2,alpha=0.4) +
  geom_line(aes(y=rnew)) +
  theme_bw() +
  xlab("Date") +
  ylab("Number of New Cases") +
  ggtitle("COVID-19 Cases" ,subtitle="Binned by State Election Results") +
  scale_color_manual(values=c("blue","red")) +
  scale_x_date(date_labels = "%b %d",date_breaks = "1 month")

## Warning: Removed 2 rows containing missing values (geom_path).

## Warning: Removed 14 rows containing missing values (geom_path).

Plots for Per Capita

covidData %>%
  filter(!is.na(elect)) %>%
  group_by(d,elect) %>%
  summarize(new=sum(new_cases), population=sum(population),ncap=new/population*1e6) %>%
  group_by(elect) %>%
  mutate(rncap=rollmean(ncap,7,fill=NA,align="right")) %>%
  ungroup() %>%
  ggplot()+
  aes(x=d,y=ncap,color=elect) +
  geom_line(alpha=0.4,linetype=2) +
  geom_line(aes(y=rncap)) +
  theme_bw() +
  xlab("Date") +
  ylab("New Cases per 1 Million") +
  ggtitle("COVID-19 Cases Per Capita " ,subtitle="Binned by County Election Results") +
  scale_color_manual(values=c("blue","red")) +
  scale_x_date(date_labels = "%b %d",date_breaks = "1 month")

## Warning: Removed 2 rows containing missing values (geom_path).

## Warning: Removed 14 rows containing missing values (geom_path).

covidData %>%
  group_by(d,province) %>%
  summarize(new=sum(new_cases),population=sum(population)) %>%
  left_join(select(votes_state,province,elect),by="province") %>%
  filter(!is.na(elect)) %>%
  group_by(d,elect) %>%
  summarize(new=sum(new),ncap=new/sum(population)*1e6) %>%
  group_by(elect) %>%
  mutate(rncap=rollmean(ncap,7,fill=NA,align="right")) %>%
  ungroup() %>%
  ggplot()+
  aes(x=d,y=ncap,color=elect) +
  geom_line(alpha=0.4,linetype=2) +
  geom_line(aes(y=rncap)) +
  xlab("Date") +
  ylab("New Cases per 1 Million") +
  ggtitle("COVID-19 Cases Per Capita " ,subtitle="Binned by State Election Results") +
  scale_color_manual(values=c("blue","red")) +
  scale_x_date(date_labels = "%b %d",date_breaks = "1 month") +
  theme_bw()

## Warning: Removed 2 rows containing missing values (geom_path).

## Warning: Removed 14 rows containing missing values (geom_path).

COVID-19 and Election Data

Mark Herrera

2020-07-03

Data Input and Parsing

Plots for Raw Counts

Plots for Per Capita