On this occassion we will try to replicate the first interactive plot from the FiveThirtyEight article titled How Americans View The Coronavirus Crisis And Trump’s Response.

Library and Setup

Below is the required package that we will use during data wrangling and chart creation.

library(tidyverse)
library(lubridate)
library(highcharter)
library(scales)

Import Data

All data is available directly from the article.

Concern Top Line

We load the data that correspond to the line for each survey responses. People can choose between four different responses: Not At All, Not Very, Somewhat, and Very.

concern_topline <- read.csv("covid-19-polls-master/covid_concern_toplines.csv")

head(concern_topline, 10)

In this part we will convert do the following process:

  • Convert modeldate into date format
  • Filter data to only concern about infection
  • Create long format table from the four responses
  • Tidy the responses category
  • Prepare the output for the tooltip
  • Prepare the column modeldate into proper timestamp format for highcharter
df_concern <- concern_topline %>% 
  mutate(
    modeldate= mdy(modeldate)
  ) %>% 
  arrange(modeldate) %>% 
  filter(subject %>% str_detect("infect")) %>% 
  select(-c(subject, timestamp, party)) %>% 
  pivot_longer(-modeldate) %>% 
  mutate(name = name %>% 
           str_remove_all("_estimate") %>% 
           str_replace_all("_", " ") %>% 
           str_to_title(),
         
         tooltip = paste0(name, ": ", number(value, accuracy = 0.01, suffix = "%")),
         date = datetime_to_timestamp(modeldate)
           )

head(df_concern, 10)

Survey Polls

We will load the Survey Polls regarding poeple concern about COVID-19.

covid_concern <- read.csv("covid-19-polls-master/covid_concern_polls.csv")

glimpse(covid_concern)
## Rows: 595
## Columns: 15
## $ start_date  <chr> "2020-01-27", "2020-01-31", "2020-02-02", "2020-02-07", "…
## $ end_date    <chr> "2020-01-29", "2020-02-02", "2020-02-04", "2020-02-09", "…
## $ pollster    <chr> "Morning Consult", "Morning Consult", "YouGov", "Morning …
## $ sponsor     <chr> "", "", "Economist", "", "Huffington Post", "Economist", …
## $ sample_size <int> 2202, 2202, 1500, 2200, 1000, 1500, 1074, 1207, 1207, 150…
## $ population  <chr> "a", "a", "a", "a", "a", "a", "a", "a", "a", "a", "a", "a…
## $ party       <chr> "all", "all", "all", "all", "all", "all", "all", "all", "…
## $ subject     <chr> "concern-economy", "concern-economy", "concern-infected",…
## $ tracking    <lgl> FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, F…
## $ text        <chr> "How concerned are you that the coronavirus will impact t…
## $ very        <dbl> 19, 26, 13, 23, 11, 11, 22, 22, 22, 10, 10, 32, 32, 41, 1…
## $ somewhat    <dbl> 33, 32, 26, 32, 24, 28, 23, 35, 21, 28, 30, 37, 39, 37, 3…
## $ not_very    <dbl> 23, 25, 43, 24, 33, 39, 37, 28, 33, 42, 40, 18, 17, 14, 2…
## $ not_at_all  <dbl> 11, 7, 18, 9, 20, 22, 19, 15, 23, 19, 20, 6, 5, 5, 16, 5,…
## $ url         <chr> "https://morningconsult.com/wp-content/uploads/2020/02/20…

There are two type of polls, concern related to economy and concern related to health risk and safety. We will use the later one.

concern_infection <- covid_concern %>% 
  filter(subject == "concern-infected")

We will do similar step to prepare the long format version of the data. For the date we will use the end_date of each survey. If the sponsor the survey is missing/NA, we will italic the data.

df_infect <- concern_infection %>% 
  mutate_at(vars(contains("date")), ymd) %>% 
  select(end_date, very, somewhat, not_very, not_at_all, pollster, sponsor) %>% 
  pivot_longer(-c(end_date, pollster, sponsor)) %>% 
  mutate(name = name %>% 
           str_remove_all("_estimate") %>% 
           str_replace_all("_", " ") %>% 
           str_to_title(),
         sponsor = ifelse(sponsor == "", "<i>NA</i>", sponsor)
           )

df_infect <- df_infect %>% 
  filter(end_date >= min(df_concern$modeldate)) %>% 
  mutate(date = datetime_to_timestamp(end_date))

df_infect

Event

As the vertical line that notify important event, we will manually create the data.frame with information gained from the original plot.

tribble(~event, ~date,
                      "First U.S. death reported", "2020-02-29",
                      "U.S. deaths surpass 10,000", "2020-04-06",
                      "U.S. deaths surpass 100,000", "2020-05-28",
                      "Trump diagnosed with COVID-19", "2020-10-2"
                      )

Here, I directly transform the event data into HTML format to suite the text shown in the original plot. The date are also transformed into timestamp so they will properly positioned in the highcharter x-axis.

  • <i>: italic
  • <br>: line break
event_date <- tribble(~event, ~date,
                      "<i>First U.S.<br><i>death<br><i>reported</i>", "2020-02-29",
                      "<i>U.S. deaths<br><i>surpass<br><i>10,000</i>", "2020-04-06",
                      "<i>U.S. deaths<br><i>surpass<br><i>100,000</i>", "2020-05-28",
                      "<i>Trump<br><i>diagnosed<br><i>with<br><i>COVID-19</i>", "2020-10-2"
                      ) %>% 
  mutate(date = ymd(date) %>% 
           datetime_to_timestamp()
         )

event_date

Prepare Color

Here we will prepare the color of each responses category. For the scatter plot, we will use transparent color by transforming the color using hex_to_rgba() and set the transparency into 0.2.

df_color <- data.frame(name = unique(df_concern$name),
                       color = c("#F56B38",  "#F9AA87", "#DAAFD6", "#BF7CCC")
                       ) %>% 
  mutate(color_opaque = hex_to_rgba(color, 0.2))

df_concern <- df_concern %>% 
  left_join(df_color) 

df_infect <- df_infect %>% 
  left_join(df_color) 

# Final Color Scheme
scatter_color <- df_color$color_opaque[4:1]
line_color <- df_color$color[4:1]
color_group <- c(line_color, scatter_color)

The color_group will be used to indicate the color both for the line chart and the scatter plot.

Tooltip

Finally, we create the tooltip for each data. For the line chart, we set the font size to 14 pixels using font type of Roboto Slab (you can see all available font from Google Font API). For the scatter plot, we will show the responses and the percentage, the pollster and the sponsor. This part is different from the original plot that completely remove all tooltip from the scatter plot.

df_concern <- df_concern %>% 
   mutate(
    tooltip = paste0("<span style='font-size: 14px; font-family: Roboto Slab; color: ",color,"'>", name, ": ", number(value, accuracy = 0.01, suffix = "%"), "</span>")
    )

df_infect <- df_infect %>% 
   mutate(
    tooltip = paste0("<span style='color: ",color,"'>", name, ": ", number(value, accuracy = 0.01, suffix = "%"), "</span><br>",
                     "Pollster: ", pollster, "<br>", "Sponsor: ", sponsor)
    )

head(df_concern, 10)

Visualization

Finally, we create the interactive chart using highcharter. First, we will create the basic plot, including the scatter plot and the line chart. To remove the legend form the chart, we use the hc_legend() and set the color using the hc_color. The hcaes is where you will include the information regarding the x-Axis, y-Axis, and the color grouping based on the responses category.

hchart(df_concern, "line", hcaes(x = modeldate, y = value, group = name), lineWidth = 4)  %>% 
  hc_add_series(data = df_infect, 
                type = "scatter", hcaes(x = end_date, y = value, group = name)) %>% 
  hc_colors(colors = color_group) %>% # Adjust Color
  hc_legend(enabled = F) # Remove Legend

Next, we add the setting for the chart title, subtitle, x-Axis and y-Axis. During the creation of the x-axis, we also include the vertical line that correspond to important events related to COVID-19 using the plotLines.

hchart(df_concern, "line", hcaes(x = modeldate, y = value, group = name), lineWidth = 4)  %>% 
  hc_add_series(data = df_infect, 
                type = "scatter", hcaes(x = end_date, y = value, group = name)) %>% 
  hc_colors(colors = color_group) %>% 
  hc_legend(enabled = F) %>% 
  hc_title(text = "How worried are Americans about infection?", 
           style = list(fontWeight = "bold", fontSize = "19px")
           ) %>% 
  hc_subtitle(text = "How concerned Americans say they are that they, someone in their family or someone else they know will<br>become infected with the coronavirus",
              style = list(fontSize = "12px", color = "black", fontFamily = "Roboto Slab")) %>% 
  hc_yAxis(title = list(text = ""),
           labels = list(style = list(fontFamily = "Roboto Slab", fontWeight = "bold", color = "#A2A2BF"),
                         formatter = JS("function(){return(this.value + '%')}")), # Create % format for y-axis
           tickInterval= 25 # Interval of x-Axis (0, 25, 50, 75)
           ) %>% 
  hc_xAxis(title = list(text = ""),
           labels = list(style = list(fontFamily = "Roboto Slab", color = "#A2A2BF", fontWeight = "bold")),
           dateTimeLabelFormats = list(month = "%m/%d", week = "%m/%d"), # Format of the date label
           
           plotLines = list(
             list(value = event_date$date[1], color = "gray", 
                  label = list(text = event_date$event[1], rotation = 0, align="center"),
                  zIndex = 1000, dashStyle = "longdash"),
             list(value = event_date$date[2], color = "gray", 
                  label = list(text = event_date$event[2], rotation = 0, align="center"),
                  zIndex = 1000, dashStyle = "longdash"),
             list(value = event_date$date[3], color = "gray", 
                  label = list(text = event_date$event[3], rotation = 0, align="center"),
                  zIndex = 1000, dashStyle = "longdash"),
             list(value = event_date$date[4], color = "gray", 
                  label = list(text = event_date$event[4], rotation = 0, align="center"),
                  zIndex = 1000, dashStyle = "longdash")
           )
           
           ) 

Finally, we add the setting for the tooltip. The tooltip is very different since I don’t find a feature to replicate this part to be exactly the same with the original.

hchart(df_concern, "line", hcaes(x = modeldate, y = value, group = name), lineWidth = 4)  %>% 
  hc_add_series(data = df_infect, 
                type = "scatter", hcaes(x = end_date, y = value, group = name)) %>% 
  hc_colors(colors = color_group) %>% 
  hc_legend(enabled = F) %>% 
  hc_title(text = "How worried are Americans about infection?", 
           style = list(fontWeight = "bold", fontSize = "19px")
           ) %>% 
  hc_subtitle(text = "How concerned Americans say they are that they, someone in their family or someone else they know will<br>become infected with the coronavirus",
              style = list(fontSize = "12px", color = "black", fontFamily = "Roboto Slab")) %>% 
  hc_yAxis(title = list(text = ""),
           labels = list(style = list(fontFamily = "Roboto Slab", fontWeight = "bold", color = "#A2A2BF"),
                         formatter = JS("function(){return(this.value + '%')}")), 
           tickInterval= 25 ) %>% 
  hc_xAxis(title = list(text = ""),
           labels = list(style = list(fontFamily = "Roboto Slab", color = "#A2A2BF", fontWeight = "bold")),
           dateTimeLabelFormats = list(month = "%m/%d", week = "%m/%d"),
           
           plotLines = list(
             list(value = event_date$date[1], color = "gray", 
                  label = list(text = event_date$event[1], rotation = 0, align="center"),
                  zIndex = 1000, dashStyle = "longdash"),
             list(value = event_date$date[2], color = "gray", 
                  label = list(text = event_date$event[2], rotation = 0, align="center"),
                  zIndex = 1000, dashStyle = "longdash"),
             list(value = event_date$date[3], color = "gray", 
                  label = list(text = event_date$event[3], rotation = 0, align="center"),
                  zIndex = 1000, dashStyle = "longdash"),
             list(value = event_date$date[4], color = "gray", 
                  label = list(text = event_date$event[4], rotation = 0, align="center"),
                  zIndex = 1000, dashStyle = "longdash")
           )
           
           ) %>% 
  hc_tooltip(crosshairs = TRUE,
             backgroundColor = "white",
             fillOpacity = 0.5,
             shared = TRUE, 
             borderWidth = 0,
             useHTML = T,
             headerFormat = "<b><span style='font-size:16px; font-family: Roboto Slab; color: gray;'>{point.x: %b %d, %Y}</span></b><br>",
             pointFormat = "<b>{point.tooltip}</b><br>"
             )