Hudson Code

Load COVID-19_Cases Dataset

library(tidyverse)

## -- Attaching packages --------------------------------------- tidyverse 1.2.1 --

## v ggplot2 3.2.1     v purrr   0.3.3
## v tibble  2.1.3     v dplyr   0.8.3
## v tidyr   1.0.0     v stringr 1.4.0
## v readr   1.3.1     v forcats 0.4.0

## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()

library(lubridate)

## 
## Attaching package: 'lubridate'

## The following object is masked from 'package:base':
## 
##     date

covid3 <- read_csv("covid_cases_hudson.csv")

## Parsed with column specification:
## cols(
##   Case_Type = col_character(),
##   Cases = col_double(),
##   Difference = col_logical(),
##   Date = col_character(),
##   Country_Region = col_character(),
##   Province_State = col_character(),
##   Admin2 = col_character(),
##   FIPS = col_double(),
##   Combined_Key = col_character(),
##   Long = col_double(),
##   Lat = col_double(),
##   Table_Names = col_character(),
##   Prep_Flow_Runtime = col_character(),
##   Latest_Date = col_character()
## )

## Warning: 5299 parsing failures.
##   row        col           expected actual                     file
## 38073 Difference 1/0/T/F/TRUE/FALSE     59 'covid_cases_hudson.csv'
## 38074 Difference 1/0/T/F/TRUE/FALSE     10 'covid_cases_hudson.csv'
## 38076 Difference 1/0/T/F/TRUE/FALSE     29 'covid_cases_hudson.csv'
## 38078 Difference 1/0/T/F/TRUE/FALSE     3  'covid_cases_hudson.csv'
## 38079 Difference 1/0/T/F/TRUE/FALSE     13 'covid_cases_hudson.csv'
## ..... .......... .................. ...... ........................
## See problems(...) for more details.

summary(covid3)

##   Case_Type             Cases         Difference          Date          
##  Length:78674       Min.   :    0.0   Mode :logical   Length:78674      
##  Class :character   1st Qu.:    0.0   FALSE:33945     Class :character  
##  Mode  :character   Median :    0.0   TRUE :1358      Mode  :character  
##                     Mean   :  109.7   NA's :43371                       
##                     3rd Qu.:    1.0                                     
##                     Max.   :92472.0                                     
##                     NA's   :648                                         
##  Country_Region     Province_State        Admin2               FIPS      
##  Length:78674       Length:78674       Length:78674       Min.   : 1001  
##  Class :character   Class :character   Class :character   1st Qu.:18179  
##  Mode  :character   Mode  :character   Mode  :character   Median :29179  
##                                                           Mean   :30413  
##                                                           3rd Qu.:45083  
##                                                           Max.   :78000  
##                                                           NA's   :40928  
##  Combined_Key            Long              Lat         Table_Names       
##  Length:78674       Min.   :-164.04   Min.   :-41.45   Length:78674      
##  Class :character   1st Qu.: -93.02   1st Qu.: 27.93   Class :character  
##  Mode  :character   Median : -80.45   Median : 36.72   Mode  :character  
##                     Mean   : -42.15   Mean   : 31.46                     
##                     3rd Qu.:  12.00   3rd Qu.: 41.78                     
##                     Max.   : 178.06   Max.   : 71.71                     
##                                                                          
##  Prep_Flow_Runtime  Latest_Date       
##  Length:78674       Length:78674      
##  Class :character   Class :character  
##  Mode  :character   Mode  :character  
##                                       
##                                       
##                                       
##

Notice Date is read in as a character. Convert it to be read as a Date

covid4 <- covid3 %>%
     mutate(date = mdy(Date))
summary(covid4)

##   Case_Type             Cases         Difference          Date          
##  Length:78674       Min.   :    0.0   Mode :logical   Length:78674      
##  Class :character   1st Qu.:    0.0   FALSE:33945     Class :character  
##  Mode  :character   Median :    0.0   TRUE :1358      Mode  :character  
##                     Mean   :  109.7   NA's :43371                       
##                     3rd Qu.:    1.0                                     
##                     Max.   :92472.0                                     
##                     NA's   :648                                         
##  Country_Region     Province_State        Admin2               FIPS      
##  Length:78674       Length:78674       Length:78674       Min.   : 1001  
##  Class :character   Class :character   Class :character   1st Qu.:18179  
##  Mode  :character   Mode  :character   Mode  :character   Median :29179  
##                                                           Mean   :30413  
##                                                           3rd Qu.:45083  
##                                                           Max.   :78000  
##                                                           NA's   :40928  
##  Combined_Key            Long              Lat         Table_Names       
##  Length:78674       Min.   :-164.04   Min.   :-41.45   Length:78674      
##  Class :character   1st Qu.: -93.02   1st Qu.: 27.93   Class :character  
##  Mode  :character   Median : -80.45   Median : 36.72   Mode  :character  
##                     Mean   : -42.15   Mean   : 31.46                     
##                     3rd Qu.:  12.00   3rd Qu.: 41.78                     
##                     Max.   : 178.06   Max.   : 71.71                     
##                                                                          
##  Prep_Flow_Runtime  Latest_Date             date           
##  Length:78674       Length:78674       Min.   :2020-01-22  
##  Class :character   Class :character   1st Qu.:2020-02-23  
##  Mode  :character   Mode  :character   Median :2020-03-23  
##                                        Mean   :2020-03-09  
##                                        3rd Qu.:2020-03-26  
##                                        Max.   :2020-03-28  
##

str(covid4)

## Classes 'spec_tbl_df', 'tbl_df', 'tbl' and 'data.frame': 78674 obs. of  15 variables:
##  $ Case_Type        : chr  "Deaths" "Deaths" "Confirmed" "Confirmed" ...
##  $ Cases            : num  0 0 0 0 13 0 11 0 0 2 ...
##  $ Difference       : logi  NA NA NA NA NA NA ...
##  $ Date             : chr  "3/23/2020" "3/25/2020" "3/26/2020" "3/25/2020" ...
##  $ Country_Region   : chr  "US" "US" "US" "US" ...
##  $ Province_State   : chr  "Iowa" "Texas" "Ohio" "Kansas" ...
##  $ Admin2           : chr  "Clinton" "Edwards" "Auglaize" "Elk" ...
##  $ FIPS             : num  19045 48137 39011 20049 50001 ...
##  $ Combined_Key     : chr  "Clinton, Iowa, US" "Edwards, Texas, US" "Auglaize, Ohio, US" "Elk, Kansas, US" ...
##  $ Long             : num  -90.5 -100.3 -84.2 -96.2 -73.1 ...
##  $ Lat              : num  41.9 30 40.6 37.5 44 ...
##  $ Table_Names      : chr  "Daily Summary" "Daily Summary" "Daily Summary" "Daily Summary" ...
##  $ Prep_Flow_Runtime: chr  "3/28/2020" "3/28/2020" "3/28/2020" "3/28/2020" ...
##  $ Latest_Date      : chr  "3/28/2020" "3/28/2020" "3/28/2020" "3/28/2020" ...
##  $ date             : Date, format: "2020-03-23" "2020-03-25" ...

covid5 <- covid4 %>%
  select(Case_Type, Cases, Date, Country_Region, Province_State, date) %>%
  group_by(date) %>%
  summarize(counts = sum(Cases))
covid5

## # A tibble: 67 x 2
##    date       counts
##    <date>      <dbl>
##  1 2020-01-22    572
##  2 2020-01-23    672
##  3 2020-01-24    967
##  4 2020-01-25   1476
##  5 2020-01-26   2174
##  6 2020-01-27   3009
##  7 2020-01-28   5709
##  8 2020-01-29   6299
##  9 2020-01-30   8405
## 10 2020-01-31  10140
## # ... with 57 more rows

p1 <- covid4 %>%
  filter(Country_Region == "US" | Country_Region =="Italy"| Country_Region== "France"| Country_Region== "Spain") %>%
  ggplot(aes(Date, Cases, color = Country_Region)) +
  geom_point() +
  facet_wrap(~ Case_Type)
p1

## Warning: Removed 648 rows containing missing values (geom_point).

Hudson Code

Rachel Saidi

3/30/2020

Load COVID-19_Cases Dataset

Notice Date is read in as a character. Convert it to be read as a Date