Load COVID-19_Cases Dataset
library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.2.1 --
## v ggplot2 3.2.1 v purrr 0.3.3
## v tibble 2.1.3 v dplyr 0.8.3
## v tidyr 1.0.0 v stringr 1.4.0
## v readr 1.3.1 v forcats 0.4.0
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(lubridate)
##
## Attaching package: 'lubridate'
## The following object is masked from 'package:base':
##
## date
covid3 <- read_csv("covid_cases_hudson.csv")
## Parsed with column specification:
## cols(
## Case_Type = col_character(),
## Cases = col_double(),
## Difference = col_logical(),
## Date = col_character(),
## Country_Region = col_character(),
## Province_State = col_character(),
## Admin2 = col_character(),
## FIPS = col_double(),
## Combined_Key = col_character(),
## Long = col_double(),
## Lat = col_double(),
## Table_Names = col_character(),
## Prep_Flow_Runtime = col_character(),
## Latest_Date = col_character()
## )
## Warning: 5299 parsing failures.
## row col expected actual file
## 38073 Difference 1/0/T/F/TRUE/FALSE 59 'covid_cases_hudson.csv'
## 38074 Difference 1/0/T/F/TRUE/FALSE 10 'covid_cases_hudson.csv'
## 38076 Difference 1/0/T/F/TRUE/FALSE 29 'covid_cases_hudson.csv'
## 38078 Difference 1/0/T/F/TRUE/FALSE 3 'covid_cases_hudson.csv'
## 38079 Difference 1/0/T/F/TRUE/FALSE 13 'covid_cases_hudson.csv'
## ..... .......... .................. ...... ........................
## See problems(...) for more details.
summary(covid3)
## Case_Type Cases Difference Date
## Length:78674 Min. : 0.0 Mode :logical Length:78674
## Class :character 1st Qu.: 0.0 FALSE:33945 Class :character
## Mode :character Median : 0.0 TRUE :1358 Mode :character
## Mean : 109.7 NA's :43371
## 3rd Qu.: 1.0
## Max. :92472.0
## NA's :648
## Country_Region Province_State Admin2 FIPS
## Length:78674 Length:78674 Length:78674 Min. : 1001
## Class :character Class :character Class :character 1st Qu.:18179
## Mode :character Mode :character Mode :character Median :29179
## Mean :30413
## 3rd Qu.:45083
## Max. :78000
## NA's :40928
## Combined_Key Long Lat Table_Names
## Length:78674 Min. :-164.04 Min. :-41.45 Length:78674
## Class :character 1st Qu.: -93.02 1st Qu.: 27.93 Class :character
## Mode :character Median : -80.45 Median : 36.72 Mode :character
## Mean : -42.15 Mean : 31.46
## 3rd Qu.: 12.00 3rd Qu.: 41.78
## Max. : 178.06 Max. : 71.71
##
## Prep_Flow_Runtime Latest_Date
## Length:78674 Length:78674
## Class :character Class :character
## Mode :character Mode :character
##
##
##
##
Notice Date is read in as a character. Convert it to be read as a Date
covid4 <- covid3 %>%
mutate(date = mdy(Date))
summary(covid4)
## Case_Type Cases Difference Date
## Length:78674 Min. : 0.0 Mode :logical Length:78674
## Class :character 1st Qu.: 0.0 FALSE:33945 Class :character
## Mode :character Median : 0.0 TRUE :1358 Mode :character
## Mean : 109.7 NA's :43371
## 3rd Qu.: 1.0
## Max. :92472.0
## NA's :648
## Country_Region Province_State Admin2 FIPS
## Length:78674 Length:78674 Length:78674 Min. : 1001
## Class :character Class :character Class :character 1st Qu.:18179
## Mode :character Mode :character Mode :character Median :29179
## Mean :30413
## 3rd Qu.:45083
## Max. :78000
## NA's :40928
## Combined_Key Long Lat Table_Names
## Length:78674 Min. :-164.04 Min. :-41.45 Length:78674
## Class :character 1st Qu.: -93.02 1st Qu.: 27.93 Class :character
## Mode :character Median : -80.45 Median : 36.72 Mode :character
## Mean : -42.15 Mean : 31.46
## 3rd Qu.: 12.00 3rd Qu.: 41.78
## Max. : 178.06 Max. : 71.71
##
## Prep_Flow_Runtime Latest_Date date
## Length:78674 Length:78674 Min. :2020-01-22
## Class :character Class :character 1st Qu.:2020-02-23
## Mode :character Mode :character Median :2020-03-23
## Mean :2020-03-09
## 3rd Qu.:2020-03-26
## Max. :2020-03-28
##
str(covid4)
## Classes 'spec_tbl_df', 'tbl_df', 'tbl' and 'data.frame': 78674 obs. of 15 variables:
## $ Case_Type : chr "Deaths" "Deaths" "Confirmed" "Confirmed" ...
## $ Cases : num 0 0 0 0 13 0 11 0 0 2 ...
## $ Difference : logi NA NA NA NA NA NA ...
## $ Date : chr "3/23/2020" "3/25/2020" "3/26/2020" "3/25/2020" ...
## $ Country_Region : chr "US" "US" "US" "US" ...
## $ Province_State : chr "Iowa" "Texas" "Ohio" "Kansas" ...
## $ Admin2 : chr "Clinton" "Edwards" "Auglaize" "Elk" ...
## $ FIPS : num 19045 48137 39011 20049 50001 ...
## $ Combined_Key : chr "Clinton, Iowa, US" "Edwards, Texas, US" "Auglaize, Ohio, US" "Elk, Kansas, US" ...
## $ Long : num -90.5 -100.3 -84.2 -96.2 -73.1 ...
## $ Lat : num 41.9 30 40.6 37.5 44 ...
## $ Table_Names : chr "Daily Summary" "Daily Summary" "Daily Summary" "Daily Summary" ...
## $ Prep_Flow_Runtime: chr "3/28/2020" "3/28/2020" "3/28/2020" "3/28/2020" ...
## $ Latest_Date : chr "3/28/2020" "3/28/2020" "3/28/2020" "3/28/2020" ...
## $ date : Date, format: "2020-03-23" "2020-03-25" ...
covid5 <- covid4 %>%
select(Case_Type, Cases, Date, Country_Region, Province_State, date) %>%
group_by(date) %>%
summarize(counts = sum(Cases))
covid5
## # A tibble: 67 x 2
## date counts
## <date> <dbl>
## 1 2020-01-22 572
## 2 2020-01-23 672
## 3 2020-01-24 967
## 4 2020-01-25 1476
## 5 2020-01-26 2174
## 6 2020-01-27 3009
## 7 2020-01-28 5709
## 8 2020-01-29 6299
## 9 2020-01-30 8405
## 10 2020-01-31 10140
## # ... with 57 more rows
p1 <- covid4 %>%
filter(Country_Region == "US" | Country_Region =="Italy"| Country_Region== "France"| Country_Region== "Spain") %>%
ggplot(aes(Date, Cases, color = Country_Region)) +
geom_point() +
facet_wrap(~ Case_Type)
p1
## Warning: Removed 648 rows containing missing values (geom_point).
