library(readr)
data_20200721 <- read_csv("~/covid19_repo/07-21-2020.csv")
## Parsed with column specification:
## cols(
## FIPS = col_double(),
## Admin2 = col_character(),
## Province_State = col_character(),
## Country_Region = col_character(),
## Last_Update = col_datetime(format = ""),
## Lat = col_double(),
## Long_ = col_double(),
## Confirmed = col_double(),
## Deaths = col_double(),
## Recovered = col_double(),
## Active = col_double(),
## Combined_Key = col_character(),
## Incidence_Rate = col_double(),
## `Case-Fatality_Ratio` = col_double()
## )
#View(data_20200721)
sum(data_20200721$Confirmed)
## [1] 14947428
sum(data_20200721[data_20200721$Country_Region == 'US', 'Confirmed'])
## [1] 3899211
data_20200721
## # A tibble: 3,924 x 14
## FIPS Admin2 Province_State Country_Region Last_Update Lat Long_
## <dbl> <chr> <chr> <chr> <dttm> <dbl> <dbl>
## 1 45001 Abbev… South Carolina US 2020-07-22 04:34:42 34.2 -82.5
## 2 22001 Acadia Louisiana US 2020-07-22 04:34:42 30.3 -92.4
## 3 51001 Accom… Virginia US 2020-07-22 04:34:42 37.8 -75.6
## 4 16001 Ada Idaho US 2020-07-22 04:34:42 43.5 -116.
## 5 19001 Adair Iowa US 2020-07-22 04:34:42 41.3 -94.5
## 6 21001 Adair Kentucky US 2020-07-22 04:34:42 37.1 -85.3
## 7 29001 Adair Missouri US 2020-07-22 04:34:42 40.2 -92.6
## 8 40001 Adair Oklahoma US 2020-07-22 04:34:42 35.9 -94.7
## 9 8001 Adams Colorado US 2020-07-22 04:34:42 39.9 -104.
## 10 16003 Adams Idaho US 2020-07-22 04:34:42 44.9 -116.
## # … with 3,914 more rows, and 7 more variables: Confirmed <dbl>, Deaths <dbl>,
## # Recovered <dbl>, Active <dbl>, Combined_Key <chr>, Incidence_Rate <dbl>,
## # `Case-Fatality_Ratio` <dbl>
library(lubridate)
## Warning: package 'lubridate' was built under R version 4.0.2
##
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
##
## date, intersect, setdiff, union
end_date <- today(tz="Asia/Taipei")
start_date <- end_date - days(10)
#end_date
seq(start_date, end_date, by = 'days')
## [1] "2020-07-13" "2020-07-14" "2020-07-15" "2020-07-16" "2020-07-17"
## [6] "2020-07-18" "2020-07-19" "2020-07-20" "2020-07-21" "2020-07-22"
## [11] "2020-07-23"
for(i in seq(start_date, end_date, by = 'days')){
#print(i)
dt <- as_date(i)
dt_str <- format(dt, format = '%m-%d-%Y')
print(dt_str)
}
## [1] "07-13-2020"
## [1] "07-14-2020"
## [1] "07-15-2020"
## [1] "07-16-2020"
## [1] "07-17-2020"
## [1] "07-18-2020"
## [1] "07-19-2020"
## [1] "07-20-2020"
## [1] "07-21-2020"
## [1] "07-22-2020"
## [1] "07-23-2020"
data_list <- list()
for(i in seq(start_date, end_date, by = 'days')){
#print(i)
dt <- as_date(i)
dt_str <- format(dt, format = '%m-%d-%Y')
tryCatch({
data <- read_csv(sprintf("~/covid19_repo/%s.csv", dt_str))
data_list[[dt_str]] <- data
}, error = function(err){
print(paste0(dt_str, '.csv ', 'not found') )
})
}
## Parsed with column specification:
## cols(
## FIPS = col_double(),
## Admin2 = col_character(),
## Province_State = col_character(),
## Country_Region = col_character(),
## Last_Update = col_datetime(format = ""),
## Lat = col_double(),
## Long_ = col_double(),
## Confirmed = col_double(),
## Deaths = col_double(),
## Recovered = col_double(),
## Active = col_double(),
## Combined_Key = col_character(),
## Incidence_Rate = col_double(),
## `Case-Fatality_Ratio` = col_double()
## )
## Parsed with column specification:
## cols(
## FIPS = col_double(),
## Admin2 = col_character(),
## Province_State = col_character(),
## Country_Region = col_character(),
## Last_Update = col_datetime(format = ""),
## Lat = col_double(),
## Long_ = col_double(),
## Confirmed = col_double(),
## Deaths = col_double(),
## Recovered = col_double(),
## Active = col_double(),
## Combined_Key = col_character(),
## Incidence_Rate = col_double(),
## `Case-Fatality_Ratio` = col_double()
## )
## Parsed with column specification:
## cols(
## FIPS = col_double(),
## Admin2 = col_character(),
## Province_State = col_character(),
## Country_Region = col_character(),
## Last_Update = col_datetime(format = ""),
## Lat = col_double(),
## Long_ = col_double(),
## Confirmed = col_double(),
## Deaths = col_double(),
## Recovered = col_double(),
## Active = col_double(),
## Combined_Key = col_character(),
## Incidence_Rate = col_double(),
## `Case-Fatality_Ratio` = col_double()
## )
## Parsed with column specification:
## cols(
## FIPS = col_double(),
## Admin2 = col_character(),
## Province_State = col_character(),
## Country_Region = col_character(),
## Last_Update = col_datetime(format = ""),
## Lat = col_double(),
## Long_ = col_double(),
## Confirmed = col_double(),
## Deaths = col_double(),
## Recovered = col_double(),
## Active = col_double(),
## Combined_Key = col_character(),
## Incidence_Rate = col_double(),
## `Case-Fatality_Ratio` = col_double()
## )
## Parsed with column specification:
## cols(
## FIPS = col_double(),
## Admin2 = col_character(),
## Province_State = col_character(),
## Country_Region = col_character(),
## Last_Update = col_datetime(format = ""),
## Lat = col_double(),
## Long_ = col_double(),
## Confirmed = col_double(),
## Deaths = col_double(),
## Recovered = col_double(),
## Active = col_double(),
## Combined_Key = col_character(),
## Incidence_Rate = col_double(),
## `Case-Fatality_Ratio` = col_double()
## )
## Parsed with column specification:
## cols(
## FIPS = col_double(),
## Admin2 = col_character(),
## Province_State = col_character(),
## Country_Region = col_character(),
## Last_Update = col_datetime(format = ""),
## Lat = col_double(),
## Long_ = col_double(),
## Confirmed = col_double(),
## Deaths = col_double(),
## Recovered = col_double(),
## Active = col_double(),
## Combined_Key = col_character(),
## Incidence_Rate = col_double(),
## `Case-Fatality_Ratio` = col_double()
## )
## Parsed with column specification:
## cols(
## FIPS = col_double(),
## Admin2 = col_character(),
## Province_State = col_character(),
## Country_Region = col_character(),
## Last_Update = col_datetime(format = ""),
## Lat = col_double(),
## Long_ = col_double(),
## Confirmed = col_double(),
## Deaths = col_double(),
## Recovered = col_double(),
## Active = col_double(),
## Combined_Key = col_character(),
## Incidence_Rate = col_double(),
## `Case-Fatality_Ratio` = col_double()
## )
## Parsed with column specification:
## cols(
## FIPS = col_double(),
## Admin2 = col_character(),
## Province_State = col_character(),
## Country_Region = col_character(),
## Last_Update = col_datetime(format = ""),
## Lat = col_double(),
## Long_ = col_double(),
## Confirmed = col_double(),
## Deaths = col_double(),
## Recovered = col_double(),
## Active = col_double(),
## Combined_Key = col_character(),
## Incidence_Rate = col_double(),
## `Case-Fatality_Ratio` = col_double()
## )
## Parsed with column specification:
## cols(
## FIPS = col_double(),
## Admin2 = col_character(),
## Province_State = col_character(),
## Country_Region = col_character(),
## Last_Update = col_datetime(format = ""),
## Lat = col_double(),
## Long_ = col_double(),
## Confirmed = col_double(),
## Deaths = col_double(),
## Recovered = col_double(),
## Active = col_double(),
## Combined_Key = col_character(),
## Incidence_Rate = col_double(),
## `Case-Fatality_Ratio` = col_double()
## )
## [1] "07-22-2020.csv not found"
## [1] "07-23-2020.csv not found"
class(data_list)
## [1] "list"
data_all <- do.call('rbind', data_list)
data_all
## # A tibble: 34,801 x 14
## FIPS Admin2 Province_State Country_Region Last_Update Lat Long_
## * <dbl> <chr> <chr> <chr> <dttm> <dbl> <dbl>
## 1 45001 Abbev… South Carolina US 2020-07-14 04:34:46 34.2 -82.5
## 2 22001 Acadia Louisiana US 2020-07-14 04:34:46 30.3 -92.4
## 3 51001 Accom… Virginia US 2020-07-14 04:34:46 37.8 -75.6
## 4 16001 Ada Idaho US 2020-07-14 04:34:46 43.5 -116.
## 5 19001 Adair Iowa US 2020-07-14 04:34:46 41.3 -94.5
## 6 21001 Adair Kentucky US 2020-07-14 04:34:46 37.1 -85.3
## 7 29001 Adair Missouri US 2020-07-14 04:34:46 40.2 -92.6
## 8 40001 Adair Oklahoma US 2020-07-14 04:34:46 35.9 -94.7
## 9 8001 Adams Colorado US 2020-07-14 04:34:46 39.9 -104.
## 10 16003 Adams Idaho US 2020-07-14 04:34:46 44.9 -116.
## # … with 34,791 more rows, and 7 more variables: Confirmed <dbl>, Deaths <dbl>,
## # Recovered <dbl>, Active <dbl>, Combined_Key <chr>, Incidence_Rate <dbl>,
## # `Case-Fatality_Ratio` <dbl>
li <- list(x = 1, y = 2)
li[[1]]
## [1] 1
li[[2]]
## [1] 2
li[['qoo']] = c(1,2,3)
li[['qoo']]
## [1] 1 2 3
li[[3]]
## [1] 1 2 3
stat_all <- data_all[ (data_all$Country_Region=='US') & (data_all$Province_State=='California'), c('Last_Update','Confirmed') ]
california <- tapply(stat_all$Confirmed, stat_all$Last_Update, sum)
plot(california)
confirmed_url <- 'https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_global.csv'
confirmed_file <- 'time_series_covid19_confirmed_global.csv'
download.file(confirmed_url, confirmed_file)
deaths_url <- 'https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_deaths_global.csv'
deaths_file <- 'time_series_covid19_deaths_global.csv'
download.file(deaths_url, deaths_file)
recovered_url <- 'https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_recovered_global.csv'
recovered_file <- 'time_series_covid19_recovered_global.csv'
download.file(recovered_url, recovered_file)
recovered_df <- read_csv('time_series_covid19_recovered_global.csv')
## Parsed with column specification:
## cols(
## .default = col_double(),
## `Province/State` = col_character(),
## `Country/Region` = col_character()
## )
## See spec(...) for full column specifications.
recovered_df$Case <- 'recovered'
confirmed_df <- read_csv('time_series_covid19_confirmed_global.csv')
## Parsed with column specification:
## cols(
## .default = col_double(),
## `Province/State` = col_character(),
## `Country/Region` = col_character()
## )
## See spec(...) for full column specifications.
confirmed_df$Case <- 'confirmed'
deaths_df <- read_csv('time_series_covid19_deaths_global.csv')
## Parsed with column specification:
## cols(
## .default = col_double(),
## `Province/State` = col_character(),
## `Country/Region` = col_character()
## )
## See spec(...) for full column specifications.
deaths_df$Case <- 'deaths'
confirmed_df[confirmed_df$`Country/Region` == 'US',]
## # A tibble: 1 x 187
## `Province/State` `Country/Region` Lat Long `1/22/20` `1/23/20` `1/24/20`
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 <NA> US 40 -100 1 1 2
## # … with 180 more variables: `1/25/20` <dbl>, `1/26/20` <dbl>, `1/27/20` <dbl>,
## # `1/28/20` <dbl>, `1/29/20` <dbl>, `1/30/20` <dbl>, `1/31/20` <dbl>,
## # `2/1/20` <dbl>, `2/2/20` <dbl>, `2/3/20` <dbl>, `2/4/20` <dbl>,
## # `2/5/20` <dbl>, `2/6/20` <dbl>, `2/7/20` <dbl>, `2/8/20` <dbl>,
## # `2/9/20` <dbl>, `2/10/20` <dbl>, `2/11/20` <dbl>, `2/12/20` <dbl>,
## # `2/13/20` <dbl>, `2/14/20` <dbl>, `2/15/20` <dbl>, `2/16/20` <dbl>,
## # `2/17/20` <dbl>, `2/18/20` <dbl>, `2/19/20` <dbl>, `2/20/20` <dbl>,
## # `2/21/20` <dbl>, `2/22/20` <dbl>, `2/23/20` <dbl>, `2/24/20` <dbl>,
## # `2/25/20` <dbl>, `2/26/20` <dbl>, `2/27/20` <dbl>, `2/28/20` <dbl>,
## # `2/29/20` <dbl>, `3/1/20` <dbl>, `3/2/20` <dbl>, `3/3/20` <dbl>,
## # `3/4/20` <dbl>, `3/5/20` <dbl>, `3/6/20` <dbl>, `3/7/20` <dbl>,
## # `3/8/20` <dbl>, `3/9/20` <dbl>, `3/10/20` <dbl>, `3/11/20` <dbl>,
## # `3/12/20` <dbl>, `3/13/20` <dbl>, `3/14/20` <dbl>, `3/15/20` <dbl>,
## # `3/16/20` <dbl>, `3/17/20` <dbl>, `3/18/20` <dbl>, `3/19/20` <dbl>,
## # `3/20/20` <dbl>, `3/21/20` <dbl>, `3/22/20` <dbl>, `3/23/20` <dbl>,
## # `3/24/20` <dbl>, `3/25/20` <dbl>, `3/26/20` <dbl>, `3/27/20` <dbl>,
## # `3/28/20` <dbl>, `3/29/20` <dbl>, `3/30/20` <dbl>, `3/31/20` <dbl>,
## # `4/1/20` <dbl>, `4/2/20` <dbl>, `4/3/20` <dbl>, `4/4/20` <dbl>,
## # `4/5/20` <dbl>, `4/6/20` <dbl>, `4/7/20` <dbl>, `4/8/20` <dbl>,
## # `4/9/20` <dbl>, `4/10/20` <dbl>, `4/11/20` <dbl>, `4/12/20` <dbl>,
## # `4/13/20` <dbl>, `4/14/20` <dbl>, `4/15/20` <dbl>, `4/16/20` <dbl>,
## # `4/17/20` <dbl>, `4/18/20` <dbl>, `4/19/20` <dbl>, `4/20/20` <dbl>,
## # `4/21/20` <dbl>, `4/22/20` <dbl>, `4/23/20` <dbl>, `4/24/20` <dbl>,
## # `4/25/20` <dbl>, `4/26/20` <dbl>, `4/27/20` <dbl>, `4/28/20` <dbl>,
## # `4/29/20` <dbl>, `4/30/20` <dbl>, `5/1/20` <dbl>, `5/2/20` <dbl>,
## # `5/3/20` <dbl>, …
merged_df <- list(confirmed = confirmed_df, deaths = deaths_df, recovered = recovered_df)
covid19_ts_df <- do.call('rbind', merged_df)
stat_0721 <- covid19_ts_df[covid19_ts_df$`Country/Region` == 'US',c('7/21/20', 'Case')]
stat_0721
## # A tibble: 3 x 2
## `7/21/20` Case
## <dbl> <chr>
## 1 3899211 confirmed
## 2 141995 deaths
## 3 1182018 recovered
#?barplot
barplot(height = stat_0721$`7/21/20`,names.arg = stat_0721$Case, col = c('red', 'blue', 'green'))
#covid19_ts_df[covid19_ts_df$`Country/Region` == 'Diamond Princess',]
colSums(is.na(data_all))
## FIPS Admin2 Province_State Country_Region
## 6202 6162 1516 0
## Last_Update Lat Long_ Confirmed
## 0 681 681 0
## Deaths Recovered Active Combined_Key
## 0 0 25 0
## Incidence_Rate Case-Fatality_Ratio
## 681 456
#data_all[is.na(data_all$Province_State), ]
data_all[is.na(data_all$Province_State), 'Province_State'] <- data_all[is.na(data_all$Province_State),'Country_Region']
colSums(is.na(data_all))
## FIPS Admin2 Province_State Country_Region
## 6202 6162 0 0
## Last_Update Lat Long_ Confirmed
## 0 681 681 0
## Deaths Recovered Active Combined_Key
## 0 0 25 0
## Incidence_Rate Case-Fatality_Ratio
## 681 456
data_all[is.na(data_all$Lat), ]
## # A tibble: 681 x 14
## FIPS Admin2 Province_State Country_Region Last_Update Lat Long_
## <dbl> <chr> <chr> <chr> <dttm> <dbl> <dbl>
## 1 NA Feder… Michigan US 2020-07-14 04:34:46 NA NA
## 2 NA Michi… Michigan US 2020-07-14 04:34:46 NA NA
## 3 80001 Out o… Alabama US 2020-07-14 04:34:46 NA NA
## 4 80013 Out o… Georgia US 2020-07-14 04:34:46 NA NA
## 5 80015 Out o… Hawaii US 2020-07-14 04:34:46 NA NA
## 6 80017 Out o… Illinois US 2020-07-14 04:34:46 NA NA
## 7 80026 Out o… Michigan US 2020-07-14 04:34:46 NA NA
## 8 80040 Out o… Oklahoma US 2020-07-14 04:34:46 NA NA
## 9 80047 Out o… Tennessee US 2020-07-14 04:34:46 NA NA
## 10 90001 Unass… Alabama US 2020-07-14 04:34:46 NA NA
## # … with 671 more rows, and 7 more variables: Confirmed <dbl>, Deaths <dbl>,
## # Recovered <dbl>, Active <dbl>, Combined_Key <chr>, Incidence_Rate <dbl>,
## # `Case-Fatality_Ratio` <dbl>
data_all[data_all$Province_State =='Mississippi',]
## # A tibble: 747 x 14
## FIPS Admin2 Province_State Country_Region Last_Update Lat Long_
## <dbl> <chr> <chr> <chr> <dttm> <dbl> <dbl>
## 1 28001 Adams Mississippi US 2020-07-14 04:34:46 31.5 -91.4
## 2 28003 Alcorn Mississippi US 2020-07-14 04:34:46 34.9 -88.6
## 3 28005 Amite Mississippi US 2020-07-14 04:34:46 31.2 -90.8
## 4 28007 Attala Mississippi US 2020-07-14 04:34:46 33.1 -89.6
## 5 28009 Benton Mississippi US 2020-07-14 04:34:46 34.8 -89.2
## 6 28011 Boliv… Mississippi US 2020-07-14 04:34:46 33.8 -90.9
## 7 28013 Calho… Mississippi US 2020-07-14 04:34:46 33.9 -89.3
## 8 28015 Carro… Mississippi US 2020-07-14 04:34:46 33.4 -89.9
## 9 28017 Chick… Mississippi US 2020-07-14 04:34:46 33.9 -88.9
## 10 28019 Choct… Mississippi US 2020-07-14 04:34:46 33.3 -89.2
## # … with 737 more rows, and 7 more variables: Confirmed <dbl>, Deaths <dbl>,
## # Recovered <dbl>, Active <dbl>, Combined_Key <chr>, Incidence_Rate <dbl>,
## # `Case-Fatality_Ratio` <dbl>
data_all[is.na(data_all$Lat), ]
## # A tibble: 681 x 14
## FIPS Admin2 Province_State Country_Region Last_Update Lat Long_
## <dbl> <chr> <chr> <chr> <dttm> <dbl> <dbl>
## 1 NA Feder… Michigan US 2020-07-14 04:34:46 NA NA
## 2 NA Michi… Michigan US 2020-07-14 04:34:46 NA NA
## 3 80001 Out o… Alabama US 2020-07-14 04:34:46 NA NA
## 4 80013 Out o… Georgia US 2020-07-14 04:34:46 NA NA
## 5 80015 Out o… Hawaii US 2020-07-14 04:34:46 NA NA
## 6 80017 Out o… Illinois US 2020-07-14 04:34:46 NA NA
## 7 80026 Out o… Michigan US 2020-07-14 04:34:46 NA NA
## 8 80040 Out o… Oklahoma US 2020-07-14 04:34:46 NA NA
## 9 80047 Out o… Tennessee US 2020-07-14 04:34:46 NA NA
## 10 90001 Unass… Alabama US 2020-07-14 04:34:46 NA NA
## # … with 671 more rows, and 7 more variables: Confirmed <dbl>, Deaths <dbl>,
## # Recovered <dbl>, Active <dbl>, Combined_Key <chr>, Incidence_Rate <dbl>,
## # `Case-Fatality_Ratio` <dbl>
colSums(is.na(covid19_ts_df))
## Province/State Country/Region Lat Long 1/22/20
## 556 0 0 0 0
## 1/23/20 1/24/20 1/25/20 1/26/20 1/27/20
## 0 0 0 0 0
## 1/28/20 1/29/20 1/30/20 1/31/20 2/1/20
## 0 0 0 0 0
## 2/2/20 2/3/20 2/4/20 2/5/20 2/6/20
## 0 0 0 0 0
## 2/7/20 2/8/20 2/9/20 2/10/20 2/11/20
## 0 0 0 0 0
## 2/12/20 2/13/20 2/14/20 2/15/20 2/16/20
## 0 0 0 0 0
## 2/17/20 2/18/20 2/19/20 2/20/20 2/21/20
## 0 0 0 0 0
## 2/22/20 2/23/20 2/24/20 2/25/20 2/26/20
## 0 0 0 0 0
## 2/27/20 2/28/20 2/29/20 3/1/20 3/2/20
## 0 0 0 0 0
## 3/3/20 3/4/20 3/5/20 3/6/20 3/7/20
## 0 0 0 0 0
## 3/8/20 3/9/20 3/10/20 3/11/20 3/12/20
## 0 0 0 0 0
## 3/13/20 3/14/20 3/15/20 3/16/20 3/17/20
## 0 0 0 0 0
## 3/18/20 3/19/20 3/20/20 3/21/20 3/22/20
## 0 0 0 0 0
## 3/23/20 3/24/20 3/25/20 3/26/20 3/27/20
## 0 0 0 0 0
## 3/28/20 3/29/20 3/30/20 3/31/20 4/1/20
## 0 0 0 0 0
## 4/2/20 4/3/20 4/4/20 4/5/20 4/6/20
## 0 0 0 0 0
## 4/7/20 4/8/20 4/9/20 4/10/20 4/11/20
## 0 0 0 0 0
## 4/12/20 4/13/20 4/14/20 4/15/20 4/16/20
## 0 0 0 0 0
## 4/17/20 4/18/20 4/19/20 4/20/20 4/21/20
## 0 0 0 0 0
## 4/22/20 4/23/20 4/24/20 4/25/20 4/26/20
## 0 0 0 0 0
## 4/27/20 4/28/20 4/29/20 4/30/20 5/1/20
## 0 0 0 0 0
## 5/2/20 5/3/20 5/4/20 5/5/20 5/6/20
## 0 0 0 0 0
## 5/7/20 5/8/20 5/9/20 5/10/20 5/11/20
## 0 0 0 0 0
## 5/12/20 5/13/20 5/14/20 5/15/20 5/16/20
## 0 0 0 0 0
## 5/17/20 5/18/20 5/19/20 5/20/20 5/21/20
## 0 0 0 0 0
## 5/22/20 5/23/20 5/24/20 5/25/20 5/26/20
## 0 0 0 0 0
## 5/27/20 5/28/20 5/29/20 5/30/20 5/31/20
## 0 0 0 0 0
## 6/1/20 6/2/20 6/3/20 6/4/20 6/5/20
## 0 0 0 0 0
## 6/6/20 6/7/20 6/8/20 6/9/20 6/10/20
## 0 0 0 0 0
## 6/11/20 6/12/20 6/13/20 6/14/20 6/15/20
## 0 0 0 0 0
## 6/16/20 6/17/20 6/18/20 6/19/20 6/20/20
## 0 0 0 0 0
## 6/21/20 6/22/20 6/23/20 6/24/20 6/25/20
## 0 0 0 0 0
## 6/26/20 6/27/20 6/28/20 6/29/20 6/30/20
## 0 0 0 0 0
## 7/1/20 7/2/20 7/3/20 7/4/20 7/5/20
## 0 0 0 0 0
## 7/6/20 7/7/20 7/8/20 7/9/20 7/10/20
## 0 0 0 0 0
## 7/11/20 7/12/20 7/13/20 7/14/20 7/15/20
## 0 0 0 0 0
## 7/16/20 7/17/20 7/18/20 7/19/20 7/20/20
## 0 0 0 0 0
## 7/21/20 Case
## 0 0
head(covid19_ts_df[is.na(covid19_ts_df$`Province/State`),])
## # A tibble: 6 x 187
## `Province/State` `Country/Region` Lat Long `1/22/20` `1/23/20` `1/24/20`
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 <NA> Afghanistan 33.9 67.7 0 0 0
## 2 <NA> Albania 41.2 20.2 0 0 0
## 3 <NA> Algeria 28.0 1.66 0 0 0
## 4 <NA> Andorra 42.5 1.52 0 0 0
## 5 <NA> Angola -11.2 17.9 0 0 0
## 6 <NA> Antigua and Bar… 17.1 -61.8 0 0 0
## # … with 180 more variables: `1/25/20` <dbl>, `1/26/20` <dbl>, `1/27/20` <dbl>,
## # `1/28/20` <dbl>, `1/29/20` <dbl>, `1/30/20` <dbl>, `1/31/20` <dbl>,
## # `2/1/20` <dbl>, `2/2/20` <dbl>, `2/3/20` <dbl>, `2/4/20` <dbl>,
## # `2/5/20` <dbl>, `2/6/20` <dbl>, `2/7/20` <dbl>, `2/8/20` <dbl>,
## # `2/9/20` <dbl>, `2/10/20` <dbl>, `2/11/20` <dbl>, `2/12/20` <dbl>,
## # `2/13/20` <dbl>, `2/14/20` <dbl>, `2/15/20` <dbl>, `2/16/20` <dbl>,
## # `2/17/20` <dbl>, `2/18/20` <dbl>, `2/19/20` <dbl>, `2/20/20` <dbl>,
## # `2/21/20` <dbl>, `2/22/20` <dbl>, `2/23/20` <dbl>, `2/24/20` <dbl>,
## # `2/25/20` <dbl>, `2/26/20` <dbl>, `2/27/20` <dbl>, `2/28/20` <dbl>,
## # `2/29/20` <dbl>, `3/1/20` <dbl>, `3/2/20` <dbl>, `3/3/20` <dbl>,
## # `3/4/20` <dbl>, `3/5/20` <dbl>, `3/6/20` <dbl>, `3/7/20` <dbl>,
## # `3/8/20` <dbl>, `3/9/20` <dbl>, `3/10/20` <dbl>, `3/11/20` <dbl>,
## # `3/12/20` <dbl>, `3/13/20` <dbl>, `3/14/20` <dbl>, `3/15/20` <dbl>,
## # `3/16/20` <dbl>, `3/17/20` <dbl>, `3/18/20` <dbl>, `3/19/20` <dbl>,
## # `3/20/20` <dbl>, `3/21/20` <dbl>, `3/22/20` <dbl>, `3/23/20` <dbl>,
## # `3/24/20` <dbl>, `3/25/20` <dbl>, `3/26/20` <dbl>, `3/27/20` <dbl>,
## # `3/28/20` <dbl>, `3/29/20` <dbl>, `3/30/20` <dbl>, `3/31/20` <dbl>,
## # `4/1/20` <dbl>, `4/2/20` <dbl>, `4/3/20` <dbl>, `4/4/20` <dbl>,
## # `4/5/20` <dbl>, `4/6/20` <dbl>, `4/7/20` <dbl>, `4/8/20` <dbl>,
## # `4/9/20` <dbl>, `4/10/20` <dbl>, `4/11/20` <dbl>, `4/12/20` <dbl>,
## # `4/13/20` <dbl>, `4/14/20` <dbl>, `4/15/20` <dbl>, `4/16/20` <dbl>,
## # `4/17/20` <dbl>, `4/18/20` <dbl>, `4/19/20` <dbl>, `4/20/20` <dbl>,
## # `4/21/20` <dbl>, `4/22/20` <dbl>, `4/23/20` <dbl>, `4/24/20` <dbl>,
## # `4/25/20` <dbl>, `4/26/20` <dbl>, `4/27/20` <dbl>, `4/28/20` <dbl>,
## # `4/29/20` <dbl>, `4/30/20` <dbl>, `5/1/20` <dbl>, `5/2/20` <dbl>,
## # `5/3/20` <dbl>, …
#covid19_ts_df[is.na(covid19_ts_df$`Province/State`),'Province/State'] <- covid19_ts_df[is.na(covid19_ts_df$`Province/State`),'Country/Region']
colSums(is.na(covid19_ts_df))
## Province/State Country/Region Lat Long 1/22/20
## 556 0 0 0 0
## 1/23/20 1/24/20 1/25/20 1/26/20 1/27/20
## 0 0 0 0 0
## 1/28/20 1/29/20 1/30/20 1/31/20 2/1/20
## 0 0 0 0 0
## 2/2/20 2/3/20 2/4/20 2/5/20 2/6/20
## 0 0 0 0 0
## 2/7/20 2/8/20 2/9/20 2/10/20 2/11/20
## 0 0 0 0 0
## 2/12/20 2/13/20 2/14/20 2/15/20 2/16/20
## 0 0 0 0 0
## 2/17/20 2/18/20 2/19/20 2/20/20 2/21/20
## 0 0 0 0 0
## 2/22/20 2/23/20 2/24/20 2/25/20 2/26/20
## 0 0 0 0 0
## 2/27/20 2/28/20 2/29/20 3/1/20 3/2/20
## 0 0 0 0 0
## 3/3/20 3/4/20 3/5/20 3/6/20 3/7/20
## 0 0 0 0 0
## 3/8/20 3/9/20 3/10/20 3/11/20 3/12/20
## 0 0 0 0 0
## 3/13/20 3/14/20 3/15/20 3/16/20 3/17/20
## 0 0 0 0 0
## 3/18/20 3/19/20 3/20/20 3/21/20 3/22/20
## 0 0 0 0 0
## 3/23/20 3/24/20 3/25/20 3/26/20 3/27/20
## 0 0 0 0 0
## 3/28/20 3/29/20 3/30/20 3/31/20 4/1/20
## 0 0 0 0 0
## 4/2/20 4/3/20 4/4/20 4/5/20 4/6/20
## 0 0 0 0 0
## 4/7/20 4/8/20 4/9/20 4/10/20 4/11/20
## 0 0 0 0 0
## 4/12/20 4/13/20 4/14/20 4/15/20 4/16/20
## 0 0 0 0 0
## 4/17/20 4/18/20 4/19/20 4/20/20 4/21/20
## 0 0 0 0 0
## 4/22/20 4/23/20 4/24/20 4/25/20 4/26/20
## 0 0 0 0 0
## 4/27/20 4/28/20 4/29/20 4/30/20 5/1/20
## 0 0 0 0 0
## 5/2/20 5/3/20 5/4/20 5/5/20 5/6/20
## 0 0 0 0 0
## 5/7/20 5/8/20 5/9/20 5/10/20 5/11/20
## 0 0 0 0 0
## 5/12/20 5/13/20 5/14/20 5/15/20 5/16/20
## 0 0 0 0 0
## 5/17/20 5/18/20 5/19/20 5/20/20 5/21/20
## 0 0 0 0 0
## 5/22/20 5/23/20 5/24/20 5/25/20 5/26/20
## 0 0 0 0 0
## 5/27/20 5/28/20 5/29/20 5/30/20 5/31/20
## 0 0 0 0 0
## 6/1/20 6/2/20 6/3/20 6/4/20 6/5/20
## 0 0 0 0 0
## 6/6/20 6/7/20 6/8/20 6/9/20 6/10/20
## 0 0 0 0 0
## 6/11/20 6/12/20 6/13/20 6/14/20 6/15/20
## 0 0 0 0 0
## 6/16/20 6/17/20 6/18/20 6/19/20 6/20/20
## 0 0 0 0 0
## 6/21/20 6/22/20 6/23/20 6/24/20 6/25/20
## 0 0 0 0 0
## 6/26/20 6/27/20 6/28/20 6/29/20 6/30/20
## 0 0 0 0 0
## 7/1/20 7/2/20 7/3/20 7/4/20 7/5/20
## 0 0 0 0 0
## 7/6/20 7/7/20 7/8/20 7/9/20 7/10/20
## 0 0 0 0 0
## 7/11/20 7/12/20 7/13/20 7/14/20 7/15/20
## 0 0 0 0 0
## 7/16/20 7/17/20 7/18/20 7/19/20 7/20/20
## 0 0 0 0 0
## 7/21/20 Case
## 0 0
covid19_ts_df[covid19_ts_df$`Country/Region` == 'US', c('7/21/20', 'Case') ]
## # A tibble: 3 x 2
## `7/21/20` Case
## <dbl> <chr>
## 1 3899211 confirmed
## 2 141995 deaths
## 3 1182018 recovered
library(tidyr)
?tidyr
col_names <- colnames(covid19_ts_df)
date_cols <- col_names[5:(length(col_names)-1) ]
ts_longer <- pivot_longer(covid19_ts_df,
cols = date_cols,
names_to = "Date",
values_to = "Number")
## Note: Using an external vector in selections is ambiguous.
## ℹ Use `all_of(date_cols)` instead of `date_cols` to silence this message.
## ℹ See <https://tidyselect.r-lib.org/reference/faq-external-vector.html>.
## This message is displayed once per session.
#head(ts_longer)
ts_longer$Date <- mdy(ts_longer$Date)
us_stat <- ts_longer[ (ts_longer$`Country/Region` =='US') & (ts_longer$`Case` =='confirmed'), c('Date', 'Number')]
plot(Number ~ Date, us_stat, type = 'o', main = 'United State', xlab = 'Date', ylab = 'Cases')
# DPLYR
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
#?dplyr
#ts_longer[ts_longer$Case == 'confirmed',]
#filter(ts_longer, Case == 'confirmed')
#ts_longer[(ts_longer$Case == 'confirmed') & (ts_longer$Number > 1000),]
#ts_longer[(ts_longer$Case == 'confirmed') | (ts_longer$Number > 1000),]
#ts_longer[(ts_longer$Case %in% c('confirmed', 'deaths') ),]
#filter(ts_longer, (Case == 'confirmed') & (Number > 1000) )
#filter(ts_longer, (Case == 'confirmed') | (Number > 1000) )
#filter(ts_longer, (Case %in% c('confirmed', 'deaths')) & (Number > 1000) )
#ts_longer[,c('Case', 'Number')]
#select(ts_longer, Case, Number)
#select(filter(ts_longer, `Country/Region` == 'Taiwan*'), Case, Number)
ts_longer %>% filter(`Country/Region` == 'Taiwan*') %>% select(Case, Number)
## # A tibble: 546 x 2
## Case Number
## <chr> <dbl>
## 1 confirmed 1
## 2 confirmed 1
## 3 confirmed 3
## 4 confirmed 3
## 5 confirmed 4
## 6 confirmed 5
## 7 confirmed 8
## 8 confirmed 8
## 9 confirmed 9
## 10 confirmed 10
## # … with 536 more rows
ts_longer %>%
filter(`Country/Region` == 'Taiwan*') %>%
select(Case, Number) %>%
head()
## # A tibble: 6 x 2
## Case Number
## <chr> <dbl>
## 1 confirmed 1
## 2 confirmed 1
## 3 confirmed 3
## 4 confirmed 3
## 5 confirmed 4
## 6 confirmed 5
stat_longer <- ts_longer %>%
filter(Date == '2020-07-21', Case == 'confirmed') %>%
select('Country/Region', Number) %>%
arrange(desc(Number)) %>%
head(10)
barplot(stat_longer$Number, names.arg = stat_longer$`Country/Region` ,col = 'blue', xlab = 'Country', ylab = 'Confirmed Case',)
stat_longer2 <- ts_longer %>%
filter(Date == '2020-07-21', Case == 'deaths') %>%
select('Country/Region', Number) %>%
arrange(desc(Number)) %>%
head(10)
barplot(stat_longer2$Number, names.arg = stat_longer2$`Country/Region` ,col = 'blue', xlab = 'Country', ylab = 'Confirmed Case',)
?pivot_wider()
ts_longer_0721 <- ts_longer %>%
filter(Date == '2020-07-21')
ts_wider_0721<-pivot_wider(ts_longer_0721, names_from = 'Case', values_from = 'Number')
ts_stat_all <- ts_wider_0721 %>%
select(`Country/Region`, confirmed, deaths, recovered) %>%
group_by(`Country/Region`) %>%
summarise(confirmed_all = sum(confirmed, na.rm=TRUE), deaths_all = sum(deaths, na.rm=TRUE), recovered_all = sum(recovered, na.rm=TRUE))
## `summarise()` ungrouping output (override with `.groups` argument)
ts_stat_all$fatalities <- ts_stat_all$deaths_all / ts_stat_all$confirmed_all
ts_stat_all %>%
arrange(desc(fatalities))%>%
select(`Country/Region`,fatalities) %>%
head(10)
## # A tibble: 10 x 2
## `Country/Region` fatalities
## <chr> <dbl>
## 1 Yemen 0.280
## 2 MS Zaandam 0.222
## 3 United Kingdom 0.153
## 4 Belgium 0.153
## 5 Italy 0.143
## 6 France 0.141
## 7 Hungary 0.137
## 8 Netherlands 0.118
## 9 Mexico 0.113
## 10 Spain 0.107