讀取單天資料

library(readr)
data_20200721 <- read_csv("~/covid19_repo/07-21-2020.csv")
## Parsed with column specification:
## cols(
##   FIPS = col_double(),
##   Admin2 = col_character(),
##   Province_State = col_character(),
##   Country_Region = col_character(),
##   Last_Update = col_datetime(format = ""),
##   Lat = col_double(),
##   Long_ = col_double(),
##   Confirmed = col_double(),
##   Deaths = col_double(),
##   Recovered = col_double(),
##   Active = col_double(),
##   Combined_Key = col_character(),
##   Incidence_Rate = col_double(),
##   `Case-Fatality_Ratio` = col_double()
## )
#View(data_20200721)
sum(data_20200721$Confirmed)
## [1] 14947428
sum(data_20200721[data_20200721$Country_Region == 'US', 'Confirmed'])
## [1] 3899211
data_20200721
## # A tibble: 3,924 x 14
##     FIPS Admin2 Province_State Country_Region Last_Update           Lat  Long_
##    <dbl> <chr>  <chr>          <chr>          <dttm>              <dbl>  <dbl>
##  1 45001 Abbev… South Carolina US             2020-07-22 04:34:42  34.2  -82.5
##  2 22001 Acadia Louisiana      US             2020-07-22 04:34:42  30.3  -92.4
##  3 51001 Accom… Virginia       US             2020-07-22 04:34:42  37.8  -75.6
##  4 16001 Ada    Idaho          US             2020-07-22 04:34:42  43.5 -116. 
##  5 19001 Adair  Iowa           US             2020-07-22 04:34:42  41.3  -94.5
##  6 21001 Adair  Kentucky       US             2020-07-22 04:34:42  37.1  -85.3
##  7 29001 Adair  Missouri       US             2020-07-22 04:34:42  40.2  -92.6
##  8 40001 Adair  Oklahoma       US             2020-07-22 04:34:42  35.9  -94.7
##  9  8001 Adams  Colorado       US             2020-07-22 04:34:42  39.9 -104. 
## 10 16003 Adams  Idaho          US             2020-07-22 04:34:42  44.9 -116. 
## # … with 3,914 more rows, and 7 more variables: Confirmed <dbl>, Deaths <dbl>,
## #   Recovered <dbl>, Active <dbl>, Combined_Key <chr>, Incidence_Rate <dbl>,
## #   `Case-Fatality_Ratio` <dbl>

合併多天資料

library(lubridate)
## Warning: package 'lubridate' was built under R version 4.0.2
## 
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
## 
##     date, intersect, setdiff, union
end_date   <- today(tz="Asia/Taipei")
start_date <- end_date - days(10)
#end_date


seq(start_date, end_date, by = 'days')
##  [1] "2020-07-13" "2020-07-14" "2020-07-15" "2020-07-16" "2020-07-17"
##  [6] "2020-07-18" "2020-07-19" "2020-07-20" "2020-07-21" "2020-07-22"
## [11] "2020-07-23"
for(i in seq(start_date, end_date, by = 'days')){
  #print(i)
  dt <- as_date(i)
  dt_str <- format(dt, format = '%m-%d-%Y')
  print(dt_str)
}
## [1] "07-13-2020"
## [1] "07-14-2020"
## [1] "07-15-2020"
## [1] "07-16-2020"
## [1] "07-17-2020"
## [1] "07-18-2020"
## [1] "07-19-2020"
## [1] "07-20-2020"
## [1] "07-21-2020"
## [1] "07-22-2020"
## [1] "07-23-2020"
data_list <- list()
for(i in seq(start_date, end_date, by = 'days')){
  #print(i)
  dt <- as_date(i)
  dt_str <- format(dt, format = '%m-%d-%Y')
  tryCatch({
  data <- read_csv(sprintf("~/covid19_repo/%s.csv", dt_str))
  data_list[[dt_str]] <- data
  }, error = function(err){
    print(paste0(dt_str, '.csv ', 'not found') )
  })
}
## Parsed with column specification:
## cols(
##   FIPS = col_double(),
##   Admin2 = col_character(),
##   Province_State = col_character(),
##   Country_Region = col_character(),
##   Last_Update = col_datetime(format = ""),
##   Lat = col_double(),
##   Long_ = col_double(),
##   Confirmed = col_double(),
##   Deaths = col_double(),
##   Recovered = col_double(),
##   Active = col_double(),
##   Combined_Key = col_character(),
##   Incidence_Rate = col_double(),
##   `Case-Fatality_Ratio` = col_double()
## )
## Parsed with column specification:
## cols(
##   FIPS = col_double(),
##   Admin2 = col_character(),
##   Province_State = col_character(),
##   Country_Region = col_character(),
##   Last_Update = col_datetime(format = ""),
##   Lat = col_double(),
##   Long_ = col_double(),
##   Confirmed = col_double(),
##   Deaths = col_double(),
##   Recovered = col_double(),
##   Active = col_double(),
##   Combined_Key = col_character(),
##   Incidence_Rate = col_double(),
##   `Case-Fatality_Ratio` = col_double()
## )
## Parsed with column specification:
## cols(
##   FIPS = col_double(),
##   Admin2 = col_character(),
##   Province_State = col_character(),
##   Country_Region = col_character(),
##   Last_Update = col_datetime(format = ""),
##   Lat = col_double(),
##   Long_ = col_double(),
##   Confirmed = col_double(),
##   Deaths = col_double(),
##   Recovered = col_double(),
##   Active = col_double(),
##   Combined_Key = col_character(),
##   Incidence_Rate = col_double(),
##   `Case-Fatality_Ratio` = col_double()
## )
## Parsed with column specification:
## cols(
##   FIPS = col_double(),
##   Admin2 = col_character(),
##   Province_State = col_character(),
##   Country_Region = col_character(),
##   Last_Update = col_datetime(format = ""),
##   Lat = col_double(),
##   Long_ = col_double(),
##   Confirmed = col_double(),
##   Deaths = col_double(),
##   Recovered = col_double(),
##   Active = col_double(),
##   Combined_Key = col_character(),
##   Incidence_Rate = col_double(),
##   `Case-Fatality_Ratio` = col_double()
## )
## Parsed with column specification:
## cols(
##   FIPS = col_double(),
##   Admin2 = col_character(),
##   Province_State = col_character(),
##   Country_Region = col_character(),
##   Last_Update = col_datetime(format = ""),
##   Lat = col_double(),
##   Long_ = col_double(),
##   Confirmed = col_double(),
##   Deaths = col_double(),
##   Recovered = col_double(),
##   Active = col_double(),
##   Combined_Key = col_character(),
##   Incidence_Rate = col_double(),
##   `Case-Fatality_Ratio` = col_double()
## )
## Parsed with column specification:
## cols(
##   FIPS = col_double(),
##   Admin2 = col_character(),
##   Province_State = col_character(),
##   Country_Region = col_character(),
##   Last_Update = col_datetime(format = ""),
##   Lat = col_double(),
##   Long_ = col_double(),
##   Confirmed = col_double(),
##   Deaths = col_double(),
##   Recovered = col_double(),
##   Active = col_double(),
##   Combined_Key = col_character(),
##   Incidence_Rate = col_double(),
##   `Case-Fatality_Ratio` = col_double()
## )
## Parsed with column specification:
## cols(
##   FIPS = col_double(),
##   Admin2 = col_character(),
##   Province_State = col_character(),
##   Country_Region = col_character(),
##   Last_Update = col_datetime(format = ""),
##   Lat = col_double(),
##   Long_ = col_double(),
##   Confirmed = col_double(),
##   Deaths = col_double(),
##   Recovered = col_double(),
##   Active = col_double(),
##   Combined_Key = col_character(),
##   Incidence_Rate = col_double(),
##   `Case-Fatality_Ratio` = col_double()
## )
## Parsed with column specification:
## cols(
##   FIPS = col_double(),
##   Admin2 = col_character(),
##   Province_State = col_character(),
##   Country_Region = col_character(),
##   Last_Update = col_datetime(format = ""),
##   Lat = col_double(),
##   Long_ = col_double(),
##   Confirmed = col_double(),
##   Deaths = col_double(),
##   Recovered = col_double(),
##   Active = col_double(),
##   Combined_Key = col_character(),
##   Incidence_Rate = col_double(),
##   `Case-Fatality_Ratio` = col_double()
## )
## Parsed with column specification:
## cols(
##   FIPS = col_double(),
##   Admin2 = col_character(),
##   Province_State = col_character(),
##   Country_Region = col_character(),
##   Last_Update = col_datetime(format = ""),
##   Lat = col_double(),
##   Long_ = col_double(),
##   Confirmed = col_double(),
##   Deaths = col_double(),
##   Recovered = col_double(),
##   Active = col_double(),
##   Combined_Key = col_character(),
##   Incidence_Rate = col_double(),
##   `Case-Fatality_Ratio` = col_double()
## )
## [1] "07-22-2020.csv not found"
## [1] "07-23-2020.csv not found"
class(data_list)
## [1] "list"
data_all <- do.call('rbind', data_list)
data_all
## # A tibble: 34,801 x 14
##     FIPS Admin2 Province_State Country_Region Last_Update           Lat  Long_
##  * <dbl> <chr>  <chr>          <chr>          <dttm>              <dbl>  <dbl>
##  1 45001 Abbev… South Carolina US             2020-07-14 04:34:46  34.2  -82.5
##  2 22001 Acadia Louisiana      US             2020-07-14 04:34:46  30.3  -92.4
##  3 51001 Accom… Virginia       US             2020-07-14 04:34:46  37.8  -75.6
##  4 16001 Ada    Idaho          US             2020-07-14 04:34:46  43.5 -116. 
##  5 19001 Adair  Iowa           US             2020-07-14 04:34:46  41.3  -94.5
##  6 21001 Adair  Kentucky       US             2020-07-14 04:34:46  37.1  -85.3
##  7 29001 Adair  Missouri       US             2020-07-14 04:34:46  40.2  -92.6
##  8 40001 Adair  Oklahoma       US             2020-07-14 04:34:46  35.9  -94.7
##  9  8001 Adams  Colorado       US             2020-07-14 04:34:46  39.9 -104. 
## 10 16003 Adams  Idaho          US             2020-07-14 04:34:46  44.9 -116. 
## # … with 34,791 more rows, and 7 more variables: Confirmed <dbl>, Deaths <dbl>,
## #   Recovered <dbl>, Active <dbl>, Combined_Key <chr>, Incidence_Rate <dbl>,
## #   `Case-Fatality_Ratio` <dbl>
li <- list(x = 1, y = 2)
li[[1]]
## [1] 1
li[[2]]
## [1] 2
li[['qoo']] = c(1,2,3)
li[['qoo']]
## [1] 1 2 3
li[[3]]
## [1] 1 2 3
stat_all <- data_all[ (data_all$Country_Region=='US') & (data_all$Province_State=='California'), c('Last_Update','Confirmed') ]


california <- tapply(stat_all$Confirmed, stat_all$Last_Update, sum) 
plot(california)

爬取時間序列資料

confirmed_url <- 'https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_global.csv'
confirmed_file <- 'time_series_covid19_confirmed_global.csv'
download.file(confirmed_url, confirmed_file)

deaths_url <- 'https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_deaths_global.csv'
deaths_file <- 'time_series_covid19_deaths_global.csv'
download.file(deaths_url, deaths_file)

recovered_url <- 'https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_recovered_global.csv'
recovered_file <- 'time_series_covid19_recovered_global.csv'
download.file(recovered_url, recovered_file)

recovered_df <- read_csv('time_series_covid19_recovered_global.csv')
## Parsed with column specification:
## cols(
##   .default = col_double(),
##   `Province/State` = col_character(),
##   `Country/Region` = col_character()
## )
## See spec(...) for full column specifications.
recovered_df$Case <- 'recovered'

confirmed_df <- read_csv('time_series_covid19_confirmed_global.csv')
## Parsed with column specification:
## cols(
##   .default = col_double(),
##   `Province/State` = col_character(),
##   `Country/Region` = col_character()
## )
## See spec(...) for full column specifications.
confirmed_df$Case <- 'confirmed'

deaths_df   <- read_csv('time_series_covid19_deaths_global.csv')
## Parsed with column specification:
## cols(
##   .default = col_double(),
##   `Province/State` = col_character(),
##   `Country/Region` = col_character()
## )
## See spec(...) for full column specifications.
deaths_df$Case <- 'deaths'

confirmed_df[confirmed_df$`Country/Region` == 'US',]
## # A tibble: 1 x 187
##   `Province/State` `Country/Region`   Lat  Long `1/22/20` `1/23/20` `1/24/20`
##   <chr>            <chr>            <dbl> <dbl>     <dbl>     <dbl>     <dbl>
## 1 <NA>             US                  40  -100         1         1         2
## # … with 180 more variables: `1/25/20` <dbl>, `1/26/20` <dbl>, `1/27/20` <dbl>,
## #   `1/28/20` <dbl>, `1/29/20` <dbl>, `1/30/20` <dbl>, `1/31/20` <dbl>,
## #   `2/1/20` <dbl>, `2/2/20` <dbl>, `2/3/20` <dbl>, `2/4/20` <dbl>,
## #   `2/5/20` <dbl>, `2/6/20` <dbl>, `2/7/20` <dbl>, `2/8/20` <dbl>,
## #   `2/9/20` <dbl>, `2/10/20` <dbl>, `2/11/20` <dbl>, `2/12/20` <dbl>,
## #   `2/13/20` <dbl>, `2/14/20` <dbl>, `2/15/20` <dbl>, `2/16/20` <dbl>,
## #   `2/17/20` <dbl>, `2/18/20` <dbl>, `2/19/20` <dbl>, `2/20/20` <dbl>,
## #   `2/21/20` <dbl>, `2/22/20` <dbl>, `2/23/20` <dbl>, `2/24/20` <dbl>,
## #   `2/25/20` <dbl>, `2/26/20` <dbl>, `2/27/20` <dbl>, `2/28/20` <dbl>,
## #   `2/29/20` <dbl>, `3/1/20` <dbl>, `3/2/20` <dbl>, `3/3/20` <dbl>,
## #   `3/4/20` <dbl>, `3/5/20` <dbl>, `3/6/20` <dbl>, `3/7/20` <dbl>,
## #   `3/8/20` <dbl>, `3/9/20` <dbl>, `3/10/20` <dbl>, `3/11/20` <dbl>,
## #   `3/12/20` <dbl>, `3/13/20` <dbl>, `3/14/20` <dbl>, `3/15/20` <dbl>,
## #   `3/16/20` <dbl>, `3/17/20` <dbl>, `3/18/20` <dbl>, `3/19/20` <dbl>,
## #   `3/20/20` <dbl>, `3/21/20` <dbl>, `3/22/20` <dbl>, `3/23/20` <dbl>,
## #   `3/24/20` <dbl>, `3/25/20` <dbl>, `3/26/20` <dbl>, `3/27/20` <dbl>,
## #   `3/28/20` <dbl>, `3/29/20` <dbl>, `3/30/20` <dbl>, `3/31/20` <dbl>,
## #   `4/1/20` <dbl>, `4/2/20` <dbl>, `4/3/20` <dbl>, `4/4/20` <dbl>,
## #   `4/5/20` <dbl>, `4/6/20` <dbl>, `4/7/20` <dbl>, `4/8/20` <dbl>,
## #   `4/9/20` <dbl>, `4/10/20` <dbl>, `4/11/20` <dbl>, `4/12/20` <dbl>,
## #   `4/13/20` <dbl>, `4/14/20` <dbl>, `4/15/20` <dbl>, `4/16/20` <dbl>,
## #   `4/17/20` <dbl>, `4/18/20` <dbl>, `4/19/20` <dbl>, `4/20/20` <dbl>,
## #   `4/21/20` <dbl>, `4/22/20` <dbl>, `4/23/20` <dbl>, `4/24/20` <dbl>,
## #   `4/25/20` <dbl>, `4/26/20` <dbl>, `4/27/20` <dbl>, `4/28/20` <dbl>,
## #   `4/29/20` <dbl>, `4/30/20` <dbl>, `5/1/20` <dbl>, `5/2/20` <dbl>,
## #   `5/3/20` <dbl>, …
merged_df <- list(confirmed = confirmed_df, deaths = deaths_df, recovered = recovered_df)
covid19_ts_df  <- do.call('rbind', merged_df)
stat_0721 <- covid19_ts_df[covid19_ts_df$`Country/Region` == 'US',c('7/21/20', 'Case')]
stat_0721
## # A tibble: 3 x 2
##   `7/21/20` Case     
##       <dbl> <chr>    
## 1   3899211 confirmed
## 2    141995 deaths   
## 3   1182018 recovered
#?barplot
barplot(height = stat_0721$`7/21/20`,names.arg = stat_0721$Case, col = c('red', 'blue', 'green'))

#covid19_ts_df[covid19_ts_df$`Country/Region` == 'Diamond Princess',]

檢視缺失值

colSums(is.na(data_all))
##                FIPS              Admin2      Province_State      Country_Region 
##                6202                6162                1516                   0 
##         Last_Update                 Lat               Long_           Confirmed 
##                   0                 681                 681                   0 
##              Deaths           Recovered              Active        Combined_Key 
##                   0                   0                  25                   0 
##      Incidence_Rate Case-Fatality_Ratio 
##                 681                 456
#data_all[is.na(data_all$Province_State), ]
data_all[is.na(data_all$Province_State), 'Province_State'] <- data_all[is.na(data_all$Province_State),'Country_Region']

colSums(is.na(data_all))
##                FIPS              Admin2      Province_State      Country_Region 
##                6202                6162                   0                   0 
##         Last_Update                 Lat               Long_           Confirmed 
##                   0                 681                 681                   0 
##              Deaths           Recovered              Active        Combined_Key 
##                   0                   0                  25                   0 
##      Incidence_Rate Case-Fatality_Ratio 
##                 681                 456
data_all[is.na(data_all$Lat), ]
## # A tibble: 681 x 14
##     FIPS Admin2 Province_State Country_Region Last_Update           Lat Long_
##    <dbl> <chr>  <chr>          <chr>          <dttm>              <dbl> <dbl>
##  1    NA Feder… Michigan       US             2020-07-14 04:34:46    NA    NA
##  2    NA Michi… Michigan       US             2020-07-14 04:34:46    NA    NA
##  3 80001 Out o… Alabama        US             2020-07-14 04:34:46    NA    NA
##  4 80013 Out o… Georgia        US             2020-07-14 04:34:46    NA    NA
##  5 80015 Out o… Hawaii         US             2020-07-14 04:34:46    NA    NA
##  6 80017 Out o… Illinois       US             2020-07-14 04:34:46    NA    NA
##  7 80026 Out o… Michigan       US             2020-07-14 04:34:46    NA    NA
##  8 80040 Out o… Oklahoma       US             2020-07-14 04:34:46    NA    NA
##  9 80047 Out o… Tennessee      US             2020-07-14 04:34:46    NA    NA
## 10 90001 Unass… Alabama        US             2020-07-14 04:34:46    NA    NA
## # … with 671 more rows, and 7 more variables: Confirmed <dbl>, Deaths <dbl>,
## #   Recovered <dbl>, Active <dbl>, Combined_Key <chr>, Incidence_Rate <dbl>,
## #   `Case-Fatality_Ratio` <dbl>
data_all[data_all$Province_State =='Mississippi',]
## # A tibble: 747 x 14
##     FIPS Admin2 Province_State Country_Region Last_Update           Lat Long_
##    <dbl> <chr>  <chr>          <chr>          <dttm>              <dbl> <dbl>
##  1 28001 Adams  Mississippi    US             2020-07-14 04:34:46  31.5 -91.4
##  2 28003 Alcorn Mississippi    US             2020-07-14 04:34:46  34.9 -88.6
##  3 28005 Amite  Mississippi    US             2020-07-14 04:34:46  31.2 -90.8
##  4 28007 Attala Mississippi    US             2020-07-14 04:34:46  33.1 -89.6
##  5 28009 Benton Mississippi    US             2020-07-14 04:34:46  34.8 -89.2
##  6 28011 Boliv… Mississippi    US             2020-07-14 04:34:46  33.8 -90.9
##  7 28013 Calho… Mississippi    US             2020-07-14 04:34:46  33.9 -89.3
##  8 28015 Carro… Mississippi    US             2020-07-14 04:34:46  33.4 -89.9
##  9 28017 Chick… Mississippi    US             2020-07-14 04:34:46  33.9 -88.9
## 10 28019 Choct… Mississippi    US             2020-07-14 04:34:46  33.3 -89.2
## # … with 737 more rows, and 7 more variables: Confirmed <dbl>, Deaths <dbl>,
## #   Recovered <dbl>, Active <dbl>, Combined_Key <chr>, Incidence_Rate <dbl>,
## #   `Case-Fatality_Ratio` <dbl>
data_all[is.na(data_all$Lat), ]
## # A tibble: 681 x 14
##     FIPS Admin2 Province_State Country_Region Last_Update           Lat Long_
##    <dbl> <chr>  <chr>          <chr>          <dttm>              <dbl> <dbl>
##  1    NA Feder… Michigan       US             2020-07-14 04:34:46    NA    NA
##  2    NA Michi… Michigan       US             2020-07-14 04:34:46    NA    NA
##  3 80001 Out o… Alabama        US             2020-07-14 04:34:46    NA    NA
##  4 80013 Out o… Georgia        US             2020-07-14 04:34:46    NA    NA
##  5 80015 Out o… Hawaii         US             2020-07-14 04:34:46    NA    NA
##  6 80017 Out o… Illinois       US             2020-07-14 04:34:46    NA    NA
##  7 80026 Out o… Michigan       US             2020-07-14 04:34:46    NA    NA
##  8 80040 Out o… Oklahoma       US             2020-07-14 04:34:46    NA    NA
##  9 80047 Out o… Tennessee      US             2020-07-14 04:34:46    NA    NA
## 10 90001 Unass… Alabama        US             2020-07-14 04:34:46    NA    NA
## # … with 671 more rows, and 7 more variables: Confirmed <dbl>, Deaths <dbl>,
## #   Recovered <dbl>, Active <dbl>, Combined_Key <chr>, Incidence_Rate <dbl>,
## #   `Case-Fatality_Ratio` <dbl>
colSums(is.na(covid19_ts_df))
## Province/State Country/Region            Lat           Long        1/22/20 
##            556              0              0              0              0 
##        1/23/20        1/24/20        1/25/20        1/26/20        1/27/20 
##              0              0              0              0              0 
##        1/28/20        1/29/20        1/30/20        1/31/20         2/1/20 
##              0              0              0              0              0 
##         2/2/20         2/3/20         2/4/20         2/5/20         2/6/20 
##              0              0              0              0              0 
##         2/7/20         2/8/20         2/9/20        2/10/20        2/11/20 
##              0              0              0              0              0 
##        2/12/20        2/13/20        2/14/20        2/15/20        2/16/20 
##              0              0              0              0              0 
##        2/17/20        2/18/20        2/19/20        2/20/20        2/21/20 
##              0              0              0              0              0 
##        2/22/20        2/23/20        2/24/20        2/25/20        2/26/20 
##              0              0              0              0              0 
##        2/27/20        2/28/20        2/29/20         3/1/20         3/2/20 
##              0              0              0              0              0 
##         3/3/20         3/4/20         3/5/20         3/6/20         3/7/20 
##              0              0              0              0              0 
##         3/8/20         3/9/20        3/10/20        3/11/20        3/12/20 
##              0              0              0              0              0 
##        3/13/20        3/14/20        3/15/20        3/16/20        3/17/20 
##              0              0              0              0              0 
##        3/18/20        3/19/20        3/20/20        3/21/20        3/22/20 
##              0              0              0              0              0 
##        3/23/20        3/24/20        3/25/20        3/26/20        3/27/20 
##              0              0              0              0              0 
##        3/28/20        3/29/20        3/30/20        3/31/20         4/1/20 
##              0              0              0              0              0 
##         4/2/20         4/3/20         4/4/20         4/5/20         4/6/20 
##              0              0              0              0              0 
##         4/7/20         4/8/20         4/9/20        4/10/20        4/11/20 
##              0              0              0              0              0 
##        4/12/20        4/13/20        4/14/20        4/15/20        4/16/20 
##              0              0              0              0              0 
##        4/17/20        4/18/20        4/19/20        4/20/20        4/21/20 
##              0              0              0              0              0 
##        4/22/20        4/23/20        4/24/20        4/25/20        4/26/20 
##              0              0              0              0              0 
##        4/27/20        4/28/20        4/29/20        4/30/20         5/1/20 
##              0              0              0              0              0 
##         5/2/20         5/3/20         5/4/20         5/5/20         5/6/20 
##              0              0              0              0              0 
##         5/7/20         5/8/20         5/9/20        5/10/20        5/11/20 
##              0              0              0              0              0 
##        5/12/20        5/13/20        5/14/20        5/15/20        5/16/20 
##              0              0              0              0              0 
##        5/17/20        5/18/20        5/19/20        5/20/20        5/21/20 
##              0              0              0              0              0 
##        5/22/20        5/23/20        5/24/20        5/25/20        5/26/20 
##              0              0              0              0              0 
##        5/27/20        5/28/20        5/29/20        5/30/20        5/31/20 
##              0              0              0              0              0 
##         6/1/20         6/2/20         6/3/20         6/4/20         6/5/20 
##              0              0              0              0              0 
##         6/6/20         6/7/20         6/8/20         6/9/20        6/10/20 
##              0              0              0              0              0 
##        6/11/20        6/12/20        6/13/20        6/14/20        6/15/20 
##              0              0              0              0              0 
##        6/16/20        6/17/20        6/18/20        6/19/20        6/20/20 
##              0              0              0              0              0 
##        6/21/20        6/22/20        6/23/20        6/24/20        6/25/20 
##              0              0              0              0              0 
##        6/26/20        6/27/20        6/28/20        6/29/20        6/30/20 
##              0              0              0              0              0 
##         7/1/20         7/2/20         7/3/20         7/4/20         7/5/20 
##              0              0              0              0              0 
##         7/6/20         7/7/20         7/8/20         7/9/20        7/10/20 
##              0              0              0              0              0 
##        7/11/20        7/12/20        7/13/20        7/14/20        7/15/20 
##              0              0              0              0              0 
##        7/16/20        7/17/20        7/18/20        7/19/20        7/20/20 
##              0              0              0              0              0 
##        7/21/20           Case 
##              0              0
head(covid19_ts_df[is.na(covid19_ts_df$`Province/State`),])
## # A tibble: 6 x 187
##   `Province/State` `Country/Region`   Lat   Long `1/22/20` `1/23/20` `1/24/20`
##   <chr>            <chr>            <dbl>  <dbl>     <dbl>     <dbl>     <dbl>
## 1 <NA>             Afghanistan       33.9  67.7          0         0         0
## 2 <NA>             Albania           41.2  20.2          0         0         0
## 3 <NA>             Algeria           28.0   1.66         0         0         0
## 4 <NA>             Andorra           42.5   1.52         0         0         0
## 5 <NA>             Angola           -11.2  17.9          0         0         0
## 6 <NA>             Antigua and Bar…  17.1 -61.8          0         0         0
## # … with 180 more variables: `1/25/20` <dbl>, `1/26/20` <dbl>, `1/27/20` <dbl>,
## #   `1/28/20` <dbl>, `1/29/20` <dbl>, `1/30/20` <dbl>, `1/31/20` <dbl>,
## #   `2/1/20` <dbl>, `2/2/20` <dbl>, `2/3/20` <dbl>, `2/4/20` <dbl>,
## #   `2/5/20` <dbl>, `2/6/20` <dbl>, `2/7/20` <dbl>, `2/8/20` <dbl>,
## #   `2/9/20` <dbl>, `2/10/20` <dbl>, `2/11/20` <dbl>, `2/12/20` <dbl>,
## #   `2/13/20` <dbl>, `2/14/20` <dbl>, `2/15/20` <dbl>, `2/16/20` <dbl>,
## #   `2/17/20` <dbl>, `2/18/20` <dbl>, `2/19/20` <dbl>, `2/20/20` <dbl>,
## #   `2/21/20` <dbl>, `2/22/20` <dbl>, `2/23/20` <dbl>, `2/24/20` <dbl>,
## #   `2/25/20` <dbl>, `2/26/20` <dbl>, `2/27/20` <dbl>, `2/28/20` <dbl>,
## #   `2/29/20` <dbl>, `3/1/20` <dbl>, `3/2/20` <dbl>, `3/3/20` <dbl>,
## #   `3/4/20` <dbl>, `3/5/20` <dbl>, `3/6/20` <dbl>, `3/7/20` <dbl>,
## #   `3/8/20` <dbl>, `3/9/20` <dbl>, `3/10/20` <dbl>, `3/11/20` <dbl>,
## #   `3/12/20` <dbl>, `3/13/20` <dbl>, `3/14/20` <dbl>, `3/15/20` <dbl>,
## #   `3/16/20` <dbl>, `3/17/20` <dbl>, `3/18/20` <dbl>, `3/19/20` <dbl>,
## #   `3/20/20` <dbl>, `3/21/20` <dbl>, `3/22/20` <dbl>, `3/23/20` <dbl>,
## #   `3/24/20` <dbl>, `3/25/20` <dbl>, `3/26/20` <dbl>, `3/27/20` <dbl>,
## #   `3/28/20` <dbl>, `3/29/20` <dbl>, `3/30/20` <dbl>, `3/31/20` <dbl>,
## #   `4/1/20` <dbl>, `4/2/20` <dbl>, `4/3/20` <dbl>, `4/4/20` <dbl>,
## #   `4/5/20` <dbl>, `4/6/20` <dbl>, `4/7/20` <dbl>, `4/8/20` <dbl>,
## #   `4/9/20` <dbl>, `4/10/20` <dbl>, `4/11/20` <dbl>, `4/12/20` <dbl>,
## #   `4/13/20` <dbl>, `4/14/20` <dbl>, `4/15/20` <dbl>, `4/16/20` <dbl>,
## #   `4/17/20` <dbl>, `4/18/20` <dbl>, `4/19/20` <dbl>, `4/20/20` <dbl>,
## #   `4/21/20` <dbl>, `4/22/20` <dbl>, `4/23/20` <dbl>, `4/24/20` <dbl>,
## #   `4/25/20` <dbl>, `4/26/20` <dbl>, `4/27/20` <dbl>, `4/28/20` <dbl>,
## #   `4/29/20` <dbl>, `4/30/20` <dbl>, `5/1/20` <dbl>, `5/2/20` <dbl>,
## #   `5/3/20` <dbl>, …
#covid19_ts_df[is.na(covid19_ts_df$`Province/State`),'Province/State'] <- covid19_ts_df[is.na(covid19_ts_df$`Province/State`),'Country/Region']  


colSums(is.na(covid19_ts_df))
## Province/State Country/Region            Lat           Long        1/22/20 
##            556              0              0              0              0 
##        1/23/20        1/24/20        1/25/20        1/26/20        1/27/20 
##              0              0              0              0              0 
##        1/28/20        1/29/20        1/30/20        1/31/20         2/1/20 
##              0              0              0              0              0 
##         2/2/20         2/3/20         2/4/20         2/5/20         2/6/20 
##              0              0              0              0              0 
##         2/7/20         2/8/20         2/9/20        2/10/20        2/11/20 
##              0              0              0              0              0 
##        2/12/20        2/13/20        2/14/20        2/15/20        2/16/20 
##              0              0              0              0              0 
##        2/17/20        2/18/20        2/19/20        2/20/20        2/21/20 
##              0              0              0              0              0 
##        2/22/20        2/23/20        2/24/20        2/25/20        2/26/20 
##              0              0              0              0              0 
##        2/27/20        2/28/20        2/29/20         3/1/20         3/2/20 
##              0              0              0              0              0 
##         3/3/20         3/4/20         3/5/20         3/6/20         3/7/20 
##              0              0              0              0              0 
##         3/8/20         3/9/20        3/10/20        3/11/20        3/12/20 
##              0              0              0              0              0 
##        3/13/20        3/14/20        3/15/20        3/16/20        3/17/20 
##              0              0              0              0              0 
##        3/18/20        3/19/20        3/20/20        3/21/20        3/22/20 
##              0              0              0              0              0 
##        3/23/20        3/24/20        3/25/20        3/26/20        3/27/20 
##              0              0              0              0              0 
##        3/28/20        3/29/20        3/30/20        3/31/20         4/1/20 
##              0              0              0              0              0 
##         4/2/20         4/3/20         4/4/20         4/5/20         4/6/20 
##              0              0              0              0              0 
##         4/7/20         4/8/20         4/9/20        4/10/20        4/11/20 
##              0              0              0              0              0 
##        4/12/20        4/13/20        4/14/20        4/15/20        4/16/20 
##              0              0              0              0              0 
##        4/17/20        4/18/20        4/19/20        4/20/20        4/21/20 
##              0              0              0              0              0 
##        4/22/20        4/23/20        4/24/20        4/25/20        4/26/20 
##              0              0              0              0              0 
##        4/27/20        4/28/20        4/29/20        4/30/20         5/1/20 
##              0              0              0              0              0 
##         5/2/20         5/3/20         5/4/20         5/5/20         5/6/20 
##              0              0              0              0              0 
##         5/7/20         5/8/20         5/9/20        5/10/20        5/11/20 
##              0              0              0              0              0 
##        5/12/20        5/13/20        5/14/20        5/15/20        5/16/20 
##              0              0              0              0              0 
##        5/17/20        5/18/20        5/19/20        5/20/20        5/21/20 
##              0              0              0              0              0 
##        5/22/20        5/23/20        5/24/20        5/25/20        5/26/20 
##              0              0              0              0              0 
##        5/27/20        5/28/20        5/29/20        5/30/20        5/31/20 
##              0              0              0              0              0 
##         6/1/20         6/2/20         6/3/20         6/4/20         6/5/20 
##              0              0              0              0              0 
##         6/6/20         6/7/20         6/8/20         6/9/20        6/10/20 
##              0              0              0              0              0 
##        6/11/20        6/12/20        6/13/20        6/14/20        6/15/20 
##              0              0              0              0              0 
##        6/16/20        6/17/20        6/18/20        6/19/20        6/20/20 
##              0              0              0              0              0 
##        6/21/20        6/22/20        6/23/20        6/24/20        6/25/20 
##              0              0              0              0              0 
##        6/26/20        6/27/20        6/28/20        6/29/20        6/30/20 
##              0              0              0              0              0 
##         7/1/20         7/2/20         7/3/20         7/4/20         7/5/20 
##              0              0              0              0              0 
##         7/6/20         7/7/20         7/8/20         7/9/20        7/10/20 
##              0              0              0              0              0 
##        7/11/20        7/12/20        7/13/20        7/14/20        7/15/20 
##              0              0              0              0              0 
##        7/16/20        7/17/20        7/18/20        7/19/20        7/20/20 
##              0              0              0              0              0 
##        7/21/20           Case 
##              0              0
covid19_ts_df[covid19_ts_df$`Country/Region` == 'US', c('7/21/20', 'Case') ]
## # A tibble: 3 x 2
##   `7/21/20` Case     
##       <dbl> <chr>    
## 1   3899211 confirmed
## 2    141995 deaths   
## 3   1182018 recovered

資料轉置

library(tidyr)
?tidyr

col_names <- colnames(covid19_ts_df)
date_cols <- col_names[5:(length(col_names)-1) ]

ts_longer <- pivot_longer(covid19_ts_df, 
             cols     = date_cols, 
             names_to  = "Date", 
             values_to = "Number")
## Note: Using an external vector in selections is ambiguous.
## ℹ Use `all_of(date_cols)` instead of `date_cols` to silence this message.
## ℹ See <https://tidyselect.r-lib.org/reference/faq-external-vector.html>.
## This message is displayed once per session.
#head(ts_longer)
ts_longer$Date <- mdy(ts_longer$Date)

us_stat <- ts_longer[ (ts_longer$`Country/Region` =='US') &  (ts_longer$`Case` =='confirmed'), c('Date', 'Number')]

plot(Number ~ Date, us_stat, type = 'o', main = 'United State', xlab = 'Date', ylab = 'Cases')

# DPLYR

library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
#?dplyr

#ts_longer[ts_longer$Case == 'confirmed',]
#filter(ts_longer, Case == 'confirmed')

#ts_longer[(ts_longer$Case == 'confirmed') & (ts_longer$Number > 1000),]

#ts_longer[(ts_longer$Case == 'confirmed') | (ts_longer$Number > 1000),]
#ts_longer[(ts_longer$Case %in% c('confirmed', 'deaths') ),]

#filter(ts_longer, (Case == 'confirmed') & (Number > 1000) )
#filter(ts_longer, (Case == 'confirmed') | (Number > 1000) )
#filter(ts_longer, (Case %in% c('confirmed', 'deaths')) & (Number > 1000) )

#ts_longer[,c('Case', 'Number')]
#select(ts_longer, Case, Number)

#select(filter(ts_longer, `Country/Region` == 'Taiwan*'), Case, Number)

ts_longer %>% filter(`Country/Region` == 'Taiwan*') %>% select(Case, Number)
## # A tibble: 546 x 2
##    Case      Number
##    <chr>      <dbl>
##  1 confirmed      1
##  2 confirmed      1
##  3 confirmed      3
##  4 confirmed      3
##  5 confirmed      4
##  6 confirmed      5
##  7 confirmed      8
##  8 confirmed      8
##  9 confirmed      9
## 10 confirmed     10
## # … with 536 more rows
ts_longer %>%
  filter(`Country/Region` == 'Taiwan*') %>%
  select(Case, Number) %>%
  head()
## # A tibble: 6 x 2
##   Case      Number
##   <chr>      <dbl>
## 1 confirmed      1
## 2 confirmed      1
## 3 confirmed      3
## 4 confirmed      3
## 5 confirmed      4
## 6 confirmed      5
stat_longer <- ts_longer %>%
  filter(Date == '2020-07-21', Case == 'confirmed') %>%
  select('Country/Region', Number) %>%
  arrange(desc(Number)) %>%
  head(10)

barplot(stat_longer$Number, names.arg = stat_longer$`Country/Region` ,col = 'blue', xlab = 'Country', ylab = 'Confirmed Case',)

stat_longer2 <- ts_longer %>%
  filter(Date == '2020-07-21', Case == 'deaths') %>%
  select('Country/Region', Number) %>%
  arrange(desc(Number)) %>%
  head(10)


barplot(stat_longer2$Number, names.arg = stat_longer2$`Country/Region` ,col = 'blue', xlab = 'Country', ylab = 'Confirmed Case',)

?pivot_wider()
ts_longer_0721 <- ts_longer %>%
  filter(Date == '2020-07-21') 

ts_wider_0721<-pivot_wider(ts_longer_0721, names_from = 'Case', values_from = 'Number')

ts_stat_all <- ts_wider_0721 %>%
  select(`Country/Region`, confirmed, deaths, recovered) %>%
  group_by(`Country/Region`) %>%
  summarise(confirmed_all = sum(confirmed, na.rm=TRUE), deaths_all = sum(deaths, na.rm=TRUE), recovered_all = sum(recovered, na.rm=TRUE)) 
## `summarise()` ungrouping output (override with `.groups` argument)
ts_stat_all$fatalities <- ts_stat_all$deaths_all / ts_stat_all$confirmed_all


ts_stat_all %>%
  arrange(desc(fatalities))%>%
  select(`Country/Region`,fatalities) %>%
  head(10)
## # A tibble: 10 x 2
##    `Country/Region` fatalities
##    <chr>                 <dbl>
##  1 Yemen                 0.280
##  2 MS Zaandam            0.222
##  3 United Kingdom        0.153
##  4 Belgium               0.153
##  5 Italy                 0.143
##  6 France                0.141
##  7 Hungary               0.137
##  8 Netherlands           0.118
##  9 Mexico                0.113
## 10 Spain                 0.107