#install.packages('tidyverse')

library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.0.2
## ── Attaching packages ────────────────────────────────────────────────────────── tidyverse 1.3.0 ──
## ✓ ggplot2 3.3.2     ✓ purrr   0.3.4
## ✓ tibble  3.0.1     ✓ dplyr   1.0.0
## ✓ tidyr   1.1.0     ✓ stringr 1.4.0
## ✓ readr   1.3.1     ✓ forcats 0.5.0
## ── Conflicts ───────────────────────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
help(package = 'tidyverse')

tidyverse_packages()
##  [1] "broom"      "cli"        "crayon"     "dbplyr"     "dplyr"     
##  [6] "forcats"    "ggplot2"    "haven"      "hms"        "httr"      
## [11] "jsonlite"   "lubridate"  "magrittr"   "modelr"     "pillar"    
## [16] "purrr"      "readr"      "readxl"     "reprex"     "rlang"     
## [21] "rstudioapi" "rvest"      "stringr"    "tibble"     "tidyr"     
## [26] "xml2"       "tidyverse"

Tibble

t <- tibble(a = c('A', 'B', 'C'), b = c(1,1,2))
class(t)
## [1] "tbl_df"     "tbl"        "data.frame"
tribble(~x,~y,1,'A',2,'B')
## # A tibble: 2 x 2
##       x y    
##   <dbl> <chr>
## 1     1 A    
## 2     2 B
df <- data.frame(a =c('A','B','C'), b = c(1,1,2))

tf <- as_tibble(df)


class(df)
## [1] "data.frame"
class(tf)
## [1] "tbl_df"     "tbl"        "data.frame"
df <- as.data.frame(tf)
#install.packages('tidyr')

library(tidyr)

help(package='tidyr')
url <- 'https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_global.csv'
filename <- 'time_series_covid19_confirmed_global.csv'

download.file(url, filename)

library(readr)
df <- read_csv(filename)
## Parsed with column specification:
## cols(
##   .default = col_double(),
##   `Province/State` = col_character(),
##   `Country/Region` = col_character()
## )
## See spec(...) for full column specifications.
df %>% head()
## # A tibble: 6 x 200
##   `Province/State` `Country/Region`   Lat   Long `1/22/20` `1/23/20` `1/24/20`
##   <chr>            <chr>            <dbl>  <dbl>     <dbl>     <dbl>     <dbl>
## 1 <NA>             Afghanistan       33.9  67.7          0         0         0
## 2 <NA>             Albania           41.2  20.2          0         0         0
## 3 <NA>             Algeria           28.0   1.66         0         0         0
## 4 <NA>             Andorra           42.5   1.52         0         0         0
## 5 <NA>             Angola           -11.2  17.9          0         0         0
## 6 <NA>             Antigua and Bar…  17.1 -61.8          0         0         0
## # … with 193 more variables: `1/25/20` <dbl>, `1/26/20` <dbl>, `1/27/20` <dbl>,
## #   `1/28/20` <dbl>, `1/29/20` <dbl>, `1/30/20` <dbl>, `1/31/20` <dbl>,
## #   `2/1/20` <dbl>, `2/2/20` <dbl>, `2/3/20` <dbl>, `2/4/20` <dbl>,
## #   `2/5/20` <dbl>, `2/6/20` <dbl>, `2/7/20` <dbl>, `2/8/20` <dbl>,
## #   `2/9/20` <dbl>, `2/10/20` <dbl>, `2/11/20` <dbl>, `2/12/20` <dbl>,
## #   `2/13/20` <dbl>, `2/14/20` <dbl>, `2/15/20` <dbl>, `2/16/20` <dbl>,
## #   `2/17/20` <dbl>, `2/18/20` <dbl>, `2/19/20` <dbl>, `2/20/20` <dbl>,
## #   `2/21/20` <dbl>, `2/22/20` <dbl>, `2/23/20` <dbl>, `2/24/20` <dbl>,
## #   `2/25/20` <dbl>, `2/26/20` <dbl>, `2/27/20` <dbl>, `2/28/20` <dbl>,
## #   `2/29/20` <dbl>, `3/1/20` <dbl>, `3/2/20` <dbl>, `3/3/20` <dbl>,
## #   `3/4/20` <dbl>, `3/5/20` <dbl>, `3/6/20` <dbl>, `3/7/20` <dbl>,
## #   `3/8/20` <dbl>, `3/9/20` <dbl>, `3/10/20` <dbl>, `3/11/20` <dbl>,
## #   `3/12/20` <dbl>, `3/13/20` <dbl>, `3/14/20` <dbl>, `3/15/20` <dbl>,
## #   `3/16/20` <dbl>, `3/17/20` <dbl>, `3/18/20` <dbl>, `3/19/20` <dbl>,
## #   `3/20/20` <dbl>, `3/21/20` <dbl>, `3/22/20` <dbl>, `3/23/20` <dbl>,
## #   `3/24/20` <dbl>, `3/25/20` <dbl>, `3/26/20` <dbl>, `3/27/20` <dbl>,
## #   `3/28/20` <dbl>, `3/29/20` <dbl>, `3/30/20` <dbl>, `3/31/20` <dbl>,
## #   `4/1/20` <dbl>, `4/2/20` <dbl>, `4/3/20` <dbl>, `4/4/20` <dbl>,
## #   `4/5/20` <dbl>, `4/6/20` <dbl>, `4/7/20` <dbl>, `4/8/20` <dbl>,
## #   `4/9/20` <dbl>, `4/10/20` <dbl>, `4/11/20` <dbl>, `4/12/20` <dbl>,
## #   `4/13/20` <dbl>, `4/14/20` <dbl>, `4/15/20` <dbl>, `4/16/20` <dbl>,
## #   `4/17/20` <dbl>, `4/18/20` <dbl>, `4/19/20` <dbl>, `4/20/20` <dbl>,
## #   `4/21/20` <dbl>, `4/22/20` <dbl>, `4/23/20` <dbl>, `4/24/20` <dbl>,
## #   `4/25/20` <dbl>, `4/26/20` <dbl>, `4/27/20` <dbl>, `4/28/20` <dbl>,
## #   `4/29/20` <dbl>, `4/30/20` <dbl>, `5/1/20` <dbl>, `5/2/20` <dbl>,
## #   `5/3/20` <dbl>, …
library(tidyr)
col_names <- colnames(df)
date_cols <- col_names[5:length(df)]

col_names[5:length(col_names)]
##   [1] "1/22/20" "1/23/20" "1/24/20" "1/25/20" "1/26/20" "1/27/20" "1/28/20"
##   [8] "1/29/20" "1/30/20" "1/31/20" "2/1/20"  "2/2/20"  "2/3/20"  "2/4/20" 
##  [15] "2/5/20"  "2/6/20"  "2/7/20"  "2/8/20"  "2/9/20"  "2/10/20" "2/11/20"
##  [22] "2/12/20" "2/13/20" "2/14/20" "2/15/20" "2/16/20" "2/17/20" "2/18/20"
##  [29] "2/19/20" "2/20/20" "2/21/20" "2/22/20" "2/23/20" "2/24/20" "2/25/20"
##  [36] "2/26/20" "2/27/20" "2/28/20" "2/29/20" "3/1/20"  "3/2/20"  "3/3/20" 
##  [43] "3/4/20"  "3/5/20"  "3/6/20"  "3/7/20"  "3/8/20"  "3/9/20"  "3/10/20"
##  [50] "3/11/20" "3/12/20" "3/13/20" "3/14/20" "3/15/20" "3/16/20" "3/17/20"
##  [57] "3/18/20" "3/19/20" "3/20/20" "3/21/20" "3/22/20" "3/23/20" "3/24/20"
##  [64] "3/25/20" "3/26/20" "3/27/20" "3/28/20" "3/29/20" "3/30/20" "3/31/20"
##  [71] "4/1/20"  "4/2/20"  "4/3/20"  "4/4/20"  "4/5/20"  "4/6/20"  "4/7/20" 
##  [78] "4/8/20"  "4/9/20"  "4/10/20" "4/11/20" "4/12/20" "4/13/20" "4/14/20"
##  [85] "4/15/20" "4/16/20" "4/17/20" "4/18/20" "4/19/20" "4/20/20" "4/21/20"
##  [92] "4/22/20" "4/23/20" "4/24/20" "4/25/20" "4/26/20" "4/27/20" "4/28/20"
##  [99] "4/29/20" "4/30/20" "5/1/20"  "5/2/20"  "5/3/20"  "5/4/20"  "5/5/20" 
## [106] "5/6/20"  "5/7/20"  "5/8/20"  "5/9/20"  "5/10/20" "5/11/20" "5/12/20"
## [113] "5/13/20" "5/14/20" "5/15/20" "5/16/20" "5/17/20" "5/18/20" "5/19/20"
## [120] "5/20/20" "5/21/20" "5/22/20" "5/23/20" "5/24/20" "5/25/20" "5/26/20"
## [127] "5/27/20" "5/28/20" "5/29/20" "5/30/20" "5/31/20" "6/1/20"  "6/2/20" 
## [134] "6/3/20"  "6/4/20"  "6/5/20"  "6/6/20"  "6/7/20"  "6/8/20"  "6/9/20" 
## [141] "6/10/20" "6/11/20" "6/12/20" "6/13/20" "6/14/20" "6/15/20" "6/16/20"
## [148] "6/17/20" "6/18/20" "6/19/20" "6/20/20" "6/21/20" "6/22/20" "6/23/20"
## [155] "6/24/20" "6/25/20" "6/26/20" "6/27/20" "6/28/20" "6/29/20" "6/30/20"
## [162] "7/1/20"  "7/2/20"  "7/3/20"  "7/4/20"  "7/5/20"  "7/6/20"  "7/7/20" 
## [169] "7/8/20"  "7/9/20"  "7/10/20" "7/11/20" "7/12/20" "7/13/20" "7/14/20"
## [176] "7/15/20" "7/16/20" "7/17/20" "7/18/20" "7/19/20" "7/20/20" "7/21/20"
## [183] "7/22/20" "7/23/20" "7/24/20" "7/25/20" "7/26/20" "7/27/20" "7/28/20"
## [190] "7/29/20" "7/30/20" "7/31/20" "8/1/20"  "8/2/20"  "8/3/20"  "8/4/20"
df_long <- df %>%
   gather(Date, Confirmed, date_cols)
## Note: Using an external vector in selections is ambiguous.
## ℹ Use `all_of(date_cols)` instead of `date_cols` to silence this message.
## ℹ See <https://tidyselect.r-lib.org/reference/faq-external-vector.html>.
## This message is displayed once per session.
df_long$Date <- as.Date(df_long$Date, '%m/%d/%Y') 


library(dplyr)

taiwan_stat <- df_long %>%
  filter(`Country/Region` == 'Taiwan*') %>%
  arrange(Date) %>%
  select(Date, Confirmed)

plot(taiwan_stat$Date, taiwan_stat$Confirmed, type = 'o')

練習題

url <- 'https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_deaths_global.csv'
filename <- 'time_series_covid19_deaths_global.csv'
download.file(url, filename)

library(readr)
deaths <- read_csv(filename)
## Parsed with column specification:
## cols(
##   .default = col_double(),
##   `Province/State` = col_character(),
##   `Country/Region` = col_character()
## )
## See spec(...) for full column specifications.
head(deaths)
## # A tibble: 6 x 200
##   `Province/State` `Country/Region`   Lat   Long `1/22/20` `1/23/20` `1/24/20`
##   <chr>            <chr>            <dbl>  <dbl>     <dbl>     <dbl>     <dbl>
## 1 <NA>             Afghanistan       33.9  67.7          0         0         0
## 2 <NA>             Albania           41.2  20.2          0         0         0
## 3 <NA>             Algeria           28.0   1.66         0         0         0
## 4 <NA>             Andorra           42.5   1.52         0         0         0
## 5 <NA>             Angola           -11.2  17.9          0         0         0
## 6 <NA>             Antigua and Bar…  17.1 -61.8          0         0         0
## # … with 193 more variables: `1/25/20` <dbl>, `1/26/20` <dbl>, `1/27/20` <dbl>,
## #   `1/28/20` <dbl>, `1/29/20` <dbl>, `1/30/20` <dbl>, `1/31/20` <dbl>,
## #   `2/1/20` <dbl>, `2/2/20` <dbl>, `2/3/20` <dbl>, `2/4/20` <dbl>,
## #   `2/5/20` <dbl>, `2/6/20` <dbl>, `2/7/20` <dbl>, `2/8/20` <dbl>,
## #   `2/9/20` <dbl>, `2/10/20` <dbl>, `2/11/20` <dbl>, `2/12/20` <dbl>,
## #   `2/13/20` <dbl>, `2/14/20` <dbl>, `2/15/20` <dbl>, `2/16/20` <dbl>,
## #   `2/17/20` <dbl>, `2/18/20` <dbl>, `2/19/20` <dbl>, `2/20/20` <dbl>,
## #   `2/21/20` <dbl>, `2/22/20` <dbl>, `2/23/20` <dbl>, `2/24/20` <dbl>,
## #   `2/25/20` <dbl>, `2/26/20` <dbl>, `2/27/20` <dbl>, `2/28/20` <dbl>,
## #   `2/29/20` <dbl>, `3/1/20` <dbl>, `3/2/20` <dbl>, `3/3/20` <dbl>,
## #   `3/4/20` <dbl>, `3/5/20` <dbl>, `3/6/20` <dbl>, `3/7/20` <dbl>,
## #   `3/8/20` <dbl>, `3/9/20` <dbl>, `3/10/20` <dbl>, `3/11/20` <dbl>,
## #   `3/12/20` <dbl>, `3/13/20` <dbl>, `3/14/20` <dbl>, `3/15/20` <dbl>,
## #   `3/16/20` <dbl>, `3/17/20` <dbl>, `3/18/20` <dbl>, `3/19/20` <dbl>,
## #   `3/20/20` <dbl>, `3/21/20` <dbl>, `3/22/20` <dbl>, `3/23/20` <dbl>,
## #   `3/24/20` <dbl>, `3/25/20` <dbl>, `3/26/20` <dbl>, `3/27/20` <dbl>,
## #   `3/28/20` <dbl>, `3/29/20` <dbl>, `3/30/20` <dbl>, `3/31/20` <dbl>,
## #   `4/1/20` <dbl>, `4/2/20` <dbl>, `4/3/20` <dbl>, `4/4/20` <dbl>,
## #   `4/5/20` <dbl>, `4/6/20` <dbl>, `4/7/20` <dbl>, `4/8/20` <dbl>,
## #   `4/9/20` <dbl>, `4/10/20` <dbl>, `4/11/20` <dbl>, `4/12/20` <dbl>,
## #   `4/13/20` <dbl>, `4/14/20` <dbl>, `4/15/20` <dbl>, `4/16/20` <dbl>,
## #   `4/17/20` <dbl>, `4/18/20` <dbl>, `4/19/20` <dbl>, `4/20/20` <dbl>,
## #   `4/21/20` <dbl>, `4/22/20` <dbl>, `4/23/20` <dbl>, `4/24/20` <dbl>,
## #   `4/25/20` <dbl>, `4/26/20` <dbl>, `4/27/20` <dbl>, `4/28/20` <dbl>,
## #   `4/29/20` <dbl>, `4/30/20` <dbl>, `5/1/20` <dbl>, `5/2/20` <dbl>,
## #   `5/3/20` <dbl>, …
library(tidyr)
col_names <- colnames(deaths)
date_col <- col_names[5:length(col_names)]

deaths_df <- deaths %>%
  gather(key = Date, value = Deaths, date_col) %>%
  mutate(Date = as.Date(Date, '%m/%d/%y')) 
## Note: Using an external vector in selections is ambiguous.
## ℹ Use `all_of(date_col)` instead of `date_col` to silence this message.
## ℹ See <https://tidyselect.r-lib.org/reference/faq-external-vector.html>.
## This message is displayed once per session.
#deaths_df$Date <- as.Date(deaths_df$Date,'%m/%d/%Y')

#head(deaths_df)

deaths_stat <- deaths_df %>%
  filter(`Country/Region` == 'US') %>%
  select(Date, Deaths)

plot(Deaths ~ Date, deaths_stat, type= 'o', col= 'red', main = 'US Deaths')

Spread

tf <- tibble(idx = c(1,2,3,4), types = c('A','B','A', 'B'), val = c(5,6,7,8))

tf
## # A tibble: 4 x 3
##     idx types   val
##   <dbl> <chr> <dbl>
## 1     1 A         5
## 2     2 B         6
## 3     3 A         7
## 4     4 B         8
tf %>%
  spread(key = types, value = val)
## # A tibble: 4 x 3
##     idx     A     B
##   <dbl> <dbl> <dbl>
## 1     1     5    NA
## 2     2    NA     6
## 3     3     7    NA
## 4     4    NA     8
url <- 'https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_global.csv'
filename <- 'time_series_covid19_confirmed_global.csv'
download.file(url, filename)


url <- 'https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_deaths_global.csv'
filename <- 'time_series_covid19_deaths_global.csv'
download.file(url, filename)

url <- 'https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_recovered_global.csv'
filename <- 'time_series_covid19_recovered_global.csv'
download.file(url, filename)

library(readr)
deaths<-read_csv('time_series_covid19_deaths_global.csv')
## Parsed with column specification:
## cols(
##   .default = col_double(),
##   `Province/State` = col_character(),
##   `Country/Region` = col_character()
## )
## See spec(...) for full column specifications.
recovered<-read_csv('time_series_covid19_recovered_global.csv')
## Parsed with column specification:
## cols(
##   .default = col_double(),
##   `Province/State` = col_character(),
##   `Country/Region` = col_character()
## )
## See spec(...) for full column specifications.
confirmed<-read_csv('time_series_covid19_confirmed_global.csv')
## Parsed with column specification:
## cols(
##   .default = col_double(),
##   `Province/State` = col_character(),
##   `Country/Region` = col_character()
## )
## See spec(...) for full column specifications.
dim(deaths)
## [1] 266 200
dim(recovered)
## [1] 253 200
dim(confirmed)
## [1] 266 200
deaths$case_type<- 'deaths'
recovered$case_type <- 'recovered'
confirmed$case_type <- 'confirmed'

m <- do.call(rbind, list(deaths, recovered, confirmed))

dim(m)
## [1] 785 201
head(m)
## # A tibble: 6 x 201
##   `Province/State` `Country/Region`   Lat   Long `1/22/20` `1/23/20` `1/24/20`
##   <chr>            <chr>            <dbl>  <dbl>     <dbl>     <dbl>     <dbl>
## 1 <NA>             Afghanistan       33.9  67.7          0         0         0
## 2 <NA>             Albania           41.2  20.2          0         0         0
## 3 <NA>             Algeria           28.0   1.66         0         0         0
## 4 <NA>             Andorra           42.5   1.52         0         0         0
## 5 <NA>             Angola           -11.2  17.9          0         0         0
## 6 <NA>             Antigua and Bar…  17.1 -61.8          0         0         0
## # … with 194 more variables: `1/25/20` <dbl>, `1/26/20` <dbl>, `1/27/20` <dbl>,
## #   `1/28/20` <dbl>, `1/29/20` <dbl>, `1/30/20` <dbl>, `1/31/20` <dbl>,
## #   `2/1/20` <dbl>, `2/2/20` <dbl>, `2/3/20` <dbl>, `2/4/20` <dbl>,
## #   `2/5/20` <dbl>, `2/6/20` <dbl>, `2/7/20` <dbl>, `2/8/20` <dbl>,
## #   `2/9/20` <dbl>, `2/10/20` <dbl>, `2/11/20` <dbl>, `2/12/20` <dbl>,
## #   `2/13/20` <dbl>, `2/14/20` <dbl>, `2/15/20` <dbl>, `2/16/20` <dbl>,
## #   `2/17/20` <dbl>, `2/18/20` <dbl>, `2/19/20` <dbl>, `2/20/20` <dbl>,
## #   `2/21/20` <dbl>, `2/22/20` <dbl>, `2/23/20` <dbl>, `2/24/20` <dbl>,
## #   `2/25/20` <dbl>, `2/26/20` <dbl>, `2/27/20` <dbl>, `2/28/20` <dbl>,
## #   `2/29/20` <dbl>, `3/1/20` <dbl>, `3/2/20` <dbl>, `3/3/20` <dbl>,
## #   `3/4/20` <dbl>, `3/5/20` <dbl>, `3/6/20` <dbl>, `3/7/20` <dbl>,
## #   `3/8/20` <dbl>, `3/9/20` <dbl>, `3/10/20` <dbl>, `3/11/20` <dbl>,
## #   `3/12/20` <dbl>, `3/13/20` <dbl>, `3/14/20` <dbl>, `3/15/20` <dbl>,
## #   `3/16/20` <dbl>, `3/17/20` <dbl>, `3/18/20` <dbl>, `3/19/20` <dbl>,
## #   `3/20/20` <dbl>, `3/21/20` <dbl>, `3/22/20` <dbl>, `3/23/20` <dbl>,
## #   `3/24/20` <dbl>, `3/25/20` <dbl>, `3/26/20` <dbl>, `3/27/20` <dbl>,
## #   `3/28/20` <dbl>, `3/29/20` <dbl>, `3/30/20` <dbl>, `3/31/20` <dbl>,
## #   `4/1/20` <dbl>, `4/2/20` <dbl>, `4/3/20` <dbl>, `4/4/20` <dbl>,
## #   `4/5/20` <dbl>, `4/6/20` <dbl>, `4/7/20` <dbl>, `4/8/20` <dbl>,
## #   `4/9/20` <dbl>, `4/10/20` <dbl>, `4/11/20` <dbl>, `4/12/20` <dbl>,
## #   `4/13/20` <dbl>, `4/14/20` <dbl>, `4/15/20` <dbl>, `4/16/20` <dbl>,
## #   `4/17/20` <dbl>, `4/18/20` <dbl>, `4/19/20` <dbl>, `4/20/20` <dbl>,
## #   `4/21/20` <dbl>, `4/22/20` <dbl>, `4/23/20` <dbl>, `4/24/20` <dbl>,
## #   `4/25/20` <dbl>, `4/26/20` <dbl>, `4/27/20` <dbl>, `4/28/20` <dbl>,
## #   `4/29/20` <dbl>, `4/30/20` <dbl>, `5/1/20` <dbl>, `5/2/20` <dbl>,
## #   `5/3/20` <dbl>, …
col_names  <- colnames(m)
date_cols <- col_names[5:(length(col_names)-1) ]

m_long <- m %>%
  gather(key = Date, value = Case, date_cols)

head(m_long)
## # A tibble: 6 x 7
##   `Province/State` `Country/Region`      Lat   Long case_type Date     Case
##   <chr>            <chr>               <dbl>  <dbl> <chr>     <chr>   <dbl>
## 1 <NA>             Afghanistan          33.9  67.7  deaths    1/22/20     0
## 2 <NA>             Albania              41.2  20.2  deaths    1/22/20     0
## 3 <NA>             Algeria              28.0   1.66 deaths    1/22/20     0
## 4 <NA>             Andorra              42.5   1.52 deaths    1/22/20     0
## 5 <NA>             Angola              -11.2  17.9  deaths    1/22/20     0
## 6 <NA>             Antigua and Barbuda  17.1 -61.8  deaths    1/22/20     0
table(m_long$case_type)
## 
## confirmed    deaths recovered 
##     52136     52136     49588
m_wide <- m_long %>% 
  spread(key = case_type, value = Case)

m_wide$Date <- as.Date(m_wide$Date, format='%m/%d/%y')

taiwan_stat <- m_wide %>%
  filter(`Country/Region` == 'Taiwan*' )

head(taiwan_stat)
## # A tibble: 6 x 8
##   `Province/State` `Country/Region`   Lat  Long Date       confirmed deaths
##   <chr>            <chr>            <dbl> <dbl> <date>         <dbl>  <dbl>
## 1 <NA>             Taiwan*           23.7   121 2020-01-22         1      0
## 2 <NA>             Taiwan*           23.7   121 2020-01-23         1      0
## 3 <NA>             Taiwan*           23.7   121 2020-01-24         3      0
## 4 <NA>             Taiwan*           23.7   121 2020-01-25         3      0
## 5 <NA>             Taiwan*           23.7   121 2020-01-26         4      0
## 6 <NA>             Taiwan*           23.7   121 2020-01-27         5      0
## # … with 1 more variable: recovered <dbl>
plot(confirmed  ~ Date, taiwan_stat, col ='blue', type = 'l')
lines(deaths  ~ Date, taiwan_stat, col ='red')
lines(recovered  ~ Date, taiwan_stat, col ='green')

par(mfrow = c(3, 1)) 
plot(confirmed  ~ Date, taiwan_stat, col ='blue', type = 'l')
plot(deaths  ~ Date, taiwan_stat, col ='red', type = 'l')
plot(recovered  ~ Date, taiwan_stat, col ='green', type = 'l')

us_stat <- m_wide %>%
  filter(`Country/Region` == 'US')

#head(us_stat)

plot(confirmed ~ Date, us_stat, type= 'o', col = 'blue')
lines(recovered ~ Date, us_stat, type = 'o', col = 'green')
lines(deaths ~ Date, us_stat, type = 'o', col = 'red')

china_stat <- m_wide %>%
  filter(`Country/Region` == 'China') %>%
  select(`Country/Region`, Date, confirmed) %>%
  group_by(`Country/Region`, Date) %>%
  summarize(confirmed = sum(confirmed, na.rm=TRUE))
## `summarise()` regrouping output by 'Country/Region' (override with `.groups` argument)
us_stat <- m_wide %>%
  filter(`Country/Region` == 'US')

plot(confirmed ~ Date, us_stat, col = 'blue', type = 'l')
lines(confirmed ~ Date, china_stat, col = 'red', type = 'l')


library(ggplot2)
g <- ggplot() 

m_long2 <- m_long %>%
  separate(Date, into = c('m', 'd', 'y'), sep = '/')
   
#m_long2


m_long %>%
   separate_rows(Date, sep = '/') %>%
  head()
## # A tibble: 6 x 7
##   `Province/State` `Country/Region`   Lat  Long case_type Date   Case
##   <chr>            <chr>            <dbl> <dbl> <chr>     <chr> <dbl>
## 1 <NA>             Afghanistan       33.9  67.7 deaths    1         0
## 2 <NA>             Afghanistan       33.9  67.7 deaths    22        0
## 3 <NA>             Afghanistan       33.9  67.7 deaths    20        0
## 4 <NA>             Albania           41.2  20.2 deaths    1         0
## 5 <NA>             Albania           41.2  20.2 deaths    22        0
## 6 <NA>             Albania           41.2  20.2 deaths    20        0
m_long2 %>%
  mutate(year = paste0('20', y)) %>%
  unite(year, m, col ='yearmonth', sep = '/')
## # A tibble: 153,860 x 9
##    `Province/State` `Country/Region`   Lat   Long case_type yearmonth d    
##    <chr>            <chr>            <dbl>  <dbl> <chr>     <chr>     <chr>
##  1 <NA>             Afghanistan       33.9  67.7  deaths    2020/1    22   
##  2 <NA>             Albania           41.2  20.2  deaths    2020/1    22   
##  3 <NA>             Algeria           28.0   1.66 deaths    2020/1    22   
##  4 <NA>             Andorra           42.5   1.52 deaths    2020/1    22   
##  5 <NA>             Angola           -11.2  17.9  deaths    2020/1    22   
##  6 <NA>             Antigua and Bar…  17.1 -61.8  deaths    2020/1    22   
##  7 <NA>             Argentina        -38.4 -63.6  deaths    2020/1    22   
##  8 <NA>             Armenia           40.1  45.0  deaths    2020/1    22   
##  9 Australian Capi… Australia        -35.5 149.   deaths    2020/1    22   
## 10 New South Wales  Australia        -33.9 151.   deaths    2020/1    22   
## # … with 153,850 more rows, and 2 more variables: y <chr>, Case <dbl>
m_long %>%
  mutate(FDate = as.Date(Date, '%m/%d/%y')) %>%
  separate(Date, into=c('m','d','y'), sep = '/') %>%
  filter(`Country/Region` == 'US') %>%
  select(FDate, case_type, m, Case) %>%
  group_by(case_type, m) %>%
  summarize(max_date = max(FDate), Case, FDate) %>%
  filter(max_date == FDate) %>%
  head()
## `summarise()` regrouping output by 'case_type', 'm' (override with `.groups` argument)
## # A tibble: 6 x 5
## # Groups:   case_type, m [6]
##   case_type m     max_date      Case FDate     
##   <chr>     <chr> <date>       <dbl> <date>    
## 1 confirmed 1     2020-01-31       7 2020-01-31
## 2 confirmed 2     2020-02-29      24 2020-02-29
## 3 confirmed 3     2020-03-31  188724 2020-03-31
## 4 confirmed 4     2020-04-30 1072667 2020-04-30
## 5 confirmed 5     2020-05-31 1799124 2020-05-31
## 6 confirmed 6     2020-06-30 2636414 2020-06-30

Missing Value

dim(confirmed)
## [1] 266 201
colSums(is.na(confirmed))
## Province/State Country/Region            Lat           Long        1/22/20 
##            185              0              0              0              0 
##        1/23/20        1/24/20        1/25/20        1/26/20        1/27/20 
##              0              0              0              0              0 
##        1/28/20        1/29/20        1/30/20        1/31/20         2/1/20 
##              0              0              0              0              0 
##         2/2/20         2/3/20         2/4/20         2/5/20         2/6/20 
##              0              0              0              0              0 
##         2/7/20         2/8/20         2/9/20        2/10/20        2/11/20 
##              0              0              0              0              0 
##        2/12/20        2/13/20        2/14/20        2/15/20        2/16/20 
##              0              0              0              0              0 
##        2/17/20        2/18/20        2/19/20        2/20/20        2/21/20 
##              0              0              0              0              0 
##        2/22/20        2/23/20        2/24/20        2/25/20        2/26/20 
##              0              0              0              0              0 
##        2/27/20        2/28/20        2/29/20         3/1/20         3/2/20 
##              0              0              0              0              0 
##         3/3/20         3/4/20         3/5/20         3/6/20         3/7/20 
##              0              0              0              0              0 
##         3/8/20         3/9/20        3/10/20        3/11/20        3/12/20 
##              0              0              0              0              0 
##        3/13/20        3/14/20        3/15/20        3/16/20        3/17/20 
##              0              0              0              0              0 
##        3/18/20        3/19/20        3/20/20        3/21/20        3/22/20 
##              0              0              0              0              0 
##        3/23/20        3/24/20        3/25/20        3/26/20        3/27/20 
##              0              0              0              0              0 
##        3/28/20        3/29/20        3/30/20        3/31/20         4/1/20 
##              0              0              0              0              0 
##         4/2/20         4/3/20         4/4/20         4/5/20         4/6/20 
##              0              0              0              0              0 
##         4/7/20         4/8/20         4/9/20        4/10/20        4/11/20 
##              0              0              0              0              0 
##        4/12/20        4/13/20        4/14/20        4/15/20        4/16/20 
##              0              0              0              0              0 
##        4/17/20        4/18/20        4/19/20        4/20/20        4/21/20 
##              0              0              0              0              0 
##        4/22/20        4/23/20        4/24/20        4/25/20        4/26/20 
##              0              0              0              0              0 
##        4/27/20        4/28/20        4/29/20        4/30/20         5/1/20 
##              0              0              0              0              0 
##         5/2/20         5/3/20         5/4/20         5/5/20         5/6/20 
##              0              0              0              0              0 
##         5/7/20         5/8/20         5/9/20        5/10/20        5/11/20 
##              0              0              0              0              0 
##        5/12/20        5/13/20        5/14/20        5/15/20        5/16/20 
##              0              0              0              0              0 
##        5/17/20        5/18/20        5/19/20        5/20/20        5/21/20 
##              0              0              0              0              0 
##        5/22/20        5/23/20        5/24/20        5/25/20        5/26/20 
##              0              0              0              0              0 
##        5/27/20        5/28/20        5/29/20        5/30/20        5/31/20 
##              0              0              0              0              0 
##         6/1/20         6/2/20         6/3/20         6/4/20         6/5/20 
##              0              0              0              0              0 
##         6/6/20         6/7/20         6/8/20         6/9/20        6/10/20 
##              0              0              0              0              0 
##        6/11/20        6/12/20        6/13/20        6/14/20        6/15/20 
##              0              0              0              0              0 
##        6/16/20        6/17/20        6/18/20        6/19/20        6/20/20 
##              0              0              0              0              0 
##        6/21/20        6/22/20        6/23/20        6/24/20        6/25/20 
##              0              0              0              0              0 
##        6/26/20        6/27/20        6/28/20        6/29/20        6/30/20 
##              0              0              0              0              0 
##         7/1/20         7/2/20         7/3/20         7/4/20         7/5/20 
##              0              0              0              0              0 
##         7/6/20         7/7/20         7/8/20         7/9/20        7/10/20 
##              0              0              0              0              0 
##        7/11/20        7/12/20        7/13/20        7/14/20        7/15/20 
##              0              0              0              0              0 
##        7/16/20        7/17/20        7/18/20        7/19/20        7/20/20 
##              0              0              0              0              0 
##        7/21/20        7/22/20        7/23/20        7/24/20        7/25/20 
##              0              0              0              0              0 
##        7/26/20        7/27/20        7/28/20        7/29/20        7/30/20 
##              0              0              0              0              0 
##        7/31/20         8/1/20         8/2/20         8/3/20         8/4/20 
##              0              0              0              0              0 
##      case_type 
##              0
sum(is.na(confirmed))
## [1] 185
confirmed %>%
  drop_na() %>% 
  is.na() %>%
  colSums()
## Province/State Country/Region            Lat           Long        1/22/20 
##              0              0              0              0              0 
##        1/23/20        1/24/20        1/25/20        1/26/20        1/27/20 
##              0              0              0              0              0 
##        1/28/20        1/29/20        1/30/20        1/31/20         2/1/20 
##              0              0              0              0              0 
##         2/2/20         2/3/20         2/4/20         2/5/20         2/6/20 
##              0              0              0              0              0 
##         2/7/20         2/8/20         2/9/20        2/10/20        2/11/20 
##              0              0              0              0              0 
##        2/12/20        2/13/20        2/14/20        2/15/20        2/16/20 
##              0              0              0              0              0 
##        2/17/20        2/18/20        2/19/20        2/20/20        2/21/20 
##              0              0              0              0              0 
##        2/22/20        2/23/20        2/24/20        2/25/20        2/26/20 
##              0              0              0              0              0 
##        2/27/20        2/28/20        2/29/20         3/1/20         3/2/20 
##              0              0              0              0              0 
##         3/3/20         3/4/20         3/5/20         3/6/20         3/7/20 
##              0              0              0              0              0 
##         3/8/20         3/9/20        3/10/20        3/11/20        3/12/20 
##              0              0              0              0              0 
##        3/13/20        3/14/20        3/15/20        3/16/20        3/17/20 
##              0              0              0              0              0 
##        3/18/20        3/19/20        3/20/20        3/21/20        3/22/20 
##              0              0              0              0              0 
##        3/23/20        3/24/20        3/25/20        3/26/20        3/27/20 
##              0              0              0              0              0 
##        3/28/20        3/29/20        3/30/20        3/31/20         4/1/20 
##              0              0              0              0              0 
##         4/2/20         4/3/20         4/4/20         4/5/20         4/6/20 
##              0              0              0              0              0 
##         4/7/20         4/8/20         4/9/20        4/10/20        4/11/20 
##              0              0              0              0              0 
##        4/12/20        4/13/20        4/14/20        4/15/20        4/16/20 
##              0              0              0              0              0 
##        4/17/20        4/18/20        4/19/20        4/20/20        4/21/20 
##              0              0              0              0              0 
##        4/22/20        4/23/20        4/24/20        4/25/20        4/26/20 
##              0              0              0              0              0 
##        4/27/20        4/28/20        4/29/20        4/30/20         5/1/20 
##              0              0              0              0              0 
##         5/2/20         5/3/20         5/4/20         5/5/20         5/6/20 
##              0              0              0              0              0 
##         5/7/20         5/8/20         5/9/20        5/10/20        5/11/20 
##              0              0              0              0              0 
##        5/12/20        5/13/20        5/14/20        5/15/20        5/16/20 
##              0              0              0              0              0 
##        5/17/20        5/18/20        5/19/20        5/20/20        5/21/20 
##              0              0              0              0              0 
##        5/22/20        5/23/20        5/24/20        5/25/20        5/26/20 
##              0              0              0              0              0 
##        5/27/20        5/28/20        5/29/20        5/30/20        5/31/20 
##              0              0              0              0              0 
##         6/1/20         6/2/20         6/3/20         6/4/20         6/5/20 
##              0              0              0              0              0 
##         6/6/20         6/7/20         6/8/20         6/9/20        6/10/20 
##              0              0              0              0              0 
##        6/11/20        6/12/20        6/13/20        6/14/20        6/15/20 
##              0              0              0              0              0 
##        6/16/20        6/17/20        6/18/20        6/19/20        6/20/20 
##              0              0              0              0              0 
##        6/21/20        6/22/20        6/23/20        6/24/20        6/25/20 
##              0              0              0              0              0 
##        6/26/20        6/27/20        6/28/20        6/29/20        6/30/20 
##              0              0              0              0              0 
##         7/1/20         7/2/20         7/3/20         7/4/20         7/5/20 
##              0              0              0              0              0 
##         7/6/20         7/7/20         7/8/20         7/9/20        7/10/20 
##              0              0              0              0              0 
##        7/11/20        7/12/20        7/13/20        7/14/20        7/15/20 
##              0              0              0              0              0 
##        7/16/20        7/17/20        7/18/20        7/19/20        7/20/20 
##              0              0              0              0              0 
##        7/21/20        7/22/20        7/23/20        7/24/20        7/25/20 
##              0              0              0              0              0 
##        7/26/20        7/27/20        7/28/20        7/29/20        7/30/20 
##              0              0              0              0              0 
##        7/31/20         8/1/20         8/2/20         8/3/20         8/4/20 
##              0              0              0              0              0 
##      case_type 
##              0
df <- data.frame(idx = c(1,2,3,4,5), col = c(1, NA, NA, 2, 3))
df %>% fill(col , .direction = c('up'))
##   idx col
## 1   1   1
## 2   2   2
## 3   3   2
## 4   4   2
## 5   5   3
confirmed %>%
  replace_na(list(`Province/State`= "NoPROVINCE"))
## # A tibble: 266 x 201
##    `Province/State` `Country/Region`   Lat   Long `1/22/20` `1/23/20` `1/24/20`
##    <chr>            <chr>            <dbl>  <dbl>     <dbl>     <dbl>     <dbl>
##  1 NoPROVINCE       Afghanistan       33.9  67.7          0         0         0
##  2 NoPROVINCE       Albania           41.2  20.2          0         0         0
##  3 NoPROVINCE       Algeria           28.0   1.66         0         0         0
##  4 NoPROVINCE       Andorra           42.5   1.52         0         0         0
##  5 NoPROVINCE       Angola           -11.2  17.9          0         0         0
##  6 NoPROVINCE       Antigua and Bar…  17.1 -61.8          0         0         0
##  7 NoPROVINCE       Argentina        -38.4 -63.6          0         0         0
##  8 NoPROVINCE       Armenia           40.1  45.0          0         0         0
##  9 Australian Capi… Australia        -35.5 149.           0         0         0
## 10 New South Wales  Australia        -33.9 151.           0         0         0
## # … with 256 more rows, and 194 more variables: `1/25/20` <dbl>,
## #   `1/26/20` <dbl>, `1/27/20` <dbl>, `1/28/20` <dbl>, `1/29/20` <dbl>,
## #   `1/30/20` <dbl>, `1/31/20` <dbl>, `2/1/20` <dbl>, `2/2/20` <dbl>,
## #   `2/3/20` <dbl>, `2/4/20` <dbl>, `2/5/20` <dbl>, `2/6/20` <dbl>,
## #   `2/7/20` <dbl>, `2/8/20` <dbl>, `2/9/20` <dbl>, `2/10/20` <dbl>,
## #   `2/11/20` <dbl>, `2/12/20` <dbl>, `2/13/20` <dbl>, `2/14/20` <dbl>,
## #   `2/15/20` <dbl>, `2/16/20` <dbl>, `2/17/20` <dbl>, `2/18/20` <dbl>,
## #   `2/19/20` <dbl>, `2/20/20` <dbl>, `2/21/20` <dbl>, `2/22/20` <dbl>,
## #   `2/23/20` <dbl>, `2/24/20` <dbl>, `2/25/20` <dbl>, `2/26/20` <dbl>,
## #   `2/27/20` <dbl>, `2/28/20` <dbl>, `2/29/20` <dbl>, `3/1/20` <dbl>,
## #   `3/2/20` <dbl>, `3/3/20` <dbl>, `3/4/20` <dbl>, `3/5/20` <dbl>,
## #   `3/6/20` <dbl>, `3/7/20` <dbl>, `3/8/20` <dbl>, `3/9/20` <dbl>,
## #   `3/10/20` <dbl>, `3/11/20` <dbl>, `3/12/20` <dbl>, `3/13/20` <dbl>,
## #   `3/14/20` <dbl>, `3/15/20` <dbl>, `3/16/20` <dbl>, `3/17/20` <dbl>,
## #   `3/18/20` <dbl>, `3/19/20` <dbl>, `3/20/20` <dbl>, `3/21/20` <dbl>,
## #   `3/22/20` <dbl>, `3/23/20` <dbl>, `3/24/20` <dbl>, `3/25/20` <dbl>,
## #   `3/26/20` <dbl>, `3/27/20` <dbl>, `3/28/20` <dbl>, `3/29/20` <dbl>,
## #   `3/30/20` <dbl>, `3/31/20` <dbl>, `4/1/20` <dbl>, `4/2/20` <dbl>,
## #   `4/3/20` <dbl>, `4/4/20` <dbl>, `4/5/20` <dbl>, `4/6/20` <dbl>,
## #   `4/7/20` <dbl>, `4/8/20` <dbl>, `4/9/20` <dbl>, `4/10/20` <dbl>,
## #   `4/11/20` <dbl>, `4/12/20` <dbl>, `4/13/20` <dbl>, `4/14/20` <dbl>,
## #   `4/15/20` <dbl>, `4/16/20` <dbl>, `4/17/20` <dbl>, `4/18/20` <dbl>,
## #   `4/19/20` <dbl>, `4/20/20` <dbl>, `4/21/20` <dbl>, `4/22/20` <dbl>,
## #   `4/23/20` <dbl>, `4/24/20` <dbl>, `4/25/20` <dbl>, `4/26/20` <dbl>,
## #   `4/27/20` <dbl>, `4/28/20` <dbl>, `4/29/20` <dbl>, `4/30/20` <dbl>,
## #   `5/1/20` <dbl>, `5/2/20` <dbl>, `5/3/20` <dbl>, …

展開表格

df <- data.frame(a =c('A','B','C'), b = c(1,1,2))
complete(df, a, b)
## # A tibble: 6 x 2
##   a         b
##   <chr> <dbl>
## 1 A         1
## 2 A         2
## 3 B         1
## 4 B         2
## 5 C         1
## 6 C         2
df <- data.frame(a =c('A','B','C'), b = c(1,1,2))
expand(df, a, b)
## # A tibble: 6 x 2
##   a         b
##   <chr> <dbl>
## 1 A         1
## 2 A         2
## 3 B         1
## 4 B         2
## 5 C         1
## 6 C         2
m_long %>%
  select(`Country/Region`, case_type) %>%
  complete(`Country/Region`, case_type) %>%
  head()
## # A tibble: 6 x 2
##   `Country/Region` case_type
##   <chr>            <chr>    
## 1 Afghanistan      confirmed
## 2 Afghanistan      confirmed
## 3 Afghanistan      confirmed
## 4 Afghanistan      confirmed
## 5 Afghanistan      confirmed
## 6 Afghanistan      confirmed

ggplot2

g<- us_stat %>%
  ggplot(aes(x = Date, y = deaths)) +
    geom_line() +
    geom_point() +
    ggtitle("Deaths")

library(plotly)
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
ggplotly(g)