Libraries, read data, clean data
> library(readr)
> library(dplyr)
> library(tidyr)
> library(lubridate)
> library(ggplot2)
>
> data <- read_csv("data/pref/01Hokkaido/covid19_data.csv", locale = locale(encoding = "SHIFT-JIS"))
> head(data)
# A tibble: 6 x 19
グラフ非表示 年 月 日 日検査数 検査累計 日陽性数 陽性累計 日患者数
<chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 <NA> 2020 1 28 NA NA 1 1 1
2 <NA> 2020 1 29 NA NA 0 1 0
3 x 2020 1 30 NA NA 0 1 0
4 x 2020 1 31 NA NA 0 1 0
5 x 2020 2 1 NA NA 0 1 0
6 x 2020 2 2 NA NA 0 1 0
# ... with 10 more variables: 患者累計 <dbl>, 日軽症中等症数 <dbl>,
# 軽症中等症累計 <dbl>, 日重症数 <dbl>, 重症累計 <dbl>, 日死亡数 <dbl>,
# 死亡累計 <dbl>, 日治療終了数 <dbl>, 治療終了累計 <dbl>, 備考 <chr>
> #data %<>% mutate(date = paste(年,formatC(月, width = 2, flag = "0"),formatC(日, width = 2, flag = "0"), sep="-") %>% ymd())
> data %>% mutate(date = paste(年,formatC(月, width = 2, flag = "0"),formatC(日, width = 2, flag = "0"), sep="-") %>% ymd()) -> data
>
> #data %<>% select(date, confirmed = 陽性累計, recovered = 治療終了累計, deaths = 死亡累計)
> data %>% select(date, confirmed = 陽性累計, recovered = 治療終了累計, deaths = 死亡累計) -> data
> data[is.na(data)] <- 0
>
> #data %<>% mutate(current.confirmed = confirmed - deaths - recovered)
> data %>% mutate(current.confirmed = confirmed - deaths - recovered) -> data
> head(data)
# A tibble: 6 x 5
date confirmed recovered deaths current.confirmed
<date> <dbl> <dbl> <dbl> <dbl>
1 2020-01-28 1 0 0 1
2 2020-01-29 1 0 0 1
3 2020-01-30 1 0 0 1
4 2020-01-31 1 0 0 1
5 2020-02-01 1 0 0 1
6 2020-02-02 1 0 0 1
> data.long <- gather(data, key = "type", value = "count", -date)
> #data.long %<>% mutate(type=recode_factor(type, confirmed='Total Confirmed',
> # current.confirmed='Current Confirmed',
> # recovered='Recovered',
> # deaths='Deaths'))
> data.long %>% mutate(type=recode_factor(type, confirmed='Total Confirmed',
+ current.confirmed='Current Confirmed',
+ recovered='Recovered',
+ deaths='Deaths')) -> data.long
> head(data.long)
# A tibble: 6 x 3
date type count
<date> <fct> <dbl>
1 2020-01-28 Total Confirmed 1
2 2020-01-29 Total Confirmed 1
3 2020-01-30 Total Confirmed 1
4 2020-01-31 Total Confirmed 1
5 2020-02-01 Total Confirmed 1
6 2020-02-02 Total Confirmed 1
> dates <- data$date
> min.date <- min(dates)
> max.date <- max(dates)
> min.date.txt <- min.date %>% format('%d %b %Y')
> max.date.txt <- max.date %>% format('%d %b %Y')
Cases - area plot
> data.long %>% filter(type != 'Total Confirmed') %>%
+ ggplot(aes(x=date, y=count)) +
+ geom_area(aes(fill=type), alpha=0.5) +
+ labs(title=paste0('Numbers of Cases Hokkaido - ', max.date.txt)) +
+ scale_fill_manual(values=c('red', 'green', 'black')) +
+ theme(legend.title=element_blank(), legend.position='bottom',
+ plot.title = element_text(size=8),
+ axis.title.x=element_blank(),
+ axis.title.y=element_blank(),
+ legend.key.size=unit(0.2, 'cm'),
+ legend.text=element_text(size=6),
+ axis.text=element_text(size=7),
+ axis.text.x=element_text(angle=45, hjust=1))
