Data were gained from NHK homepage, https://www3.nhk.or.jp/news/special/coronavirus/data-widget/#mokuji0 .
library(RCurl,quietly=TRUE)
## Warning: package 'RCurl' was built under R version 3.6.3
library(readr,quietly=TRUE)
## Warning: package 'readr' was built under R version 3.6.3
## Registered S3 methods overwritten by 'tibble':
## method from
## format.tbl pillar
## print.tbl pillar
url = "https://www3.nhk.or.jp/n-data/opendata/coronavirus/nhk_news_covid19_domestic_daily_data.csv"
myfile <- getURL(url=url,.encoding='UTF-8', ssl.verifypeer=FALSE)
nhk_covid19 <- read_csv(myfile,col_types = cols('日付' = col_date(format = "%Y/%m/%d")))
colnames(nhk_covid19)<-c('date','infected_daily','infected_accum','death_daily','death_accum')
Seven days average will compensate the uneven working amount depending on weekday. Seven days average makes output values of the upper and lower 3 rows as NA.
library(RcppRoll,quietly=TRUE)
## Warning: package 'RcppRoll' was built under R version 3.6.3
nhk_covid19$infectedr7ds<-roll_mean(nhk_covid19$infected_daily,n=7,fill=NA)
nhk_covid19$deathr7ds<-roll_mean(nhk_covid19$death_daily,n=7,fill=NA)
library(dplyr,quietly=TRUE)
## Warning: package 'dplyr' was built under R version 3.6.3
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
ls <- length(nhk_covid19$date)
nhk_covid197ds<-slice(nhk_covid19,4:(ls-3))
head(nhk_covid19)
## Warning: `...` is not empty.
##
## We detected these problematic arguments:
## * `needs_dots`
##
## These dots only exist to allow future extensions and should be empty.
## Did you misspecify an argument?
## # A tibble: 6 x 7
## date infected_daily infected_accum death_daily death_accum infectedr7ds
## <date> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 2020-01-16 1 1 0 0 NA
## 2 2020-01-17 0 1 0 0 NA
## 3 2020-01-18 0 1 0 0 NA
## 4 2020-01-19 0 1 0 0 0.143
## 5 2020-01-20 0 1 0 0 0
## 6 2020-01-21 0 1 0 0 0.143
## # ... with 1 more variable: deathr7ds <dbl>
tail(nhk_covid19)
## Warning: `...` is not empty.
##
## We detected these problematic arguments:
## * `needs_dots`
##
## These dots only exist to allow future extensions and should be empty.
## Did you misspecify an argument?
## # A tibble: 6 x 7
## date infected_daily infected_accum death_daily death_accum infectedr7ds
## <date> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 2021-09-07 10603 1593287 62 16462 11345
## 2 2021-09-08 12388 1605675 89 16551 10316.
## 3 2021-09-09 10395 1616070 88 16639 9503.
## 4 2021-09-10 8888 1624958 69 16708 NA
## 5 2021-09-11 8806 1633764 56 16764 NA
## 6 2021-09-12 7212 1640976 41 16805 NA
## # ... with 1 more variable: deathr7ds <dbl>
We estimate death depend on rolled infected and rolled death from 2020-01-19 to 2021/07/12. We surpose that death depends on the infected of 20 days before
lastnum <- which(nhk_covid197ds$date =="2021-07-12")
delta = 20
DeadT <- nhk_covid197ds$deathr7ds[delta:lastnum]
InfectedT <-nhk_covid197ds$infectedr7ds[1:(lastnum-delta+1)]
covidT <- data.frame(Infected=InfectedT,Dead=DeadT)
covid19lm <- lm(Dead~Infected ,data = covidT)
covidcof <-summary(covid19lm)$coefficients
print(covidcof)
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.27828695 0.5362233241 6.11366 1.913208e-09
## Infected 0.01679829 0.0002351545 71.43514 5.423595e-271
death_estimated <-nhk_covid19$infectedr7ds*covidcof[2]+covidcof[1]
death_est_date <- nhk_covid19$date+20
death_estimation = data.frame(date = death_est_date ,estimated= death_estimated)
library(dplyr, quietly = TRUE)
#7days
df =data.frame(date=nhk_covid19$date,value=nhk_covid19$infectedr7ds,item='infected_7dyas' )
df0 =data.frame(date=nhk_covid19$date,value=nhk_covid19$deathr7ds,item='death_7dyas' )
df = bind_rows(df,df0)
#estimated
df0 =data.frame(date=death_estimation$date,value=death_estimation$estimated,item='death_estimated' )
df = bind_rows(df,df0)
library(ggplot2, quietly = TRUE)
p <- ggplot(data=df,aes(x=date,y=value,color=item,shape=item))
p <- p + scale_y_log10()
p <- p + geom_point()
p
## Warning: Transformation introduced infinite values in continuous y-axis
## Warning: Removed 18 rows containing missing values (geom_point).