Covid 19 Data at NHK homepage

Data were gained from NHK homepage, https://www3.nhk.or.jp/news/special/coronavirus/data-widget/#mokuji0 .

library(RCurl,quietly=TRUE)
## Warning: package 'RCurl' was built under R version 3.6.3
library(readr,quietly=TRUE)
## Warning: package 'readr' was built under R version 3.6.3
## Registered S3 methods overwritten by 'tibble':
##   method     from  
##   format.tbl pillar
##   print.tbl  pillar
url = "https://www3.nhk.or.jp/n-data/opendata/coronavirus/nhk_news_covid19_domestic_daily_data.csv"
myfile <- getURL(url=url,.encoding='UTF-8', ssl.verifypeer=FALSE)

nhk_covid19 <- read_csv(myfile,col_types = cols('日付' = col_date(format = "%Y/%m/%d")))
colnames(nhk_covid19)<-c('date','infected_daily','infected_accum','death_daily','death_accum')

Seven days rolling and Slice rows with NA

Seven days average will compensate the uneven working amount depending on weekday. Seven days average makes output values of the upper and lower 3 rows as NA.

library(RcppRoll,quietly=TRUE)
## Warning: package 'RcppRoll' was built under R version 3.6.3
nhk_covid19$infectedr7ds<-roll_mean(nhk_covid19$infected_daily,n=7,fill=NA)
nhk_covid19$deathr7ds<-roll_mean(nhk_covid19$death_daily,n=7,fill=NA)
library(dplyr,quietly=TRUE)
## Warning: package 'dplyr' was built under R version 3.6.3
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
ls <- length(nhk_covid19$date)
nhk_covid197ds<-slice(nhk_covid19,4:(ls-3))
head(nhk_covid19)
## Warning: `...` is not empty.
## 
## We detected these problematic arguments:
## * `needs_dots`
## 
## These dots only exist to allow future extensions and should be empty.
## Did you misspecify an argument?
## # A tibble: 6 x 7
##   date       infected_daily infected_accum death_daily death_accum infectedr7ds
##   <date>              <dbl>          <dbl>       <dbl>       <dbl>        <dbl>
## 1 2020-01-16              1              1           0           0       NA    
## 2 2020-01-17              0              1           0           0       NA    
## 3 2020-01-18              0              1           0           0       NA    
## 4 2020-01-19              0              1           0           0        0.143
## 5 2020-01-20              0              1           0           0        0    
## 6 2020-01-21              0              1           0           0        0.143
## # ... with 1 more variable: deathr7ds <dbl>
tail(nhk_covid19)
## Warning: `...` is not empty.
## 
## We detected these problematic arguments:
## * `needs_dots`
## 
## These dots only exist to allow future extensions and should be empty.
## Did you misspecify an argument?
## # A tibble: 6 x 7
##   date       infected_daily infected_accum death_daily death_accum infectedr7ds
##   <date>              <dbl>          <dbl>       <dbl>       <dbl>        <dbl>
## 1 2021-09-07          10603        1593287          62       16462       11345 
## 2 2021-09-08          12388        1605675          89       16551       10316.
## 3 2021-09-09          10395        1616070          88       16639        9503.
## 4 2021-09-10           8888        1624958          69       16708          NA 
## 5 2021-09-11           8806        1633764          56       16764          NA 
## 6 2021-09-12           7212        1640976          41       16805          NA 
## # ... with 1 more variable: deathr7ds <dbl>

Linear regressive estimation of the death

We estimate death depend on rolled infected and rolled death from 2020-01-19 to 2021/07/12. We surpose that death depends on the infected of 20 days before

lastnum <- which(nhk_covid197ds$date =="2021-07-12")
delta = 20
DeadT <- nhk_covid197ds$deathr7ds[delta:lastnum]
InfectedT <-nhk_covid197ds$infectedr7ds[1:(lastnum-delta+1)]
covidT <- data.frame(Infected=InfectedT,Dead=DeadT)
covid19lm <- lm(Dead~Infected ,data = covidT)
covidcof <-summary(covid19lm)$coefficients
print(covidcof)
##               Estimate   Std. Error  t value      Pr(>|t|)
## (Intercept) 3.27828695 0.5362233241  6.11366  1.913208e-09
## Infected    0.01679829 0.0002351545 71.43514 5.423595e-271
death_estimated <-nhk_covid19$infectedr7ds*covidcof[2]+covidcof[1]
death_est_date <- nhk_covid19$date+20
death_estimation = data.frame(date  = death_est_date ,estimated= death_estimated)

bind rows

library(dplyr, quietly = TRUE)

#7days
df =data.frame(date=nhk_covid19$date,value=nhk_covid19$infectedr7ds,item='infected_7dyas' )
df0 =data.frame(date=nhk_covid19$date,value=nhk_covid19$deathr7ds,item='death_7dyas' )
df = bind_rows(df,df0)
#estimated
df0 =data.frame(date=death_estimation$date,value=death_estimation$estimated,item='death_estimated' )
df = bind_rows(df,df0)

Plot

library(ggplot2, quietly = TRUE)

p <- ggplot(data=df,aes(x=date,y=value,color=item,shape=item))
p <- p + scale_y_log10()
p <- p + geom_point()
p 
## Warning: Transformation introduced infinite values in continuous y-axis
## Warning: Removed 18 rows containing missing values (geom_point).