In this note, I use the daily cases of covid-19 in Vietnam for a demo of time series analysis.

Loading packages

library(readxl)
library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.0.2
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✓ ggplot2 3.3.5     ✓ purrr   0.3.4
## ✓ tibble  3.1.2     ✓ dplyr   1.0.7
## ✓ tidyr   1.1.3     ✓ stringr 1.4.0
## ✓ readr   1.4.0     ✓ forcats 0.5.1
## Warning: package 'ggplot2' was built under R version 4.0.2
## Warning: package 'tibble' was built under R version 4.0.2
## Warning: package 'tidyr' was built under R version 4.0.2
## Warning: package 'readr' was built under R version 4.0.2
## Warning: package 'dplyr' was built under R version 4.0.2
## Warning: package 'stringr' was built under R version 4.0.2
## Warning: package 'forcats' was built under R version 4.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(ggplot2)
library(gridExtra)
## 
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
## 
##     combine
library(RColorBrewer)
## Warning: package 'RColorBrewer' was built under R version 4.0.2
library(zoo)
## Warning: package 'zoo' was built under R version 4.0.2
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric

Reading data

vn = read_excel("~/Dropbox/Bao chi/Khoa hoc & Y te/Mortality analysis VN/Daily cases and deaths in Vietnam 11-2021.xlsx")

vn$date = as.Date(vn$Date)
p1 = ggplot(data=vn, aes(x=date, y=Cases, col=date)) + geom_segment(aes(x=date, xend=date, y=0, yend=Cases)) + geom_point() + scale_x_date(date_breaks = "1 week")+ theme(axis.text.x = element_text(angle = 45, hjust = 1)) + theme(legend.position="none") + labs(x=" ", y="Số ca dương tính", title="Số ca dương tính mỗi ngày") 

p2 = ggplot(data=vn, aes(x=date, y=Deaths, col=date)) + geom_segment(aes(x=date, xend=date, y=0, yend= Deaths)) + geom_point() + scale_x_date(date_breaks = "1 week")+ theme(axis.text.x = element_text(angle = 45, hjust = 1)) + theme(legend.position="none") + labs(x="Ngày, tính từ 1/9/2021", y="Số ca tử vong", title="Số ca tử vong mỗi ngày")

grid.arrange(p1, p2, nrow=2)

Using ggplot2

p1 = ggplot(data=vn, aes(x=date, y=Cases, col=date)) + geom_point() + stat_smooth(color="purple", method="loess", se=F) + scale_x_date(date_breaks = "1 week")+ theme(axis.text.x = element_text(angle = 45, hjust = 1)) + theme(legend.position="none") + labs(x=" ", y="Số ca dương tính", title="Số ca dương tính mỗi ngày")

p2 = ggplot(data=vn, aes(x=date, y=Deaths, col=date)) + geom_point() + stat_smooth(method="loess", col="red", se=F) + scale_x_date(date_breaks = "1 week") + theme(axis.text.x = element_text(angle = 45, hjust = 1)) + theme(legend.position="none") + labs(x="Ngày, tính từ 1/9/2021", y="Số ca tử vong", title="Số ca tử vong mỗi ngày")

grid.arrange(p1, p2, nrow=2)
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'

Using moving average (TTR)

vn$ma = TTR::SMA( vn$Cases, n=5)
vn$ma.deaths = TTR::SMA( vn$Deaths, n=5)

p1 = ggplot(vn , aes(x=date)) + geom_point(aes(y=Cases, color="blue")) + geom_line(aes(y=ma, color="purple")) + scale_x_date(date_breaks = "1 week") + theme(axis.text.x = element_text(angle = 45, hjust = 1)) + theme(legend.position="none") + theme(legend.position="none") + labs(x=" ", y="Số ca dương tính", title="Số ca dương tính mỗi ngày")

p2 = ggplot(vn , aes(x=date)) + geom_point(aes(y=Deaths)) + geom_line(aes(y=ma.deaths, color="purple")) + scale_x_date(date_breaks = "1 week") + theme(axis.text.x = element_text(angle = 45, hjust = 1)) + theme(legend.position="none") + theme(legend.position="none") + labs(x=" ", y="Số ca tử vong", title="Số ca tử vong mỗi ngày")

grid.arrange(p1, p2, nrow=2)
## Warning: Removed 4 row(s) containing missing values (geom_path).

## Warning: Removed 4 row(s) containing missing values (geom_path).