library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.0.2
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✓ ggplot2 3.3.5 ✓ purrr 0.3.4
## ✓ tibble 3.1.2 ✓ dplyr 1.0.7
## ✓ tidyr 1.1.3 ✓ stringr 1.4.0
## ✓ readr 1.4.0 ✓ forcats 0.5.1
## Warning: package 'ggplot2' was built under R version 4.0.2
## Warning: package 'tibble' was built under R version 4.0.2
## Warning: package 'tidyr' was built under R version 4.0.2
## Warning: package 'readr' was built under R version 4.0.2
## Warning: package 'dplyr' was built under R version 4.0.2
## Warning: package 'stringr' was built under R version 4.0.2
## Warning: package 'forcats' was built under R version 4.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(readxl)
library(ggplot2)
library(gridExtra)
##
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
##
## combine
library(ggthemes)
library(ggrepel)
require(scales)
## Loading required package: scales
## Warning: package 'scales' was built under R version 4.0.2
##
## Attaching package: 'scales'
## The following object is masked from 'package:purrr':
##
## discard
## The following object is masked from 'package:readr':
##
## col_factor
require(MASS)
## Loading required package: MASS
## Warning: package 'MASS' was built under R version 4.0.2
##
## Attaching package: 'MASS'
## The following object is masked from 'package:dplyr':
##
## select
library(gridExtra)
Empirical Bayes (EB) estimate of incidence of mortality is estimated by:
adj.rate_i = (w_i * y_i) + (1 - w_i)*mu
where
k = number of provinces = 62 r_i = number of deaths in the ith province p_i = number of cases y_i = r_i / p_i (case fatality rate) mu = sum(r_i) / sum(p_i) v_i = sum(p_i*(y_i-mu)^2) / sum(p_i) - mu / sum(p_i)/k w_i = v_i / (v_i + mu/p_i)
Ref: http://www.dpi.inpe.br/gilberto/tutorials/software/geoda/tutorials/w6_rates_slides.pdf
Vaccine data: https://vnexpress.net/covid-19/vaccine (9/11/2021)
df = read_excel("~/Dropbox/Bao chi/Khoa hoc & Y te/Mortality analysis VN/CFR in VN 8-11-2021.xlsx")
df$Region = factor(df$Region, levels=c("Nam", "Trung", "Bắc"))
# EB estimate
k = length(df$Prov)
df$Rate = df$Cases/df$Pop
sumPop = sum(df$Pop)
sumCases = sum(df$Cases)
mu = sumCases / sumPop
pav = sumPop / k
df$v0 = df$Pop*(df$Rate-mu)^2
df$v = sum(df$v0)/sumPop - mu/pav
df$w = df$v/(df$v + mu/df$Pop)
df$adj.Rate = df$w*df$Rate + (1-df$w)*mu
# Number of cases
ggplot(data=df, aes(x=reorder(Prov, Cases), y=log(Cases), fill=Prov)) + geom_bar(stat="identity") + coord_flip() + geom_text(aes(label=Cases), hjust=1, size=3, color="white") + theme(legend.position="none") + labs(x=" ", y="Số ca (log)", title="Số ca nhiễm")
# Rate per 100 population
p1 = ggplot(data=df, aes(x=reorder(Prov, Rate), y=Rate*100, fill=Prov)) + geom_bar(stat="identity") + coord_flip() + theme(legend.position="none") + labs(x=" ", y="Tỉ lệ nhiễm trên 100 dân số", title="Tỉ lệ nhiễm tính theo dữ liệu thô")
p2 = ggplot(data=df, aes(x=reorder(Prov, adj.Rate), y=adj.Rate*100, fill=Prov)) + geom_bar(stat="identity") + coord_flip() + theme(legend.position="none") + labs(x=" ", y="Tỉ lệ nhiễm trên 100 dân số", title="Tỉ lệ nhiễm tính theo phương pháp Empirical Bayes")
grid.arrange(p1, p2, ncol=2)
ggplot(data=df, aes(x=Region, y=Full.vacc*100, col=Region)) + geom_jitter() + stat_summary(fun=median, fun.min= median,fun.max= median, geom="crossbar", width=0.5, color="purple") + labs(x="", y="Tỉ lệ tiêm vaccine 2 liều") + theme(legend.position="none")
# Thêm Pr
ggplot(data=df, aes(x=Region, y=Full.vacc*100, col=Region)) + geom_jitter() + geom_label_repel(aes(label = Pr)) + stat_summary(fun=median, fun.min= median,fun.max= median, geom="crossbar", width=0.5, color="purple") + labs(x="", y="Tỉ lệ tiêm vaccine 2 liều") + theme(legend.position="none")
df = read_excel("~/Dropbox/Bao chi/Khoa hoc & Y te/Mortality analysis VN/CFR in VN 8-11-2021.xlsx")
df$New.cases = df$Cases-df$Cases.2809
df$New.deaths = df$Deaths-df$Deaths.2809
df$Mort1 = df$New.deaths / df$Cases.2809*100
df$Mort2 = df$New.deaths / df$New.cases*100
# Excluding TPHCM and EB analysis
df0 = df %>% filter(!Prov %in% c("TP. Hồ Chí Minh"))
df0$cfr = df0$Deaths/df0$Cases
k = length(df0$Prov)
sumCases = sum(df0$Cases)
sumDeaths = sum(df0$Deaths)
mu = sumDeaths / sumCases
pav = sumCases / k
df0$v0 = df0$Cases*(df0$cfr-mu)^2
df0$v = sum(df0$v0)/sumCases - mu/pav
df0$w = df0$v/(df0$v + mu/df0$Cases)
df0$adj.CFR = df0$w*df0$cfr + (1-df0$w)*mu
df0$d.cfr = df0$cfr-df0$adj.CFR
df0$Exp.Deaths = round(df0$Cases*df0$adj.CFR)
df0$Deviation = df0$Deaths-df0$Exp.Deaths
df0[, c("Prov", "cfr", "adj.CFR", "d.cfr", "Deaths", "Exp.Deaths", "Deviation")]
## # A tibble: 61 x 7
## Prov cfr adj.CFR d.cfr Deaths Exp.Deaths Deviation
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 An Giang 0.0116 0.0115 0.0000870 156 155 1
## 2 Bà Rịa - Vũng Tàu 0.00958 0.00961 -0.0000280 53 53 0
## 3 Bắc Giang 0.00208 0.00285 -0.000771 13 18 -5
## 4 Bắc Kạn 0 0.00970 -0.00970 0 0 0
## 5 Bạc Liêu 0.00920 0.00927 -0.0000747 48 48 0
## 6 Bắc Ninh 0.00606 0.00688 -0.000824 15 17 -2
## 7 Bến Tre 0.0188 0.0170 0.00176 53 48 5
## 8 Bình Định 0.00964 0.00969 -0.0000528 18 18 0
## 9 Bình Dương 0.00879 0.00879 -0.00000255 2494 2495 -1
## 10 Bình Phước 0.00648 0.00725 -0.000771 15 17 -2
## # … with 51 more rows
df1 = as.data.frame(df0)
# Observed CFR
p1 = ggplot(data=df1, aes(x=reorder(Prov, CFR), y=CFR, fill=Prov)) + geom_bar(stat="identity") + coord_flip() + theme(legend.position="none") + labs(x=" ", y="Tỉ lệ tử vong (CFR, %)", title="Tỉ lệ tính theo dữ liệu thô")
# EB estimate of CFR
p2 = ggplot(data=df1, aes(x=reorder(Prov, CFR), y=adj.CFR*100, fill=Prov)) + geom_bar(stat="identity") + coord_flip() + theme(legend.position="none") + labs(x=" ", y="Tỉ lệ tử vong (CFR, %)", title="Tỉ lệ tính theo phương pháp Empirical Bayes")
grid.arrange(p1, p2, ncol=2)
north = subset(df0, Region=="Bắc")
cfr = sum(north$Deaths)/sum(north$Cases)
north$exp = north$Cases*cfr
test = (north$Deaths-north$exp)^2/north$exp
chisq= sum(test)
pchisq(chisq, df=27, lower.tail=FALSE)
## [1] 3.761014e-05
p = ggplot(data=north, aes(x=exp, y=Deaths, label=Pr, col=Pr)) + geom_point() + geom_label_repel(aes(label = Pr))
p + scale_x_continuous(breaks = seq(from = 0, to = 50, by = 10), limits=c(0, 50)) + scale_y_continuous(breaks = seq(from = 0, to = 50, by = 10), limits=c(0, 50)) + geom_abline(intercept = 0, slope = 1, size = 0.5, lty=2) + theme(legend.position="none") + labs(x="Số tử vong kì vọng", y="Số tử vong ghi nhận trong thực tế")