Loading libraries

library(tidyverse)

## Warning: package 'tidyverse' was built under R version 4.0.2

## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──

## ✓ ggplot2 3.3.5     ✓ purrr   0.3.4
## ✓ tibble  3.1.2     ✓ dplyr   1.0.7
## ✓ tidyr   1.1.3     ✓ stringr 1.4.0
## ✓ readr   1.4.0     ✓ forcats 0.5.1

## Warning: package 'ggplot2' was built under R version 4.0.2

## Warning: package 'tibble' was built under R version 4.0.2

## Warning: package 'tidyr' was built under R version 4.0.2

## Warning: package 'readr' was built under R version 4.0.2

## Warning: package 'dplyr' was built under R version 4.0.2

## Warning: package 'stringr' was built under R version 4.0.2

## Warning: package 'forcats' was built under R version 4.0.2

## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()

library(readxl)
library(ggplot2)
library(gridExtra)

## 
## Attaching package: 'gridExtra'

## The following object is masked from 'package:dplyr':
## 
##     combine

library(ggthemes)
library(ggrepel)
require(scales)

## Loading required package: scales

## Warning: package 'scales' was built under R version 4.0.2

## 
## Attaching package: 'scales'

## The following object is masked from 'package:purrr':
## 
##     discard

## The following object is masked from 'package:readr':
## 
##     col_factor

require(MASS)

## Loading required package: MASS

## Warning: package 'MASS' was built under R version 4.0.2

## 
## Attaching package: 'MASS'

## The following object is masked from 'package:dplyr':
## 
##     select

library(gridExtra)

Empirical Bayes (EB) estimate of incidence of mortality is estimated by:

adj.rate_i = (w_i * y_i) + (1 - w_i)*mu

where

k = number of provinces = 62 r_i = number of deaths in the ith province p_i = number of cases y_i = r_i / p_i (case fatality rate) mu = sum(r_i) / sum(p_i) v_i = sum(p_i*(y_i-mu)^2) / sum(p_i) - mu / sum(p_i)/k w_i = v_i / (v_i + mu/p_i)

Ref: http://www.dpi.inpe.br/gilberto/tutorials/software/geoda/tutorials/w6_rates_slides.pdf

Vaccine data: https://vnexpress.net/covid-19/vaccine (9/11/2021)

Analysis of cases in relation to population

df = read_excel("~/Dropbox/Bao chi/Khoa hoc & Y te/Mortality analysis VN/CFR in VN 8-11-2021.xlsx")

df$Region = factor(df$Region, levels=c("Nam", "Trung", "Bắc"))

# EB estimate 
k = length(df$Prov)
df$Rate = df$Cases/df$Pop
sumPop = sum(df$Pop)
sumCases = sum(df$Cases)
mu = sumCases / sumPop 
pav = sumPop / k
df$v0 = df$Pop*(df$Rate-mu)^2
df$v = sum(df$v0)/sumPop - mu/pav
df$w = df$v/(df$v + mu/df$Pop)
df$adj.Rate = df$w*df$Rate + (1-df$w)*mu

# Number of cases 
ggplot(data=df, aes(x=reorder(Prov, Cases), y=log(Cases), fill=Prov)) + geom_bar(stat="identity") + coord_flip() + geom_text(aes(label=Cases), hjust=1, size=3, color="white") + theme(legend.position="none") + labs(x=" ", y="Số ca (log)", title="Số ca nhiễm")

# Rate per 100 population 

p1 = ggplot(data=df, aes(x=reorder(Prov, Rate), y=Rate*100, fill=Prov)) + geom_bar(stat="identity") + coord_flip() + theme(legend.position="none") + labs(x=" ", y="Tỉ lệ nhiễm trên 100 dân số", title="Tỉ lệ nhiễm tính theo dữ liệu thô")

p2 = ggplot(data=df, aes(x=reorder(Prov, adj.Rate), y=adj.Rate*100, fill=Prov)) + geom_bar(stat="identity") + coord_flip() + theme(legend.position="none") + labs(x=" ", y="Tỉ lệ nhiễm trên 100 dân số", title="Tỉ lệ nhiễm tính theo phương pháp Empirical Bayes")

grid.arrange(p1, p2, ncol=2)

Vaccination rate

ggplot(data=df, aes(x=Region, y=Full.vacc*100, col=Region)) + geom_jitter() + stat_summary(fun=median, fun.min= median,fun.max= median, geom="crossbar", width=0.5, color="purple") + labs(x="", y="Tỉ lệ tiêm vaccine 2 liều") + theme(legend.position="none")

# Thêm Pr
ggplot(data=df, aes(x=Region, y=Full.vacc*100, col=Region)) + geom_jitter() + geom_label_repel(aes(label = Pr)) + stat_summary(fun=median, fun.min= median,fun.max= median, geom="crossbar", width=0.5, color="purple") + labs(x="", y="Tỉ lệ tiêm vaccine 2 liều") + theme(legend.position="none")

Analysis of mortality

df = read_excel("~/Dropbox/Bao chi/Khoa hoc & Y te/Mortality analysis VN/CFR in VN 8-11-2021.xlsx")
df$New.cases = df$Cases-df$Cases.2809
df$New.deaths = df$Deaths-df$Deaths.2809
df$Mort1 = df$New.deaths / df$Cases.2809*100 
df$Mort2 = df$New.deaths / df$New.cases*100

# Excluding TPHCM and EB analysis
df0 = df %>% filter(!Prov %in% c("TP. Hồ Chí Minh"))
df0$cfr = df0$Deaths/df0$Cases
k = length(df0$Prov)
sumCases = sum(df0$Cases)
sumDeaths = sum(df0$Deaths)
mu = sumDeaths / sumCases
pav = sumCases / k

df0$v0 = df0$Cases*(df0$cfr-mu)^2
df0$v = sum(df0$v0)/sumCases - mu/pav
df0$w = df0$v/(df0$v + mu/df0$Cases)

df0$adj.CFR = df0$w*df0$cfr + (1-df0$w)*mu
df0$d.cfr = df0$cfr-df0$adj.CFR

df0$Exp.Deaths = round(df0$Cases*df0$adj.CFR) 
df0$Deviation = df0$Deaths-df0$Exp.Deaths

df0[, c("Prov", "cfr", "adj.CFR", "d.cfr", "Deaths", "Exp.Deaths", "Deviation")]

## # A tibble: 61 x 7
##    Prov                  cfr adj.CFR       d.cfr Deaths Exp.Deaths Deviation
##    <chr>               <dbl>   <dbl>       <dbl>  <dbl>      <dbl>     <dbl>
##  1 An Giang          0.0116  0.0115   0.0000870     156        155         1
##  2 Bà Rịa - Vũng Tàu 0.00958 0.00961 -0.0000280      53         53         0
##  3 Bắc Giang         0.00208 0.00285 -0.000771       13         18        -5
##  4 Bắc Kạn           0       0.00970 -0.00970         0          0         0
##  5 Bạc Liêu          0.00920 0.00927 -0.0000747      48         48         0
##  6 Bắc Ninh          0.00606 0.00688 -0.000824       15         17        -2
##  7 Bến Tre           0.0188  0.0170   0.00176        53         48         5
##  8 Bình Định         0.00964 0.00969 -0.0000528      18         18         0
##  9 Bình Dương        0.00879 0.00879 -0.00000255   2494       2495        -1
## 10 Bình Phước        0.00648 0.00725 -0.000771       15         17        -2
## # … with 51 more rows

df1 = as.data.frame(df0)

Plotting data

# Observed CFR 
p1 = ggplot(data=df1, aes(x=reorder(Prov, CFR), y=CFR, fill=Prov)) + geom_bar(stat="identity") + coord_flip() + theme(legend.position="none") + labs(x=" ", y="Tỉ lệ tử vong (CFR, %)", title="Tỉ lệ tính theo dữ liệu thô")

# EB estimate of CFR  
p2 = ggplot(data=df1, aes(x=reorder(Prov, CFR), y=adj.CFR*100, fill=Prov)) + geom_bar(stat="identity") + coord_flip() + theme(legend.position="none") + labs(x=" ", y="Tỉ lệ tử vong (CFR, %)", title="Tỉ lệ tính theo phương pháp Empirical Bayes")

grid.arrange(p1, p2, ncol=2)

Analysis of expected and observed deaths in the North

north = subset(df0, Region=="Bắc")
cfr = sum(north$Deaths)/sum(north$Cases)
north$exp = north$Cases*cfr
test = (north$Deaths-north$exp)^2/north$exp
chisq= sum(test)
pchisq(chisq, df=27, lower.tail=FALSE)

## [1] 3.761014e-05

p = ggplot(data=north, aes(x=exp, y=Deaths, label=Pr, col=Pr)) + geom_point() + geom_label_repel(aes(label = Pr))

p + scale_x_continuous(breaks = seq(from = 0, to = 50, by = 10), limits=c(0, 50)) + scale_y_continuous(breaks = seq(from = 0, to = 50, by = 10), limits=c(0, 50)) + geom_abline(intercept = 0, slope = 1, size = 0.5, lty=2) + theme(legend.position="none") + labs(x="Số tử vong kì vọng", y="Số tử vong ghi nhận trong thực tế")

Empirical Bayes analysis of covid mortality Vietnam

T Nguyen

08/11/2021