library(ggplot2); library(readxl); library(ggrepel); library(tidyverse); library(gridExtra); library(ggthemes)
## Warning: package 'ggplot2' was built under R version 4.0.2
## Warning: package 'tidyverse' was built under R version 4.0.2
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✓ tibble  3.1.2     ✓ dplyr   1.0.7
## ✓ tidyr   1.1.3     ✓ stringr 1.4.0
## ✓ readr   1.4.0     ✓ forcats 0.5.1
## ✓ purrr   0.3.4
## Warning: package 'tibble' was built under R version 4.0.2
## Warning: package 'tidyr' was built under R version 4.0.2
## Warning: package 'readr' was built under R version 4.0.2
## Warning: package 'dplyr' was built under R version 4.0.2
## Warning: package 'stringr' was built under R version 4.0.2
## Warning: package 'forcats' was built under R version 4.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
## 
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
## 
##     combine
df = read_excel("~/Dropbox/Bao chi/Khoa hoc & Y te/Mortality analysis VN/CFR in VN 28-9-2021.xlsx")

df1 = subset(df, log(Cases)>4.9 & Deaths>0)

Số ca tử vong và số ca nhiễm

ggplot(data=df, aes(x=log(Cases), y=log(Deaths), label=Pr, col=Pr)) + geom_point() + geom_text_repel() + theme(legend.position="none") + labs(x="Số ca (log)", y="Số tử vong  (log)")

Mật độ dân số và CFR

ggplot(data=df, aes(x=log(Density), y=CFR, label=Pr, col=Pr)) + geom_point() + geom_text_repel() + theme(legend.position="none") + labs(x="Mật độ dân số (log)", y="Tỉ lệ tử vong (CFR)")

ggplot(data=df, aes(x=log(Density), y=log(Cases), label=Pr, col=Pr)) + geom_point() + geom_text_repel() + theme(legend.position="none") + labs(x="Mật độ dân số (log)", y="Số ca (log)")

Số ca tử vong và số ca nhiễm

ggplot(data=df1, aes(x=log(Cases), y=log(Deaths), label=Pr)) + geom_point() + geom_smooth(method="lm") + geom_text_repel(aes(col=Pr)) + theme(legend.position="none") + labs(x="Số ca (log)", y="Số tử vong  (log)")
## `geom_smooth()` using formula 'y ~ x'

Poisson regression

p1 = glm(Deaths ~ Prov + offset(log(Cases)), family=poisson(link="log"), data=df1)
summary(p1)
## 
## Call:
## glm(formula = Deaths ~ Prov + offset(log(Cases)), family = poisson(link = "log"), 
##     data = df1)
## 
## Deviance Residuals: 
##  [1]  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
## [26]  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
## 
## Coefficients:
##                       Estimate Std. Error z value Pr(>|z|)    
## (Intercept)           -5.18448    0.20000 -25.922  < 2e-16 ***
## ProvBà Rịa - Vũng Tàu  0.64978    0.24847   2.615  0.00892 ** 
## ProvBắc Giang         -0.93091    0.34194  -2.722  0.00648 ** 
## ProvBạc Liêu          -0.14340    0.73485  -0.195  0.84528    
## ProvBắc Ninh           0.34187    0.32660   1.047  0.29522    
## ProvBến Tre            1.51154    0.24578   6.150 7.75e-10 ***
## ProvBình Định          0.54813    0.35119   1.561  0.11857    
## ProvBình Dương         0.49765    0.20133   2.472  0.01344 *  
## ProvBình Phước        -0.07227    0.42762  -0.169  0.86578    
## ProvBình Thuận         1.19645    0.23925   5.001 5.71e-07 ***
## ProvCà Mau             0.70998    0.53852   1.318  0.18737    
## ProvCần Thơ            1.05156    0.22635   4.646 3.39e-06 ***
## ProvĐà Nẵng            1.19309    0.22361   5.336 9.52e-08 ***
## ProvĐắk Lắk           -0.27511    0.42762  -0.643  0.52000    
## ProvĐắk Nông          -1.37938    1.01980  -1.353  0.17618    
## ProvĐồng Nai           0.39185    0.20641   1.898  0.05764 .  
## ProvĐồng Tháp          1.14203    0.21667   5.271 1.36e-07 ***
## ProvGia Lai           -0.47326    0.73485  -0.644  0.51956    
## ProvHà Nội             0.71362    0.24415   2.923  0.00347 ** 
## ProvHà Tĩnh            0.65403    0.48990   1.335  0.18187    
## ProvHải Dương         -1.67933    1.01980  -1.647  0.09961 .  
## ProvHậu Giang         -0.39147    0.73485  -0.533  0.59422    
## ProvHưng Yên          -0.70993    1.01980  -0.696  0.48634    
## ProvKhánh Hòa          0.70688    0.22635   3.123  0.00179 ** 
## ProvKiên Giang         0.43107    0.24754   1.741  0.08161 .  
## ProvLạng Sơn          -0.18616    1.01980  -0.183  0.85515    
## ProvLong An            0.80917    0.20611   3.926 8.64e-05 ***
## ProvNghệ An            0.44773    0.32016   1.398  0.16197    
## ProvNinh Thuận         0.43846    0.40620   1.079  0.28041    
## ProvPhú Yên            0.68271    0.26346   2.591  0.00956 ** 
## ProvQuảng Bình        -1.47482    0.73485  -2.007  0.04475 *  
## ProvQuảng Nam          0.57308    0.40620   1.411  0.15830    
## ProvQuảng Trị         -0.15786    1.01980  -0.155  0.87698    
## ProvSóc Trăng          1.19457    0.30000   3.982 6.84e-05 ***
## ProvTây Ninh           1.11384    0.21799   5.110 3.23e-07 ***
## ProvThanh Hóa          0.37433    0.53852   0.695  0.48699    
## ProvThừa Thiên Huế     0.87185    0.36181   2.410  0.01597 *  
## ProvTiền Giang         1.48069    0.20737   7.140 9.31e-13 ***
## ProvTP. Hồ Chí Minh    1.92827    0.20017   9.633  < 2e-16 ***
## ProvTrà Vinh           0.73150    0.31436   2.327  0.01997 *  
## ProvVĩnh Long          1.25631    0.25151   4.995 5.88e-07 ***
## ProvVĩnh Phúc          0.73791    0.61101   1.208  0.22717    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for poisson family taken to be 1)
## 
##     Null deviance:  7.0311e+03  on 41  degrees of freedom
## Residual deviance: -4.4675e-13  on  0  degrees of freedom
## AIC: 286.17
## 
## Number of Fisher Scoring iterations: 3