library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.1 ✔ tibble 3.2.1
## ✔ lubridate 1.9.3 ✔ tidyr 1.3.1
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(dplyr)
library(tidyr)
library(car)
## Loading required package: carData
##
## Attaching package: 'car'
##
## The following object is masked from 'package:dplyr':
##
## recode
##
## The following object is masked from 'package:purrr':
##
## some
library(plm)
##
## Attaching package: 'plm'
##
## The following objects are masked from 'package:dplyr':
##
## between, lag, lead
library(lmtest)
## Loading required package: zoo
##
## Attaching package: 'zoo'
##
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
library(haven)
library(rms)
## Loading required package: Hmisc
##
## Attaching package: 'Hmisc'
##
## The following objects are masked from 'package:dplyr':
##
## src, summarize
##
## The following objects are masked from 'package:base':
##
## format.pval, units
##
##
## Attaching package: 'rms'
##
## The following object is masked from 'package:lmtest':
##
## lrtest
##
## The following objects are masked from 'package:car':
##
## Predict, vif
B24 <- haven::read_dta("D:\\STUDY\\HK5\\PTDLUONG\\DATA GK\\VHLSS2022_9k_hh_v1\\VHLSS2022_9k_hh_v1\\B24_Ho_Muc6A1.dta") %>%
dplyr::select(MATINH, IDHO, M6A_MA) %>%
filter(MATINH %in% c("62", "64", "66", "67", "68"))
B26 <- haven::read_dta("D:\\STUDY\\HK5\\PTDLUONG\\DATA GK\\VHLSS2022_9k_hh_v1\\VHLSS2022_9k_hh_v1\\B26_Ho_ThanhVien.dta") %>%
dplyr::select(MATINH, IDHO, M2_C2A, M1A_C5, M2_C3, M3A_C7, M1A_C10, M2_C1, M1A_C3) %>%
filter(MATINH %in% c("62", "64", "66", "67", "68"))
B27 <- haven::read_dta("D:\\STUDY\\HK5\\PTDLUONG\\DATA GK\\VHLSS2022_9k_hh_v1\\VHLSS2022_9k_hh_v1\\B27_Ho_ThongTinHo.dta") %>%
dplyr::select(MATINH, IDHO, M3A_C1, M7_C4D, M7_C2, SONHANKHAU, M7_C13A, M7_C15, THUBQ, GIOITINHCHUHO, DANTOCCHUHO,TTNT) %>%
filter(MATINH %in% c("62", "64", "66", "67", "68"))
B24 <- B24 %>%
rename(
taisan=M6A_MA
)
B26 <- B26 %>%
rename(
trinhdogiaoduc=M2_C2A,
tuoi=M1A_C5,
hiencodihoc=M2_C3,
cothebaohiem=M3A_C7,
cosudunginternet=M1A_C10,
hochetlopmay=M2_C1,
quanhevoichuho=M1A_C3
)
B27 <- B27 %>%
rename(
tiepcanyte=M3A_C1,
loainha=M7_C4D,
tongdientichnha=M7_C2,
sonhankhau=SONHANKHAU,
nguonnuoc=M7_C13A,
loaihoxi=M7_C15,
thubq=THUBQ,
gioitinhchuho=GIOITINHCHUHO,
dantocchuho=DANTOCCHUHO,
thanhthinongthon=TTNT
)
Trình độ giáo dục thấp nhất của hộ (chỉ tính người từ 15 tuổi trở lên) ? Không nếu chưa tốt nghiệp cấp 2, Có nếu ngược lại
2022: M2_C2A ()
B26_trinhdogiaoduc <- B26 %>%
mutate(trinhdogiaoduc = case_when(
tuoi >= 15 & trinhdogiaoduc < 2 ~ 0,
tuoi >= 15 & trinhdogiaoduc >= 2 ~ 1,
TRUE ~ NA_real_
)) %>%
group_by(IDHO) %>%
summarise(
MATINH = first(MATINH),
trinhdogiaoduc = ifelse(any(trinhdogiaoduc == 0| is.na(trinhdogiaoduc)), 0, 1)
)
Trẻ em từ 5 -15 tuổi có đang đi học không? Không - nếu có ít nhất 1 trẻ em không đi học, Ngược lại là Có
2022: M2_C3 ()
B26_hiencodihoc <- B26 %>%
mutate(hiencodihoc = case_when(
tuoi >= 5 & tuoi <= 15 & (hiencodihoc == 1 | hiencodihoc == 2) ~ 1,
tuoi >= 5 & tuoi <= 15 & hiencodihoc == 3 ~ 0,
TRUE ~ NA_real_
)) %>%
group_by(IDHO) %>%
summarise(
MATINH = first(MATINH),
hiencodihoc = ifelse(any(hiencodihoc == 0| is.na(hiencodihoc)), 0, 1)
)
Có tiếp cận dịch vụ y tế không? Không - Nếu có thành viên có bệnh nặng nhưng không có tới y tế - Ngược lại là Có
2022: M3A_C1 ()
B27 <- B27 %>%
mutate(tiepcanyte = case_when(
tiepcanyte == 1 ~ 1,
tiepcanyte == 2 ~ 0,
TRUE ~ NA_real_
))
Có bảo hiểm y tế? Không - Nếu có 1 thành viên từ 6 tuổi nhưng không có bảo hiểm y tế/sức khỏe, Ngược lại là Có
2022: M3A_C7 ()
B26_cothebaohiem <- B26 %>%
mutate(cothebaohiem = case_when(
tuoi >= 6 & cothebaohiem == 1 ~ 1,
tuoi >= 6 & cothebaohiem == 2 ~ 0,
TRUE ~ NA_real_
)) %>%
group_by(IDHO) %>%
summarise(
MATINH = first(MATINH),
cothebaohiem = ifelse(any(cothebaohiem == 0| is.na(cothebaohiem)), 0, 1)
)
Chất lượng/Loại nhà ở? Không nếu nhà tạm/dột nát - Có nếu nhà từ kiên cố trở lên
2022: M7_C4D ()
B27 <- B27 %>%
mutate(loainha = case_when(
loainha < 4 ~ 1,
loainha >= 4 ~ 0,
TRUE ~ NA_real_
))
Diện tích ở trung bình trên đầu người? Không nếu diện tích nhà ở trên đầu người < 8 m2; Có nếu ngược lại
2022: M7_C2 ()
B27 <- B27 %>%
mutate(dientichtrungbinh = tongdientichnha / sonhankhau) %>%
mutate(dientichtrungbinh = case_when(
dientichtrungbinh >= 8 ~ 1,
dientichtrungbinh < 8 ~ 0,
TRUE ~ NA_real_
))
Nguồn nước sinh hoạt có đủ vệ sinh không? Có; Không (giếng không được bảo vệ, nước suối không được bảo vệ, nước mưa, nước mua từ xe chở thô sơ, nước bề mặt, các nguồn nước khác)
2022: M7_C13A ()
B27 <- B27 %>%
mutate(nguonnuoc = case_when(
nguonnuoc = 7 | nguonnuoc >= 9 ~ 0,
nguonnuoc = 8 | nguonnuoc <= 6 ~ 1,
TRUE ~ NA_real_
))
Loại nhà vệ sinh? Có (Hợp) vệ sinh (tự hoại, thấm dội nước) hoặc Không hợp vệ sinh
2022: M7_C15 ()
B27 <- B27 %>%
mutate(loaihoxi = case_when(
loaihoxi < 6 ~ 1,
loaihoxi >= 6 ~ 0,
TRUE ~ NA_real_
))
Sử dụng điện thoại/ internet?
2022: M1A_C10 ()
B26_cosudunginternet <- B26 %>%
mutate(cosudunginternet = case_when(
cosudunginternet == 1 ~ 1,
cosudunginternet == 2 ~ 0,
TRUE ~ NA_real_
)) %>%
group_by(IDHO) %>%
summarise(
MATINH = first(MATINH),
cosudunginternet = ifelse(any(cosudunginternet == 1), 1, first(na.omit(cosudunginternet))
))
Không có bất kỳ tài sản nào dưới đây: Tivi, Radio, Máy tính hoặc nghe đài phát thanh? Không nếu tất cả đồng thời là Không
B24 <- B24 %>%
mutate(taisan = case_when(
taisan %in% c(16, 17, 19, 21, 22, 23) ~ 1,
taisan %in% c(1:15, 18, 20, 24:37) ~ 0,
TRUE ~ NA_real_
)) %>%
group_by(IDHO) %>%
summarise(
MATINH = first(MATINH),
taisan = ifelse(any(taisan == 1), 1, first(na.omit(taisan))
))
Thu nhập trung bình trên đầu người
2022: THUBQ ()
B27 <- B27 %>%
mutate(thunhapdaunguoi = thubq / sonhankhau)
Số năm đi học của Chủ hộ
2022: M2_C1 ()
B26_sonamdihoc_chuho <- B26 %>%
mutate(sonamdihoc_chuho = case_when(
quanhevoichuho == 1 ~ hochetlopmay,
TRUE ~ NA_real_
)) %>%
group_by(IDHO) %>%
summarise(
MATINH = first(MATINH),
sonamdihoc_chuho = first(na.omit(sonamdihoc_chuho)))
Tuổi chủ hộ
2022: M1A_C5 ()
B26_tuoi_chuho <- B26 %>%
mutate(tuoi_chuho = case_when(
quanhevoichuho == 1 ~ tuoi,
TRUE ~ NA_real_
)) %>%
group_by(IDHO) %>%
summarise(
MATINH = first(MATINH),
tuoi_chuho = first(na.omit(tuoi_chuho)))
Giới tính chủ hộ
2022: GIOITINHCHUHO ()
B27 <- B27 %>%
mutate(gioitinhchuho = case_when(
gioitinhchuho == 1 ~ 1,
gioitinhchuho == 2 ~ 0,
TRUE ~ NA_real_
))
Dân tộc của chủ hộ
2022: DANTOCCHUHO
B27 <- B27 %>%
mutate(dantocchuho = case_when(
dantocchuho == 1 ~ 1,
dantocchuho > 1 ~ 0,
TRUE ~ NA_real_
))
Nơi ở của hộ: thành thị/ nông thôn, vùng-miền
2022: TTNT ()
B27 <- B27 %>%
mutate(ttnt = case_when(
thanhthinongthon == 1 ~ 1,
thanhthinongthon == 2 ~ 0,
TRUE ~ NA_real_
))
Bex <- readxl::read_xlsx ("D:\\STUDY\\HK5\\PTDLUONG\\DATA CK\\[PTĐL] Chỉ số vĩ mô (1).xlsx", sheet = "final")
B24$MATINH <- as.character(B24$MATINH)
B26_cosudunginternet$MATINH <- as.character(B26_cosudunginternet$MATINH)
B26_cothebaohiem$MATINH <- as.character(B26_cothebaohiem$MATINH)
B26_hiencodihoc$MATINH <- as.character(B26_hiencodihoc$MATINH)
B26_sonamdihoc_chuho$MATINH <- as.character(B26_sonamdihoc_chuho$MATINH)
B26_trinhdogiaoduc$MATINH <- as.character(B26_trinhdogiaoduc$MATINH)
B26_tuoi_chuho$MATINH <- as.character(B26_tuoi_chuho$MATINH)
B27$MATINH <- as.character(B27$MATINH)
Bex$MATINH <- as.character(Bex$MATINH)
Datafinal <- B27 %>%
left_join(Bex, by = "MATINH") %>%
left_join(B24, by = c("MATINH", "IDHO")) %>%
left_join(B26_cosudunginternet, by = c("MATINH", "IDHO")) %>%
left_join(B26_cothebaohiem, by = c("MATINH", "IDHO")) %>%
left_join(B26_hiencodihoc, by = c("MATINH", "IDHO")) %>%
left_join(B26_sonamdihoc_chuho, by = c("MATINH", "IDHO")) %>%
left_join(B26_trinhdogiaoduc, by = c("MATINH", "IDHO")) %>%
left_join(B26_tuoi_chuho, by = c("MATINH", "IDHO"))
Biến phụ thuộc: ngheodachieu
Biến y tế: tiepcanyte, cothebaohiem
Biến giáo dục: trinhdogiaoduc, hiencodihoc
Biến nhà ở: loainha, dientichtrungbinh
Biến vệ sinh: nguonnuoc, loaihoxi
Biến tiếp cận thông tin: cosudunginternet, taisan
Biến đặc điểm thu nhập hộ: thunhapdaunguoi
Biến đặc trưng nhân khẩu học: sonamdihoc_chuho, tuoi_chuho, gioitinhchuho, dantocchuho, ttnt
Biến vĩ mô: thatnghiep, thunhapbq, chatluongnguonnl, lamphat
Datafinal<- Datafinal %>%
mutate(GD=ifelse(trinhdogiaoduc==0 & hiencodihoc==0,0,
ifelse(trinhdogiaoduc==1 & hiencodihoc==0,1,
ifelse(trinhdogiaoduc==0 & hiencodihoc ==1,2,
ifelse(trinhdogiaoduc==1 & hiencodihoc ==1,3,NA)))))
Datafinal<- Datafinal %>%
mutate(YTE=ifelse(tiepcanyte==0 & cothebaohiem==0,0,
ifelse(tiepcanyte==1 & cothebaohiem==0,1,
ifelse(tiepcanyte==0 & cothebaohiem ==1,2,
ifelse(tiepcanyte==1 & cothebaohiem ==1,3,NA)))))
Datafinal<- Datafinal %>%
mutate(NHA=ifelse(loainha==0 & dientichtrungbinh==0,0,
ifelse(loainha==1 & dientichtrungbinh==0,1,
ifelse(loainha==0 & dientichtrungbinh ==1,2,
ifelse(loainha==1 & dientichtrungbinh ==1,3,NA)))))
Datafinal<- Datafinal %>%
mutate(VS=ifelse(nguonnuoc==0 & loaihoxi==0,0,
ifelse(nguonnuoc==1 & loaihoxi==0,1,
ifelse(nguonnuoc==0 & loaihoxi ==1,2,
ifelse(nguonnuoc==1 & loaihoxi ==1,3,NA)))))
Datafinal<- Datafinal %>%
mutate(TT=ifelse(cosudunginternet==0 & taisan==0,0,
ifelse(cosudunginternet==1 & taisan==0,1,
ifelse(cosudunginternet==0 & taisan ==1,2,
ifelse(cosudunginternet==1 & taisan ==1,3,NA)))))
Datafinal <- Datafinal %>%
rowwise() %>%
mutate(
sobienbang0 = sum(c_across(c(GD, YTE, NHA, VS, TT)) == 0),
ngheodc = case_when(
sobienbang0 %in% c(2, 3, 4) ~ 1,
sobienbang0 %in% c(0, 1) ~ 0,
TRUE ~ NA_real_))
Datafinal %>%
dplyr::select(ngheodachieu, GD, YTE, NHA, VS, TT, sonamdihoc_chuho, tuoi_chuho, gioitinhchuho, dantocchuho, thanhthinongthon, thunhapdaunguoi, thatnghiep, thunhapbq, chatluongnguonnl, lamphat) %>%
summary()
## ngheodachieu GD YTE NHA
## Min. : 3.61 Min. :0.0000 Min. :0.000 Min. :0.000
## 1st Qu.: 8.18 1st Qu.:0.0000 1st Qu.:1.000 1st Qu.:2.000
## Median : 9.74 Median :0.0000 Median :2.000 Median :2.000
## Mean :11.32 Mean :0.1582 Mean :1.551 Mean :2.111
## 3rd Qu.:18.10 3rd Qu.:0.0000 3rd Qu.:2.000 3rd Qu.:2.000
## Max. :19.26 Max. :1.0000 Max. :3.000 Max. :3.000
##
## VS TT sonamdihoc_chuho tuoi_chuho
## Min. :0.000 Min. :0.00 Min. : 0.00 Min. :22.00
## 1st Qu.:2.000 1st Qu.:3.00 1st Qu.: 6.00 1st Qu.:40.00
## Median :2.000 Median :3.00 Median : 9.00 Median :50.00
## Mean :1.687 Mean :2.73 Mean :13.66 Mean :50.07
## 3rd Qu.:2.000 3rd Qu.:3.00 3rd Qu.:12.00 3rd Qu.:59.00
## Max. :2.000 Max. :3.00 Max. :99.00 Max. :91.00
## NA's :26
## gioitinhchuho dantocchuho thanhthinongthon thunhapdaunguoi
## Min. :0.0000 Min. :0.0000 Min. :1.000 Min. : 30.4
## 1st Qu.:1.0000 1st Qu.:0.0000 1st Qu.:1.000 1st Qu.: 393.7
## Median :1.0000 Median :1.0000 Median :2.000 Median : 818.2
## Mean :0.7588 Mean :0.7281 Mean :1.696 Mean : 1289.0
## 3rd Qu.:1.0000 3rd Qu.:1.0000 3rd Qu.:2.000 3rd Qu.: 1464.2
## Max. :1.0000 Max. :1.0000 Max. :2.000 Max. :56904.0
##
## thatnghiep thunhapbq chatluongnguonnl lamphat
## Min. :0.520 Min. :2.567 Min. :14.70 Min. :2.98
## 1st Qu.:0.640 1st Qu.:2.927 1st Qu.:15.30 1st Qu.:2.99
## Median :0.650 Median :3.138 Median :17.60 Median :3.48
## Mean :0.664 Mean :3.286 Mean :17.66 Mean :3.47
## 3rd Qu.:0.810 3rd Qu.:3.238 3rd Qu.:18.00 3rd Qu.:3.88
## Max. :0.810 Max. :4.428 Max. :21.80 Max. :3.88
##
library(flextable)
## Warning: package 'flextable' was built under R version 4.4.2
##
## Attaching package: 'flextable'
## The following object is masked from 'package:purrr':
##
## compose
# Tỷ lệ % hộ nghèo đa chiều và cường độ thiếu hụt theo mã tỉnh
Datafinal %>%
group_by(MATINH) %>%
summarise(
ty_le_ho_ngheo = mean(ngheodachieu, na.rm = TRUE),
cuong_do_thieu_hut_tb = mean(sobienbang0, na.rm = TRUE) # Cường độ thiếu hụt trung bình
) %>%
arrange(desc(ty_le_ho_ngheo)) %>% # Sắp xếp theo tỷ lệ hộ nghèo giảm dần
flextable()
MATINH | ty_le_ho_ngheo | cuong_do_thieu_hut_tb |
|---|---|---|
62 | 19.26 | 1.3092784 |
64 | 18.10 | 1.5954198 |
67 | 9.74 | 1.3367347 |
66 | 8.18 | 1.2264151 |
68 | 3.61 | 0.9928571 |
62: Kon Tum 64: Gia Lai 67: Đắk Nông 66: Đắk Lắk 68: Lâm Đồng
logit1 <- glm(ngheodc ~ GD+YTE+NHA+VS+TT+ thunhapdaunguoi + sonamdihoc_chuho + tuoi_chuho + gioitinhchuho + dantocchuho + thanhthinongthon + thatnghiep + thunhapbq + chatluongnguonnl + lamphat,
family = binomial(link = "logit"),
data = Datafinal)
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
summary(logit1)
##
## Call:
## glm(formula = ngheodc ~ GD + YTE + NHA + VS + TT + thunhapdaunguoi +
## sonamdihoc_chuho + tuoi_chuho + gioitinhchuho + dantocchuho +
## thanhthinongthon + thatnghiep + thunhapbq + chatluongnguonnl +
## lamphat, family = binomial(link = "logit"), data = Datafinal)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 5.478e+01 2.119e+03 0.026 0.979375
## GD -4.203e+01 3.550e+03 -0.012 0.990552
## YTE -5.614e+00 6.988e-01 -8.034 9.40e-16 ***
## NHA -2.445e+00 6.630e-01 -3.688 0.000226 ***
## VS -1.661e+01 1.059e+03 -0.016 0.987489
## TT -3.274e+00 5.907e-01 -5.542 2.98e-08 ***
## thunhapdaunguoi -1.267e-05 2.039e-04 -0.062 0.950476
## sonamdihoc_chuho 9.490e-03 1.246e-02 0.762 0.446157
## tuoi_chuho 1.514e-02 2.081e-02 0.728 0.466833
## gioitinhchuho 1.287e-02 6.535e-01 0.020 0.984281
## dantocchuho 1.170e+00 8.239e-01 1.420 0.155670
## thanhthinongthon -6.816e-01 5.993e-01 -1.137 0.255402
## thatnghiep -1.943e+00 5.665e+00 -0.343 0.731607
## thunhapbq 7.317e-01 1.769e+00 0.414 0.679129
## chatluongnguonnl -3.318e-01 3.555e-01 -0.933 0.350588
## lamphat 2.137e-01 1.951e+00 0.110 0.912779
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 788.04 on 624 degrees of freedom
## Residual deviance: 104.68 on 609 degrees of freedom
## (26 observations deleted due to missingness)
## AIC: 136.68
##
## Number of Fisher Scoring iterations: 21
probit1 <- glm(ngheodc ~ GD+YTE+NHA+VS+TT+ thunhapdaunguoi + sonamdihoc_chuho + tuoi_chuho + gioitinhchuho + dantocchuho + thanhthinongthon + thatnghiep + thunhapbq + chatluongnguonnl + lamphat,
family = binomial(link = "probit"),
data = Datafinal)
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
summary(probit1)
##
## Call:
## glm(formula = ngheodc ~ GD + YTE + NHA + VS + TT + thunhapdaunguoi +
## sonamdihoc_chuho + tuoi_chuho + gioitinhchuho + dantocchuho +
## thanhthinongthon + thatnghiep + thunhapbq + chatluongnguonnl +
## lamphat, family = binomial(link = "probit"), data = Datafinal)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 2.553e+01 2.696e+03 0.009 0.992
## GD -1.608e+01 4.535e+03 -0.004 0.997
## YTE -2.946e+00 3.154e-01 -9.340 < 2e-16 ***
## NHA -1.329e+00 3.101e-01 -4.285 1.83e-05 ***
## VS -7.118e+00 1.348e+03 -0.005 0.996
## TT -1.703e+00 2.861e-01 -5.952 2.64e-09 ***
## thunhapdaunguoi 3.992e-07 1.073e-04 0.004 0.997
## sonamdihoc_chuho 7.087e-03 6.522e-03 1.087 0.277
## tuoi_chuho 9.240e-03 1.055e-02 0.875 0.381
## gioitinhchuho 4.507e-02 3.242e-01 0.139 0.889
## dantocchuho 5.766e-01 3.779e-01 1.526 0.127
## thanhthinongthon -4.211e-01 2.968e-01 -1.419 0.156
## thatnghiep -7.014e-01 2.765e+00 -0.254 0.800
## thunhapbq 4.479e-01 8.749e-01 0.512 0.609
## chatluongnguonnl -1.954e-01 1.755e-01 -1.113 0.266
## lamphat 1.216e-01 9.593e-01 0.127 0.899
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 788.04 on 624 degrees of freedom
## Residual deviance: 108.75 on 609 degrees of freedom
## (26 observations deleted due to missingness)
## AIC: 140.75
##
## Number of Fisher Scoring iterations: 25
Models <- list(logit1,probit1)
modelsummary::modelsummary(Models,
statistic = c('{statistic} '),
stars = c("*"=0.1, "**"=0.05, "***"=0.01))
| (1) | (2) | |
|---|---|---|
| * p < 0.1, ** p < 0.05, *** p < 0.01 | ||
| (Intercept) | 54.779 | 25.530 |
| 0.026 | 0.009 | |
| GD | -42.034 | -16.079 |
| -0.012 | -0.004 | |
| YTE | -5.614*** | -2.946*** |
| -8.034 | -9.340 | |
| NHA | -2.445*** | -1.329*** |
| -3.688 | -4.285 | |
| VS | -16.613 | -7.118 |
| -0.016 | -0.005 | |
| TT | -3.274*** | -1.703*** |
| -5.542 | -5.952 | |
| thunhapdaunguoi | 0.000 | 0.000 |
| -0.062 | 0.004 | |
| sonamdihoc_chuho | 0.009 | 0.007 |
| 0.762 | 1.087 | |
| tuoi_chuho | 0.015 | 0.009 |
| 0.728 | 0.875 | |
| gioitinhchuho | 0.013 | 0.045 |
| 0.020 | 0.139 | |
| dantocchuho | 1.170 | 0.577 |
| 1.420 | 1.526 | |
| thanhthinongthon | -0.682 | -0.421 |
| -1.137 | -1.419 | |
| thatnghiep | -1.943 | -0.701 |
| -0.343 | -0.254 | |
| thunhapbq | 0.732 | 0.448 |
| 0.414 | 0.512 | |
| chatluongnguonnl | -0.332 | -0.195 |
| -0.933 | -1.113 | |
| lamphat | 0.214 | 0.122 |
| 0.110 | 0.127 | |
| Num.Obs. | 625 | 625 |
| AIC | 136.7 | 140.8 |
| BIC | 207.7 | 211.8 |
| Log.Lik. | -52.340 | -54.376 |
| F | 4.820 | 6.338 |
| RMSE | 0.14 | 0.15 |
Kiểm định quan trọng (sống/chết) của mô hình.
Nếu kiểm định này không có ý nghĩa thông kê ==> tốt
Nếu có ý nghĩa thống kê thì mô hình chưa tốt (cần xem xét lại: đặc trưng dữ liệu, các biến trong mô hình, mô hình lý thuyết)
performance::performance_hosmer(logit1, n_bins=2)
## # Hosmer-Lemeshow Goodness-of-Fit Test
##
## Chi-squared: 0.065
## df: 0
## p-value: 0.000
## Summary: model does not fit well.
performance::performance_hosmer(probit1, n_bins=2)
## # Hosmer-Lemeshow Goodness-of-Fit Test
##
## Chi-squared: 0.010
## df: 0
## p-value: 0.000
## Summary: model does not fit well.
logit.res <- rms::lrm(logit, data = Datafinal, y = TRUE, x = TRUE, linear.predictors = TRUE) residuals(logit.res, type = “gof”)
cbind(Estimate=round(coef(logit1),4),
OR=round(exp(coef(logit1)),4))
## Estimate OR
## (Intercept) 54.7793 6.170716e+23
## GD -42.0341 0.000000e+00
## YTE -5.6143 3.600000e-03
## NHA -2.4452 8.670000e-02
## VS -16.6129 0.000000e+00
## TT -3.2738 3.790000e-02
## thunhapdaunguoi 0.0000 1.000000e+00
## sonamdihoc_chuho 0.0095 1.009500e+00
## tuoi_chuho 0.0151 1.015300e+00
## gioitinhchuho 0.0129 1.013000e+00
## dantocchuho 1.1698 3.221200e+00
## thanhthinongthon -0.6816 5.058000e-01
## thatnghiep -1.9431 1.433000e-01
## thunhapbq 0.7317 2.078600e+00
## chatluongnguonnl -0.3318 7.176000e-01
## lamphat 0.2137 1.238300e+00
exp(cbind(OR = coef(logit1), confint(logit1)))
## Waiting for profiling to be done...
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## OR 2.5 % 97.5 %
## (Intercept) 6.170716e+23 1.214329e-09 Inf
## GD 5.556758e-19 0.000000e+00 2.417079e+55
## YTE 3.645392e-03 7.520513e-04 1.221021e-02
## NHA 8.671086e-02 1.965258e-02 2.711067e-01
## VS 6.096726e-08 2.275743e-190 6.794539e-29
## TT 3.786124e-02 1.051894e-02 1.102660e-01
## thunhapdaunguoi 9.999873e-01 9.995036e-01 1.000364e+00
## sonamdihoc_chuho 1.009535e+00 9.843764e-01 1.034557e+00
## tuoi_chuho 1.015254e+00 9.748477e-01 1.058634e+00
## gioitinhchuho 1.012958e+00 2.766254e-01 3.671994e+00
## dantocchuho 3.221189e+00 6.975420e-01 1.865457e+01
## thanhthinongthon 5.058174e-01 1.518720e-01 1.638473e+00
## thatnghiep 1.432550e-01 1.559268e-06 9.159186e+03
## thunhapbq 2.078643e+00 6.564700e-02 7.234190e+01
## chatluongnguonnl 7.176186e-01 3.537424e-01 1.434139e+00
## lamphat 1.238292e+00 2.596061e-02 6.032981e+01
Khi so sánh các mô hình cần xem xét các chỉ số AIC, BIC, RMSE và khả năng ước tính của mô hình, đặc biệt khi xem xét lựa chọn tiếp cận logit hay probit
Theo đó, AIC, BIC, RMSE càng thấp càng tốt, khả năng dự báo đúng càng cao càng tốt
invlogit = function (x) {1/(1+exp(-x))}
invlogit(coef(logit1)[1]+
coef(logit1)[2]*mean(Datafinal$GD)+
coef(logit1)[3]*mean(Datafinal$YTE)+
coef(logit1)[4]*mean(Datafinal$NHA)+
coef(logit1)[4]*mean(Datafinal$VS)+
coef(logit1)[4]*mean(Datafinal$TT)+
coef(logit1)[4]*mean(Datafinal$thunhapdaunguoi)+
coef(logit1)[4]*mean(Datafinal$sonamdihoc_chuho)+
coef(logit1)[4]*mean(Datafinal$tuoi_chuho)+
coef(logit1)[4]*mean(Datafinal$gioitinhchuho)+
coef(logit1)[4]*mean(Datafinal$dantocchuho)+
coef(logit1)[4]*mean(Datafinal$thanhthinongthon)+
coef(logit1)[4]*mean(Datafinal$thatnghiep)+
coef(logit1)[4]*mean(Datafinal$thunhapbq)+
coef(logit1)[4]*mean(Datafinal$chatluongnguonnl)+
coef(logit1)[4]*mean(Datafinal$lamphat))
## (Intercept)
## NA
models <- list(logit1,probit1)
modelsummary::modelsummary(models,
statistic = c('{statistic} '),
stars = c("*"=0.1, "**"=0.05, "***"=0.01))
| (1) | (2) | |
|---|---|---|
| * p < 0.1, ** p < 0.05, *** p < 0.01 | ||
| (Intercept) | 54.779 | 25.530 |
| 0.026 | 0.009 | |
| GD | -42.034 | -16.079 |
| -0.012 | -0.004 | |
| YTE | -5.614*** | -2.946*** |
| -8.034 | -9.340 | |
| NHA | -2.445*** | -1.329*** |
| -3.688 | -4.285 | |
| VS | -16.613 | -7.118 |
| -0.016 | -0.005 | |
| TT | -3.274*** | -1.703*** |
| -5.542 | -5.952 | |
| thunhapdaunguoi | 0.000 | 0.000 |
| -0.062 | 0.004 | |
| sonamdihoc_chuho | 0.009 | 0.007 |
| 0.762 | 1.087 | |
| tuoi_chuho | 0.015 | 0.009 |
| 0.728 | 0.875 | |
| gioitinhchuho | 0.013 | 0.045 |
| 0.020 | 0.139 | |
| dantocchuho | 1.170 | 0.577 |
| 1.420 | 1.526 | |
| thanhthinongthon | -0.682 | -0.421 |
| -1.137 | -1.419 | |
| thatnghiep | -1.943 | -0.701 |
| -0.343 | -0.254 | |
| thunhapbq | 0.732 | 0.448 |
| 0.414 | 0.512 | |
| chatluongnguonnl | -0.332 | -0.195 |
| -0.933 | -1.113 | |
| lamphat | 0.214 | 0.122 |
| 0.110 | 0.127 | |
| Num.Obs. | 625 | 625 |
| AIC | 136.7 | 140.8 |
| BIC | 207.7 | 211.8 |
| Log.Lik. | -52.340 | -54.376 |
| F | 4.820 | 6.338 |
| RMSE | 0.14 | 0.15 |
mfx::logitmfx(ngheodc ~ GD + YTE + NHA + VS + TT + + thunhapdaunguoi + sonamdihoc_chuho + tuoi_chuho + gioitinhchuho + dantocchuho + thanhthinongthon + thatnghiep + thunhapbq + chatluongnguonnl + lamphat, data = Datafinal)
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Call:
## mfx::logitmfx(formula = ngheodc ~ GD + YTE + NHA + VS + TT +
## +thunhapdaunguoi + sonamdihoc_chuho + tuoi_chuho + gioitinhchuho +
## dantocchuho + thanhthinongthon + thatnghiep + thunhapbq +
## chatluongnguonnl + lamphat, data = Datafinal)
##
## Marginal Effects:
## dF/dx Std. Err. z P>|z|
## GD -4.6275e-01 7.5858e+01 -0.0061 0.9951
## YTE -5.4148e-03 2.4953e+00 -0.0022 0.9983
## NHA -2.3583e-03 1.0867e+00 -0.0022 0.9983
## VS -9.9999e-01 4.6332e-03 -215.8326 <2e-16 ***
## TT -3.1575e-03 1.4550e+00 -0.0022 0.9983
## thunhapdaunguoi -1.2215e-08 5.6324e-06 -0.0022 0.9983
## sonamdihoc_chuho 9.1528e-06 4.2178e-03 0.0022 0.9983
## tuoi_chuho 1.4601e-05 6.7285e-03 0.0022 0.9983
## gioitinhchuho 1.2376e-05 5.7374e-03 0.0022 0.9983
## dantocchuho 8.9744e-04 4.1365e-01 0.0022 0.9983
## thanhthinongthon -7.6065e-04 3.5039e-01 -0.0022 0.9983
## thatnghiep -1.8741e-03 8.6363e-01 -0.0022 0.9983
## thunhapbq 7.0571e-04 3.2521e-01 0.0022 0.9983
## chatluongnguonnl -3.2003e-04 1.4748e-01 -0.0022 0.9983
## lamphat 2.0614e-04 9.5012e-02 0.0022 0.9983
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## dF/dx is for discrete change for the following variables:
##
## [1] "GD" "VS" "gioitinhchuho" "dantocchuho"
## [5] "thanhthinongthon"
##1- Xác định xác suất theo từng quan sát từ mô hình
test.probs <-predict(logit1, Datafinal, type='response')
##2- Tạo biến dự báo (đặt giá trị 0)
Datafinal$pred.logit1 <- rep(0,length(test.probs))
##3- Chọn mức xác suất để xác định dự báo sang 1, mặc định là 0.5. Tùy theo lĩnh vực và hiểu biết trong lĩnh vực đó có thể điều chỉnh cho phù hợp.
Datafinal$pred.logit1[test.probs>=0.5] <- 1
##4- So sánh với giá trị thực: so sánh giữa biến ngheodc và biến dự báo pred.logit
Datafinal%>%
dplyr::select(ngheodc, pred.logit1) %>%
table() %>%
prop.table()* 100
## pred.logit1
## ngheodc 0 1
## 0 66.56 0.96
## 1 0.80 31.68
##5- Dùng lệnh test chi tiết Chuyển dữ liệu của các biến sang dạng factor (hiện ở dạng numeric 0.1)
Datafinal %>%
mutate(ngheodc=as.factor(ngheodc),pred.logit1=as.factor(pred.logit1))-> Datafinal
caret::confusionMatrix(Datafinal$ngheodc, Datafinal$pred.logit1)
## Confusion Matrix and Statistics
##
## Reference
## Prediction 0 1
## 0 416 6
## 1 5 198
##
## Accuracy : 0.9824
## 95% CI : (0.9687, 0.9912)
## No Information Rate : 0.6736
## P-Value [Acc > NIR] : <2e-16
##
## Kappa : 0.9599
##
## Mcnemar's Test P-Value : 1
##
## Sensitivity : 0.9881
## Specificity : 0.9706
## Pos Pred Value : 0.9858
## Neg Pred Value : 0.9754
## Prevalence : 0.6736
## Detection Rate : 0.6656
## Detection Prevalence : 0.6752
## Balanced Accuracy : 0.9794
##
## 'Positive' Class : 0
##