library(tidyverse)

## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.1     ✔ tibble    3.2.1
## ✔ lubridate 1.9.3     ✔ tidyr     1.3.1
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

library(dplyr)
library(tidyr)  
library(car)

## Loading required package: carData
## 
## Attaching package: 'car'
## 
## The following object is masked from 'package:dplyr':
## 
##     recode
## 
## The following object is masked from 'package:purrr':
## 
##     some

library(plm)

## 
## Attaching package: 'plm'
## 
## The following objects are masked from 'package:dplyr':
## 
##     between, lag, lead

library(lmtest)

## Loading required package: zoo
## 
## Attaching package: 'zoo'
## 
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric

library(haven)
library(rms)

## Loading required package: Hmisc
## 
## Attaching package: 'Hmisc'
## 
## The following objects are masked from 'package:dplyr':
## 
##     src, summarize
## 
## The following objects are masked from 'package:base':
## 
##     format.pval, units
## 
## 
## Attaching package: 'rms'
## 
## The following object is masked from 'package:lmtest':
## 
##     lrtest
## 
## The following objects are masked from 'package:car':
## 
##     Predict, vif

Các biến vi mô

B24 <- haven::read_dta("D:\\STUDY\\HK5\\PTDLUONG\\DATA GK\\VHLSS2022_9k_hh_v1\\VHLSS2022_9k_hh_v1\\B24_Ho_Muc6A1.dta") %>%
  dplyr::select(MATINH, IDHO, M6A_MA) %>%
  filter(MATINH %in% c("62", "64", "66", "67", "68"))

B26 <- haven::read_dta("D:\\STUDY\\HK5\\PTDLUONG\\DATA GK\\VHLSS2022_9k_hh_v1\\VHLSS2022_9k_hh_v1\\B26_Ho_ThanhVien.dta") %>%
  dplyr::select(MATINH, IDHO, M2_C2A, M1A_C5, M2_C3, M3A_C7, M1A_C10, M2_C1, M1A_C3) %>%
  filter(MATINH %in% c("62", "64", "66", "67", "68"))

B27 <- haven::read_dta("D:\\STUDY\\HK5\\PTDLUONG\\DATA GK\\VHLSS2022_9k_hh_v1\\VHLSS2022_9k_hh_v1\\B27_Ho_ThongTinHo.dta") %>%
  dplyr::select(MATINH, IDHO, M3A_C1, M7_C4D, M7_C2, SONHANKHAU, M7_C13A, M7_C15, THUBQ, GIOITINHCHUHO, DANTOCCHUHO,TTNT) %>%
  filter(MATINH %in% c("62", "64", "66", "67", "68"))

B24 <- B24 %>%
  rename(
    taisan=M6A_MA
  )

B26 <- B26 %>%
  rename(
    trinhdogiaoduc=M2_C2A,
    tuoi=M1A_C5,
    hiencodihoc=M2_C3,
    cothebaohiem=M3A_C7,
    cosudunginternet=M1A_C10,
    hochetlopmay=M2_C1,
    quanhevoichuho=M1A_C3
  )

B27 <- B27 %>%
  rename(
    tiepcanyte=M3A_C1,
    loainha=M7_C4D,
    tongdientichnha=M7_C2,
    sonhankhau=SONHANKHAU,
    nguonnuoc=M7_C13A,
    loaihoxi=M7_C15,
    thubq=THUBQ,
    gioitinhchuho=GIOITINHCHUHO,
    dantocchuho=DANTOCCHUHO,
    thanhthinongthon=TTNT
  )

Giáo dục

Trình độ giáo dục thấp nhất của hộ (chỉ tính người từ 15 tuổi trở lên) ? Không nếu chưa tốt nghiệp cấp 2, Có nếu ngược lại

2022: M2_C2A ()

B26_trinhdogiaoduc <- B26 %>%
  mutate(trinhdogiaoduc = case_when(
    tuoi >= 15 & trinhdogiaoduc < 2 ~ 0,
    tuoi >= 15 & trinhdogiaoduc >= 2 ~ 1,
    TRUE ~ NA_real_
  )) %>%
  group_by(IDHO) %>%
  summarise(
    MATINH = first(MATINH),
    trinhdogiaoduc = ifelse(any(trinhdogiaoduc == 0| is.na(trinhdogiaoduc)), 0, 1)
  )

Trẻ em từ 5 -15 tuổi có đang đi học không? Không - nếu có ít nhất 1 trẻ em không đi học, Ngược lại là Có

2022: M2_C3 ()

B26_hiencodihoc <- B26 %>%
  mutate(hiencodihoc = case_when(
    tuoi >= 5 & tuoi <= 15 & (hiencodihoc == 1 | hiencodihoc == 2) ~ 1,
    tuoi >= 5 & tuoi <= 15 & hiencodihoc == 3 ~ 0,
    TRUE ~ NA_real_
  )) %>%
  group_by(IDHO) %>%
  summarise(
    MATINH = first(MATINH),
    hiencodihoc = ifelse(any(hiencodihoc == 0| is.na(hiencodihoc)), 0, 1)
  )

Y tế

Có tiếp cận dịch vụ y tế không? Không - Nếu có thành viên có bệnh nặng nhưng không có tới y tế - Ngược lại là Có

2022: M3A_C1 ()

B27 <- B27 %>%
  mutate(tiepcanyte = case_when(
    tiepcanyte == 1 ~ 1,
    tiepcanyte == 2 ~ 0,
    TRUE ~ NA_real_
  ))

Có bảo hiểm y tế? Không - Nếu có 1 thành viên từ 6 tuổi nhưng không có bảo hiểm y tế/sức khỏe, Ngược lại là Có

2022: M3A_C7 ()

B26_cothebaohiem <- B26 %>%
  mutate(cothebaohiem = case_when(
    tuoi >= 6 & cothebaohiem == 1 ~ 1,
    tuoi >= 6 & cothebaohiem == 2 ~ 0,
    TRUE ~ NA_real_
  )) %>%
  group_by(IDHO) %>%
  summarise(
    MATINH = first(MATINH),
    cothebaohiem = ifelse(any(cothebaohiem == 0| is.na(cothebaohiem)), 0, 1)
  )

Nhà ở

Chất lượng/Loại nhà ở? Không nếu nhà tạm/dột nát - Có nếu nhà từ kiên cố trở lên

2022: M7_C4D ()

B27 <- B27 %>%
  mutate(loainha = case_when(
    loainha < 4 ~ 1,
    loainha >= 4 ~ 0,
    TRUE ~ NA_real_
  ))

Diện tích ở trung bình trên đầu người? Không nếu diện tích nhà ở trên đầu người < 8 m2; Có nếu ngược lại

2022: M7_C2 ()

B27 <- B27 %>%
  mutate(dientichtrungbinh = tongdientichnha / sonhankhau) %>%
  mutate(dientichtrungbinh = case_when(
    dientichtrungbinh >= 8 ~ 1,
    dientichtrungbinh < 8 ~ 0,
    TRUE ~ NA_real_
  ))

Nhóm biến vệ sinh và nước sạch

Nguồn nước sinh hoạt có đủ vệ sinh không? Có; Không (giếng không được bảo vệ, nước suối không được bảo vệ, nước mưa, nước mua từ xe chở thô sơ, nước bề mặt, các nguồn nước khác)

2022: M7_C13A ()

B27 <- B27 %>%
  mutate(nguonnuoc = case_when(
    nguonnuoc = 7 | nguonnuoc >= 9 ~ 0,
    nguonnuoc = 8 | nguonnuoc <= 6 ~ 1,
    TRUE ~ NA_real_ 
  ))

Loại nhà vệ sinh? Có (Hợp) vệ sinh (tự hoại, thấm dội nước) hoặc Không hợp vệ sinh

2022: M7_C15 ()

B27 <- B27 %>%
  mutate(loaihoxi = case_when(
    loaihoxi < 6 ~ 1,
    loaihoxi >= 6 ~ 0,
    TRUE ~ NA_real_ 
  ))

Nhóm biến Tiếp cận thông tin

Sử dụng điện thoại/ internet?

2022: M1A_C10 ()

B26_cosudunginternet <- B26 %>%
  mutate(cosudunginternet = case_when(
    cosudunginternet == 1 ~ 1,
    cosudunginternet == 2 ~ 0,
    TRUE ~ NA_real_
  )) %>%
  group_by(IDHO) %>%
  summarise(
    MATINH = first(MATINH),
    cosudunginternet = ifelse(any(cosudunginternet == 1), 1, first(na.omit(cosudunginternet))
    ))

Không có bất kỳ tài sản nào dưới đây: Tivi, Radio, Máy tính hoặc nghe đài phát thanh? Không nếu tất cả đồng thời là Không

B24 <- B24 %>%
  mutate(taisan = case_when(
    taisan %in% c(16, 17, 19, 21, 22, 23) ~ 1,
    taisan %in% c(1:15, 18, 20, 24:37) ~ 0,
    TRUE ~ NA_real_
  )) %>%
  group_by(IDHO) %>%
  summarise(
    MATINH = first(MATINH),
    taisan = ifelse(any(taisan == 1), 1, first(na.omit(taisan))
    ))

Nhóm biến đặc điểm thu nhập hộ

Thu nhập trung bình trên đầu người

2022: THUBQ ()

B27 <- B27 %>%
  mutate(thunhapdaunguoi = thubq / sonhankhau)

Nhóm biến đặc trưng nhân khẩu học:

Số năm đi học của Chủ hộ

2022: M2_C1 ()

B26_sonamdihoc_chuho <- B26 %>%
  mutate(sonamdihoc_chuho = case_when(
    quanhevoichuho == 1 ~ hochetlopmay,
    TRUE ~ NA_real_
  )) %>%
  group_by(IDHO) %>%
  summarise(
    MATINH = first(MATINH),
    sonamdihoc_chuho = first(na.omit(sonamdihoc_chuho)))

Tuổi chủ hộ

2022: M1A_C5 ()

B26_tuoi_chuho <- B26 %>%
  mutate(tuoi_chuho = case_when(
    quanhevoichuho == 1 ~ tuoi,
    TRUE ~ NA_real_
  )) %>%
  group_by(IDHO) %>%
  summarise(
    MATINH = first(MATINH),
    tuoi_chuho = first(na.omit(tuoi_chuho)))

Giới tính chủ hộ

2022: GIOITINHCHUHO ()

B27 <- B27 %>%
  mutate(gioitinhchuho = case_when(
    gioitinhchuho == 1 ~ 1,
    gioitinhchuho == 2 ~ 0,
    TRUE ~ NA_real_
  ))

Dân tộc của chủ hộ

2022: DANTOCCHUHO

B27 <- B27 %>%
  mutate(dantocchuho = case_when(
    dantocchuho == 1 ~ 1,
    dantocchuho > 1 ~ 0,
    TRUE ~ NA_real_
  ))

Nơi ở của hộ: thành thị/ nông thôn, vùng-miền

2022: TTNT ()

B27 <- B27 %>%
  mutate(ttnt = case_when(
    thanhthinongthon == 1 ~ 1,
    thanhthinongthon == 2 ~ 0,
    TRUE ~ NA_real_
  ))

Các biến vĩ mô

Bex <- readxl::read_xlsx ("D:\\STUDY\\HK5\\PTDLUONG\\DATA CK\\[PTĐL] Chỉ số vĩ mô (1).xlsx", sheet = "final")

Thất nghiệp

Thu nhập bình quân

Chất lượng nguồn nhân lực

Lạm phát

B24$MATINH <- as.character(B24$MATINH)

B26_cosudunginternet$MATINH <- as.character(B26_cosudunginternet$MATINH)
B26_cothebaohiem$MATINH <- as.character(B26_cothebaohiem$MATINH)
B26_hiencodihoc$MATINH <- as.character(B26_hiencodihoc$MATINH)
B26_sonamdihoc_chuho$MATINH <- as.character(B26_sonamdihoc_chuho$MATINH)
B26_trinhdogiaoduc$MATINH <- as.character(B26_trinhdogiaoduc$MATINH)
B26_tuoi_chuho$MATINH <- as.character(B26_tuoi_chuho$MATINH)

B27$MATINH <- as.character(B27$MATINH)

Bex$MATINH <- as.character(Bex$MATINH)

Ghép data

Datafinal <- B27 %>%
  left_join(Bex, by = "MATINH") %>%
  left_join(B24, by = c("MATINH", "IDHO")) %>%
  left_join(B26_cosudunginternet, by = c("MATINH", "IDHO")) %>%
  left_join(B26_cothebaohiem, by = c("MATINH", "IDHO")) %>%
  left_join(B26_hiencodihoc, by = c("MATINH", "IDHO")) %>%
  left_join(B26_sonamdihoc_chuho, by = c("MATINH", "IDHO")) %>%
  left_join(B26_trinhdogiaoduc, by = c("MATINH", "IDHO")) %>%
  left_join(B26_tuoi_chuho, by = c("MATINH", "IDHO"))

Mô hình

Biến phụ thuộc: ngheodachieu

Biến y tế: tiepcanyte, cothebaohiem

Biến giáo dục: trinhdogiaoduc, hiencodihoc

Biến nhà ở: loainha, dientichtrungbinh

Biến vệ sinh: nguonnuoc, loaihoxi

Biến tiếp cận thông tin: cosudunginternet, taisan

Biến đặc điểm thu nhập hộ: thunhapdaunguoi

Biến đặc trưng nhân khẩu học: sonamdihoc_chuho, tuoi_chuho, gioitinhchuho, dantocchuho, ttnt

Biến vĩ mô: thatnghiep, thunhapbq, chatluongnguonnl, lamphat

Xử lý biến lớn

Datafinal<- Datafinal %>%
  mutate(GD=ifelse(trinhdogiaoduc==0 & hiencodihoc==0,0,
                   ifelse(trinhdogiaoduc==1 & hiencodihoc==0,1,
                          ifelse(trinhdogiaoduc==0 & hiencodihoc ==1,2,
                                 ifelse(trinhdogiaoduc==1 & hiencodihoc ==1,3,NA)))))

Datafinal<- Datafinal %>%
  mutate(YTE=ifelse(tiepcanyte==0 & cothebaohiem==0,0,
                    ifelse(tiepcanyte==1 & cothebaohiem==0,1,
                           ifelse(tiepcanyte==0 & cothebaohiem ==1,2,
                                ifelse(tiepcanyte==1 & cothebaohiem ==1,3,NA)))))

Datafinal<- Datafinal %>%
  mutate(NHA=ifelse(loainha==0 & dientichtrungbinh==0,0,
                    ifelse(loainha==1 & dientichtrungbinh==0,1,
                           ifelse(loainha==0 & dientichtrungbinh ==1,2,
                                  ifelse(loainha==1 & dientichtrungbinh ==1,3,NA)))))

Datafinal<- Datafinal %>% 
  mutate(VS=ifelse(nguonnuoc==0 & loaihoxi==0,0,
                   ifelse(nguonnuoc==1 & loaihoxi==0,1,
                          ifelse(nguonnuoc==0 & loaihoxi ==1,2,
                                 ifelse(nguonnuoc==1 & loaihoxi ==1,3,NA)))))

Datafinal<- Datafinal %>% 
  mutate(TT=ifelse(cosudunginternet==0 & taisan==0,0,
                   ifelse(cosudunginternet==1 & taisan==0,1,
                          ifelse(cosudunginternet==0 & taisan ==1,2,
                                 ifelse(cosudunginternet==1 & taisan ==1,3,NA)))))

Datafinal <- Datafinal %>%
  rowwise() %>%
  mutate(
    sobienbang0 = sum(c_across(c(GD, YTE, NHA, VS, TT)) == 0), 
    ngheodc = case_when(
      sobienbang0 %in% c(2, 3, 4) ~ 1,
      sobienbang0 %in% c(0, 1) ~ 0,  
      TRUE ~ NA_real_))

Thống kê mô tả

Datafinal %>%
  dplyr::select(ngheodachieu, GD, YTE, NHA, VS, TT, sonamdihoc_chuho, tuoi_chuho, gioitinhchuho, dantocchuho, thanhthinongthon, thunhapdaunguoi, thatnghiep, thunhapbq, chatluongnguonnl, lamphat)  %>%
  summary()

##   ngheodachieu         GD              YTE             NHA       
##  Min.   : 3.61   Min.   :0.0000   Min.   :0.000   Min.   :0.000  
##  1st Qu.: 8.18   1st Qu.:0.0000   1st Qu.:1.000   1st Qu.:2.000  
##  Median : 9.74   Median :0.0000   Median :2.000   Median :2.000  
##  Mean   :11.32   Mean   :0.1582   Mean   :1.551   Mean   :2.111  
##  3rd Qu.:18.10   3rd Qu.:0.0000   3rd Qu.:2.000   3rd Qu.:2.000  
##  Max.   :19.26   Max.   :1.0000   Max.   :3.000   Max.   :3.000  
##                                                                  
##        VS              TT       sonamdihoc_chuho   tuoi_chuho   
##  Min.   :0.000   Min.   :0.00   Min.   : 0.00    Min.   :22.00  
##  1st Qu.:2.000   1st Qu.:3.00   1st Qu.: 6.00    1st Qu.:40.00  
##  Median :2.000   Median :3.00   Median : 9.00    Median :50.00  
##  Mean   :1.687   Mean   :2.73   Mean   :13.66    Mean   :50.07  
##  3rd Qu.:2.000   3rd Qu.:3.00   3rd Qu.:12.00    3rd Qu.:59.00  
##  Max.   :2.000   Max.   :3.00   Max.   :99.00    Max.   :91.00  
##                  NA's   :26                                     
##  gioitinhchuho     dantocchuho     thanhthinongthon thunhapdaunguoi  
##  Min.   :0.0000   Min.   :0.0000   Min.   :1.000    Min.   :   30.4  
##  1st Qu.:1.0000   1st Qu.:0.0000   1st Qu.:1.000    1st Qu.:  393.7  
##  Median :1.0000   Median :1.0000   Median :2.000    Median :  818.2  
##  Mean   :0.7588   Mean   :0.7281   Mean   :1.696    Mean   : 1289.0  
##  3rd Qu.:1.0000   3rd Qu.:1.0000   3rd Qu.:2.000    3rd Qu.: 1464.2  
##  Max.   :1.0000   Max.   :1.0000   Max.   :2.000    Max.   :56904.0  
##                                                                      
##    thatnghiep      thunhapbq     chatluongnguonnl    lamphat    
##  Min.   :0.520   Min.   :2.567   Min.   :14.70    Min.   :2.98  
##  1st Qu.:0.640   1st Qu.:2.927   1st Qu.:15.30    1st Qu.:2.99  
##  Median :0.650   Median :3.138   Median :17.60    Median :3.48  
##  Mean   :0.664   Mean   :3.286   Mean   :17.66    Mean   :3.47  
##  3rd Qu.:0.810   3rd Qu.:3.238   3rd Qu.:18.00    3rd Qu.:3.88  
##  Max.   :0.810   Max.   :4.428   Max.   :21.80    Max.   :3.88  
##

library(flextable)

## Warning: package 'flextable' was built under R version 4.4.2

## 
## Attaching package: 'flextable'

## The following object is masked from 'package:purrr':
## 
##     compose

# Tỷ lệ % hộ nghèo đa chiều và cường độ thiếu hụt theo mã tỉnh
Datafinal %>%
  group_by(MATINH) %>%
  summarise(
    ty_le_ho_ngheo = mean(ngheodachieu, na.rm = TRUE),
    cuong_do_thieu_hut_tb = mean(sobienbang0, na.rm = TRUE) # Cường độ thiếu hụt trung bình
  ) %>%
  arrange(desc(ty_le_ho_ngheo)) %>% # Sắp xếp theo tỷ lệ hộ nghèo giảm dần
  flextable()

MATINH	ty_le_ho_ngheo	cuong_do_thieu_hut_tb
62	19.26	1.3092784
64	18.10	1.5954198
67	9.74	1.3367347
66	8.18	1.2264151
68	3.61	0.9928571

62: Kon Tum 64: Gia Lai 67: Đắk Nông 66: Đắk Lắk 68: Lâm Đồng

Mô hình logit

logit1 <- glm(ngheodc ~ GD+YTE+NHA+VS+TT+ thunhapdaunguoi + sonamdihoc_chuho + tuoi_chuho + gioitinhchuho + dantocchuho + thanhthinongthon + thatnghiep + thunhapbq + chatluongnguonnl + lamphat, 
              family = binomial(link = "logit"), 
              data = Datafinal)

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

summary(logit1)

## 
## Call:
## glm(formula = ngheodc ~ GD + YTE + NHA + VS + TT + thunhapdaunguoi + 
##     sonamdihoc_chuho + tuoi_chuho + gioitinhchuho + dantocchuho + 
##     thanhthinongthon + thatnghiep + thunhapbq + chatluongnguonnl + 
##     lamphat, family = binomial(link = "logit"), data = Datafinal)
## 
## Coefficients:
##                    Estimate Std. Error z value Pr(>|z|)    
## (Intercept)       5.478e+01  2.119e+03   0.026 0.979375    
## GD               -4.203e+01  3.550e+03  -0.012 0.990552    
## YTE              -5.614e+00  6.988e-01  -8.034 9.40e-16 ***
## NHA              -2.445e+00  6.630e-01  -3.688 0.000226 ***
## VS               -1.661e+01  1.059e+03  -0.016 0.987489    
## TT               -3.274e+00  5.907e-01  -5.542 2.98e-08 ***
## thunhapdaunguoi  -1.267e-05  2.039e-04  -0.062 0.950476    
## sonamdihoc_chuho  9.490e-03  1.246e-02   0.762 0.446157    
## tuoi_chuho        1.514e-02  2.081e-02   0.728 0.466833    
## gioitinhchuho     1.287e-02  6.535e-01   0.020 0.984281    
## dantocchuho       1.170e+00  8.239e-01   1.420 0.155670    
## thanhthinongthon -6.816e-01  5.993e-01  -1.137 0.255402    
## thatnghiep       -1.943e+00  5.665e+00  -0.343 0.731607    
## thunhapbq         7.317e-01  1.769e+00   0.414 0.679129    
## chatluongnguonnl -3.318e-01  3.555e-01  -0.933 0.350588    
## lamphat           2.137e-01  1.951e+00   0.110 0.912779    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 788.04  on 624  degrees of freedom
## Residual deviance: 104.68  on 609  degrees of freedom
##   (26 observations deleted due to missingness)
## AIC: 136.68
## 
## Number of Fisher Scoring iterations: 21

Mô hình probit

probit1 <- glm(ngheodc ~ GD+YTE+NHA+VS+TT+ thunhapdaunguoi + sonamdihoc_chuho + tuoi_chuho + gioitinhchuho + dantocchuho + thanhthinongthon + thatnghiep + thunhapbq + chatluongnguonnl + lamphat, 
              family = binomial(link = "probit"), 
              data = Datafinal)

## Warning: glm.fit: algorithm did not converge

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

summary(probit1)

## 
## Call:
## glm(formula = ngheodc ~ GD + YTE + NHA + VS + TT + thunhapdaunguoi + 
##     sonamdihoc_chuho + tuoi_chuho + gioitinhchuho + dantocchuho + 
##     thanhthinongthon + thatnghiep + thunhapbq + chatluongnguonnl + 
##     lamphat, family = binomial(link = "probit"), data = Datafinal)
## 
## Coefficients:
##                    Estimate Std. Error z value Pr(>|z|)    
## (Intercept)       2.553e+01  2.696e+03   0.009    0.992    
## GD               -1.608e+01  4.535e+03  -0.004    0.997    
## YTE              -2.946e+00  3.154e-01  -9.340  < 2e-16 ***
## NHA              -1.329e+00  3.101e-01  -4.285 1.83e-05 ***
## VS               -7.118e+00  1.348e+03  -0.005    0.996    
## TT               -1.703e+00  2.861e-01  -5.952 2.64e-09 ***
## thunhapdaunguoi   3.992e-07  1.073e-04   0.004    0.997    
## sonamdihoc_chuho  7.087e-03  6.522e-03   1.087    0.277    
## tuoi_chuho        9.240e-03  1.055e-02   0.875    0.381    
## gioitinhchuho     4.507e-02  3.242e-01   0.139    0.889    
## dantocchuho       5.766e-01  3.779e-01   1.526    0.127    
## thanhthinongthon -4.211e-01  2.968e-01  -1.419    0.156    
## thatnghiep       -7.014e-01  2.765e+00  -0.254    0.800    
## thunhapbq         4.479e-01  8.749e-01   0.512    0.609    
## chatluongnguonnl -1.954e-01  1.755e-01  -1.113    0.266    
## lamphat           1.216e-01  9.593e-01   0.127    0.899    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 788.04  on 624  degrees of freedom
## Residual deviance: 108.75  on 609  degrees of freedom
##   (26 observations deleted due to missingness)
## AIC: 140.75
## 
## Number of Fisher Scoring iterations: 25

So sánh:

Models <- list(logit1,probit1)
modelsummary::modelsummary(Models,
                           statistic = c('{statistic} '),
                           stars = c("*"=0.1, "**"=0.05, "***"=0.01))

	(1)	(2)
* p < 0.1, p < 0.05, * p < 0.01
(Intercept)	54.779	25.530
	0.026	0.009
GD	-42.034	-16.079
	-0.012	-0.004
YTE	-5.614***	-2.946***
	-8.034	-9.340
NHA	-2.445***	-1.329***
	-3.688	-4.285
VS	-16.613	-7.118
	-0.016	-0.005
TT	-3.274***	-1.703***
	-5.542	-5.952
thunhapdaunguoi	0.000	0.000
	-0.062	0.004
sonamdihoc_chuho	0.009	0.007
	0.762	1.087
tuoi_chuho	0.015	0.009
	0.728	0.875
gioitinhchuho	0.013	0.045
	0.020	0.139
dantocchuho	1.170	0.577
	1.420	1.526
thanhthinongthon	-0.682	-0.421
	-1.137	-1.419
thatnghiep	-1.943	-0.701
	-0.343	-0.254
thunhapbq	0.732	0.448
	0.414	0.512
chatluongnguonnl	-0.332	-0.195
	-0.933	-1.113
lamphat	0.214	0.122
	0.110	0.127
Num.Obs.	625	625
AIC	136.7	140.8
BIC	207.7	211.8
Log.Lik.	-52.340	-54.376
F	4.820	6.338
RMSE	0.14	0.15

Kiểm định/xem xét

Hosmer-Lemeshow GOF test

Kiểm định quan trọng (sống/chết) của mô hình.

Nếu kiểm định này không có ý nghĩa thông kê ==> tốt

Nếu có ý nghĩa thống kê thì mô hình chưa tốt (cần xem xét lại: đặc trưng dữ liệu, các biến trong mô hình, mô hình lý thuyết)

performance::performance_hosmer(logit1, n_bins=2)

## # Hosmer-Lemeshow Goodness-of-Fit Test
## 
##   Chi-squared: 0.065
##            df: 0    
##       p-value: 0.000

## Summary: model does not fit well.

performance::performance_hosmer(probit1, n_bins=2)

## # Hosmer-Lemeshow Goodness-of-Fit Test
## 
##   Chi-squared: 0.010
##            df: 0    
##       p-value: 0.000

## Summary: model does not fit well.

logit.res <- rms::lrm(logit, data = Datafinal, y = TRUE, x = TRUE, linear.predictors = TRUE) residuals(logit.res, type = “gof”)

OR

cbind(Estimate=round(coef(logit1),4),
      OR=round(exp(coef(logit1)),4))

##                  Estimate           OR
## (Intercept)       54.7793 6.170716e+23
## GD               -42.0341 0.000000e+00
## YTE               -5.6143 3.600000e-03
## NHA               -2.4452 8.670000e-02
## VS               -16.6129 0.000000e+00
## TT                -3.2738 3.790000e-02
## thunhapdaunguoi    0.0000 1.000000e+00
## sonamdihoc_chuho   0.0095 1.009500e+00
## tuoi_chuho         0.0151 1.015300e+00
## gioitinhchuho      0.0129 1.013000e+00
## dantocchuho        1.1698 3.221200e+00
## thanhthinongthon  -0.6816 5.058000e-01
## thatnghiep        -1.9431 1.433000e-01
## thunhapbq          0.7317 2.078600e+00
## chatluongnguonnl  -0.3318 7.176000e-01
## lamphat            0.2137 1.238300e+00

exp(cbind(OR = coef(logit1), confint(logit1)))

## Waiting for profiling to be done...

## Warning: glm.fit: algorithm did not converge

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: algorithm did not converge

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: algorithm did not converge

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

##                            OR         2.5 %       97.5 %
## (Intercept)      6.170716e+23  1.214329e-09          Inf
## GD               5.556758e-19  0.000000e+00 2.417079e+55
## YTE              3.645392e-03  7.520513e-04 1.221021e-02
## NHA              8.671086e-02  1.965258e-02 2.711067e-01
## VS               6.096726e-08 2.275743e-190 6.794539e-29
## TT               3.786124e-02  1.051894e-02 1.102660e-01
## thunhapdaunguoi  9.999873e-01  9.995036e-01 1.000364e+00
## sonamdihoc_chuho 1.009535e+00  9.843764e-01 1.034557e+00
## tuoi_chuho       1.015254e+00  9.748477e-01 1.058634e+00
## gioitinhchuho    1.012958e+00  2.766254e-01 3.671994e+00
## dantocchuho      3.221189e+00  6.975420e-01 1.865457e+01
## thanhthinongthon 5.058174e-01  1.518720e-01 1.638473e+00
## thatnghiep       1.432550e-01  1.559268e-06 9.159186e+03
## thunhapbq        2.078643e+00  6.564700e-02 7.234190e+01
## chatluongnguonnl 7.176186e-01  3.537424e-01 1.434139e+00
## lamphat          1.238292e+00  2.596061e-02 6.032981e+01

Hypothesis and specification tests

Khi so sánh các mô hình cần xem xét các chỉ số AIC, BIC, RMSE và khả năng ước tính của mô hình, đặc biệt khi xem xét lựa chọn tiếp cận logit hay probit

Theo đó, AIC, BIC, RMSE càng thấp càng tốt, khả năng dự báo đúng càng cao càng tốt

invlogit = function (x) {1/(1+exp(-x))}
invlogit(coef(logit1)[1]+
 coef(logit1)[2]*mean(Datafinal$GD)+
 coef(logit1)[3]*mean(Datafinal$YTE)+
 coef(logit1)[4]*mean(Datafinal$NHA)+
   coef(logit1)[4]*mean(Datafinal$VS)+
   coef(logit1)[4]*mean(Datafinal$TT)+
   coef(logit1)[4]*mean(Datafinal$thunhapdaunguoi)+
   coef(logit1)[4]*mean(Datafinal$sonamdihoc_chuho)+
   coef(logit1)[4]*mean(Datafinal$tuoi_chuho)+
   coef(logit1)[4]*mean(Datafinal$gioitinhchuho)+
   coef(logit1)[4]*mean(Datafinal$dantocchuho)+
   coef(logit1)[4]*mean(Datafinal$thanhthinongthon)+
   coef(logit1)[4]*mean(Datafinal$thatnghiep)+
   coef(logit1)[4]*mean(Datafinal$thunhapbq)+
   coef(logit1)[4]*mean(Datafinal$chatluongnguonnl)+
   coef(logit1)[4]*mean(Datafinal$lamphat))

## (Intercept) 
##          NA

models <- list(logit1,probit1)
modelsummary::modelsummary(models,
                           statistic = c('{statistic} '),
                           stars = c("*"=0.1, "**"=0.05, "***"=0.01))

	(1)	(2)
* p < 0.1, p < 0.05, * p < 0.01
(Intercept)	54.779	25.530
	0.026	0.009
GD	-42.034	-16.079
	-0.012	-0.004
YTE	-5.614***	-2.946***
	-8.034	-9.340
NHA	-2.445***	-1.329***
	-3.688	-4.285
VS	-16.613	-7.118
	-0.016	-0.005
TT	-3.274***	-1.703***
	-5.542	-5.952
thunhapdaunguoi	0.000	0.000
	-0.062	0.004
sonamdihoc_chuho	0.009	0.007
	0.762	1.087
tuoi_chuho	0.015	0.009
	0.728	0.875
gioitinhchuho	0.013	0.045
	0.020	0.139
dantocchuho	1.170	0.577
	1.420	1.526
thanhthinongthon	-0.682	-0.421
	-1.137	-1.419
thatnghiep	-1.943	-0.701
	-0.343	-0.254
thunhapbq	0.732	0.448
	0.414	0.512
chatluongnguonnl	-0.332	-0.195
	-0.933	-1.113
lamphat	0.214	0.122
	0.110	0.127
Num.Obs.	625	625
AIC	136.7	140.8
BIC	207.7	211.8
Log.Lik.	-52.340	-54.376
F	4.820	6.338
RMSE	0.14	0.15

Marginal effects

mfx::logitmfx(ngheodc ~ GD + YTE + NHA + VS + TT + + thunhapdaunguoi + sonamdihoc_chuho + tuoi_chuho + gioitinhchuho + dantocchuho + thanhthinongthon + thatnghiep + thunhapbq + chatluongnguonnl + lamphat, data = Datafinal)

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Call:
## mfx::logitmfx(formula = ngheodc ~ GD + YTE + NHA + VS + TT + 
##     +thunhapdaunguoi + sonamdihoc_chuho + tuoi_chuho + gioitinhchuho + 
##     dantocchuho + thanhthinongthon + thatnghiep + thunhapbq + 
##     chatluongnguonnl + lamphat, data = Datafinal)
## 
## Marginal Effects:
##                        dF/dx   Std. Err.         z  P>|z|    
## GD               -4.6275e-01  7.5858e+01   -0.0061 0.9951    
## YTE              -5.4148e-03  2.4953e+00   -0.0022 0.9983    
## NHA              -2.3583e-03  1.0867e+00   -0.0022 0.9983    
## VS               -9.9999e-01  4.6332e-03 -215.8326 <2e-16 ***
## TT               -3.1575e-03  1.4550e+00   -0.0022 0.9983    
## thunhapdaunguoi  -1.2215e-08  5.6324e-06   -0.0022 0.9983    
## sonamdihoc_chuho  9.1528e-06  4.2178e-03    0.0022 0.9983    
## tuoi_chuho        1.4601e-05  6.7285e-03    0.0022 0.9983    
## gioitinhchuho     1.2376e-05  5.7374e-03    0.0022 0.9983    
## dantocchuho       8.9744e-04  4.1365e-01    0.0022 0.9983    
## thanhthinongthon -7.6065e-04  3.5039e-01   -0.0022 0.9983    
## thatnghiep       -1.8741e-03  8.6363e-01   -0.0022 0.9983    
## thunhapbq         7.0571e-04  3.2521e-01    0.0022 0.9983    
## chatluongnguonnl -3.2003e-04  1.4748e-01   -0.0022 0.9983    
## lamphat           2.0614e-04  9.5012e-02    0.0022 0.9983    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## dF/dx is for discrete change for the following variables:
## 
## [1] "GD"               "VS"               "gioitinhchuho"    "dantocchuho"     
## [5] "thanhthinongthon"

Classification: Xem xét khả năng dự báo của mô hình

##1- Xác định xác suất theo từng quan sát từ mô hình

test.probs <-predict(logit1, Datafinal, type='response')

##2- Tạo biến dự báo (đặt giá trị 0)

Datafinal$pred.logit1 <- rep(0,length(test.probs))

##3- Chọn mức xác suất để xác định dự báo sang 1, mặc định là 0.5. Tùy theo lĩnh vực và hiểu biết trong lĩnh vực đó có thể điều chỉnh cho phù hợp.

Datafinal$pred.logit1[test.probs>=0.5] <- 1

##4- So sánh với giá trị thực: so sánh giữa biến ngheodc và biến dự báo pred.logit

Datafinal%>%
  dplyr::select(ngheodc, pred.logit1) %>%
  table() %>%
  prop.table()* 100

##        pred.logit1
## ngheodc     0     1
##       0 66.56  0.96
##       1  0.80 31.68

##5- Dùng lệnh test chi tiết Chuyển dữ liệu của các biến sang dạng factor (hiện ở dạng numeric 0.1)

Datafinal %>%
  mutate(ngheodc=as.factor(ngheodc),pred.logit1=as.factor(pred.logit1))-> Datafinal

Dùng lệnh kiểm tra

caret::confusionMatrix(Datafinal$ngheodc, Datafinal$pred.logit1)

## Confusion Matrix and Statistics
## 
##           Reference
## Prediction   0   1
##          0 416   6
##          1   5 198
##                                           
##                Accuracy : 0.9824          
##                  95% CI : (0.9687, 0.9912)
##     No Information Rate : 0.6736          
##     P-Value [Acc > NIR] : <2e-16          
##                                           
##                   Kappa : 0.9599          
##                                           
##  Mcnemar's Test P-Value : 1               
##                                           
##             Sensitivity : 0.9881          
##             Specificity : 0.9706          
##          Pos Pred Value : 0.9858          
##          Neg Pred Value : 0.9754          
##              Prevalence : 0.6736          
##          Detection Rate : 0.6656          
##    Detection Prevalence : 0.6752          
##       Balanced Accuracy : 0.9794          
##                                           
##        'Positive' Class : 0               
##

Nhom3

2024-11-19