polytomius (Multinonimal) Logistic Regression

tai = read.dta13("D:\\\\OneDrive - UMP\\\\R - lenh 2016\\\\hc.dta")
## Warning in read.dta13("D:\\\\OneDrive - UMP\\\\R - lenh 2016\\\\hc.dta"): 
##    Factor codes of type double or float detected in variables
## 
##    nhomtuoi, xlloiich, xlhanche, xltacdong,
##    xlnhucau
## 
##    No labels have been assigned.
##    Set option 'nonint.factors = TRUE' to assign labels anyway.
attach(tai)
table1(~ xlnhucau + nganh)
Overall
(N=362)
xlnhucau
Mean (SD) 0.823 (0.455)
Median [Min, Max] 1.00 [0, 2.00]
nganh
ktha 60 (16.6%)
y da khoa 248 (68.5%)
rang ham mat 54 (14.9%)
tai$nhucau = factor(tai$xlnhucau, levels = c(0,1,2),
labels=c("kem", "trungbinh", "tot"))
tai$xlnhucau.1 = relevel(tai$nhucau, ref="kem")
attach(tai)
## The following objects are masked from tai (pos = 3):
## 
##     baigiang, benhvien, chucvu, chuyenkhoa, dtbhanche, dtbloiich,
##     dtbnhucau, dtbtacdong, gioitinh, h1, h2, h3, h4, h5, h6, hanche100,
##     hieubiet, hoctap, hoinghikh, khoahockhmt, khoahocnnlt, kinhte, l1,
##     l10, l11, l12, l13, l14, l15, l16, l17, l2, l3, l4, l5, l6, l7, l8,
##     l9, loiich100, n1, n10, n11, n12, n13, n2, n3, n4, n5, n6, n7, n8,
##     n9, namsinh, nganh, nguon, nhomtuoi, nhucau100, nnlt, quantamcn,
##     quequan, songchung, stt, t1, t10, t2, t3, t4, t5, t6, t7, t8, t9,
##     tacdong100, thamgiaclb, truonghoc, tuoi, ungdung, ungdungbv,
##     xlhanche, xlloiich, xlnhucau, xltacdong
bang.1 = crosstable(tai, c(nganh, gioitinh, hoctap),
by = xlnhucau.1, total="both",
percent_pattern="{n} ({p_row})",
percent_digits=0)%>% as_flextable(compact=T, header_show_n =1:2)
bang.1

xlnhucau.1

Total

kem (N=75)

trungbinh (N=276)

tot (N=11)

nganh

ktha

11 (18%)

49 (82%)

0 (0%)

60 (17%)

y da khoa

56 (23%)

184 (74%)

8 (3%)

248 (69%)

rang ham mat

8 (15%)

43 (80%)

3 (6%)

54 (15%)

Total

75 (21%)

276 (76%)

11 (3%)

362 (100%)

gioitinh

nu

56 (26%)

154 (72%)

3 (1%)

213 (59%)

nam

19 (13%)

122 (82%)

8 (5%)

149 (41%)

Total

75 (21%)

276 (76%)

11 (3%)

362 (100%)

hoctap

xuat sac/gioi

9 (25%)

26 (72%)

1 (3%)

36 (10%)

kha

35 (19%)

141 (78%)

5 (3%)

181 (50%)

trung binh

27 (21%)

98 (75%)

5 (4%)

130 (36%)

yeu/kem

4 (27%)

11 (73%)

0 (0%)

15 (4%)

Total

75 (21%)

276 (76%)

11 (3%)

362 (100%)

bang.2 = crosstable(tai, c(nganh, gioitinh, hoctap),
by = xlnhucau.1, total="both",
percent_pattern="{n} ({p_col})",
percent_digits=0)%>% as_flextable(compact=T, header_show_n =1:2)
bang.2

xlnhucau.1

Total

kem (N=75)

trungbinh (N=276)

tot (N=11)

nganh

ktha

11 (15%)

49 (18%)

0 (0%)

60 (17%)

y da khoa

56 (75%)

184 (67%)

8 (73%)

248 (69%)

rang ham mat

8 (11%)

43 (16%)

3 (27%)

54 (15%)

Total

75 (21%)

276 (76%)

11 (3%)

362 (100%)

gioitinh

nu

56 (75%)

154 (56%)

3 (27%)

213 (59%)

nam

19 (25%)

122 (44%)

8 (73%)

149 (41%)

Total

75 (21%)

276 (76%)

11 (3%)

362 (100%)

hoctap

xuat sac/gioi

9 (12%)

26 (9%)

1 (9%)

36 (10%)

kha

35 (47%)

141 (51%)

5 (45%)

181 (50%)

trung binh

27 (36%)

98 (36%)

5 (45%)

130 (36%)

yeu/kem

4 (5%)

11 (4%)

0 (0%)

15 (4%)

Total

75 (21%)

276 (76%)

11 (3%)

362 (100%)

percent_pattern=“{n}({p_row}/{p_col})”

m = multinom(xlnhucau.1 ~ factor(gioitinh), data=tai)
## # weights:  9 (4 variable)
## initial  value 397.697648 
## iter  10 value 224.466565
## final  value 224.466560 
## converged
summary(m)
## Call:
## multinom(formula = xlnhucau.1 ~ factor(gioitinh), data = tai)
## 
## Coefficients:
##           (Intercept) factor(gioitinh)nam
## trungbinh    1.011810           0.8476004
## tot         -2.926037           2.0608997
## 
## Std. Errors:
##           (Intercept) factor(gioitinh)nam
## trungbinh   0.1560552           0.2918457
## tot         0.5924634           0.7270734
## 
## Residual Deviance: 448.9331 
## AIC: 456.9331
exp(coef(m))
##           (Intercept) factor(gioitinh)nam
## trungbinh  2.75057604            2.334039
## tot        0.05360909            7.853032
exp(confint(m))
## , , trungbinh
## 
##                        2.5 %   97.5 %
## (Intercept)         2.025766 3.734720
## factor(gioitinh)nam 1.317312 4.135497
## 
## , , tot
## 
##                          2.5 %    97.5 %
## (Intercept)         0.01678544  0.171216
## factor(gioitinh)nam 1.88864953 32.653018
m.1 = multinom(xlnhucau.1 ~ nganh + gioitinh + hoctap + chucvu + kinhte, data = tai)
## # weights:  33 (20 variable)
## initial  value 397.697648 
## iter  10 value 225.909819
## iter  20 value 218.693014
## iter  30 value 218.507276
## iter  40 value 218.504093
## iter  40 value 218.504091
## iter  40 value 218.504091
## final  value 218.504091 
## converged
summary(m.1)
## Call:
## multinom(formula = xlnhucau.1 ~ nganh + gioitinh + hoctap + chucvu + 
##     kinhte, data = tai)
## 
## Coefficients:
##           (Intercept) nganhy da khoa nganhrang ham mat gioitinhnam hoctapkha
## trungbinh     1.31774     -0.1417783         0.0968795   0.8223389 0.2780041
## tot         -18.39218     16.0331577        16.5154341   2.0859500 0.3598141
##           hoctaptrung binh hoctapyeu/kem    chucvuco kinhtebinh thuong
## trungbinh        0.1740502   -0.02906254 -0.02117045        -0.4798408
## tot              0.5321315  -12.51943311  0.85806504        -1.2216755
##           kinhtekho khan
## trungbinh     -0.5541384
## tot          -15.5268462
## 
## Std. Errors:
##           (Intercept) nganhy da khoa nganhrang ham mat gioitinhnam hoctapkha
## trungbinh   0.5710167      0.3882190         0.5228865   0.2969057 0.4466749
## tot         0.8612916      0.5311888         0.6574520   0.7488156 1.2231452
##           hoctaptrung binh hoctapyeu/kem  chucvuco kinhtebinh thuong
## trungbinh        0.4629306  7.195699e-01 0.3702689         0.3898000
## tot              1.2322892  6.265447e-06 0.7951604         0.8062072
##           kinhtekho khan
## trungbinh   7.581794e-01
## tot         2.483571e-07
## 
## Residual Deviance: 437.0082 
## AIC: 477.0082
exp(coef(m.1))
##            (Intercept) nganhy da khoa nganhrang ham mat gioitinhnam hoctapkha
## trungbinh 3.734970e+00   8.678137e-01      1.101728e+00    2.275816  1.320492
## tot       1.028911e-08   9.185693e+06      1.487859e+07    8.052238  1.433063
##           hoctaptrung binh hoctapyeu/kem  chucvuco kinhtebinh thuong
## trungbinh         1.190115  9.713557e-01 0.9790521         0.6188819
## tot               1.702557  3.654932e-06 2.3585925         0.2947359
##           kinhtekho khan
## trungbinh   5.745671e-01
## tot         1.806244e-07
exp(confint(m.1))
## , , trungbinh
## 
##                       2.5 %    97.5 %
## (Intercept)       1.2196545 11.437665
## nganhy da khoa    0.4054841  1.857287
## nganhrang ham mat 0.3953597  3.070125
## gioitinhnam       1.2717760  4.072526
## hoctapkha         0.5502059  3.169174
## hoctaptrung binh  0.4803322  2.948739
## hoctapyeu/kem     0.2370715  3.979947
## chucvuco          0.4738406  2.022923
## kinhtebinh thuong 0.2882766  1.328636
## kinhtekho khan    0.1300102  2.539242
## 
## , , tot
## 
##                          2.5 %       97.5 %
## (Intercept)       1.902151e-09 5.565582e-08
## nganhy da khoa    3.243121e+06 2.601721e+07
## nganhrang ham mat 4.101453e+06 5.397418e+07
## gioitinhnam       1.855768e+00 3.493893e+01
## hoctapkha         1.303533e-01 1.575464e+01
## hoctaptrung binh  1.521161e-01 1.905585e+01
## hoctapyeu/kem     3.654887e-06 3.654977e-06
## chucvuco          4.963764e-01 1.120714e+01
## kinhtebinh thuong 6.069994e-02 1.431126e+00
## kinhtekho khan    1.806243e-07 1.806245e-07

Giá trị p

MASS::dropterm(m.1, trace=T, test="Chisq")
## trying - nganh
## # weights:  27 (16 variable)
## initial  value 397.697648 
## iter  10 value 223.962057
## iter  20 value 221.039365
## iter  30 value 220.979555
## final  value 220.979278 
## converged
## trying - gioitinh
## # weights:  30 (18 variable)
## initial  value 397.697648 
## iter  10 value 231.273310
## iter  20 value 225.154891
## iter  30 value 224.919525
## final  value 224.917393 
## converged
## trying - hoctap
## # weights:  24 (14 variable)
## initial  value 397.697648 
## iter  10 value 223.446955
## iter  20 value 219.484619
## iter  30 value 219.403053
## iter  30 value 219.403053
## final  value 219.403053 
## converged
## trying - chucvu
## # weights:  30 (18 variable)
## initial  value 397.697648 
## iter  10 value 226.349160
## iter  20 value 219.313088
## iter  30 value 219.147380
## final  value 219.145426 
## converged
## trying - kinhte
## # weights:  27 (16 variable)
## initial  value 397.697648 
## iter  10 value 223.748929
## iter  20 value 220.570933
## iter  30 value 220.463176
## final  value 220.462831 
## converged
## Single term deletions
## 
## Model:
## xlnhucau.1 ~ nganh + gioitinh + hoctap + chucvu + kinhte
##          Df    AIC     LRT Pr(Chi)   
## <none>      477.01                   
## nganh     4 473.96  4.9504 0.29243   
## gioitinh  2 485.83 12.8266 0.00164 **
## hoctap    6 466.81  1.7979 0.93731   
## chucvu    2 474.29  1.2827 0.52659   
## kinhte    4 472.93  3.9175 0.41729   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Mô tả

Hmisc::describe(xlnhucau.1 ~ nganh + gioitinh + hoctap + chucvu + kinhte, data = tai)
## xlnhucau.1 ~ nganh + gioitinh + hoctap + chucvu + kinhte 
## 
##  6  Variables      362  Observations
## --------------------------------------------------------------------------------
## xlnhucau.1 
##        n  missing distinct 
##      362        0        3 
##                                         
## Value            kem trungbinh       tot
## Frequency         75       276        11
## Proportion     0.207     0.762     0.030
## --------------------------------------------------------------------------------
## nganh 
##        n  missing distinct 
##      362        0        3 
##                                                  
## Value              ktha    y da khoa rang ham mat
## Frequency            60          248           54
## Proportion        0.166        0.685        0.149
## --------------------------------------------------------------------------------
## gioitinh 
##        n  missing distinct 
##      362        0        2 
##                       
## Value         nu   nam
## Frequency    213   149
## Proportion 0.588 0.412
## --------------------------------------------------------------------------------
## hoctap 
##        n  missing distinct 
##      362        0        4 
##                                                                   
## Value      xuat sac/gioi           kha    trung binh       yeu/kem
## Frequency             36           181           130            15
## Proportion         0.099         0.500         0.359         0.041
## --------------------------------------------------------------------------------
## chucvu 
##        n  missing distinct 
##      362        0        2 
##                       
## Value      khong    co
## Frequency    307    55
## Proportion 0.848 0.152
## --------------------------------------------------------------------------------
## kinhte 
##        n  missing distinct 
##      362        0        3 
##                                               
## Value          kha gia binh thuong    kho khan
## Frequency           69         280          13
## Proportion       0.191       0.773       0.036
## --------------------------------------------------------------------------------
library(biostat3)
eform(m.1, method ="Profile")
## Warning in cbind(coef = coef(object), estfun(object, level = level)): number of
## rows of result is not a multiple of vector length (arg 2)
##              exp(beta) nganhy da khoa nganhrang ham mat gioitinhnam hoctapkha
## trungbinh 3.734970e+00   8.678137e-01      1.101728e+00    2.275816  1.320492
## tot       1.028911e-08   9.185693e+06      1.487859e+07    8.052238  1.433063
##           hoctaptrung binh hoctapyeu/kem  chucvuco kinhtebinh thuong
## trungbinh         1.190115  9.713557e-01 0.9790521         0.6188819
## tot               1.702557  3.654932e-06 2.3585925         0.2947359
##           kinhtekho khan          
## trungbinh   5.745671e-01 1.2196545
## tot         1.806244e-07 0.4054841
library(effects)
## lattice theme set by effectsTheme()
## See ?effectsTheme for details.
plot(allEffects(m.1), style = "stacked")

#exp(coef(m.1)) exp(confint(m.1))
zvalues = summary(m)$coefficients/summary(m)$standard.errors
pnorm(abs(zvalues), lower.tail = F)*2
##            (Intercept) factor(gioitinh)nam
## trungbinh 8.951756e-11         0.003681041
## tot       7.861960e-07         0.004589544
predicted = fitted(m)
head(predicted)
##         kem trungbinh        tot
## 1 0.2628684 0.7230395 0.01409214
## 2 0.1275356 0.8187727 0.05369172
## 3 0.2628684 0.7230395 0.01409214
## 4 0.2628684 0.7230395 0.01409214
## 5 0.1275356 0.8187727 0.05369172
## 6 0.1275356 0.8187727 0.05369172
zvalues = summary(m.1)$coefficients/summary(m.1)$standard.errors
pnorm(abs(zvalues), lower.tail = F)*2
##             (Intercept) nganhy da khoa nganhrang ham mat gioitinhnam hoctapkha
## trungbinh  2.101538e-02   7.149608e-01      8.530108e-01 0.005610845 0.5336883
## tot       3.565466e-101  3.895716e-200     2.980099e-139 0.005341790 0.7686271
##           hoctaptrung binh hoctapyeu/kem  chucvuco kinhtebinh thuong
## trungbinh        0.7069358     0.9677832 0.9544051         0.2183257
## tot              0.6658697     0.0000000 0.2805390         0.1296871
##           kinhtekho khan
## trungbinh      0.4648522
## tot            0.0000000
predicted.1 = fitted(m.1)
head(predicted.1)
##         kem trungbinh          tot
## 1 0.2911424 0.6950496 1.380798e-02
## 2 0.1360593 0.8202053 4.373539e-02
## 3 0.2710933 0.7180848 1.082198e-02
## 4 0.2890860 0.7109140 7.072265e-09
## 5 0.1360593 0.8202053 4.373539e-02
## 6 0.1360593 0.8202053 4.373539e-02
library(rms)
## Loading required package: Hmisc
## 
## Attaching package: 'Hmisc'
## The following objects are masked from 'package:table1':
## 
##     label, label<-, units
## The following objects are masked from 'package:dplyr':
## 
##     src, summarize
## The following objects are masked from 'package:base':
## 
##     format.pval, units
## 
## Attaching package: 'rms'
## The following objects are masked from 'package:car':
## 
##     Predict, vif
m2 = lrm (xlnhucau.1 ~ nganh + gioitinh + hoctap, data = tai)
m2
## Logistic Regression Model
## 
## lrm(formula = xlnhucau.1 ~ nganh + gioitinh + hoctap, data = tai)
## 
##                       Model Likelihood      Discrimination    Rank Discrim.    
##                             Ratio Test             Indexes          Indexes    
## Obs           362    LR chi2     14.90      R2       0.056    C       0.621    
##  kem           75    d.f.            6      R2(6,362)0.024    Dxy     0.243    
##  trungbinh    276    Pr(> chi2) 0.0211    R2(6,198.3)0.044    gamma   0.270    
##  tot           11                           Brier    0.159    tau-a   0.091    
## max |deriv| 1e-09                                                              
## 
##                    Coef    S.E.   Wald Z Pr(>|Z|)
## y>=trungbinh        0.8970 0.4945  1.81  0.0697  
## y>=tot             -4.1024 0.5912 -6.94  <0.0001 
## nganh=y da khoa    -0.0189 0.3407 -0.06  0.9557  
## nganh=rang ham mat  0.3164 0.4656  0.68  0.4967  
## gioitinh=nam        0.9283 0.2787  3.33  0.0009  
## hoctap=kha          0.1633 0.4240  0.39  0.7002  
## hoctap=trung binh   0.0940 0.4376  0.21  0.8299  
## hoctap=yeu/kem     -0.2727 0.6797 -0.40  0.6883

hết