Dataset –> Diabetes Health Indicator

Link Dataset –> https://www.kaggle.com/datasets/michealsamuelw/3-diabetes-012-health-indicators-brfss2015

Variabel

Diabetes_012 = Respon

HighBP = Prediktor

HighChol = Prediktor

CholCheck = Prediktor

BMI = Prediktor

GenHlth = Prediktor

HvyAlcoholConsump = Prediktor

Sex = Prediktor

Age = Prediktor

Ordinal

Library

library(ggcorrplot)
library(ordinal)
library(biotools)
library(glmnet)
library(MVN)
library(MASS)
library(ggplot2)

Membaca File

data <- read.csv("diabetes_012_health_indicators_BRFSS2015.csv", header=TRUE, sep=",")
head(data)
  Diabetes_012 HighBP HighChol CholCheck BMI Smoker Stroke HeartDiseaseorAttack
1            0      1        1         1  40      1      0                    0
2            0      0        0         0  25      1      0                    0
3            0      1        1         1  28      0      0                    0
4            0      1        0         1  27      0      0                    0
5            0      1        1         1  24      0      0                    0
6            0      1        1         1  25      1      0                    0
  PhysActivity Fruits Veggies HvyAlcoholConsump AnyHealthcare NoDocbcCost
1            0      0       1                 0             1           0
2            1      0       0                 0             0           1
3            0      1       0                 0             1           1
4            1      1       1                 0             1           0
5            1      1       1                 0             1           0
6            1      1       1                 0             1           0
  GenHlth MentHlth PhysHlth DiffWalk Sex Age Education Income
1       5       18       15        1   0   9         4      3
2       3        0        0        0   0   7         6      1
3       5       30       30        1   0   9         4      8
4       2        0        0        0   0  11         3      6
5       2        3        0        0   0  11         5      4
6       2        0        2        0   1  10         6      8

Preprocessing

data2 <- data[, -c(1)]
colSums(is.na(data2))
              HighBP             HighChol            CholCheck 
                   0                    0                    0 
                 BMI               Smoker               Stroke 
                   0                    0                    0 
HeartDiseaseorAttack         PhysActivity               Fruits 
                   0                    0                    0 
             Veggies    HvyAlcoholConsump        AnyHealthcare 
                   0                    0                    0 
         NoDocbcCost              GenHlth             MentHlth 
                   0                    0                    0 
            PhysHlth             DiffWalk                  Sex 
                   0                    0                    0 
                 Age            Education               Income 
                   0                    0                    0 
data$Diabetes_012 <- factor(
  data$Diabetes_012,
  levels = c(0,1,2),
  ordered = TRUE
)

Uji Multikolinearitas

cor(data2)
                           HighBP    HighChol    CholCheck         BMI
HighBP                1.000000000  0.29819930  0.098508273  0.21374812
HighChol              0.298199295  1.00000000  0.085642228  0.10672208
CholCheck             0.098508273  0.08564223  1.000000000  0.03449509
BMI                   0.213748120  0.10672208  0.034495087  1.00000000
Smoker                0.096991467  0.09129936 -0.009928878  0.01380447
Stroke                0.129574913  0.09262007  0.024157667  0.02015266
HeartDiseaseorAttack  0.209361211  0.18076535  0.044205810  0.05290426
PhysActivity         -0.125266866 -0.07804619  0.004189617 -0.14729363
Fruits               -0.040554659 -0.04085908  0.023849406 -0.08751812
Veggies              -0.061266165 -0.03987361  0.006121010 -0.06227519
HvyAlcoholConsump    -0.003971574 -0.01154252 -0.023730091 -0.04873628
AnyHealthcare         0.038424769  0.04222986  0.117625625 -0.01847079
NoDocbcCost           0.017357984  0.01331016 -0.058255084  0.05820629
GenHlth               0.300529631  0.20842555  0.046588865  0.23918537
MentHlth              0.056455917  0.06206915 -0.008365598  0.08531016
PhysHlth              0.161211571  0.12175053  0.031774808  0.12114111
DiffWalk              0.223618466  0.14467154  0.040585057  0.19707776
Sex                   0.052206961  0.03120533 -0.022115036  0.04295030
Age                   0.344452330  0.27231823  0.090321114 -0.03661764
Education            -0.141357934 -0.07080189  0.001510491 -0.10393202
Income               -0.171234581 -0.08545931  0.014258747 -0.10006871
                           Smoker       Stroke HeartDiseaseorAttack
HighBP                0.096991467  0.129574913           0.20936121
HighChol              0.091299357  0.092620074           0.18076535
CholCheck            -0.009928878  0.024157667           0.04420581
BMI                   0.013804467  0.020152661           0.05290426
Smoker                1.000000000  0.061172675           0.11444122
Stroke                0.061172675  1.000000000           0.20300194
HeartDiseaseorAttack  0.114441218  0.203001940           1.00000000
PhysActivity         -0.087401163 -0.069151416          -0.08729899
Fruits               -0.077665839 -0.013389353          -0.01979035
Veggies              -0.030677710 -0.041124225          -0.03916741
HvyAlcoholConsump     0.101618687 -0.016950330          -0.02899052
AnyHealthcare        -0.023250803  0.008775925           0.01873419
NoDocbcCost           0.048945823  0.034804106           0.03099970
GenHlth               0.163143067  0.177942260           0.25838341
MentHlth              0.092196474  0.070171812           0.06462129
PhysHlth              0.116459714  0.148944169           0.18169754
DiffWalk              0.122463215  0.176566917           0.21270870
Sex                   0.093662361  0.002978288           0.08609551
Age                   0.120641084  0.126973699           0.22161763
Education            -0.161955255 -0.076008557          -0.09959992
Income               -0.123937229 -0.128598578          -0.14101123
                     PhysActivity      Fruits      Veggies HvyAlcoholConsump
HighBP               -0.125266866 -0.04055466 -0.061266165      -0.003971574
HighChol             -0.078046186 -0.04085908 -0.039873607      -0.011542519
CholCheck             0.004189617  0.02384941  0.006121010      -0.023730091
BMI                  -0.147293634 -0.08751812 -0.062275194      -0.048736275
Smoker               -0.087401163 -0.07766584 -0.030677710       0.101618687
Stroke               -0.069151416 -0.01338935 -0.041124225      -0.016950330
HeartDiseaseorAttack -0.087298987 -0.01979035 -0.039167409      -0.028990516
PhysActivity          1.000000000  0.14275586  0.153149570       0.012392236
Fruits                0.142755863  1.00000000  0.254342244      -0.035287733
Veggies               0.153149570  0.25434224  1.000000000       0.021064481
HvyAlcoholConsump     0.012392236 -0.03528773  0.021064481       1.000000000
AnyHealthcare         0.035504737  0.03154392  0.029583817      -0.010488085
NoDocbcCost          -0.061638387 -0.04424269 -0.032231705       0.004683595
GenHlth              -0.266185624 -0.10385417 -0.123066330      -0.036723570
MentHlth             -0.125587088 -0.06821738 -0.058883553       0.024715803
PhysHlth             -0.219229522 -0.04463332 -0.064290327      -0.026415474
DiffWalk             -0.253174007 -0.04835167 -0.080505717      -0.037668174
Sex                   0.032481686 -0.09117487 -0.064765156       0.005740219
Age                  -0.092510633  0.06454722 -0.009771198      -0.034577637
Education             0.199658057  0.11018710  0.154329262       0.023996867
Income                0.198539455  0.07992931  0.151086944       0.053618566
                     AnyHealthcare  NoDocbcCost      GenHlth     MentHlth
HighBP                 0.038424769  0.017357984  0.300529631  0.056455917
HighChol               0.042229862  0.013310163  0.208425550  0.062069154
CholCheck              0.117625625 -0.058255084  0.046588865 -0.008365598
BMI                   -0.018470787  0.058206290  0.239185373  0.085310159
Smoker                -0.023250803  0.048945823  0.163143067  0.092196474
Stroke                 0.008775925  0.034804106  0.177942260  0.070171812
HeartDiseaseorAttack   0.018734186  0.030999705  0.258383409  0.064621292
PhysActivity           0.035504737 -0.061638387 -0.266185624 -0.125587088
Fruits                 0.031543919 -0.044242689 -0.103854171 -0.068217375
Veggies                0.029583817 -0.032231705 -0.123066330 -0.058883553
HvyAlcoholConsump     -0.010488085  0.004683595 -0.036723570  0.024715803
AnyHealthcare          1.000000000 -0.232532105 -0.040817072 -0.052706597
NoDocbcCost           -0.232532105  1.000000000  0.166397186  0.192106853
GenHlth               -0.040817072  0.166397186  1.000000000  0.301674393
MentHlth              -0.052706597  0.192106853  0.301674393  1.000000000
PhysHlth              -0.008276167  0.148997564  0.524363644  0.353618868
DiffWalk               0.007074092  0.118446862  0.456919503  0.233688079
Sex                   -0.019405465 -0.044931366 -0.006091004 -0.080704863
Age                    0.138045679 -0.119777068  0.152449830 -0.092068024
Education              0.122514239 -0.100701002 -0.284911532 -0.101829695
Income                 0.157999279 -0.203182369 -0.370013734 -0.209806127
                         PhysHlth     DiffWalk          Sex          Age
HighBP                0.161211571  0.223618466  0.052206961  0.344452330
HighChol              0.121750528  0.144671538  0.031205330  0.272318226
CholCheck             0.031774808  0.040585057 -0.022115036  0.090321114
BMI                   0.121141107  0.197077760  0.042950303 -0.036617635
Smoker                0.116459714  0.122463215  0.093662361  0.120641084
Stroke                0.148944169  0.176566917  0.002978288  0.126973699
HeartDiseaseorAttack  0.181697536  0.212708695  0.086095508  0.221617632
PhysActivity         -0.219229522 -0.253174007  0.032481686 -0.092510633
Fruits               -0.044633325 -0.048351675 -0.091174865  0.064547217
Veggies              -0.064290327 -0.080505717 -0.064765156 -0.009771198
HvyAlcoholConsump    -0.026415474 -0.037668174  0.005740219 -0.034577637
AnyHealthcare        -0.008276167  0.007074092 -0.019405465  0.138045679
NoDocbcCost           0.148997564  0.118446862 -0.044931366 -0.119777068
GenHlth               0.524363644  0.456919503 -0.006091004  0.152449830
MentHlth              0.353618868  0.233688079 -0.080704863 -0.092068024
PhysHlth              1.000000000  0.478416619 -0.043136502  0.099129925
DiffWalk              0.478416619  1.000000000 -0.070298902  0.204450090
Sex                  -0.043136502 -0.070298902  1.000000000 -0.027340383
Age                   0.099129925  0.204450090 -0.027340383  1.000000000
Education            -0.155092517 -0.192642100  0.019479786 -0.101901070
Income               -0.266798962 -0.320124244  0.127141058 -0.127775278
                        Education      Income
HighBP               -0.141357934 -0.17123458
HighChol             -0.070801887 -0.08545931
CholCheck             0.001510491  0.01425875
BMI                  -0.103932022 -0.10006871
Smoker               -0.161955255 -0.12393723
Stroke               -0.076008557 -0.12859858
HeartDiseaseorAttack -0.099599915 -0.14101123
PhysActivity          0.199658057  0.19853946
Fruits                0.110187097  0.07992931
Veggies               0.154329262  0.15108694
HvyAlcoholConsump     0.023996867  0.05361857
AnyHealthcare         0.122514239  0.15799928
NoDocbcCost          -0.100701002 -0.20318237
GenHlth              -0.284911532 -0.37001373
MentHlth             -0.101829695 -0.20980613
PhysHlth             -0.155092517 -0.26679896
DiffWalk             -0.192642100 -0.32012424
Sex                   0.019479786  0.12714106
Age                  -0.101901070 -0.12777528
Education             1.000000000  0.44910642
Income                0.449106424  1.00000000
ggcorrplot(cor(data2))

write.csv(cor(data2), "corr matrix.csv") #simpen ke csv biar enak dibaca

Distribusi Target

table(data$Diabetes_012)

     0      1      2 
213703   4631  35346 
prop.table(table(data$Diabetes_012))

         0          1          2 
0.84241170 0.01825528 0.13933302 

Feature Selection

x <- model.matrix(Diabetes_012 ~ ., data)[,-1]
y <- as.factor(data$Diabetes_012)
cv <- cv.glmnet(x, y, family = "multinomial")
coef(cv, s = "lambda.min")
$`0`
22 x 1 sparse Matrix of class "dgCMatrix"
                       lambda.min
(Intercept)           5.193120329
HighBP               -0.376904342
HighChol             -0.568022370
CholCheck            -0.827981393
BMI                  -0.050838688
Smoker                0.006120385
Stroke                .          
HeartDiseaseorAttack  .          
PhysActivity          0.015146047
Fruits                0.031375959
Veggies               0.034205991
HvyAlcoholConsump     0.166269615
AnyHealthcare         .          
NoDocbcCost          -0.027353855
GenHlth              -0.311896360
MentHlth              .          
PhysHlth              0.004528470
DiffWalk              .          
Sex                  -0.085396602
Age                  -0.118079568
Education             0.034806329
Income                0.054668059

$`1`
22 x 1 sparse Matrix of class "dgCMatrix"
                       lambda.min
(Intercept)          -2.551375247
HighBP                .          
HighChol              .          
CholCheck             .          
BMI                   .          
Smoker                .          
Stroke               -0.051129745
HeartDiseaseorAttack  .          
PhysActivity          .          
Fruits                .          
Veggies               .          
HvyAlcoholConsump     .          
AnyHealthcare        -0.033697970
NoDocbcCost           0.273324785
GenHlth               .          
MentHlth              0.004600636
PhysHlth              .          
DiffWalk              .          
Sex                   .          
Age                   .          
Education            -0.025447010
Income               -0.005532654

$`2`
22 x 1 sparse Matrix of class "dgCMatrix"
                       lambda.min
(Intercept)          -2.641745082
HighBP                0.387664939
HighChol              0.026457753
CholCheck             0.372767234
BMI                   0.012337291
Smoker                .          
Stroke                0.126643110
HeartDiseaseorAttack  0.221517489
PhysActivity         -0.032201846
Fruits               -0.012954387
Veggies               .          
HvyAlcoholConsump    -0.582318395
AnyHealthcare         0.068434585
NoDocbcCost           .          
GenHlth               0.228509564
MentHlth             -0.003073044
PhysHlth             -0.002275171
DiffWalk              0.118733066
Sex                   0.167645285
Age                   0.008395125
Education             .          
Income                .          
selected <- c(
  "HighBP",
  "HighChol",
  "CholCheck",
  "BMI",
  "GenHlth",
  "Age",
  "HvyAlcoholConsump",
  "Sex"
)

Ordinal Logistic Regression

model_final <- clm(
  Diabetes_012 ~ HighBP + HighChol + CholCheck + BMI + 
  GenHlth + Age + HvyAlcoholConsump + Sex,
  data = data
)
summary(model_final)
formula: 
Diabetes_012 ~ HighBP + HighChol + CholCheck + BMI + GenHlth + Age + HvyAlcoholConsump + Sex
data:    data

 link  threshold nobs   logLik     AIC       niter max.grad cond.H 
 logit flexible  253680 -102252.30 204524.61 9(2)  9.21e-10 4.0e+05

Coefficients:
                    Estimate Std. Error z value Pr(>|z|)    
HighBP             0.7515114  0.0137236   54.76   <2e-16 ***
HighChol           0.6009777  0.0127221   47.24   <2e-16 ***
CholCheck          1.1505472  0.0613949   18.74   <2e-16 ***
BMI                0.0630349  0.0008571   73.55   <2e-16 ***
GenHlth            0.5703493  0.0059115   96.48   <2e-16 ***
Age                0.1382938  0.0024697   55.99   <2e-16 ***
HvyAlcoholConsump -0.7250897  0.0347425  -20.87   <2e-16 ***
Sex                0.2116692  0.0121134   17.47   <2e-16 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Threshold coefficients:
    Estimate Std. Error z value
0|1  8.28624    0.07284   113.8
1|2  8.46260    0.07295   116.0

Nominal and Scale Test

nominal_test(model_final)
Tests of nominal effects

formula: Diabetes_012 ~ HighBP + HighChol + CholCheck + BMI + GenHlth + Age + HvyAlcoholConsump + Sex
                  Df  logLik    AIC    LRT  Pr(>Chi)    
<none>               -102252 204525                     
HighBP             1 -102214 204450 76.227 < 2.2e-16 ***
HighChol           1 -102252 204527  0.083 0.7734635    
CholCheck          1 -102248 204519  8.049 0.0045518 ** 
BMI                1 -102251 204525  1.802 0.1794971    
GenHlth            1 -102223 204468 59.013 1.566e-14 ***
Age                1 -102245 204512 14.278 0.0001577 ***
HvyAlcoholConsump  1 -102237 204495 31.468 2.028e-08 ***
Sex                1 -102240 204502 24.982 5.788e-07 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
scale_test(model_final)
Tests of scale effects

formula: Diabetes_012 ~ HighBP + HighChol + CholCheck + BMI + GenHlth + Age + HvyAlcoholConsump + Sex
                  Df  logLik    AIC    LRT  Pr(>Chi)    
<none>               -102252 204525                     
HighBP             1 -102145 204311 215.37 < 2.2e-16 ***
HighChol           1 -102234 204490  37.03 1.162e-09 ***
CholCheck          1 -102246 204515  11.75 0.0006080 ***
BMI                1 -101877 203776 750.73 < 2.2e-16 ***
GenHlth            1 -101979 203979 547.20 < 2.2e-16 ***
Age                1 -102058 204138 388.76 < 2.2e-16 ***
HvyAlcoholConsump  1 -102252 204526   0.63 0.4280663    
Sex                1 -102245 204512  14.15 0.0001687 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Odds Ratio

exp(coef(model_final))
              0|1               1|2            HighBP          HighChol 
     3968.8946270      4734.3282212         2.1202020         1.8239012 
        CholCheck               BMI           GenHlth               Age 
        3.1599215         1.0650640         1.7688847         1.1483129 
HvyAlcoholConsump               Sex 
        0.4842811         1.2357391 

Diskriminan

Read File

data_disk <- read.csv("sample_diabetes_100_each.csv", header=TRUE, sep=",")
fitur2 <- data_disk[, c("HighBP", "HighChol",
                          "BMI", "GenHlth",
                          "Age", "Sex")]

Uji Homogenitas Varians

boxM(fitur2, data_disk$Diabetes_012)

    Box's M-test for Homogeneity of Covariance Matrices

data:  fitur2
Chi-Sq (approx.) = 64.462, df = 42, p-value = 0.01448

Uji Multivariate Normal

mvn(fitur2)
$multivariate_normality
           Test Statistic p.value     Method          MVN
1 Henze-Zirkler     4.019  <0.001 asymptotic ✗ Not normal

$univariate_normality
              Test Variable Statistic p.value    Normality
1 Anderson-Darling   HighBP    57.438  <0.001 ✗ Not normal
2 Anderson-Darling HighChol    56.243  <0.001 ✗ Not normal
3 Anderson-Darling      BMI     4.835  <0.001 ✗ Not normal
4 Anderson-Darling  GenHlth     9.754  <0.001 ✗ Not normal
5 Anderson-Darling      Age     3.562  <0.001 ✗ Not normal
6 Anderson-Darling      Sex    55.575  <0.001 ✗ Not normal

$descriptives
  Variable   n   Mean Std.Dev Median Min Max 25th  75th   Skew Kurtosis
1   HighBP 300  0.613   0.488      1   0   1    0  1.00 -0.465    1.217
2 HighChol 300  0.593   0.492      1   0   1    0  1.00 -0.380    1.144
3      BMI 300 30.947   7.408     30  13  77   26 34.25  1.621    8.970
4  GenHlth 300  2.927   1.131      3   1   5    2  4.00  0.019    2.269
5      Age 300  8.863   2.753      9   1  13    7 11.00 -0.572    2.995
6      Sex 300  0.420   0.494      0   0   1    0  1.00  0.324    1.105

$data
    HighBP HighChol BMI GenHlth Age Sex
1        1        1  25       5  10   1
2        0        0  24       1   5   0
3        1        0  28       2   9   0
4        0        0  25       2  10   0
5        0        1  27       3  12   1
6        1        1  39       3  10   0
7        0        0  24       2   2   1
8        1        1  28       3  13   0
9        0        0  19       2   6   0
10       0        0  25       1   6   0
11       1        1  31       2   7   1
12       1        0  27       3  12   0
13       1        1  33       2  11   0
14       0        0  77       3   3   0
15       1        1  22       3  13   0
16       1        1  31       2   7   1
17       0        1  28       1   7   0
18       0        1  29       2  13   1
19       1        0  24       2  13   1
20       0        0  27       1   7   1
21       1        1  22       2  12   1
22       0        0  24       3   4   0
23       0        0  20       2   8   0
24       0        0  22       1   2   0
25       1        1  32       3   7   1
26       0        1  36       3   3   0
27       1        1  26       1   7   0
28       1        1  28       2  12   0
29       1        0  25       1  11   0
30       0        0  26       1   8   1
31       0        0  13       4   9   1
32       1        0  32       2  10   0
33       1        0  25       3  13   1
34       1        1  28       2  13   0
35       0        0  23       2  11   1
36       1        0  29       2  10   0
37       0        0  19       1   4   0
38       1        0  24       2  12   1
39       0        0  23       1   5   1
40       1        0  30       3  10   0
41       0        0  26       2   9   1
42       1        1  27       2   9   0
43       1        1  30       4   7   1
44       0        1  32       1   9   0
45       1        1  39       4  10   0
46       0        0  24       3   8   1
47       0        0  20       2  10   0
48       0        0  23       5   2   0
49       0        0  19       1  10   0
50       0        0  25       1   8   1
51       1        1  27       3  10   0
52       1        1  30       2  13   0
53       0        0  35       2   6   0
54       0        0  35       2  10   0
55       0        1  24       3  12   1
56       0        1  23       1  10   0
57       1        1  23       2  13   1
58       0        0  25       1   8   0
59       1        1  30       2  13   1
60       0        0  26       2   5   1
61       0        0  23       2   9   0
62       1        1  26       4   9   1
63       0        0  24       2   7   0
64       0        1  35       3   5   0
65       0        0  34       2   3   0
66       0        0  20       1  10   0
67       0        0  31       1   6   0
68       0        0  27       3   5   0
69       1        0  31       4   8   0
70       0        0  22       5   1   1
71       0        1  19       4  13   0
72       0        1  27       1   8   1
73       0        0  31       2   7   0
74       1        1  31       2  10   0
75       0        0  20       1   9   0
76       0        1  33       3  10   1
77       0        0  29       1   4   1
78       1        1  30       3   8   0
79       1        1  25       3   9   0
80       0        0  29       2   9   1
81       1        1  25       2  10   0
82       1        1  21       5   4   0
83       0        0  31       3   2   1
84       1        1  30       4  12   1
85       0        0  41       3  12   0
86       1        0  32       1  11   0
87       0        1  27       2  10   0
88       1        1  39       3   9   1
89       0        0  31       3   8   1
90       1        1  25       4  11   0
91       1        0  27       3   9   1
92       1        0  26       2  10   1
93       0        0  27       2   8   0
94       0        0  39       3   6   0
95       0        0  29       1   2   1
96       0        1  23       4   4   0
97       0        0  26       2   9   0
98       0        0  43       2   9   0
99       0        1  25       1   6   0
100      0        0  22       1   9   0
101      1        1  34       2  13   0
102      1        0  36       2  11   0
103      1        0  36       4   8   0
104      0        0  34       2  10   1
105      0        0  32       4   9   0
106      0        0  36       3   6   0
107      1        1  23       3  12   1
108      1        1  26       2  10   1
109      0        1  23       3  12   0
110      1        1  23       3  11   1
111      0        0  24       1   7   1
112      1        1  44       2   7   0
113      1        0  27       3  12   0
114      1        1  28       3   8   1
115      1        1  45       4   9   1
116      1        1  36       4  10   0
117      1        0  28       4  13   0
118      1        1  39       3   6   1
119      0        1  29       3   8   1
120      0        1  50       2   9   0
121      0        1  28       3   7   1
122      0        1  30       3   7   0
123      1        1  31       3  12   0
124      0        0  32       4   5   0
125      1        0  38       3   9   1
126      0        1  30       3  11   0
127      1        1  37       4   8   0
128      1        0  32       4   8   1
129      0        1  24       3   7   0
130      1        1  30       4   7   1
131      1        1  36       4  10   0
132      0        1  42       4   5   0
133      1        1  39       2   1   0
134      1        1  31       3   9   1
135      1        0  31       4  10   0
136      0        0  50       3   6   0
137      1        1  32       4  10   1
138      0        1  31       4  10   1
139      1        1  34       3   7   0
140      1        1  33       4  13   0
141      0        0  33       2   9   1
142      1        0  39       5   6   1
143      1        1  24       5  10   1
144      0        1  31       4   7   1
145      1        1  35       5   7   1
146      1        1  40       3  10   1
147      0        1  33       2  10   1
148      0        0  41       2  10   1
149      1        1  31       5   8   0
150      0        1  30       4   6   1
151      1        1  28       3  11   1
152      1        0  21       4  12   0
153      0        0  29       3   5   1
154      1        0  22       1  13   0
155      1        1  35       3   1   1
156      0        0  31       3   1   1
157      1        1  27       5   7   0
158      0        0  32       1  10   0
159      1        0  35       4   5   0
160      1        1  29       4  13   1
161      0        0  29       3   7   0
162      0        0  39       3   4   0
163      1        1  36       2  12   0
164      1        0  37       3   8   1
165      0        1  29       4   9   1
166      1        1  33       4   9   1
167      0        1  30       3   6   0
168      0        1  25       3   7   0
169      1        1  21       2  10   1
170      0        1  24       3  13   1
171      1        0  39       3   8   0
172      0        1  33       3  10   1
173      1        1  27       3  10   1
174      0        1  24       1   8   0
175      0        0  24       3  13   0
176      0        1  30       3   7   1
177      1        0  39       5   8   1
178      1        0  30       2  13   1
179      1        1  37       3  12   0
180      0        1  32       2  10   0
181      0        0  38       3   7   0
182      0        0  38       2   5   0
183      1        1  33       2  10   1
184      1        0  27       2  11   0
185      1        1  46       5   6   1
186      1        0  44       4   6   1
187      1        1  33       4  11   0
188      1        1  27       2  13   1
189      1        0  25       3  13   1
190      1        1  29       3   5   1
191      1        1  43       5   8   0
192      1        0  30       3  12   1
193      1        1  38       4   9   1
194      1        1  34       3   5   0
195      0        0  24       3  13   0
196      0        1  24       1  11   0
197      1        1  34       4   9   0
198      1        1  33       3   7   0
199      1        1  34       4   9   0
200      1        1  34       4   7   0
201      1        1  23       3   9   0
202      1        1  49       4   6   0
203      1        1  26       5  13   0
204      1        1  28       2  13   1
205      1        0  32       4  11   0
206      0        1  31       3   7   1
207      1        1  31       4  11   0
208      0        1  30       3   8   1
209      1        0  32       2  12   0
210      1        0  27       5  12   0
211      0        1  35       3   5   0
212      1        0  43       3  10   0
213      1        1  37       3   5   1
214      1        1  33       2  12   0
215      0        0  24       3   9   0
216      1        0  25       2  13   0
217      1        1  43       5   9   1
218      1        0  25       3   7   1
219      1        1  36       3   8   0
220      1        1  36       3  10   1
221      1        1  35       5  10   1
222      1        1  45       4  10   0
223      1        1  28       5   7   1
224      1        1  33       4  10   0
225      1        1  34       4   8   0
226      1        1  27       4  10   0
227      1        0  35       5   9   1
228      0        1  36       3   5   1
229      1        0  23       5  11   0
230      0        1  39       4   4   0
231      1        1  35       4   9   0
232      0        1  36       3   9   0
233      1        1  65       4   7   0
234      1        1  35       3   9   0
235      1        1  35       4   8   0
236      1        1  55       3  10   0
237      1        1  28       3  10   0
238      1        0  26       3  10   1
239      1        1  33       4   9   1
240      1        1  47       2   9   0
241      1        1  24       4  12   0
242      1        0  52       4  10   1
243      0        0  30       4  10   0
244      1        1  32       3  10   0
245      1        1  37       2   5   0
246      1        1  34       4  10   1
247      1        1  41       5   6   1
248      1        1  34       5  11   0
249      1        0  34       4   9   0
250      1        1  39       4  11   0
251      0        1  28       4   8   0
252      1        0  26       1   9   1
253      1        1  32       1  10   1
254      1        1  26       2  13   0
255      1        0  33       4   8   0
256      1        1  26       3   9   1
257      1        0  26       3  12   0
258      1        1  26       2  10   1
259      0        0  35       3   7   1
260      1        1  33       5  11   1
261      1        0  32       2  11   1
262      1        1  30       4  12   0
263      1        1  24       3  10   1
264      1        1  24       3   8   1
265      0        1  25       3   9   0
266      1        1  27       1  11   1
267      1        1  50       5   6   0
268      1        0  43       3   5   0
269      0        0  27       1  11   1
270      1        1  32       4  11   0
271      1        0  29       3  11   0
272      1        1  27       4  11   0
273      1        0  25       3  12   0
274      0        0  29       4  13   0
275      0        1  24       1   9   1
276      1        1  27       5   8   1
277      1        1  24       5  12   1
278      1        0  27       2  12   1
279      0        1  27       1   6   0
280      1        1  31       4  13   0
281      1        1  30       4  11   0
282      1        1  28       3   9   0
283      1        1  27       2  11   0
284      1        1  27       3  11   1
285      1        1  32       4   5   1
286      1        1  33       3  12   1
287      0        1  29       3  10   1
288      1        1  33       2  10   0
289      1        0  33       2   9   1
290      0        1  32       4   7   0
291      1        0  37       2   9   1
292      1        0  52       4   5   1
293      1        0  27       3  12   1
294      1        1  39       3  10   0
295      0        1  38       5   8   0
296      1        1  44       2   9   0
297      1        1  36       2  13   0
298      1        1  32       4  10   0
299      1        1  26       4  11   1
300      0        1  26       3   6   1

$subset
NULL

$outlierMethod
[1] "none"

attr(,"class")
[1] "mvn"

Linear Discfriminant Analysis

data_disk$Diabetes_012 <- as.factor(data_disk$Diabetes_012)
lda_model <- lda(Diabetes_012 ~ HighBP + HighChol + BMI + GenHlth + Age + Sex,data=data_disk)
print(lda_model)
Call:
lda(Diabetes_012 ~ HighBP + HighChol + BMI + GenHlth + Age + 
    Sex, data = data_disk)

Prior probabilities of groups:
        0         1         2 
0.3333333 0.3333333 0.3333333 

Group means:
  HighBP HighChol   BMI GenHlth  Age  Sex
0   0.41     0.42 27.79    2.31 8.40 0.37
1   0.62     0.64 32.36    3.16 8.75 0.48
2   0.81     0.72 32.69    3.31 9.44 0.41

Coefficients of linear discriminants:
                LD1         LD2
HighBP   0.49234725  1.64573028
HighChol 0.53916647 -0.01527861
BMI      0.07205744 -0.05240978
GenHlth  0.53188262 -0.28336149
Age      0.11740466  0.04197276
Sex      0.28806418 -1.10334856

Proportion of trace:
 LD1  LD2 
0.94 0.06 
lda_values <- predict(lda_model)
data_disk$LD1 <- lda_values$x[,1]
data_disk$LD2 <- lda_values$x[,2]
ggplot(data_disk, aes(x = LD1, y = LD2, color = Diabetes_012 )) + 
  geom_point(size = 3) + 
  labs(title = "Discriminant Function Plot",  
       x = "First Discriminant Function", y = "Second Discriminant Function")

```