Dataset –> Diabetes Health Indicator
Link Dataset –> https://www.kaggle.com/datasets/michealsamuelw/3-diabetes-012-health-indicators-brfss2015
Diabetes_012 = Respon
HighBP = Prediktor
HighChol = Prediktor
CholCheck = Prediktor
BMI = Prediktor
GenHlth = Prediktor
HvyAlcoholConsump = Prediktor
Sex = Prediktor
Age = Prediktor
library(ggcorrplot)
library(ordinal)
library(biotools)
library(glmnet)
library(MVN)
library(MASS)
library(ggplot2)
data <- read.csv("diabetes_012_health_indicators_BRFSS2015.csv", header=TRUE, sep=",")
head(data)
Diabetes_012 HighBP HighChol CholCheck BMI Smoker Stroke HeartDiseaseorAttack
1 0 1 1 1 40 1 0 0
2 0 0 0 0 25 1 0 0
3 0 1 1 1 28 0 0 0
4 0 1 0 1 27 0 0 0
5 0 1 1 1 24 0 0 0
6 0 1 1 1 25 1 0 0
PhysActivity Fruits Veggies HvyAlcoholConsump AnyHealthcare NoDocbcCost
1 0 0 1 0 1 0
2 1 0 0 0 0 1
3 0 1 0 0 1 1
4 1 1 1 0 1 0
5 1 1 1 0 1 0
6 1 1 1 0 1 0
GenHlth MentHlth PhysHlth DiffWalk Sex Age Education Income
1 5 18 15 1 0 9 4 3
2 3 0 0 0 0 7 6 1
3 5 30 30 1 0 9 4 8
4 2 0 0 0 0 11 3 6
5 2 3 0 0 0 11 5 4
6 2 0 2 0 1 10 6 8
data2 <- data[, -c(1)]
colSums(is.na(data2))
HighBP HighChol CholCheck
0 0 0
BMI Smoker Stroke
0 0 0
HeartDiseaseorAttack PhysActivity Fruits
0 0 0
Veggies HvyAlcoholConsump AnyHealthcare
0 0 0
NoDocbcCost GenHlth MentHlth
0 0 0
PhysHlth DiffWalk Sex
0 0 0
Age Education Income
0 0 0
data$Diabetes_012 <- factor(
data$Diabetes_012,
levels = c(0,1,2),
ordered = TRUE
)
cor(data2)
HighBP HighChol CholCheck BMI
HighBP 1.000000000 0.29819930 0.098508273 0.21374812
HighChol 0.298199295 1.00000000 0.085642228 0.10672208
CholCheck 0.098508273 0.08564223 1.000000000 0.03449509
BMI 0.213748120 0.10672208 0.034495087 1.00000000
Smoker 0.096991467 0.09129936 -0.009928878 0.01380447
Stroke 0.129574913 0.09262007 0.024157667 0.02015266
HeartDiseaseorAttack 0.209361211 0.18076535 0.044205810 0.05290426
PhysActivity -0.125266866 -0.07804619 0.004189617 -0.14729363
Fruits -0.040554659 -0.04085908 0.023849406 -0.08751812
Veggies -0.061266165 -0.03987361 0.006121010 -0.06227519
HvyAlcoholConsump -0.003971574 -0.01154252 -0.023730091 -0.04873628
AnyHealthcare 0.038424769 0.04222986 0.117625625 -0.01847079
NoDocbcCost 0.017357984 0.01331016 -0.058255084 0.05820629
GenHlth 0.300529631 0.20842555 0.046588865 0.23918537
MentHlth 0.056455917 0.06206915 -0.008365598 0.08531016
PhysHlth 0.161211571 0.12175053 0.031774808 0.12114111
DiffWalk 0.223618466 0.14467154 0.040585057 0.19707776
Sex 0.052206961 0.03120533 -0.022115036 0.04295030
Age 0.344452330 0.27231823 0.090321114 -0.03661764
Education -0.141357934 -0.07080189 0.001510491 -0.10393202
Income -0.171234581 -0.08545931 0.014258747 -0.10006871
Smoker Stroke HeartDiseaseorAttack
HighBP 0.096991467 0.129574913 0.20936121
HighChol 0.091299357 0.092620074 0.18076535
CholCheck -0.009928878 0.024157667 0.04420581
BMI 0.013804467 0.020152661 0.05290426
Smoker 1.000000000 0.061172675 0.11444122
Stroke 0.061172675 1.000000000 0.20300194
HeartDiseaseorAttack 0.114441218 0.203001940 1.00000000
PhysActivity -0.087401163 -0.069151416 -0.08729899
Fruits -0.077665839 -0.013389353 -0.01979035
Veggies -0.030677710 -0.041124225 -0.03916741
HvyAlcoholConsump 0.101618687 -0.016950330 -0.02899052
AnyHealthcare -0.023250803 0.008775925 0.01873419
NoDocbcCost 0.048945823 0.034804106 0.03099970
GenHlth 0.163143067 0.177942260 0.25838341
MentHlth 0.092196474 0.070171812 0.06462129
PhysHlth 0.116459714 0.148944169 0.18169754
DiffWalk 0.122463215 0.176566917 0.21270870
Sex 0.093662361 0.002978288 0.08609551
Age 0.120641084 0.126973699 0.22161763
Education -0.161955255 -0.076008557 -0.09959992
Income -0.123937229 -0.128598578 -0.14101123
PhysActivity Fruits Veggies HvyAlcoholConsump
HighBP -0.125266866 -0.04055466 -0.061266165 -0.003971574
HighChol -0.078046186 -0.04085908 -0.039873607 -0.011542519
CholCheck 0.004189617 0.02384941 0.006121010 -0.023730091
BMI -0.147293634 -0.08751812 -0.062275194 -0.048736275
Smoker -0.087401163 -0.07766584 -0.030677710 0.101618687
Stroke -0.069151416 -0.01338935 -0.041124225 -0.016950330
HeartDiseaseorAttack -0.087298987 -0.01979035 -0.039167409 -0.028990516
PhysActivity 1.000000000 0.14275586 0.153149570 0.012392236
Fruits 0.142755863 1.00000000 0.254342244 -0.035287733
Veggies 0.153149570 0.25434224 1.000000000 0.021064481
HvyAlcoholConsump 0.012392236 -0.03528773 0.021064481 1.000000000
AnyHealthcare 0.035504737 0.03154392 0.029583817 -0.010488085
NoDocbcCost -0.061638387 -0.04424269 -0.032231705 0.004683595
GenHlth -0.266185624 -0.10385417 -0.123066330 -0.036723570
MentHlth -0.125587088 -0.06821738 -0.058883553 0.024715803
PhysHlth -0.219229522 -0.04463332 -0.064290327 -0.026415474
DiffWalk -0.253174007 -0.04835167 -0.080505717 -0.037668174
Sex 0.032481686 -0.09117487 -0.064765156 0.005740219
Age -0.092510633 0.06454722 -0.009771198 -0.034577637
Education 0.199658057 0.11018710 0.154329262 0.023996867
Income 0.198539455 0.07992931 0.151086944 0.053618566
AnyHealthcare NoDocbcCost GenHlth MentHlth
HighBP 0.038424769 0.017357984 0.300529631 0.056455917
HighChol 0.042229862 0.013310163 0.208425550 0.062069154
CholCheck 0.117625625 -0.058255084 0.046588865 -0.008365598
BMI -0.018470787 0.058206290 0.239185373 0.085310159
Smoker -0.023250803 0.048945823 0.163143067 0.092196474
Stroke 0.008775925 0.034804106 0.177942260 0.070171812
HeartDiseaseorAttack 0.018734186 0.030999705 0.258383409 0.064621292
PhysActivity 0.035504737 -0.061638387 -0.266185624 -0.125587088
Fruits 0.031543919 -0.044242689 -0.103854171 -0.068217375
Veggies 0.029583817 -0.032231705 -0.123066330 -0.058883553
HvyAlcoholConsump -0.010488085 0.004683595 -0.036723570 0.024715803
AnyHealthcare 1.000000000 -0.232532105 -0.040817072 -0.052706597
NoDocbcCost -0.232532105 1.000000000 0.166397186 0.192106853
GenHlth -0.040817072 0.166397186 1.000000000 0.301674393
MentHlth -0.052706597 0.192106853 0.301674393 1.000000000
PhysHlth -0.008276167 0.148997564 0.524363644 0.353618868
DiffWalk 0.007074092 0.118446862 0.456919503 0.233688079
Sex -0.019405465 -0.044931366 -0.006091004 -0.080704863
Age 0.138045679 -0.119777068 0.152449830 -0.092068024
Education 0.122514239 -0.100701002 -0.284911532 -0.101829695
Income 0.157999279 -0.203182369 -0.370013734 -0.209806127
PhysHlth DiffWalk Sex Age
HighBP 0.161211571 0.223618466 0.052206961 0.344452330
HighChol 0.121750528 0.144671538 0.031205330 0.272318226
CholCheck 0.031774808 0.040585057 -0.022115036 0.090321114
BMI 0.121141107 0.197077760 0.042950303 -0.036617635
Smoker 0.116459714 0.122463215 0.093662361 0.120641084
Stroke 0.148944169 0.176566917 0.002978288 0.126973699
HeartDiseaseorAttack 0.181697536 0.212708695 0.086095508 0.221617632
PhysActivity -0.219229522 -0.253174007 0.032481686 -0.092510633
Fruits -0.044633325 -0.048351675 -0.091174865 0.064547217
Veggies -0.064290327 -0.080505717 -0.064765156 -0.009771198
HvyAlcoholConsump -0.026415474 -0.037668174 0.005740219 -0.034577637
AnyHealthcare -0.008276167 0.007074092 -0.019405465 0.138045679
NoDocbcCost 0.148997564 0.118446862 -0.044931366 -0.119777068
GenHlth 0.524363644 0.456919503 -0.006091004 0.152449830
MentHlth 0.353618868 0.233688079 -0.080704863 -0.092068024
PhysHlth 1.000000000 0.478416619 -0.043136502 0.099129925
DiffWalk 0.478416619 1.000000000 -0.070298902 0.204450090
Sex -0.043136502 -0.070298902 1.000000000 -0.027340383
Age 0.099129925 0.204450090 -0.027340383 1.000000000
Education -0.155092517 -0.192642100 0.019479786 -0.101901070
Income -0.266798962 -0.320124244 0.127141058 -0.127775278
Education Income
HighBP -0.141357934 -0.17123458
HighChol -0.070801887 -0.08545931
CholCheck 0.001510491 0.01425875
BMI -0.103932022 -0.10006871
Smoker -0.161955255 -0.12393723
Stroke -0.076008557 -0.12859858
HeartDiseaseorAttack -0.099599915 -0.14101123
PhysActivity 0.199658057 0.19853946
Fruits 0.110187097 0.07992931
Veggies 0.154329262 0.15108694
HvyAlcoholConsump 0.023996867 0.05361857
AnyHealthcare 0.122514239 0.15799928
NoDocbcCost -0.100701002 -0.20318237
GenHlth -0.284911532 -0.37001373
MentHlth -0.101829695 -0.20980613
PhysHlth -0.155092517 -0.26679896
DiffWalk -0.192642100 -0.32012424
Sex 0.019479786 0.12714106
Age -0.101901070 -0.12777528
Education 1.000000000 0.44910642
Income 0.449106424 1.00000000
ggcorrplot(cor(data2))
write.csv(cor(data2), "corr matrix.csv") #simpen ke csv biar enak dibaca
table(data$Diabetes_012)
0 1 2
213703 4631 35346
prop.table(table(data$Diabetes_012))
0 1 2
0.84241170 0.01825528 0.13933302
x <- model.matrix(Diabetes_012 ~ ., data)[,-1]
y <- as.factor(data$Diabetes_012)
cv <- cv.glmnet(x, y, family = "multinomial")
coef(cv, s = "lambda.min")
$`0`
22 x 1 sparse Matrix of class "dgCMatrix"
lambda.min
(Intercept) 5.193120329
HighBP -0.376904342
HighChol -0.568022370
CholCheck -0.827981393
BMI -0.050838688
Smoker 0.006120385
Stroke .
HeartDiseaseorAttack .
PhysActivity 0.015146047
Fruits 0.031375959
Veggies 0.034205991
HvyAlcoholConsump 0.166269615
AnyHealthcare .
NoDocbcCost -0.027353855
GenHlth -0.311896360
MentHlth .
PhysHlth 0.004528470
DiffWalk .
Sex -0.085396602
Age -0.118079568
Education 0.034806329
Income 0.054668059
$`1`
22 x 1 sparse Matrix of class "dgCMatrix"
lambda.min
(Intercept) -2.551375247
HighBP .
HighChol .
CholCheck .
BMI .
Smoker .
Stroke -0.051129745
HeartDiseaseorAttack .
PhysActivity .
Fruits .
Veggies .
HvyAlcoholConsump .
AnyHealthcare -0.033697970
NoDocbcCost 0.273324785
GenHlth .
MentHlth 0.004600636
PhysHlth .
DiffWalk .
Sex .
Age .
Education -0.025447010
Income -0.005532654
$`2`
22 x 1 sparse Matrix of class "dgCMatrix"
lambda.min
(Intercept) -2.641745082
HighBP 0.387664939
HighChol 0.026457753
CholCheck 0.372767234
BMI 0.012337291
Smoker .
Stroke 0.126643110
HeartDiseaseorAttack 0.221517489
PhysActivity -0.032201846
Fruits -0.012954387
Veggies .
HvyAlcoholConsump -0.582318395
AnyHealthcare 0.068434585
NoDocbcCost .
GenHlth 0.228509564
MentHlth -0.003073044
PhysHlth -0.002275171
DiffWalk 0.118733066
Sex 0.167645285
Age 0.008395125
Education .
Income .
selected <- c(
"HighBP",
"HighChol",
"CholCheck",
"BMI",
"GenHlth",
"Age",
"HvyAlcoholConsump",
"Sex"
)
model_final <- clm(
Diabetes_012 ~ HighBP + HighChol + CholCheck + BMI +
GenHlth + Age + HvyAlcoholConsump + Sex,
data = data
)
summary(model_final)
formula:
Diabetes_012 ~ HighBP + HighChol + CholCheck + BMI + GenHlth + Age + HvyAlcoholConsump + Sex
data: data
link threshold nobs logLik AIC niter max.grad cond.H
logit flexible 253680 -102252.30 204524.61 9(2) 9.21e-10 4.0e+05
Coefficients:
Estimate Std. Error z value Pr(>|z|)
HighBP 0.7515114 0.0137236 54.76 <2e-16 ***
HighChol 0.6009777 0.0127221 47.24 <2e-16 ***
CholCheck 1.1505472 0.0613949 18.74 <2e-16 ***
BMI 0.0630349 0.0008571 73.55 <2e-16 ***
GenHlth 0.5703493 0.0059115 96.48 <2e-16 ***
Age 0.1382938 0.0024697 55.99 <2e-16 ***
HvyAlcoholConsump -0.7250897 0.0347425 -20.87 <2e-16 ***
Sex 0.2116692 0.0121134 17.47 <2e-16 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Threshold coefficients:
Estimate Std. Error z value
0|1 8.28624 0.07284 113.8
1|2 8.46260 0.07295 116.0
nominal_test(model_final)
Tests of nominal effects
formula: Diabetes_012 ~ HighBP + HighChol + CholCheck + BMI + GenHlth + Age + HvyAlcoholConsump + Sex
Df logLik AIC LRT Pr(>Chi)
<none> -102252 204525
HighBP 1 -102214 204450 76.227 < 2.2e-16 ***
HighChol 1 -102252 204527 0.083 0.7734635
CholCheck 1 -102248 204519 8.049 0.0045518 **
BMI 1 -102251 204525 1.802 0.1794971
GenHlth 1 -102223 204468 59.013 1.566e-14 ***
Age 1 -102245 204512 14.278 0.0001577 ***
HvyAlcoholConsump 1 -102237 204495 31.468 2.028e-08 ***
Sex 1 -102240 204502 24.982 5.788e-07 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
scale_test(model_final)
Tests of scale effects
formula: Diabetes_012 ~ HighBP + HighChol + CholCheck + BMI + GenHlth + Age + HvyAlcoholConsump + Sex
Df logLik AIC LRT Pr(>Chi)
<none> -102252 204525
HighBP 1 -102145 204311 215.37 < 2.2e-16 ***
HighChol 1 -102234 204490 37.03 1.162e-09 ***
CholCheck 1 -102246 204515 11.75 0.0006080 ***
BMI 1 -101877 203776 750.73 < 2.2e-16 ***
GenHlth 1 -101979 203979 547.20 < 2.2e-16 ***
Age 1 -102058 204138 388.76 < 2.2e-16 ***
HvyAlcoholConsump 1 -102252 204526 0.63 0.4280663
Sex 1 -102245 204512 14.15 0.0001687 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
exp(coef(model_final))
0|1 1|2 HighBP HighChol
3968.8946270 4734.3282212 2.1202020 1.8239012
CholCheck BMI GenHlth Age
3.1599215 1.0650640 1.7688847 1.1483129
HvyAlcoholConsump Sex
0.4842811 1.2357391
data_disk <- read.csv("sample_diabetes_100_each.csv", header=TRUE, sep=",")
fitur2 <- data_disk[, c("HighBP", "HighChol",
"BMI", "GenHlth",
"Age", "Sex")]
boxM(fitur2, data_disk$Diabetes_012)
Box's M-test for Homogeneity of Covariance Matrices
data: fitur2
Chi-Sq (approx.) = 64.462, df = 42, p-value = 0.01448
mvn(fitur2)
$multivariate_normality
Test Statistic p.value Method MVN
1 Henze-Zirkler 4.019 <0.001 asymptotic ✗ Not normal
$univariate_normality
Test Variable Statistic p.value Normality
1 Anderson-Darling HighBP 57.438 <0.001 ✗ Not normal
2 Anderson-Darling HighChol 56.243 <0.001 ✗ Not normal
3 Anderson-Darling BMI 4.835 <0.001 ✗ Not normal
4 Anderson-Darling GenHlth 9.754 <0.001 ✗ Not normal
5 Anderson-Darling Age 3.562 <0.001 ✗ Not normal
6 Anderson-Darling Sex 55.575 <0.001 ✗ Not normal
$descriptives
Variable n Mean Std.Dev Median Min Max 25th 75th Skew Kurtosis
1 HighBP 300 0.613 0.488 1 0 1 0 1.00 -0.465 1.217
2 HighChol 300 0.593 0.492 1 0 1 0 1.00 -0.380 1.144
3 BMI 300 30.947 7.408 30 13 77 26 34.25 1.621 8.970
4 GenHlth 300 2.927 1.131 3 1 5 2 4.00 0.019 2.269
5 Age 300 8.863 2.753 9 1 13 7 11.00 -0.572 2.995
6 Sex 300 0.420 0.494 0 0 1 0 1.00 0.324 1.105
$data
HighBP HighChol BMI GenHlth Age Sex
1 1 1 25 5 10 1
2 0 0 24 1 5 0
3 1 0 28 2 9 0
4 0 0 25 2 10 0
5 0 1 27 3 12 1
6 1 1 39 3 10 0
7 0 0 24 2 2 1
8 1 1 28 3 13 0
9 0 0 19 2 6 0
10 0 0 25 1 6 0
11 1 1 31 2 7 1
12 1 0 27 3 12 0
13 1 1 33 2 11 0
14 0 0 77 3 3 0
15 1 1 22 3 13 0
16 1 1 31 2 7 1
17 0 1 28 1 7 0
18 0 1 29 2 13 1
19 1 0 24 2 13 1
20 0 0 27 1 7 1
21 1 1 22 2 12 1
22 0 0 24 3 4 0
23 0 0 20 2 8 0
24 0 0 22 1 2 0
25 1 1 32 3 7 1
26 0 1 36 3 3 0
27 1 1 26 1 7 0
28 1 1 28 2 12 0
29 1 0 25 1 11 0
30 0 0 26 1 8 1
31 0 0 13 4 9 1
32 1 0 32 2 10 0
33 1 0 25 3 13 1
34 1 1 28 2 13 0
35 0 0 23 2 11 1
36 1 0 29 2 10 0
37 0 0 19 1 4 0
38 1 0 24 2 12 1
39 0 0 23 1 5 1
40 1 0 30 3 10 0
41 0 0 26 2 9 1
42 1 1 27 2 9 0
43 1 1 30 4 7 1
44 0 1 32 1 9 0
45 1 1 39 4 10 0
46 0 0 24 3 8 1
47 0 0 20 2 10 0
48 0 0 23 5 2 0
49 0 0 19 1 10 0
50 0 0 25 1 8 1
51 1 1 27 3 10 0
52 1 1 30 2 13 0
53 0 0 35 2 6 0
54 0 0 35 2 10 0
55 0 1 24 3 12 1
56 0 1 23 1 10 0
57 1 1 23 2 13 1
58 0 0 25 1 8 0
59 1 1 30 2 13 1
60 0 0 26 2 5 1
61 0 0 23 2 9 0
62 1 1 26 4 9 1
63 0 0 24 2 7 0
64 0 1 35 3 5 0
65 0 0 34 2 3 0
66 0 0 20 1 10 0
67 0 0 31 1 6 0
68 0 0 27 3 5 0
69 1 0 31 4 8 0
70 0 0 22 5 1 1
71 0 1 19 4 13 0
72 0 1 27 1 8 1
73 0 0 31 2 7 0
74 1 1 31 2 10 0
75 0 0 20 1 9 0
76 0 1 33 3 10 1
77 0 0 29 1 4 1
78 1 1 30 3 8 0
79 1 1 25 3 9 0
80 0 0 29 2 9 1
81 1 1 25 2 10 0
82 1 1 21 5 4 0
83 0 0 31 3 2 1
84 1 1 30 4 12 1
85 0 0 41 3 12 0
86 1 0 32 1 11 0
87 0 1 27 2 10 0
88 1 1 39 3 9 1
89 0 0 31 3 8 1
90 1 1 25 4 11 0
91 1 0 27 3 9 1
92 1 0 26 2 10 1
93 0 0 27 2 8 0
94 0 0 39 3 6 0
95 0 0 29 1 2 1
96 0 1 23 4 4 0
97 0 0 26 2 9 0
98 0 0 43 2 9 0
99 0 1 25 1 6 0
100 0 0 22 1 9 0
101 1 1 34 2 13 0
102 1 0 36 2 11 0
103 1 0 36 4 8 0
104 0 0 34 2 10 1
105 0 0 32 4 9 0
106 0 0 36 3 6 0
107 1 1 23 3 12 1
108 1 1 26 2 10 1
109 0 1 23 3 12 0
110 1 1 23 3 11 1
111 0 0 24 1 7 1
112 1 1 44 2 7 0
113 1 0 27 3 12 0
114 1 1 28 3 8 1
115 1 1 45 4 9 1
116 1 1 36 4 10 0
117 1 0 28 4 13 0
118 1 1 39 3 6 1
119 0 1 29 3 8 1
120 0 1 50 2 9 0
121 0 1 28 3 7 1
122 0 1 30 3 7 0
123 1 1 31 3 12 0
124 0 0 32 4 5 0
125 1 0 38 3 9 1
126 0 1 30 3 11 0
127 1 1 37 4 8 0
128 1 0 32 4 8 1
129 0 1 24 3 7 0
130 1 1 30 4 7 1
131 1 1 36 4 10 0
132 0 1 42 4 5 0
133 1 1 39 2 1 0
134 1 1 31 3 9 1
135 1 0 31 4 10 0
136 0 0 50 3 6 0
137 1 1 32 4 10 1
138 0 1 31 4 10 1
139 1 1 34 3 7 0
140 1 1 33 4 13 0
141 0 0 33 2 9 1
142 1 0 39 5 6 1
143 1 1 24 5 10 1
144 0 1 31 4 7 1
145 1 1 35 5 7 1
146 1 1 40 3 10 1
147 0 1 33 2 10 1
148 0 0 41 2 10 1
149 1 1 31 5 8 0
150 0 1 30 4 6 1
151 1 1 28 3 11 1
152 1 0 21 4 12 0
153 0 0 29 3 5 1
154 1 0 22 1 13 0
155 1 1 35 3 1 1
156 0 0 31 3 1 1
157 1 1 27 5 7 0
158 0 0 32 1 10 0
159 1 0 35 4 5 0
160 1 1 29 4 13 1
161 0 0 29 3 7 0
162 0 0 39 3 4 0
163 1 1 36 2 12 0
164 1 0 37 3 8 1
165 0 1 29 4 9 1
166 1 1 33 4 9 1
167 0 1 30 3 6 0
168 0 1 25 3 7 0
169 1 1 21 2 10 1
170 0 1 24 3 13 1
171 1 0 39 3 8 0
172 0 1 33 3 10 1
173 1 1 27 3 10 1
174 0 1 24 1 8 0
175 0 0 24 3 13 0
176 0 1 30 3 7 1
177 1 0 39 5 8 1
178 1 0 30 2 13 1
179 1 1 37 3 12 0
180 0 1 32 2 10 0
181 0 0 38 3 7 0
182 0 0 38 2 5 0
183 1 1 33 2 10 1
184 1 0 27 2 11 0
185 1 1 46 5 6 1
186 1 0 44 4 6 1
187 1 1 33 4 11 0
188 1 1 27 2 13 1
189 1 0 25 3 13 1
190 1 1 29 3 5 1
191 1 1 43 5 8 0
192 1 0 30 3 12 1
193 1 1 38 4 9 1
194 1 1 34 3 5 0
195 0 0 24 3 13 0
196 0 1 24 1 11 0
197 1 1 34 4 9 0
198 1 1 33 3 7 0
199 1 1 34 4 9 0
200 1 1 34 4 7 0
201 1 1 23 3 9 0
202 1 1 49 4 6 0
203 1 1 26 5 13 0
204 1 1 28 2 13 1
205 1 0 32 4 11 0
206 0 1 31 3 7 1
207 1 1 31 4 11 0
208 0 1 30 3 8 1
209 1 0 32 2 12 0
210 1 0 27 5 12 0
211 0 1 35 3 5 0
212 1 0 43 3 10 0
213 1 1 37 3 5 1
214 1 1 33 2 12 0
215 0 0 24 3 9 0
216 1 0 25 2 13 0
217 1 1 43 5 9 1
218 1 0 25 3 7 1
219 1 1 36 3 8 0
220 1 1 36 3 10 1
221 1 1 35 5 10 1
222 1 1 45 4 10 0
223 1 1 28 5 7 1
224 1 1 33 4 10 0
225 1 1 34 4 8 0
226 1 1 27 4 10 0
227 1 0 35 5 9 1
228 0 1 36 3 5 1
229 1 0 23 5 11 0
230 0 1 39 4 4 0
231 1 1 35 4 9 0
232 0 1 36 3 9 0
233 1 1 65 4 7 0
234 1 1 35 3 9 0
235 1 1 35 4 8 0
236 1 1 55 3 10 0
237 1 1 28 3 10 0
238 1 0 26 3 10 1
239 1 1 33 4 9 1
240 1 1 47 2 9 0
241 1 1 24 4 12 0
242 1 0 52 4 10 1
243 0 0 30 4 10 0
244 1 1 32 3 10 0
245 1 1 37 2 5 0
246 1 1 34 4 10 1
247 1 1 41 5 6 1
248 1 1 34 5 11 0
249 1 0 34 4 9 0
250 1 1 39 4 11 0
251 0 1 28 4 8 0
252 1 0 26 1 9 1
253 1 1 32 1 10 1
254 1 1 26 2 13 0
255 1 0 33 4 8 0
256 1 1 26 3 9 1
257 1 0 26 3 12 0
258 1 1 26 2 10 1
259 0 0 35 3 7 1
260 1 1 33 5 11 1
261 1 0 32 2 11 1
262 1 1 30 4 12 0
263 1 1 24 3 10 1
264 1 1 24 3 8 1
265 0 1 25 3 9 0
266 1 1 27 1 11 1
267 1 1 50 5 6 0
268 1 0 43 3 5 0
269 0 0 27 1 11 1
270 1 1 32 4 11 0
271 1 0 29 3 11 0
272 1 1 27 4 11 0
273 1 0 25 3 12 0
274 0 0 29 4 13 0
275 0 1 24 1 9 1
276 1 1 27 5 8 1
277 1 1 24 5 12 1
278 1 0 27 2 12 1
279 0 1 27 1 6 0
280 1 1 31 4 13 0
281 1 1 30 4 11 0
282 1 1 28 3 9 0
283 1 1 27 2 11 0
284 1 1 27 3 11 1
285 1 1 32 4 5 1
286 1 1 33 3 12 1
287 0 1 29 3 10 1
288 1 1 33 2 10 0
289 1 0 33 2 9 1
290 0 1 32 4 7 0
291 1 0 37 2 9 1
292 1 0 52 4 5 1
293 1 0 27 3 12 1
294 1 1 39 3 10 0
295 0 1 38 5 8 0
296 1 1 44 2 9 0
297 1 1 36 2 13 0
298 1 1 32 4 10 0
299 1 1 26 4 11 1
300 0 1 26 3 6 1
$subset
NULL
$outlierMethod
[1] "none"
attr(,"class")
[1] "mvn"
data_disk$Diabetes_012 <- as.factor(data_disk$Diabetes_012)
lda_model <- lda(Diabetes_012 ~ HighBP + HighChol + BMI + GenHlth + Age + Sex,data=data_disk)
print(lda_model)
Call:
lda(Diabetes_012 ~ HighBP + HighChol + BMI + GenHlth + Age +
Sex, data = data_disk)
Prior probabilities of groups:
0 1 2
0.3333333 0.3333333 0.3333333
Group means:
HighBP HighChol BMI GenHlth Age Sex
0 0.41 0.42 27.79 2.31 8.40 0.37
1 0.62 0.64 32.36 3.16 8.75 0.48
2 0.81 0.72 32.69 3.31 9.44 0.41
Coefficients of linear discriminants:
LD1 LD2
HighBP 0.49234725 1.64573028
HighChol 0.53916647 -0.01527861
BMI 0.07205744 -0.05240978
GenHlth 0.53188262 -0.28336149
Age 0.11740466 0.04197276
Sex 0.28806418 -1.10334856
Proportion of trace:
LD1 LD2
0.94 0.06
lda_values <- predict(lda_model)
data_disk$LD1 <- lda_values$x[,1]
data_disk$LD2 <- lda_values$x[,2]
ggplot(data_disk, aes(x = LD1, y = LD2, color = Diabetes_012 )) +
geom_point(size = 3) +
labs(title = "Discriminant Function Plot",
x = "First Discriminant Function", y = "Second Discriminant Function")
```