Analisis multivariat merupakan metode statistik yang digunakan untuk menganalisis hubungan beberapa variabel secara bersamaan. Dalam penelitian ini digunakan metode Analisis Diskriminan dan Regresi Logistik Ordinal untuk menganalisis tingkat performa fisik berdasarkan beberapa variabel kesehatan dan kebugaran tubuh.
Dataset yang digunakan merupakan data Body Performance yang berisi informasi mengenai usia, tinggi badan, berat badan, body fat, tekanan darah, grip force, sit-up, broad jump, dan kategori performa fisik.
Analisis diskriminan digunakan untuk mengklasifikasikan individu ke dalam kelompok Fit dan Unfit, sedangkan regresi logistik ordinal digunakan untuk memprediksi tingkatan performa fisik yang bersifat ordinal yaitu class A, B, C, dan D.
library(readr) # membaca data
library(dplyr) # manipulasi data
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2) # visualisasi data
library(MASS) # analisis diskriminan dan regresi ordinal
##
## Attaching package: 'MASS'
## The following object is masked from 'package:dplyr':
##
## select
library(caret) # evaluasi model
## Loading required package: lattice
library(biotools) # uji Box's M
## ---
## biotools version 4.3
library(corrplot) # visualisasi korelasi
## corrplot 0.95 loaded
data <- read.csv("bodyPerformance.csv")
head(data)
## age gender height_cm weight_kg body.fat_. diastolic systolic gripForce
## 1 27 M 172.3 75.24 21.3 80 130 54.9
## 2 25 M 165.0 55.80 15.7 77 126 36.4
## 3 31 M 179.6 78.00 20.1 92 152 44.8
## 4 32 M 174.5 71.10 18.4 76 147 41.4
## 5 28 M 173.8 67.70 17.1 70 127 43.5
## 6 36 F 165.4 55.40 22.0 64 119 23.8
## sit.and.bend.forward_cm sit.ups.counts broad.jump_cm class
## 1 18.4 60 217 C
## 2 16.3 53 229 A
## 3 12.0 49 181 C
## 4 15.2 53 219 B
## 5 27.1 45 217 B
## 6 21.0 27 153 B
Digunakan untuk melihat tipe data masing-masing variabel
str(data)
## 'data.frame': 13393 obs. of 12 variables:
## $ age : num 27 25 31 32 28 36 42 33 54 28 ...
## $ gender : chr "M" "M" "M" "M" ...
## $ height_cm : num 172 165 180 174 174 ...
## $ weight_kg : num 75.2 55.8 78 71.1 67.7 ...
## $ body.fat_. : num 21.3 15.7 20.1 18.4 17.1 22 32.2 36.9 27.6 14.4 ...
## $ diastolic : num 80 77 92 76 70 64 72 84 85 81 ...
## $ systolic : num 130 126 152 147 127 119 135 137 165 156 ...
## $ gripForce : num 54.9 36.4 44.8 41.4 43.5 23.8 22.7 45.9 40.4 57.9 ...
## $ sit.and.bend.forward_cm: num 18.4 16.3 12 15.2 27.1 21 0.8 12.3 18.6 12.1 ...
## $ sit.ups.counts : num 60 53 49 53 45 27 18 42 34 55 ...
## $ broad.jump_cm : num 217 229 181 219 217 153 146 234 148 213 ...
## $ class : chr "C" "A" "C" "B" ...
summary(data)
## age gender height_cm weight_kg
## Min. :21.00 Length:13393 Min. :125.0 Min. : 26.30
## 1st Qu.:25.00 Class :character 1st Qu.:162.4 1st Qu.: 58.20
## Median :32.00 Mode :character Median :169.2 Median : 67.40
## Mean :36.78 Mean :168.6 Mean : 67.45
## 3rd Qu.:48.00 3rd Qu.:174.8 3rd Qu.: 75.30
## Max. :64.00 Max. :193.8 Max. :138.10
## body.fat_. diastolic systolic gripForce
## Min. : 3.00 Min. : 0.0 Min. : 0.0 Min. : 0.00
## 1st Qu.:18.00 1st Qu.: 71.0 1st Qu.:120.0 1st Qu.:27.50
## Median :22.80 Median : 79.0 Median :130.0 Median :37.90
## Mean :23.24 Mean : 78.8 Mean :130.2 Mean :36.96
## 3rd Qu.:28.00 3rd Qu.: 86.0 3rd Qu.:141.0 3rd Qu.:45.20
## Max. :78.40 Max. :156.2 Max. :201.0 Max. :70.50
## sit.and.bend.forward_cm sit.ups.counts broad.jump_cm class
## Min. :-25.00 Min. : 0.00 Min. : 0.0 Length:13393
## 1st Qu.: 10.90 1st Qu.:30.00 1st Qu.:162.0 Class :character
## Median : 16.20 Median :41.00 Median :193.0 Mode :character
## Mean : 15.21 Mean :39.77 Mean :190.1
## 3rd Qu.: 20.70 3rd Qu.:50.00 3rd Qu.:221.0
## Max. :213.00 Max. :80.00 Max. :303.0
Tahap preprocessing dilakukan untuk mempersiapkan data sebelum proses analisis ### Mengecek Missing Value
colSums(is.na(data))
## age gender height_cm
## 0 0 0
## weight_kg body.fat_. diastolic
## 0 0 0
## systolic gripForce sit.and.bend.forward_cm
## 0 0 0
## sit.ups.counts broad.jump_cm class
## 0 0 0
Nama variabel diubah agar lebih mudah digunakan dalam analisis.
colnames(data) <- c(
"age",
"gender",
"height_cm",
"weight_kg",
"body_fat",
"diastolic",
"systolic",
"gripForce",
"sit_bend",
"sit_ups",
"broad_jump",
"class")
Variabel kategorik diubah menjadi factor.
# Variabel Gender
data$gender <- as.factor(data$gender)
# Variabel Class
data$class <- as.factor(data$class)
Kategori A dan B dimasukkan ke kelompok Fit, sedangkan C dan D dimasukkan ke kelompok Unfit
data$Fitness_Group <- ifelse(
data$class %in% c("A", "B"),
"Fit",
"Unfit"
)
Variabel class diubah menjadi ordinal karena memiliki tingkatan.
data$Fitness_Group <- as.factor(
data$Fitness_Group
)
data$class <- ordered(
data$class,
levels = c("D", "C", "B", "A")
)
Digunakan untuk melihat jumlah data pada setiap kategori.
table(data$class)
##
## D C B A
## 3349 3349 3347 3348
prop.table(
table(data$class)
) * 100
##
## D C B A
## 25.00560 25.00560 24.99067 24.99813
Histogram digunakan untuk melihat distribusi umur
hist(
data$age,
main = "Distribusi Umur",
xlab = "Umur",
col = "lightblue"
)
### Histogram Body Fat
hist(
data$body_fat,
main = "Distribusi Body Fat",
xlab = "Body Fat (%)",
col = "lightgreen"
)
### Distribusi Gender
barplot(
table(data$gender),
main = "Distribusi Gender",
col = "orange"
)
### Boxplot Grip Force terhadap class Digunakan untuk melihat perbedaan
grip force antar class
boxplot(
gripForce ~ class,
data = data,
main = "Grip Force berdasarkan Class",
xlab = "Class",
ylab = "Grip Force",
col = "cyan"
)
### Boxplot Body Fat terhadap class
boxplot(
body_fat ~ class,
data = data,
main = "Body Fat berdasarkan Class",
xlab = "Class",
ylab = "Body Fat",
col = "yellow"
)
### Scatter Plot Grip Force dan Sit-Up Digunakan untuk melihat hubungan
antara grip force dan sit-up.
plot(
data$gripForce,
data$sit_ups,
main = "Grip Force vs Sit-Up",
xlab = "Grip Force",
ylab = "Sit-Up",
col = "blue",
pch = 16
)
numeric_data <- data[, c(
"age",
"height_cm",
"weight_kg",
"body_fat",
"diastolic",
"systolic",
"gripForce",
"sit_bend",
"sit_ups",
"broad_jump"
)]
Digunakan untuk melihat hubungan antar variabel numerik.
cor(numeric_data)
## age height_cm weight_kg body_fat diastolic
## age 1.00000000 -0.2939805 -0.09996575 0.24230221 0.15850789
## height_cm -0.29398048 1.0000000 0.73490928 -0.51544035 0.14593270
## weight_kg -0.09996575 0.7349093 1.00000000 -0.08406528 0.26231672
## body_fat 0.24230221 -0.5154404 -0.08406528 1.00000000 0.04805944
## diastolic 0.15850789 0.1459327 0.26231672 0.04805944 1.00000000
## systolic 0.21116733 0.2101855 0.33894315 -0.03037637 0.67630860
## gripForce -0.17958278 0.7350238 0.70011890 -0.54178817 0.20206216
## sit_bend -0.07003333 -0.2219699 -0.29624858 -0.07122516 -0.07209778
## sit_ups -0.54458109 0.5004235 0.29489883 -0.60891188 0.01654669
## broad_jump -0.43517159 0.6745891 0.47956401 -0.67327318 0.09724306
## systolic gripForce sit_bend sit_ups broad_jump
## age 0.21116733 -0.1795828 -0.07003333 -0.54458109 -0.43517159
## height_cm 0.21018555 0.7350238 -0.22196991 0.50042351 0.67458913
## weight_kg 0.33894315 0.7001189 -0.29624858 0.29489883 0.47956401
## body_fat -0.03037637 -0.5417882 -0.07122516 -0.60891188 -0.67327318
## diastolic 0.67630860 0.2020622 -0.07209778 0.01654669 0.09724306
## systolic 1.00000000 0.2860123 -0.08243446 0.05627629 0.15289398
## gripForce 0.28601233 1.0000000 -0.11257669 0.57666946 0.74685329
## sit_bend -0.08243446 -0.1125767 1.00000000 0.17715333 0.02648747
## sit_ups 0.05627629 0.5766695 0.17715333 1.00000000 0.74827281
## broad_jump 0.15289398 0.7468533 0.02648747 0.74827281 1.00000000
corrplot(
cor(numeric_data),
method = "color"
)
## Uji Asumsi Analisis Diskriminan
set.seed(123)
sample_bodyfat <- sample(
data$body_fat,
5000
)
shapiro.test(sample_bodyfat)
##
## Shapiro-Wilk normality test
##
## data: sample_bodyfat
## W = 0.99197, p-value = 3.223e-16
sample_grip <- sample(
data$gripForce,
5000 )
shapiro.test(sample_grip)
##
## Shapiro-Wilk normality test
##
## data: sample_grip
## W = 0.97864, p-value < 2.2e-16
sample_situp <- sample(
data$sit_ups,
5000 )
shapiro.test(sample_situp)
##
## Shapiro-Wilk normality test
##
## data: sample_situp
## W = 0.98119, p-value < 2.2e-16
hist(data$body_fat)
qqnorm(data$body_fat)
qqline(data$body_fat)
### Uji Homogenitas Kovarian
boxM_result <- boxM(
numeric_data,
data$Fitness_Group
)
boxM_result
##
## Box's M-test for Homogeneity of Covariance Matrices
##
## data: numeric_data
## Chi-Sq (approx.) = 3772.1, df = 55, p-value < 2.2e-16
cor(numeric_data)
## age height_cm weight_kg body_fat diastolic
## age 1.00000000 -0.2939805 -0.09996575 0.24230221 0.15850789
## height_cm -0.29398048 1.0000000 0.73490928 -0.51544035 0.14593270
## weight_kg -0.09996575 0.7349093 1.00000000 -0.08406528 0.26231672
## body_fat 0.24230221 -0.5154404 -0.08406528 1.00000000 0.04805944
## diastolic 0.15850789 0.1459327 0.26231672 0.04805944 1.00000000
## systolic 0.21116733 0.2101855 0.33894315 -0.03037637 0.67630860
## gripForce -0.17958278 0.7350238 0.70011890 -0.54178817 0.20206216
## sit_bend -0.07003333 -0.2219699 -0.29624858 -0.07122516 -0.07209778
## sit_ups -0.54458109 0.5004235 0.29489883 -0.60891188 0.01654669
## broad_jump -0.43517159 0.6745891 0.47956401 -0.67327318 0.09724306
## systolic gripForce sit_bend sit_ups broad_jump
## age 0.21116733 -0.1795828 -0.07003333 -0.54458109 -0.43517159
## height_cm 0.21018555 0.7350238 -0.22196991 0.50042351 0.67458913
## weight_kg 0.33894315 0.7001189 -0.29624858 0.29489883 0.47956401
## body_fat -0.03037637 -0.5417882 -0.07122516 -0.60891188 -0.67327318
## diastolic 0.67630860 0.2020622 -0.07209778 0.01654669 0.09724306
## systolic 1.00000000 0.2860123 -0.08243446 0.05627629 0.15289398
## gripForce 0.28601233 1.0000000 -0.11257669 0.57666946 0.74685329
## sit_bend -0.08243446 -0.1125767 1.00000000 0.17715333 0.02648747
## sit_ups 0.05627629 0.5766695 0.17715333 1.00000000 0.74827281
## broad_jump 0.15289398 0.7468533 0.02648747 0.74827281 1.00000000
lda_model <- lda(
Fitness_Group ~ age +
height_cm +
weight_kg +
body_fat +
gripForce +
sit_ups +
broad_jump,
data = data
)
lda_model
## Call:
## lda(Fitness_Group ~ age + height_cm + weight_kg + body_fat +
## gripForce + sit_ups + broad_jump, data = data)
##
## Prior probabilities of groups:
## Fit Unfit
## 0.499888 0.500112
##
## Group means:
## age height_cm weight_kg body_fat gripForce sit_ups broad_jump
## Fit 36.17102 168.2256 65.51514 21.28946 38.26313 45.24412 199.0388
## Unfit 37.37892 168.8938 69.37863 25.19000 35.66520 34.30078 181.2244
##
## Coefficients of linear discriminants:
## LD1
## age -0.039422165
## height_cm 0.036082391
## weight_kg 0.063036667
## body_fat -0.018080027
## gripForce -0.028589789
## sit_ups -0.087885374
## broad_jump -0.005381281
lda_pred <- predict(lda_model)
table(
Prediksi = lda_pred$class,
Aktual = data$Fitness_Group
)
## Aktual
## Prediksi Fit Unfit
## Fit 5229 2000
## Unfit 1466 4698
confusionMatrix(
lda_pred$class,
data$Fitness_Group
)
## Confusion Matrix and Statistics
##
## Reference
## Prediction Fit Unfit
## Fit 5229 2000
## Unfit 1466 4698
##
## Accuracy : 0.7412
## 95% CI : (0.7337, 0.7486)
## No Information Rate : 0.5001
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.4824
##
## Mcnemar's Test P-Value : < 2.2e-16
##
## Sensitivity : 0.7810
## Specificity : 0.7014
## Pos Pred Value : 0.7233
## Neg Pred Value : 0.7622
## Prevalence : 0.4999
## Detection Rate : 0.3904
## Detection Prevalence : 0.5398
## Balanced Accuracy : 0.7412
##
## 'Positive' Class : Fit
##
ordinal_model <- polr(
class ~ age +
body_fat +
gripForce +
sit_ups +
broad_jump,
data = data,
Hess = TRUE
)
summary(ordinal_model)
## Call:
## polr(formula = class ~ age + body_fat + gripForce + sit_ups +
## broad_jump, data = data, Hess = TRUE)
##
## Coefficients:
## Value Std. Error t value
## age 0.050710 0.0015939 31.814
## body_fat -0.053561 0.0032876 -16.292
## gripForce -0.061498 0.0025100 -24.501
## sit_ups 0.115766 0.0022472 51.516
## broad_jump -0.002727 0.0008319 -3.279
##
## Intercepts:
## Value Std. Error t value
## D|C 1.0215 0.2020 5.0560
## C|B 2.4806 0.2029 12.2278
## B|A 3.8613 0.2046 18.8690
##
## Residual Deviance: 32083.02
## AIC: 32099.02
ctable <- coef(summary(ordinal_model))
p_value <- pnorm(
abs(ctable[, "t value"]),
lower.tail = FALSE
) * 2
(ctable <- cbind(
ctable,
"p value" = p_value
))
## Value Std. Error t value p value
## age 0.050709849 0.0015939272 31.814407 4.091593e-222
## body_fat -0.053561293 0.0032875939 -16.291943 1.125943e-59
## gripForce -0.061498367 0.0025100274 -24.501073 1.438735e-132
## sit_ups 0.115765884 0.0022471755 51.516174 0.000000e+00
## broad_jump -0.002727386 0.0008318524 -3.278690 1.042900e-03
## D|C 1.021452700 0.2020280499 5.055994 4.281538e-07
## C|B 2.480566065 0.2028621373 12.227842 2.207261e-34
## B|A 3.861318193 0.2046381571 18.869004 2.051285e-79
exp(coef(ordinal_model))
## age body_fat gripForce sit_ups broad_jump
## 1.0520176 0.9478478 0.9403545 1.1227330 0.9972763
prediksi_ordinal <- predict(
ordinal_model,
type = "class"
)
table(
Prediksi = prediksi_ordinal,
Aktual = data$class
)
## Aktual
## Prediksi D C B A
## D 2023 846 407 69
## C 705 1104 1011 633
## B 331 680 832 830
## A 290 719 1097 1816
confusionMatrix(
prediksi_ordinal,
data$class
)
## Confusion Matrix and Statistics
##
## Reference
## Prediction D C B A
## D 2023 846 407 69
## C 705 1104 1011 633
## B 331 680 832 830
## A 290 719 1097 1816
##
## Overall Statistics
##
## Accuracy : 0.4312
## 95% CI : (0.4228, 0.4396)
## No Information Rate : 0.2501
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.2416
##
## Mcnemar's Test P-Value : < 2.2e-16
##
## Statistics by Class:
##
## Class: D Class: C Class: B Class: A
## Sensitivity 0.6041 0.32965 0.24858 0.5424
## Specificity 0.8684 0.76613 0.81674 0.7903
## Pos Pred Value 0.6048 0.31972 0.31126 0.4630
## Neg Pred Value 0.8680 0.77414 0.76539 0.8382
## Prevalence 0.2501 0.25006 0.24991 0.2500
## Detection Rate 0.1510 0.08243 0.06212 0.1356
## Detection Prevalence 0.2498 0.25782 0.19958 0.2928
## Balanced Accuracy 0.7362 0.54789 0.53266 0.6664
summary(ordinal_model)
## Call:
## polr(formula = class ~ age + body_fat + gripForce + sit_ups +
## broad_jump, data = data, Hess = TRUE)
##
## Coefficients:
## Value Std. Error t value
## age 0.050710 0.0015939 31.814
## body_fat -0.053561 0.0032876 -16.292
## gripForce -0.061498 0.0025100 -24.501
## sit_ups 0.115766 0.0022472 51.516
## broad_jump -0.002727 0.0008319 -3.279
##
## Intercepts:
## Value Std. Error t value
## D|C 1.0215 0.2020 5.0560
## C|B 2.4806 0.2029 12.2278
## B|A 3.8613 0.2046 18.8690
##
## Residual Deviance: 32083.02
## AIC: 32099.02