#install.packages("heplots")
#install.packages(c("MASS","dplyr","MVN","biotools","car"))
library(MASS)
library(dplyr)
library(MVN)
library(biotools)
library(car)
library(heplots)
data <- read.csv("D:/SEMESTER 4/Materi Sofia/AnMul/Modul 4_klasifikasi/predictive_maintenance1.csv")
# lihat struktur data
str(data)
## 'data.frame': 10000 obs. of 10 variables:
## $ UDI : int 1 2 3 4 5 6 7 8 9 10 ...
## $ Product.ID : chr "M14860" "L47181" "L47182" "L47183" ...
## $ Type : chr "M" "L" "L" "L" ...
## $ Air.temperature..K. : num 298 298 298 298 298 ...
## $ Process.temperature..K.: num 309 309 308 309 309 ...
## $ Rotational.speed..rpm. : int 1551 1408 1498 1433 1408 1425 1558 1527 1667 1741 ...
## $ Torque..Nm. : num 42.8 46.3 49.4 39.5 40 41.9 42.4 40.2 28.6 28 ...
## $ Tool.wear..min. : int 0 3 5 7 9 11 14 16 18 21 ...
## $ Target : int 0 0 0 0 0 0 0 0 0 0 ...
## $ Failure.Type : chr "No Failure" "No Failure" "No Failure" "No Failure" ...
head(data)
## UDI Product.ID Type Air.temperature..K. Process.temperature..K.
## 1 1 M14860 M 298.1 308.6
## 2 2 L47181 L 298.2 308.7
## 3 3 L47182 L 298.1 308.5
## 4 4 L47183 L 298.2 308.6
## 5 5 L47184 L 298.2 308.7
## 6 6 M14865 M 298.1 308.6
## Rotational.speed..rpm. Torque..Nm. Tool.wear..min. Target Failure.Type
## 1 1551 42.8 0 0 No Failure
## 2 1408 46.3 3 0 No Failure
## 3 1498 49.4 5 0 No Failure
## 4 1433 39.5 7 0 No Failure
## 5 1408 40.0 9 0 No Failure
## 6 1425 41.9 11 0 No Failure
# ringkasan umum
summary(data)
## UDI Product.ID Type Air.temperature..K.
## Min. : 1 Length:10000 Length:10000 Min. :295.3
## 1st Qu.: 2501 Class :character Class :character 1st Qu.:298.3
## Median : 5000 Mode :character Mode :character Median :300.1
## Mean : 5000 Mean :300.0
## 3rd Qu.: 7500 3rd Qu.:301.5
## Max. :10000 Max. :304.5
## Process.temperature..K. Rotational.speed..rpm. Torque..Nm. Tool.wear..min.
## Min. :305.7 Min. :1168 Min. : 3.80 Min. : 0
## 1st Qu.:308.8 1st Qu.:1423 1st Qu.:33.20 1st Qu.: 53
## Median :310.1 Median :1503 Median :40.10 Median :108
## Mean :310.0 Mean :1539 Mean :39.99 Mean :108
## 3rd Qu.:311.1 3rd Qu.:1612 3rd Qu.:46.80 3rd Qu.:162
## Max. :313.8 Max. :2886 Max. :76.60 Max. :253
## Target Failure.Type
## Min. :0.0000 Length:10000
## 1st Qu.:0.0000 Class :character
## Median :0.0000 Mode :character
## Mean :0.0339
## 3rd Qu.:0.0000
## Max. :1.0000
# statistik numerik
summary(data[, sapply(data, is.numeric)])
## UDI Air.temperature..K. Process.temperature..K.
## Min. : 1 Min. :295.3 Min. :305.7
## 1st Qu.: 2501 1st Qu.:298.3 1st Qu.:308.8
## Median : 5000 Median :300.1 Median :310.1
## Mean : 5000 Mean :300.0 Mean :310.0
## 3rd Qu.: 7500 3rd Qu.:301.5 3rd Qu.:311.1
## Max. :10000 Max. :304.5 Max. :313.8
## Rotational.speed..rpm. Torque..Nm. Tool.wear..min. Target
## Min. :1168 Min. : 3.80 Min. : 0 Min. :0.0000
## 1st Qu.:1423 1st Qu.:33.20 1st Qu.: 53 1st Qu.:0.0000
## Median :1503 Median :40.10 Median :108 Median :0.0000
## Mean :1539 Mean :39.99 Mean :108 Mean :0.0339
## 3rd Qu.:1612 3rd Qu.:46.80 3rd Qu.:162 3rd Qu.:0.0000
## Max. :2886 Max. :76.60 Max. :253 Max. :1.0000
# distribusi variabel dependen
table(data$Failure_Type)
## < table of extent 0 >
prop.table(table(data$Failure_Type))
## numeric(0)
#ubah ke faktor
data$Failure.Type <- as.factor(data$Failure.Type)
data$Type <- as.factor(data$Type)
names(data)
## [1] "UDI" "Product.ID"
## [3] "Type" "Air.temperature..K."
## [5] "Process.temperature..K." "Rotational.speed..rpm."
## [7] "Torque..Nm." "Tool.wear..min."
## [9] "Target" "Failure.Type"
#buang var tidak diperlukan
data <- data[, !(names(data) %in% c("UDI","Product.ID","Target"))]
str(data)
## 'data.frame': 10000 obs. of 7 variables:
## $ Type : Factor w/ 3 levels "H","L","M": 3 2 2 2 2 3 2 2 3 3 ...
## $ Air.temperature..K. : num 298 298 298 298 298 ...
## $ Process.temperature..K.: num 309 309 308 309 309 ...
## $ Rotational.speed..rpm. : int 1551 1408 1498 1433 1408 1425 1558 1527 1667 1741 ...
## $ Torque..Nm. : num 42.8 46.3 49.4 39.5 40 41.9 42.4 40.2 28.6 28 ...
## $ Tool.wear..min. : int 0 3 5 7 9 11 14 16 18 21 ...
## $ Failure.Type : Factor w/ 6 levels "Heat Dissipation Failure",..: 2 2 2 2 2 2 2 2 2 2 ...
# ambil numerik saja
num_cols <- sapply(data, is.numeric)
# scaling
data[, num_cols] <- scale(data[, num_cols])
par(mfrow=c(1,3))
boxplot(data$Rotational.speed..rpm., main="Rotational Speed [rpm]", col="lightblue")
boxplot(data$Torque..Nm., main="Torque [Nm]", col="lightgreen")
boxplot(data$Tool.wear..min., main="Tool Wear [min]", col="pink")
par(mfrow=c(1,1))
cat("UJI NORMALITAS MULTIVARIAT PER KELOMPOK")
## UJI NORMALITAS MULTIVARIAT PER KELOMPOK
by(data[, num_cols], data$Failure.Type, function(sub){
hasil <- MVN::mvn(sub)
print(hasil$multivariate_normality)
})
## Test Statistic p.value Method MVN
## 1 Henze-Zirkler 1.574 <0.001 asymptotic ✗ Not normal
## Test Statistic p.value Method MVN
## 1 Henze-Zirkler 11.997 <0.001 asymptotic ✗ Not normal
## Test Statistic p.value Method MVN
## 1 Henze-Zirkler 1.153 <0.001 asymptotic ✗ Not normal
## Test Statistic p.value Method MVN
## 1 Henze-Zirkler 1.744 <0.001 asymptotic ✗ Not normal
## Test Statistic p.value Method MVN
## 1 Henze-Zirkler 0.746 0.572 asymptotic ✓ Normal
## Test Statistic p.value Method MVN
## 1 Henze-Zirkler 1.074 0.002 asymptotic ✗ Not normal
## data$Failure.Type: Heat Dissipation Failure
## Test Statistic p.value Method MVN
## 1 Henze-Zirkler 1.574 <0.001 asymptotic ✗ Not normal
## ------------------------------------------------------------
## data$Failure.Type: No Failure
## Test Statistic p.value Method MVN
## 1 Henze-Zirkler 11.997 <0.001 asymptotic ✗ Not normal
## ------------------------------------------------------------
## data$Failure.Type: Overstrain Failure
## Test Statistic p.value Method MVN
## 1 Henze-Zirkler 1.153 <0.001 asymptotic ✗ Not normal
## ------------------------------------------------------------
## data$Failure.Type: Power Failure
## Test Statistic p.value Method MVN
## 1 Henze-Zirkler 1.744 <0.001 asymptotic ✗ Not normal
## ------------------------------------------------------------
## data$Failure.Type: Random Failures
## Test Statistic p.value Method MVN
## 1 Henze-Zirkler 0.746 0.572 asymptotic ✓ Normal
## ------------------------------------------------------------
## data$Failure.Type: Tool Wear Failure
## Test Statistic p.value Method MVN
## 1 Henze-Zirkler 1.074 0.002 asymptotic ✗ Not normal
cat("UJI HOMOGENITAS KOVARIANS (BOX M)")
## UJI HOMOGENITAS KOVARIANS (BOX M)
print(biotools::boxM(data[, num_cols], data$Failure.Type))
##
## Box's M-test for Homogeneity of Covariance Matrices
##
## data: data[, num_cols]
## Chi-Sq (approx.) = 1948.3, df = 75, p-value < 2.2e-16
cat("UJI MULTIKOLINEARITAS (VIF)")
## UJI MULTIKOLINEARITAS (VIF)
vif_model <- lm(Air.temperature..K. ~ Process.temperature..K. + Rotational.speed..rpm. + Torque..Nm. + Tool.wear..min., data=data)
print(car::vif(vif_model))
## Process.temperature..K. Rotational.speed..rpm. Torque..Nm.
## 1.000588 4.268537 4.267830
## Tool.wear..min.
## 1.000219
cat("UJI INDEPENDENSI (KORELASI)")
## UJI INDEPENDENSI (KORELASI)
print(cor(data[, num_cols]))
## Air.temperature..K. Process.temperature..K.
## Air.temperature..K. 1.00000000 0.87610716
## Process.temperature..K. 0.87610716 1.00000000
## Rotational.speed..rpm. 0.02267046 0.01927671
## Torque..Nm. -0.01377782 -0.01406061
## Tool.wear..min. 0.01385283 0.01348752
## Rotational.speed..rpm. Torque..Nm. Tool.wear..min.
## Air.temperature..K. 0.0226704588 -0.013777823 0.0138528277
## Process.temperature..K. 0.0192767139 -0.014060613 0.0134875171
## Rotational.speed..rpm. 1.0000000000 -0.875027086 0.0002230848
## Torque..Nm. -0.8750270863 1.000000000 -0.0030927814
## Tool.wear..min. 0.0002230848 -0.003092781 1.0000000000
cat("UJI OUTLIER (MAHALANOBIS)")
## UJI OUTLIER (MAHALANOBIS)
X <- data[, num_cols]
mahal <- mahalanobis(X, colMeans(X), cov(X))
cut <- qchisq(0.999, df=ncol(X))
data_lda <- data[mahal < cut, ]
cat("Jumlah data setelah buang outlier:", nrow(data_lda), "\n")
## Jumlah data setelah buang outlier: 9896
lda_model <- lda(Failure.Type ~ ., data = data_lda)
lda_model
## Call:
## lda(Failure.Type ~ ., data = data_lda)
##
## Prior probabilities of groups:
## Heat Dissipation Failure No Failure Overstrain Failure
## 0.011317704 0.969078416 0.007881973
## Power Failure Random Failures Tool Wear Failure
## 0.005456750 0.001818917 0.004446241
##
## Group means:
## TypeL TypeM Air.temperature..K.
## Heat Dissipation Failure 0.6607143 0.26785714 1.28129785
## No Failure 0.5961418 0.30260688 -0.01674855
## Overstrain Failure 0.9358974 0.05128205 -0.06848178
## Power Failure 0.6481481 0.29629630 0.04289982
## Random Failures 0.6666667 0.11111111 0.38081908
## Tool Wear Failure 0.5454545 0.31818182 0.16115052
## Process.temperature..K. Rotational.speed..rpm.
## Heat Dissipation Failure 0.534831058 -1.12007601
## No Failure -0.008238127 -0.02035194
## Overstrain Failure 0.030815527 -1.02927429
## Power Failure -0.054919490 -0.97640494
## Random Failures 0.505478371 -0.27515913
## Tool Wear Failure 0.112666589 0.08909825
## Torque..Nm. Tool.wear..min.
## Heat Dissipation Failure 1.28315242 -0.009609968
## No Failure -0.02051908 -0.020572084
## Overstrain Failure 1.69439336 1.575183299
## Power Failure 2.47529512 -0.071670289
## Random Failures 0.35463293 0.187542988
## Tool Wear Failure -0.22894771 1.705650504
##
## Coefficients of linear discriminants:
## LD1 LD2 LD3 LD4
## TypeL 0.28685528 0.090528731 -0.19209211 -1.2263328
## TypeM 0.04201076 -0.008112212 0.16980290 0.1077969
## Air.temperature..K. 0.44139104 -1.701190289 -0.17549133 0.8090318
## Process.temperature..K. -0.31818396 1.225018035 0.05627724 -0.7320050
## Rotational.speed..rpm. 1.40005319 1.174611761 1.07563831 1.1915889
## Torque..Nm. 2.00481187 0.789819677 0.90453763 0.5653581
## Tool.wear..min. 0.24145276 0.299466588 -0.89832783 0.2808569
## LD5
## TypeL -2.16485549
## TypeM -2.61054305
## Air.temperature..K. -0.34765882
## Process.temperature..K. 0.97818948
## Rotational.speed..rpm. 0.21871272
## Torque..Nm. 0.15650605
## Tool.wear..min. -0.04424669
##
## Proportion of trace:
## LD1 LD2 LD3 LD4 LD5
## 0.6497 0.1997 0.1336 0.0129 0.0042
lda_model$scaling
## LD1 LD2 LD3 LD4
## TypeL 0.28685528 0.090528731 -0.19209211 -1.2263328
## TypeM 0.04201076 -0.008112212 0.16980290 0.1077969
## Air.temperature..K. 0.44139104 -1.701190289 -0.17549133 0.8090318
## Process.temperature..K. -0.31818396 1.225018035 0.05627724 -0.7320050
## Rotational.speed..rpm. 1.40005319 1.174611761 1.07563831 1.1915889
## Torque..Nm. 2.00481187 0.789819677 0.90453763 0.5653581
## Tool.wear..min. 0.24145276 0.299466588 -0.89832783 0.2808569
## LD5
## TypeL -2.16485549
## TypeM -2.61054305
## Air.temperature..K. -0.34765882
## Process.temperature..K. 0.97818948
## Rotational.speed..rpm. 0.21871272
## Torque..Nm. 0.15650605
## Tool.wear..min. -0.04424669
# ambil variabel numerik
X <- data_lda[, sapply(data, is.numeric)]
# MANOVA
manova_model <- manova(as.matrix(X) ~ data_lda$Failure.Type)
# uji signifikansi (Wilks Lambda)
summary(manova_model, test = "Wilks")
## Df Wilks approx F num Df den Df Pr(>F)
## data_lda$Failure.Type 5 0.81211 84.651 25 36726 < 2.2e-16 ***
## Residuals 9890
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary.aov(manova_model)
## Response Air.temperature..K. :
## Df Sum Sq Mean Sq F value Pr(>F)
## data_lda$Failure.Type 5 190.8 38.156 38.949 < 2.2e-16 ***
## Residuals 9890 9688.5 0.980
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Response Process.temperature..K. :
## Df Sum Sq Mean Sq F value Pr(>F)
## data_lda$Failure.Type 5 38.1 7.6158 7.6567 3.415e-07 ***
## Residuals 9890 9837.2 0.9947
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Response Rotational.speed..rpm. :
## Df Sum Sq Mean Sq F value Pr(>F)
## data_lda$Failure.Type 5 259.4 51.884 71.047 < 2.2e-16 ***
## Residuals 9890 7222.5 0.730
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Response Torque..Nm. :
## Df Sum Sq Mean Sq F value Pr(>F)
## data_lda$Failure.Type 5 743.4 148.679 173.52 < 2.2e-16 ***
## Residuals 9890 8474.4 0.857
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Response Tool.wear..min. :
## Df Sum Sq Mean Sq F value Pr(>F)
## data_lda$Failure.Type 5 326.5 65.304 67.588 < 2.2e-16 ***
## Residuals 9890 9555.7 0.966
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Hasil uji MANOVA menunjukkan bahwa variabel independen secara simultan memiliki perbedaan yang signifikan antar kelompok, ditunjukkan oleh nilai p-value < 0,05.
pred <- predict(lda_model)
conf_matrix <- table(
Predicted = pred$class,
Actual = data_lda$Failure.Type
)
conf_matrix
## Actual
## Predicted Heat Dissipation Failure No Failure
## Heat Dissipation Failure 3 0
## No Failure 106 9558
## Overstrain Failure 3 4
## Power Failure 0 28
## Random Failures 0 0
## Tool Wear Failure 0 0
## Actual
## Predicted Overstrain Failure Power Failure Random Failures
## Heat Dissipation Failure 0 0 0
## No Failure 60 1 17
## Overstrain Failure 17 0 1
## Power Failure 1 53 0
## Random Failures 0 0 0
## Tool Wear Failure 0 0 0
## Actual
## Predicted Tool Wear Failure
## Heat Dissipation Failure 0
## No Failure 44
## Overstrain Failure 0
## Power Failure 0
## Random Failures 0
## Tool Wear Failure 0
library(ggplot2)
# ubah ke data frame
cm_df <- as.data.frame(conf_matrix)
ggplot(cm_df, aes(x = Actual, y = Predicted, fill = Freq)) +
geom_tile() +
geom_text(aes(label = Freq), color = "black") +
scale_fill_gradient(low = "white", high = "blue") +
theme_minimal() +
labs(title = "Confusion Matrix Heatmap",
x = "Actual",
y = "Predicted")
library(ggplot2)
library(dplyr)
library(scales)
# ubah ke dataframe
cm_df <- as.data.frame(conf_matrix)
# hitung persentase per baris (biar adil per kelas)
cm_df <- cm_df %>%
group_by(Actual) %>%
mutate(Percentage = Freq / sum(Freq))
# plot
ggplot(cm_df, aes(x = Actual, y = Predicted, fill = Percentage)) +
geom_tile(color = "white", size = 0.5) +
geom_text(aes(label = paste0(Freq, "\n(", percent(Percentage, accuracy = 0.1), ")")),
size = 3.5) +
scale_fill_gradient(low = "#f7fbff", high = "#08306b") +
labs(
title = "Confusion Matrix (LDA Model)",
subtitle = "Count dan Persentase per Kelas Aktual",
x = "Actual Class",
y = "Predicted Class",
fill = "Proportion"
) +
theme_minimal(base_size = 13) +
theme(
plot.title = element_text(face = "bold", size = 16),
plot.subtitle = element_text(size = 12),
axis.text.x = element_text(angle = 30, hjust = 1),
panel.grid = element_blank()
)
Visualisasi confusion matrix dalam bentuk heatmap menunjukkan bahwa model memiliki tingkat akurasi tinggi pada kelas “No Failure”, namun masih mengalami kesalahan klasifikasi pada kelas kegagalan lainnya, terutama pada kelas dengan jumlah data yang lebih sedikit.
n <- sum(conf_matrix)
correct <- sum(diag(conf_matrix))
error <- n - correct
APER <- error / n
APER
## [1] 0.0267785
library(caret)
cm <- confusionMatrix(pred$class, data_lda$Failure.Type)
cm
## Confusion Matrix and Statistics
##
## Reference
## Prediction Heat Dissipation Failure No Failure
## Heat Dissipation Failure 3 0
## No Failure 106 9558
## Overstrain Failure 3 4
## Power Failure 0 28
## Random Failures 0 0
## Tool Wear Failure 0 0
## Reference
## Prediction Overstrain Failure Power Failure Random Failures
## Heat Dissipation Failure 0 0 0
## No Failure 60 1 17
## Overstrain Failure 17 0 1
## Power Failure 1 53 0
## Random Failures 0 0 0
## Tool Wear Failure 0 0 0
## Reference
## Prediction Tool Wear Failure
## Heat Dissipation Failure 0
## No Failure 44
## Overstrain Failure 0
## Power Failure 0
## Random Failures 0
## Tool Wear Failure 0
##
## Overall Statistics
##
## Accuracy : 0.9732
## 95% CI : (0.9698, 0.9763)
## No Information Rate : 0.9691
## P-Value [Acc > NIR] : 0.008292
##
## Kappa : 0.3567
##
## Mcnemar's Test P-Value : NA
##
## Statistics by Class:
##
## Class: Heat Dissipation Failure Class: No Failure
## Sensitivity 0.0267857 0.9967
## Specificity 1.0000000 0.2549
## Pos Pred Value 1.0000000 0.9767
## Neg Pred Value 0.9889821 0.7091
## Prevalence 0.0113177 0.9691
## Detection Rate 0.0003032 0.9658
## Detection Prevalence 0.0003032 0.9889
## Balanced Accuracy 0.5133929 0.6258
## Class: Overstrain Failure Class: Power Failure
## Sensitivity 0.217949 0.981481
## Specificity 0.999185 0.997053
## Pos Pred Value 0.680000 0.646341
## Neg Pred Value 0.993820 0.999898
## Prevalence 0.007882 0.005457
## Detection Rate 0.001718 0.005356
## Detection Prevalence 0.002526 0.008286
## Balanced Accuracy 0.608567 0.989267
## Class: Random Failures Class: Tool Wear Failure
## Sensitivity 0.000000 0.000000
## Specificity 1.000000 1.000000
## Pos Pred Value NaN NaN
## Neg Pred Value 0.998181 0.995554
## Prevalence 0.001819 0.004446
## Detection Rate 0.000000 0.000000
## Detection Prevalence 0.000000 0.000000
## Balanced Accuracy 0.500000 0.500000
# akurasi
cm$overall['Accuracy']
## Accuracy
## 0.9732215
# precision per kelas
cm$byClass[, "Precision"]
## Class: Heat Dissipation Failure Class: No Failure
## 1.0000000 0.9767014
## Class: Overstrain Failure Class: Power Failure
## 0.6800000 0.6463415
## Class: Random Failures Class: Tool Wear Failure
## NA NA
# recall per kelas
cm$byClass[, "Recall"]
## Class: Heat Dissipation Failure Class: No Failure
## 0.02678571 0.99666319
## Class: Overstrain Failure Class: Power Failure
## 0.21794872 0.98148148
## Class: Random Failures Class: Tool Wear Failure
## 0.00000000 0.00000000
# F1-score per kelas
cm$byClass[, "F1"]
## Class: Heat Dissipation Failure Class: No Failure
## 0.05217391 0.98658134
## Class: Overstrain Failure Class: Power Failure
## 0.33009709 0.77941176
## Class: Random Failures Class: Tool Wear Failure
## NA NA
metrics <- data.frame(
Precision = cm$byClass[, "Precision"],
Recall = cm$byClass[, "Recall"],
F1_Score = cm$byClass[, "F1"]
)
metrics
## Precision Recall F1_Score
## Class: Heat Dissipation Failure 1.0000000 0.02678571 0.05217391
## Class: No Failure 0.9767014 0.99666319 0.98658134
## Class: Overstrain Failure 0.6800000 0.21794872 0.33009709
## Class: Power Failure 0.6463415 0.98148148 0.77941176
## Class: Random Failures NA 0.00000000 NA
## Class: Tool Wear Failure NA 0.00000000 NA
Nilai APER sebesar 0.0267785 menunjukkan bahwa tingkat kesalahan klasifikasi model relatif kecil. Namun, berdasarkan evaluasi lebih lanjut menggunakan precision, recall, dan F1-score, diketahui bahwa performa model cenderung lebih baik pada kelas mayoritas dibandingkan kelas minoritas. Hal ini menunjukkan adanya pengaruh ketidakseimbangan data terhadap kinerja model, sehingga evaluasi tidak hanya bergantung pada akurasi, tetapi juga mempertimbangkan metrik lain seperti recall dan F1-score.
cat("UJI LINEARITY LOGIT (BOX-TIDWELL")
## UJI LINEARITY LOGIT (BOX-TIDWELL
data$logAirTemp <- log(data$Air.temperature..K.)
bt_model <- nnet::multinom(Failure.Type ~ Air.temperature..K. + logAirTemp, data=data)
## # weights: 24 (15 variable)
## initial value 9043.010041
## iter 10 value 1860.607682
## iter 20 value 1455.906608
## iter 30 value 1227.017183
## iter 40 value 1205.801578
## iter 50 value 1203.362662
## final value 1203.174071
## converged
print(summary(bt_model))
## Call:
## nnet::multinom(formula = Failure.Type ~ Air.temperature..K. +
## logAirTemp, data = data)
##
## Coefficients:
## (Intercept) Air.temperature..K. logAirTemp
## No Failure -7.226413 10.306806 -13.54870
## Overstrain Failure -11.031178 9.365472 -12.89993
## Power Failure -11.271186 9.943043 -13.12385
## Random Failures -13.523320 10.606750 -13.79646
## Tool Wear Failure -12.232209 10.128902 -13.41777
##
## Std. Errors:
## (Intercept) Air.temperature..K. logAirTemp
## No Failure 1.848319 1.826371 2.315948
## Overstrain Failure 2.056690 1.996778 2.375806
## Power Failure 1.985355 1.931418 2.354948
## Random Failures 2.244474 2.130681 2.404289
## Tool Wear Failure 2.081531 2.008331 2.374279
##
## Residual Deviance: 2406.348
## AIC: 2436.348
cat("UJI MULTIKOLINEARITAS (VIF)")
## UJI MULTIKOLINEARITAS (VIF)
vif_multi <- lm(Air.temperature..K. ~ Process.temperature..K. + Rotational.speed..rpm. + Torque..Nm. + Tool.wear..min., data=data)
print(car::vif(vif_multi))
## Process.temperature..K. Rotational.speed..rpm. Torque..Nm.
## 1.000588 4.268537 4.267830
## Tool.wear..min.
## 1.000219
cat("UJI OUTLIER (MAHALANOBIS DISTANCE)")
## UJI OUTLIER (MAHALANOBIS DISTANCE)
mahal <- mahalanobis(X, colMeans(X), cov(X))
cut <- qchisq(0.999, df=ncol(X))
outlier_index <- which(mahal > cut)
print(outlier_index)
## 70 260 299 306 419 468 865 990 999 1017 1116 1285 1305 1500 1501 1563
## 69 258 297 304 415 463 857 979 988 1006 1104 1270 1290 1483 1484 1544
## 1921 1937 2782 3072 3122 3216 4031 4091 4216 4229 4418 4423 4592 4613 4688 4727
## 1899 1915 2751 3038 3087 3180 3987 4045 4169 4182 4368 4372 4540 4561 4636 4675
## 4739 4937 5002 5116 5231 5299 5328 5688 6007 6060 6230 6244 6406 6595 6655 6719
## 4687 4880 4942 5056 5168 5236 5265 5621 5939 5992 6162 6176 6338 6524 6584 6648
## 6791 6862 7025 7183 7673 7688 7819 7958 8010 8185 8242 8943 9066 9176 9351 9407
## 6720 6790 6948 7104 7590 7605 7734 7872 7923 8096 8153 8848 8970 9078 9252 9308
## 9445 9602 9833 9975
## 9346 9503 9732 9871
cat("UJI INDEPENDENSI (DURBIN WATSON)")
## UJI INDEPENDENSI (DURBIN WATSON)
dw2 <- car::durbinWatsonTest(lm(Air.temperature..K. ~ Process.temperature..K., data = data))
print(dw2)
## lag Autocorrelation D-W Statistic p-value
## 1 0.9965106 0.006941408 0
## Alternative hypothesis: rho != 0
#install.packages("brant")
#install.packages("mlogit")
#install.packages("lmtest")
library(brant)
library(mlogit)
library(lmtest)
#install.packages("nnet")
library(nnet)
model_multi <- multinom(Failure.Type ~ Air.temperature..K. + Process.temperature..K. + Rotational.speed..rpm. + Torque..Nm. + Tool.wear..min., data=data)
## # weights: 42 (30 variable)
## initial value 17917.594692
## iter 10 value 1395.056284
## iter 20 value 736.107756
## iter 30 value 667.320320
## iter 40 value 661.834858
## iter 50 value 661.652225
## final value 661.652148
## converged
summary(model_multi)
## Call:
## multinom(formula = Failure.Type ~ Air.temperature..K. + Process.temperature..K. +
## Rotational.speed..rpm. + Torque..Nm. + Tool.wear..min., data = data)
##
## Coefficients:
## (Intercept) Air.temperature..K. Process.temperature..K.
## No Failure 20.423995 -12.55882 8.225194
## Overstrain Failure 1.224271 -13.03811 8.614050
## Power Failure 3.460588 -12.35137 7.920669
## Random Failures 13.912758 -12.74912 8.914013
## Tool Wear Failure 7.696795 -12.32795 8.035346
## Rotational.speed..rpm. Torque..Nm. Tool.wear..min.
## No Failure 9.253133 0.638611 0.01696826
## Overstrain Failure 9.286277 5.028972 7.74574403
## Power Failure 17.539888 11.479828 0.75052882
## Random Failures 9.431519 1.231135 0.24499545
## Tool Wear Failure 9.129206 0.357392 5.82782283
##
## Std. Errors:
## (Intercept) Air.temperature..K. Process.temperature..K.
## No Failure 1.670248 1.109202 0.7353608
## Overstrain Failure 2.273132 1.149168 0.7994375
## Power Failure 2.249545 1.174815 0.8353486
## Random Failures 1.694523 1.208829 0.8794252
## Tool Wear Failure 1.972589 1.148366 0.7999995
## Rotational.speed..rpm. Torque..Nm. Tool.wear..min.
## No Failure 0.9444391 0.3211476 0.1525835
## Overstrain Failure 1.1328747 0.5421566 0.7911798
## Power Failure 1.2210925 1.0487645 0.2770055
## Random Failures 1.1243377 0.6084331 0.2866690
## Tool Wear Failure 1.0200043 0.5323467 0.6384166
##
## Residual Deviance: 1323.304
## AIC: 1383.304
model_multi_null <- multinom(Failure.Type ~ 1, data=data)
## # weights: 12 (5 variable)
## initial value 17917.594692
## iter 10 value 2896.881855
## iter 20 value 2182.892751
## iter 30 value 2022.832095
## final value 2022.831790
## converged
lrtest(model_multi_null, model_multi)
## Likelihood ratio test
##
## Model 1: Failure.Type ~ 1
## Model 2: Failure.Type ~ Air.temperature..K. + Process.temperature..K. +
## Rotational.speed..rpm. + Torque..Nm. + Tool.wear..min.
## #Df LogLik Df Chisq Pr(>Chisq)
## 1 5 -2022.83
## 2 30 -661.65 25 2722.4 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Berdasarkan hasil Likelihood Ratio Test diperoleh nilai Chi-Square sebesar 2722,4 dengan p-value < 0,05. Hasil tersebut menunjukkan bahwa model regresi multinomial dengan variabel independen memiliki performa yang secara signifikan lebih baik dibandingkan model tanpa variabel independen (null model). Dengan demikian, variabel air temperature, process temperature, rotational speed, torque, dan tool wear secara simultan berpengaruh signifikan terhadap klasifikasi jenis kegagalan mesin (Failure Type).
z_stats <- summary(model_multi)$coefficients / summary(model_multi)$standard.errors
p_values <- (1 - pnorm(abs(z_stats), 0, 1)) * 2
print(p_values)
## (Intercept) Air.temperature..K. Process.temperature..K.
## No Failure 0.000000e+00 0 0
## Overstrain Failure 5.901743e-01 0 0
## Power Failure 1.239631e-01 0 0
## Random Failures 2.220446e-16 0 0
## Tool Wear Failure 9.545082e-05 0 0
## Rotational.speed..rpm. Torque..Nm. Tool.wear..min.
## No Failure 0.000000e+00 0.04675336 0.911452662
## Overstrain Failure 2.220446e-16 0.00000000 0.000000000
## Power Failure 0.000000e+00 0.00000000 0.006739761
## Random Failures 0.000000e+00 0.04302660 0.392756995
## Tool Wear Failure 0.000000e+00 0.50199633 0.000000000
Berdasarkan hasil uji parsial pada regresi multinomial, sebagian besar variabel independen memiliki nilai p-value kurang dari 0,05, sehingga berpengaruh signifikan terhadap klasifikasi jenis kegagalan mesin. Variabel air temperature, process temperature, dan rotational speed menunjukkan pengaruh yang sangat signifikan pada hampir seluruh kategori kegagalan. Namun, terdapat beberapa variabel yang tidak signifikan pada kategori tertentu, misalnya tool wear pada kelas No Failure dan Random Failures, serta torque pada kelas Tool Wear Failure.
exp(coef(model_multi))
## (Intercept) Air.temperature..K. Process.temperature..K.
## No Failure 7.413589e+08 3.513781e-06 3733.847
## Overstrain Failure 3.401686e+00 2.175806e-06 5508.514
## Power Failure 3.183568e+01 4.323821e-06 2753.613
## Random Failures 1.102133e+06 2.904881e-06 7435.437
## Tool Wear Failure 2.201281e+03 4.426287e-06 3088.208
## Rotational.speed..rpm. Torque..Nm. Tool.wear..min.
## No Failure 10437.212 1.893848 1.017113
## Overstrain Failure 10788.941 152.775887 2311.712862
## Power Failure 41445412.622 96744.390036 2.118120
## Random Failures 12475.466 3.425115 1.277615
## Tool Wear Failure 9220.696 1.429596 339.618468
Berdasarkan hasil odds ratio, terlihat bahwa beberapa variabel memiliki pengaruh yang cukup besar terhadap peluang terjadinya jenis kegagalan mesin tertentu. Variabel rotational speed dan process temperature memiliki nilai odds ratio yang sangat tinggi pada hampir seluruh kategori kegagalan, yang menunjukkan bahwa peningkatan kedua variabel tersebut dapat meningkatkan peluang terjadinya kegagalan mesin dibandingkan kategori acuan. Selain itu, variabel torque juga menunjukkan pengaruh yang sangat besar terutama pada kategori Power Failure dan Overstrain Failure. Sementara itu, variabel tool wear memiliki pengaruh paling dominan pada kategori Tool Wear Failure dan Overstrain Failure, yang ditunjukkan oleh nilai odds ratio yang tinggi.
Sumber dataset: https://www.kaggle.com/datasets/shivamb/machine-predictive-maintenance-classification