# ==========================================================
# 1. INSTALL & LOAD PACKAGES
# ==========================================================
if (!require("pacman")) install.packages("pacman")
## Loading required package: pacman
## Warning: package 'pacman' was built under R version 4.5.3
pacman::p_load(tidyverse, nnet, car, broom)
# ==========================================================
# 2. LOAD DATASET
# ==========================================================
# Memastikan kolom teks dibaca sebagai factor
df_pm <- read.csv("predictive_maintenance (1).csv", stringsAsFactors = TRUE)
# Membersihkan nama kolom agar tidak ada spasi atau karakter spesial
colnames(df_pm) <- make.names(colnames(df_pm))
# Menampilkan struktur data
str(df_pm)
## 'data.frame': 10000 obs. of 10 variables:
## $ UDI : int 1 2 3 4 5 6 7 8 9 10 ...
## $ Product.ID : Factor w/ 10000 levels "H29424","H29425",..: 7004 1004 1005 1006 1007 7005 1008 1009 7006 7007 ...
## $ Type : Factor w/ 3 levels "H","L","M": 3 2 2 2 2 3 2 2 3 3 ...
## $ Air.temperature..K. : num 298 298 298 298 298 ...
## $ Process.temperature..K.: num 309 309 308 309 309 ...
## $ Rotational.speed..rpm. : int 1551 1408 1498 1433 1408 1425 1558 1527 1667 1741 ...
## $ Torque..Nm. : num 42.8 46.3 49.4 39.5 40 41.9 42.4 40.2 28.6 28 ...
## $ Tool.wear..min. : int 0 3 5 7 9 11 14 16 18 21 ...
## $ Target : int 0 0 0 0 0 0 0 0 0 0 ...
## $ Failure.Type : Factor w/ 6 levels "Heat Dissipation Failure",..: 2 2 2 2 2 2 2 2 2 2 ...
# ==========================================================
# 3. PENGECEKAN TARGET (Failure.Type)
# ==========================================================
# Menghapus baris jika ada Failure.Type yang tidak jelas (jika ada)
# Menampilkan jumlah kategori (Nominal > 2)
table(df_pm$Failure.Type)
##
## Heat Dissipation Failure No Failure Overstrain Failure
## 112 9652 78
## Power Failure Random Failures Tool Wear Failure
## 95 18 45
# ==========================================================
# 4. UJI ASUMSI MULTIKOLINEARITAS (VIF)
# ==========================================================
# Kita menggunakan model linear bantuan untuk mengecek nilai VIF
# Mengecek apakah variabel suhu, kecepatan, torsi, dll saling tumpang tindih
vif_model <- lm(as.numeric(Failure.Type) ~ Type + Air.temperature..K. +
Process.temperature..K. + Rotational.speed..rpm. +
Torque..Nm. + Tool.wear..min., data = df_pm)
print("--- Hasil Uji VIF ---")
## [1] "--- Hasil Uji VIF ---"
vif(vif_model)
## GVIF Df GVIF^(1/(2*Df))
## Type 1.000842 2 1.000211
## Air.temperature..K. 4.304971 1 2.074842
## Process.temperature..K. 4.302882 1 2.074339
## Rotational.speed..rpm. 4.270917 1 2.066620
## Torque..Nm. 4.269660 1 2.066316
## Tool.wear..min. 1.000306 1 1.000153
# Interpretasi: Jika VIF < 5 atau 10, maka asumsi terpenuhi.
# ==========================================================
# 5. UJI LINEARITAS LOGIT (BOX-TIDWELL)
# ==========================================================
# Mengecek apakah variabel numerik linear dengan logitnya
# Kita buat variabel interaksi: Variabel * log(Variabel)
df_linear <- df_pm %>%
# Filter nilai 0 pada Tool.wear agar tidak error saat di-log
filter(Tool.wear..min. > 0) %>%
mutate(
air_temp_log = Air.temperature..K. * log(Air.temperature..K.),
torque_log = Torque..Nm. * log(Torque..Nm.),
tool_wear_log = Tool.wear..min. * log(Tool.wear..min.)
)
# Lakukan cek pada salah satu kategori utama kegagalan (misal: Power Failure)
# Jika p-value variabel *_log > 0.05, maka asumsi linearitas terpenuhi
model_linear_check <- glm(I(Failure.Type == "Power Failure") ~
Air.temperature..K. + air_temp_log +
Torque..Nm. + torque_log,
data = df_linear, family = binomial)
print("--- Hasil Uji Linearitas (Box-Tidwell) ---")
## [1] "--- Hasil Uji Linearitas (Box-Tidwell) ---"
summary(model_linear_check)
##
## Call:
## glm(formula = I(Failure.Type == "Power Failure") ~ Air.temperature..K. +
## air_temp_log + Torque..Nm. + torque_log, family = binomial,
## data = df_linear)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 4649.15939 6867.45180 0.677 0.498
## Air.temperature..K. -102.40052 153.42395 -0.667 0.504
## air_temp_log 15.25347 22.88522 0.667 0.505
## Torque..Nm. -5.24687 0.41740 -12.570 <2e-16 ***
## torque_log 1.14412 0.09098 12.576 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1043.61 on 9879 degrees of freedom
## Residual deviance: 253.92 on 9875 degrees of freedom
## AIC: 263.92
##
## Number of Fisher Scoring iterations: 11
# ==========================================================
# 6. UJI OUTLIER (Z-SCORE)
# ==========================================================
# Mengecek apakah ada data ekstrem pada variabel numerik utama (misal: Torque)
mean_torque <- mean(df_pm$Torque..Nm., na.rm = TRUE)
sd_torque <- sd(df_pm$Torque..Nm., na.rm = TRUE)
outliers <- which(abs(df_pm$Torque..Nm. - mean_torque) > 3 * sd_torque)
cat("Jumlah outlier yang ditemukan pada variabel Torque:", length(outliers), "\n")
## Jumlah outlier yang ditemukan pada variabel Torque: 25
# Opsional: Visualisasi Boxplot untuk melihat outlier secara manual
boxplot(df_pm$Torque..Nm., main="Boxplot Torque untuk Deteksi Outlier", col="orange")

# ==========================================================
# 1. TRANSFORMASI DATA (LOG TRANSFORMATION)
# ==========================================================
# Kita buat kolom baru 'Log_Torque'
# Catatan: Torsi di dataset ini tidak ada yang nol, jadi aman untuk di-log
df_pm <- df_pm %>%
mutate(Log_Torque = log(Torque..Nm.))
# ==========================================================
# 2. CEK ULANG LINEARITAS (BOX-TIDWELL) SETELAH TRANSFORMASI
# ==========================================================
# Kita buat variabel interaksi baru untuk Log_Torque
df_linear_check <- df_pm %>%
mutate(log_torque_interaction = Log_Torque * log(Log_Torque))
# Jalankan model pengecekan pada kategori yang sama (Power Failure)
model_transformed_check <- glm(I(Failure.Type == "Power Failure") ~
Log_Torque + log_torque_interaction,
data = df_linear_check, family = binomial)
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
print("--- Hasil Uji Linearitas Setelah Transformasi ---")
## [1] "--- Hasil Uji Linearitas Setelah Transformasi ---"
summary(model_transformed_check)
##
## Call:
## glm(formula = I(Failure.Type == "Power Failure") ~ Log_Torque +
## log_torque_interaction, family = binomial, data = df_linear_check)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 506.48 41.67 12.15 <2e-16 ***
## Log_Torque -343.83 28.23 -12.18 <2e-16 ***
## log_torque_interaction 155.58 12.78 12.17 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1073.8 on 9999 degrees of freedom
## Residual deviance: 255.4 on 9997 degrees of freedom
## AIC: 261.4
##
## Number of Fisher Scoring iterations: 11
# ==========================================================
# 3. VISUALISASI PERBANDINGAN (OPSIONAL)
# ==========================================================
par(mfrow=c(1,2))
hist(df_pm$Torque..Nm., main="Original Torque", col="gray")
hist(df_pm$Log_Torque, main="Log Transformed Torque", col="lightblue")

# ==========================================================
# 7. PEMODELAN MULTINOMIAL LOGISTIC REGRESSION (FINAL)
# ==========================================================
# Set Referensi ke "No Failure"
df_pm$Failure.Type <- relevel(df_pm$Failure.Type, ref = "No Failure")
# Menjalankan Model dengan Log_Torque
model_final <- multinom(Failure.Type ~ Type + Air.temperature..K. +
Process.temperature..K. + Rotational.speed..rpm. +
Log_Torque + Tool.wear..min., data = df_pm)
## # weights: 54 (40 variable)
## initial value 17917.594692
## iter 10 value 2915.954527
## iter 20 value 2296.310647
## iter 30 value 1429.837113
## iter 40 value 1075.766345
## iter 50 value 868.208668
## iter 60 value 846.334052
## iter 70 value 838.201515
## iter 80 value 833.045876
## iter 90 value 831.207365
## iter 100 value 830.551558
## final value 830.551558
## stopped after 100 iterations
# Menghitung P-Value
summary_mnl <- summary(model_final)
z_stats <- summary_mnl$coefficients / summary_mnl$standard.errors
p_values <- (1 - pnorm(abs(z_stats), 0, 1)) * 2
# Menghitung Odds Ratio (OR)
OR <- exp(coef(model_final))
# ==========================================================
# 8. OUTPUT UNTUK LAPORAN
# ==========================================================
print("--- HASIL ODDS RATIO ---")
## [1] "--- HASIL ODDS RATIO ---"
round(OR, 4)
## (Intercept) TypeL TypeM Air.temperature..K.
## Heat Dissipation Failure 0.0000 1.3282 1.0615 503.8088
## Overstrain Failure 0.0000 78.0099 0.9511 0.7548
## Power Failure 0.0000 3.2690 3.3032 1.1100
## Random Failures 0.0000 0.5040 0.1626 1.0089
## Tool Wear Failure 5.0446 0.6869 0.9427 1.1442
## Process.temperature..K. Rotational.speed..rpm.
## Heat Dissipation Failure 0.0033 0.9493
## Overstrain Failure 1.1433 0.9956
## Power Failure 0.8373 1.0355
## Random Failures 1.1483 0.9979
## Tool Wear Failure 0.8503 0.9967
## Log_Torque Tool.wear..min.
## Heat Dissipation Failure 1.800000e-02 0.9989
## Overstrain Failure 8.318628e+09 1.1528
## Power Failure 4.053522e+10 1.0016
## Random Failures 1.322000e+00 1.0035
## Tool Wear Failure 7.730000e-02 1.0968
print("--- HASIL P-VALUE ---")
## [1] "--- HASIL P-VALUE ---"
round(p_values, 4)
## (Intercept) TypeL TypeM Air.temperature..K.
## Heat Dissipation Failure 0 0.0930 0.7094 0.0000
## Overstrain Failure 0 0.0000 0.8885 0.1264
## Power Failure 0 0.0000 0.0000 0.4118
## Random Failures 0 0.1053 0.0000 0.9697
## Tool Wear Failure 0 0.0411 0.7291 0.3539
## Process.temperature..K. Rotational.speed..rpm.
## Heat Dissipation Failure 0.0000 0.0000
## Overstrain Failure 0.4537 0.2281
## Power Failure 0.1488 0.0000
## Random Failures 0.5389 0.2598
## Tool Wear Failure 0.2502 0.0003
## Log_Torque Tool.wear..min.
## Heat Dissipation Failure 0 0.6302
## Overstrain Failure 0 0.0000
## Power Failure 0 0.4237
## Random Failures 0 0.3560
## Tool Wear Failure 0 0.0000
# Menghitung Pseudo R-Square
log_null <- multinom(Failure.Type ~ 1, data = df_pm)$Deviance
## # weights: 12 (5 variable)
## initial value 17917.594692
## iter 10 value 2637.138759
## iter 20 value 2033.611318
## iter 30 value 2022.831790
## iter 30 value 2022.831790
## iter 30 value 2022.831790
## final value 2022.831790
## converged
log_model <- model_final$Deviance
pseudo_r2 <- 1 - (log_model / log_null)
cat("McFadden Pseudo R-Square:", round(pseudo_r2, 4))
## McFadden Pseudo R-Square:
# 1. Bagi data menjadi 80% Latih, 20% Uji
# ==========================================================
# 9. PEMBAGIAN DATA & CEK AKURASI (DATA SPLITTING)
# ==========================================================
# Memuat paket yang diperlukan
if (!require("caret")) install.packages("caret")
## Loading required package: caret
## Warning: package 'caret' was built under R version 4.5.3
## Loading required package: lattice
##
## Attaching package: 'caret'
## The following object is masked from 'package:purrr':
##
## lift
library(caret)
# Membagi data: 80% Latih (Train), 20% Uji (Test)
set.seed(123) # Agar hasil pembagian konsisten jika dirunning ulang
train_index <- createDataPartition(df_pm$Failure.Type, p = 0.8, list = FALSE)
train_data <- df_pm[train_index, ]
test_data <- df_pm[-train_index, ]
# Latih ulang model menggunakan train_data saja
model_train <- nnet::multinom(Failure.Type ~ Type + Air.temperature..K. +
Process.temperature..K. + Rotational.speed..rpm. +
Log_Torque + Tool.wear..min., data = train_data, trace = FALSE)
# Melakukan prediksi pada "New Data" (test_data)
prediksi_uji <- predict(model_train, newdata = test_data)
# Membuat Confusion Matrix
cm <- confusionMatrix(prediksi_uji, test_data$Failure.Type)
# Menampilkan Hasil Akurasi
print(cm)
## Confusion Matrix and Statistics
##
## Reference
## Prediction No Failure Heat Dissipation Failure
## No Failure 1927 11
## Heat Dissipation Failure 2 10
## Overstrain Failure 0 1
## Power Failure 1 0
## Random Failures 0 0
## Tool Wear Failure 0 0
## Reference
## Prediction Overstrain Failure Power Failure Random Failures
## No Failure 6 8 3
## Heat Dissipation Failure 0 2 0
## Overstrain Failure 9 3 0
## Power Failure 0 6 0
## Random Failures 0 0 0
## Tool Wear Failure 0 0 0
## Reference
## Prediction Tool Wear Failure
## No Failure 9
## Heat Dissipation Failure 0
## Overstrain Failure 0
## Power Failure 0
## Random Failures 0
## Tool Wear Failure 0
##
## Overall Statistics
##
## Accuracy : 0.977
## 95% CI : (0.9694, 0.9831)
## No Information Rate : 0.966
## P-Value [Acc > NIR] : 0.002643
##
## Kappa : 0.5424
##
## Mcnemar's Test P-Value : NA
##
## Statistics by Class:
##
## Class: No Failure Class: Heat Dissipation Failure
## Sensitivity 0.9984 0.454545
## Specificity 0.4559 0.997976
## Pos Pred Value 0.9812 0.714286
## Neg Pred Value 0.9118 0.993952
## Prevalence 0.9660 0.011011
## Detection Rate 0.9645 0.005005
## Detection Prevalence 0.9830 0.007007
## Balanced Accuracy 0.7272 0.726261
## Class: Overstrain Failure Class: Power Failure
## Sensitivity 0.600000 0.315789
## Specificity 0.997983 0.999495
## Pos Pred Value 0.692308 0.857143
## Neg Pred Value 0.996977 0.993471
## Prevalence 0.007508 0.009510
## Detection Rate 0.004505 0.003003
## Detection Prevalence 0.006507 0.003504
## Balanced Accuracy 0.798991 0.657642
## Class: Random Failures Class: Tool Wear Failure
## Sensitivity 0.000000 0.000000
## Specificity 1.000000 1.000000
## Pos Pred Value NaN NaN
## Neg Pred Value 0.998498 0.995495
## Prevalence 0.001502 0.004505
## Detection Rate 0.000000 0.000000
## Detection Prevalence 0.000000 0.000000
## Balanced Accuracy 0.500000 0.500000