ORDINAL LOGISTIC REGRESSION
Baca file dataset
cat("\nMemuat Data\n")
##
## Memuat Data
library(readr)
## Warning: package 'readr' was built under R version 4.4.3
data <- read_csv("D:/Kuliah Semester 4/Analisis Multivariat/stress_detection_data.csv")
## Rows: 773 Columns: 22
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (7): Gender, Occupation, Marital_Status, Smoking_Habit, Meditation_Pra...
## dbl (13): Age, Sleep_Duration, Sleep_Quality, Physical_Activity, Screen_Tim...
## time (2): Wake_Up_Time, Bed_Time
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
cat("Dimensi data:", dim(data), "\n")
## Dimensi data: 773 22
Cek isi data
data
## # A tibble: 773 × 22
## Age Gender Occupation Marital_Status Sleep_Duration Sleep_Quality
## <dbl> <chr> <chr> <chr> <dbl> <dbl>
## 1 30 Male Software Engineer Single 7 4
## 2 35 Female Marketing Manager Married 6 3
## 3 40 Male Data Scientist Divorced 7 4
## 4 35 Male Software Engineer Single 7 4
## 5 29 Female Teacher Single 8 5
## 6 45 Male Doctor Married 6 3
## 7 32 Female Graphic Designer Single 7 4
## 8 37 Male Civil Engineer Married 7.5 4
## 9 50 Male Business Owner Married 5.5 2
## 10 40 Female Nurse Divorced 6 3
## # ℹ 763 more rows
## # ℹ 16 more variables: Wake_Up_Time <time>, Bed_Time <time>,
## # Physical_Activity <dbl>, Screen_Time <dbl>, Caffeine_Intake <dbl>,
## # Alcohol_Intake <dbl>, Smoking_Habit <chr>, Work_Hours <dbl>,
## # Travel_Time <dbl>, Social_Interactions <dbl>, Meditation_Practice <chr>,
## # Exercise_Type <chr>, Blood_Pressure <dbl>, Cholesterol_Level <dbl>,
## # Blood_Sugar_Level <dbl>, Stress_Detection <chr>
Siapkan library
library(ggplot2)
library(gridExtra)
## Warning: package 'gridExtra' was built under R version 4.4.3
library(dplyr)
## Warning: package 'dplyr' was built under R version 4.4.3
##
## Attaching package: 'dplyr'
## The following object is masked from 'package:gridExtra':
##
## combine
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(corrplot)
## Warning: package 'corrplot' was built under R version 4.4.3
## corrplot 0.95 loaded
library(rlang)
Identifikasi tipe data
numeric_vars <- names(data)[sapply(data, is.numeric)]
categorical_vars <- names(data)[sapply(data, is.character)]
cat("\nVariabel numerik:\n")
##
## Variabel numerik:
print(numeric_vars)
## [1] "Age" "Sleep_Duration" "Sleep_Quality"
## [4] "Physical_Activity" "Screen_Time" "Caffeine_Intake"
## [7] "Alcohol_Intake" "Work_Hours" "Travel_Time"
## [10] "Social_Interactions" "Blood_Pressure" "Cholesterol_Level"
## [13] "Blood_Sugar_Level"
cat("\nVariabel kategorikal:\n")
##
## Variabel kategorikal:
print(categorical_vars)
## [1] "Gender" "Occupation" "Marital_Status"
## [4] "Smoking_Habit" "Meditation_Practice" "Exercise_Type"
## [7] "Stress_Detection"
Statistik deskriptif / EDA variabell numerik dan kategorik
# Statistik Deskriptif untuk variabel numerik
cat("\nStatistik Deskriptif (Numerik):\n")
##
## Statistik Deskriptif (Numerik):
print(summary(data[numeric_vars]))
## Age Sleep_Duration Sleep_Quality Physical_Activity
## Min. :18.00 Min. :3.500 Min. :2.000 Min. :1.000
## 1st Qu.:33.00 1st Qu.:6.000 1st Qu.:3.600 1st Qu.:2.000
## Median :39.00 Median :6.300 Median :3.900 Median :3.000
## Mean :38.89 Mean :6.338 Mean :3.848 Mean :2.979
## 3rd Qu.:45.00 3rd Qu.:7.000 3rd Qu.:4.000 3rd Qu.:4.000
## Max. :60.00 Max. :8.000 Max. :5.000 Max. :5.000
## Screen_Time Caffeine_Intake Alcohol_Intake Work_Hours
## Min. :2.000 Min. :0.000 Min. :0.0000 Min. : 6.000
## 1st Qu.:4.000 1st Qu.:1.000 1st Qu.:0.0000 1st Qu.: 8.000
## Median :4.000 Median :2.000 Median :1.0000 Median : 8.000
## Mean :4.105 Mean :1.819 Mean :0.8887 Mean : 8.259
## 3rd Qu.:5.000 3rd Qu.:2.000 3rd Qu.:1.0000 3rd Qu.: 9.000
## Max. :8.000 Max. :4.000 Max. :2.0000 Max. :14.000
## Travel_Time Social_Interactions Blood_Pressure Cholesterol_Level
## Min. :0.500 Min. :1.000 Min. :110.0 Min. :150.0
## 1st Qu.:2.000 1st Qu.:3.000 1st Qu.:130.0 1st Qu.:210.0
## Median :3.000 Median :3.000 Median :140.0 Median :220.0
## Mean :2.858 Mean :3.197 Mean :137.9 Mean :220.8
## 3rd Qu.:4.000 3rd Qu.:4.000 3rd Qu.:150.0 3rd Qu.:230.0
## Max. :5.000 Max. :5.000 Max. :170.0 Max. :290.0
## Blood_Sugar_Level
## Min. : 80.0
## 1st Qu.:105.0
## Median :115.0
## Mean :111.8
## 3rd Qu.:120.0
## Max. :150.0
# Frekuensi untuk variabel kategorikal
cat("\nDistribusi Kategorikal:\n")
##
## Distribusi Kategorikal:
for (var in categorical_vars) {
cat(paste0("\n", var, ":\n"))
print(table(data[[var]]))
}
##
## Gender:
##
## Female Male
## 384 389
##
## Occupation:
##
## Account Manager Accountant
## 1 11
## Actor Advertising Executive
## 4 1
## Advertising Manager Architect
## 1 17
## Artist Baker
## 7 1
## Bakery Owner Bank Manager
## 2 3
## Banker Bartender
## 3 2
## Biologist Blacksmith
## 1 3
## Brand Manager Bus Driver
## 1 2
## Business Analyst Business Consultant
## 8 4
## Business Owner Carpenter
## 1 5
## CEO Chef
## 1 26
## Civil Engineer Civil Servant
## 12 1
## Cleaner Cobbler
## 1 2
## Construction Engineer Construction Manager
## 1 6
## Construction Worker Consultant
## 14 3
## Content Creator Content Strategist
## 2 1
## Content Writer Copywriter
## 6 3
## Courier Customer Support
## 1 3
## Data Analyst Data Engineer
## 10 1
## Data Scientist Database Administrator
## 10 1
## Delivery Driver Dentist
## 1 3
## Designer Developer
## 2 1
## Digital Marketer Doctor
## 2 8
## Driver Editor
## 4 2
## Electrical Engineer Electrical Technician
## 3 1
## Electrician Engineer
## 17 11
## Entrepreneur Event Coordinator
## 3 1
## Event Manager Event Planner
## 1 6
## Executive Director Factory Worker
## 1 1
## Farmer Fashion Designer
## 8 8
## Financial Advisor Financial Analyst
## 2 7
## Financial Planner Firefighter
## 1 1
## Fisherwoman Fitness Instructor
## 2 1
## Fitness Trainer Flower Seller
## 4 2
## Freelancer Graphic Designer
## 5 18
## Hair Stylist Handicrafts Maker
## 1 2
## Healthcare Assistant HR Executive
## 1 2
## HR Manager HR Specialist
## 13 8
## Human Resources Human Resources Manager
## 3 1
## Insurance Agent Interior Designer
## 1 7
## IT Consultant IT Manager
## 6 1
## IT Specialist IT Support
## 2 1
## IT Support Specialist Janitor
## 1 1
## Journalist Laboratory Technician
## 16 1
## Lawyer Librarian
## 7 7
## Manager Marketing Director
## 1 1
## Marketing Executive Marketing Manager
## 8 11
## Marketing Specialist Mechanic
## 7 8
## Mechanical Engineer Medical Assistant
## 4 1
## Musician Nanny
## 3 2
## Network Administrator Network Engineer
## 2 1
## Nurse Nurse Practitioner
## 23 1
## Nutritional Specialist Nutritionist
## 1 1
## Operations Manager Painter
## 3 2
## Personal Trainer Pharmacist
## 1 6
## Photographer Physician
## 22 4
## Physicist Physiotherapist
## 1 2
## Pilot Plumber
## 1 9
## Police Officer Potter
## 6 2
## Primary School Teacher Product Designer
## 2 1
## Product Manager Program Manager
## 3 1
## Project Coordinator Project Manager
## 1 15
## Psychologist Public Relations Specialist
## 6 4
## Real Estate Agent Receptionist
## 7 1
## Research Analyst Research Assistant
## 1 3
## Research Scientist Researcher
## 5 2
## Restaurant Manager Retail Manager
## 2 4
## Retail Worker Retired
## 2 1
## Sales Executive Sales Manager
## 1 9
## Sales Representative Salesperson
## 6 1
## Scientist Seamstress
## 17 2
## Secretary Security Guard
## 2 3
## Security Officer SEO Specialist
## 1 5
## Shopkeeper Social Worker
## 2 5
## Software Architect Software Developer
## 2 15
## Software Engineer Software Tester
## 16 2
## Street Vendor Student
## 2 3
## Surgeon Tailor
## 1 5
## Taxi Driver Teacher
## 2 37
## Technician Therapist
## 1 1
## Truck Driver UX Designer
## 9 1
## Vegetable Vendor Veterinarian
## 2 8
## Waitress Warehouse Worker
## 3 1
## Weaver Web Developer
## 2 14
## Writer
## 5
##
## Marital_Status:
##
## Divorced Married Single
## 60 360 353
##
## Smoking_Habit:
##
## No Yes
## 358 415
##
## Meditation_Practice:
##
## No Yes
## 292 481
##
## Exercise_Type:
##
## Aerobics Cardio Meditation Pilates
## 5 215 24 88
## Strength Training Walking Yoga
## 245 5 191
##
## Stress_Detection:
##
## High Low Medium
## 301 162 310
Korelasi antar variabel numerik
cat("\nKorelasi Variabel Numerik:\n")
##
## Korelasi Variabel Numerik:
cor_matrix <- cor(data[numeric_vars], use = "complete.obs")
print(round(cor_matrix, 2))
## Age Sleep_Duration Sleep_Quality Physical_Activity
## Age 1.00 -0.17 0.07 0.29
## Sleep_Duration -0.17 1.00 0.21 -0.19
## Sleep_Quality 0.07 0.21 1.00 0.10
## Physical_Activity 0.29 -0.19 0.10 1.00
## Screen_Time 0.35 -0.25 0.02 0.72
## Caffeine_Intake 0.33 -0.25 0.09 0.73
## Alcohol_Intake 0.25 -0.24 0.15 0.59
## Work_Hours 0.25 0.24 0.01 0.12
## Travel_Time 0.24 -0.32 -0.07 0.68
## Social_Interactions 0.10 0.15 0.33 0.09
## Blood_Pressure 0.40 -0.31 0.07 0.66
## Cholesterol_Level 0.46 -0.29 0.15 0.59
## Blood_Sugar_Level 0.50 -0.22 0.16 0.61
## Screen_Time Caffeine_Intake Alcohol_Intake Work_Hours
## Age 0.35 0.33 0.25 0.25
## Sleep_Duration -0.25 -0.25 -0.24 0.24
## Sleep_Quality 0.02 0.09 0.15 0.01
## Physical_Activity 0.72 0.73 0.59 0.12
## Screen_Time 1.00 0.68 0.54 0.31
## Caffeine_Intake 0.68 1.00 0.65 0.08
## Alcohol_Intake 0.54 0.65 1.00 -0.03
## Work_Hours 0.31 0.08 -0.03 1.00
## Travel_Time 0.50 0.72 0.57 -0.11
## Social_Interactions 0.10 -0.08 -0.02 0.45
## Blood_Pressure 0.61 0.74 0.61 -0.05
## Cholesterol_Level 0.62 0.59 0.45 0.24
## Blood_Sugar_Level 0.64 0.63 0.47 0.23
## Travel_Time Social_Interactions Blood_Pressure
## Age 0.24 0.10 0.40
## Sleep_Duration -0.32 0.15 -0.31
## Sleep_Quality -0.07 0.33 0.07
## Physical_Activity 0.68 0.09 0.66
## Screen_Time 0.50 0.10 0.61
## Caffeine_Intake 0.72 -0.08 0.74
## Alcohol_Intake 0.57 -0.02 0.61
## Work_Hours -0.11 0.45 -0.05
## Travel_Time 1.00 -0.19 0.74
## Social_Interactions -0.19 1.00 -0.21
## Blood_Pressure 0.74 -0.21 1.00
## Cholesterol_Level 0.52 0.09 0.81
## Blood_Sugar_Level 0.57 0.03 0.83
## Cholesterol_Level Blood_Sugar_Level
## Age 0.46 0.50
## Sleep_Duration -0.29 -0.22
## Sleep_Quality 0.15 0.16
## Physical_Activity 0.59 0.61
## Screen_Time 0.62 0.64
## Caffeine_Intake 0.59 0.63
## Alcohol_Intake 0.45 0.47
## Work_Hours 0.24 0.23
## Travel_Time 0.52 0.57
## Social_Interactions 0.09 0.03
## Blood_Pressure 0.81 0.83
## Cholesterol_Level 1.00 0.94
## Blood_Sugar_Level 0.94 1.00
corrplot(cor_matrix, method = "color", type = "upper", tl.cex = 0.8)

Cek missing value
cat("\nCek Missing Values:\n")
##
## Cek Missing Values:
colSums(is.na(data))
## Age Gender Occupation Marital_Status
## 0 0 0 0
## Sleep_Duration Sleep_Quality Wake_Up_Time Bed_Time
## 0 0 0 0
## Physical_Activity Screen_Time Caffeine_Intake Alcohol_Intake
## 0 0 0 0
## Smoking_Habit Work_Hours Travel_Time Social_Interactions
## 0 0 0 0
## Meditation_Practice Exercise_Type Blood_Pressure Cholesterol_Level
## 0 0 0 0
## Blood_Sugar_Level Stress_Detection
## 0 0
Cek distribusi target
cat("\nDistribusi Target Variable (Stress_Detection):\n")
##
## Distribusi Target Variable (Stress_Detection):
print(table(data$Stress_Detection))
##
## High Low Medium
## 301 162 310
barplot(table(data$Stress_Detection), col = "orange", main = "Distribusi Stress Detection")

Normalisasi
# Fungsi normalisasi min-max
normalize <- function(x) {
return((x - min(x, na.rm = TRUE)) / (max(x, na.rm = TRUE) - min(x, na.rm = TRUE)))
}
# Normalisasi semua variabel numerik
data[numeric_vars] <- lapply(data[numeric_vars], normalize)
# Cek hasil normalisasi (optional)
summary(data[numeric_vars])
## Age Sleep_Duration Sleep_Quality Physical_Activity
## Min. :0.0000 Min. :0.0000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.3571 1st Qu.:0.5556 1st Qu.:0.5333 1st Qu.:0.2500
## Median :0.5000 Median :0.6222 Median :0.6333 Median :0.5000
## Mean :0.4973 Mean :0.6308 Mean :0.6160 Mean :0.4948
## 3rd Qu.:0.6429 3rd Qu.:0.7778 3rd Qu.:0.6667 3rd Qu.:0.7500
## Max. :1.0000 Max. :1.0000 Max. :1.0000 Max. :1.0000
## Screen_Time Caffeine_Intake Alcohol_Intake Work_Hours
## Min. :0.0000 Min. :0.0000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.3333 1st Qu.:0.2500 1st Qu.:0.0000 1st Qu.:0.2500
## Median :0.3333 Median :0.5000 Median :0.5000 Median :0.2500
## Mean :0.3509 Mean :0.4547 Mean :0.4444 Mean :0.2823
## 3rd Qu.:0.5000 3rd Qu.:0.5000 3rd Qu.:0.5000 3rd Qu.:0.3750
## Max. :1.0000 Max. :1.0000 Max. :1.0000 Max. :1.0000
## Travel_Time Social_Interactions Blood_Pressure Cholesterol_Level
## Min. :0.0000 Min. :0.0000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.3333 1st Qu.:0.5000 1st Qu.:0.3333 1st Qu.:0.4286
## Median :0.5556 Median :0.5000 Median :0.5000 Median :0.5000
## Mean :0.5241 Mean :0.5492 Mean :0.4657 Mean :0.5060
## 3rd Qu.:0.7778 3rd Qu.:0.7500 3rd Qu.:0.6667 3rd Qu.:0.5714
## Max. :1.0000 Max. :1.0000 Max. :1.0000 Max. :1.0000
## Blood_Sugar_Level
## Min. :0.0000
## 1st Qu.:0.3571
## Median :0.5000
## Mean :0.4538
## 3rd Qu.:0.5714
## Max. :1.0000
Encoding (factor)
categorical_vars_except_target <- setdiff(c("Gender", "Occupation", "Marital_Status", "Smoking_Habit", "Meditation_Practice", "Exercise_Type"), "Stress_Detection")
for (var in categorical_vars_except_target) {
data[[var]] <- as.factor(data[[var]])
}
Validasi target
data$Stress_Level <- factor(data$Stress_Detection,
levels = c("Low", "Medium", "High"),
ordered = TRUE)
Load library uji dan model
library(MASS) # untuk polr
## Warning: package 'MASS' was built under R version 4.4.3
##
## Attaching package: 'MASS'
## The following object is masked from 'package:dplyr':
##
## select
library(car) # untuk VIF
## Warning: package 'car' was built under R version 4.4.3
## Loading required package: carData
## Warning: package 'carData' was built under R version 4.4.3
##
## Attaching package: 'car'
## The following object is masked from 'package:dplyr':
##
## recode
library(brant) # untuk Brant test
## Warning: package 'brant' was built under R version 4.4.3
Inisialisasi variabel predictor
predictors <- c(numeric_vars, categorical_vars_except_target)
Load Model
formula_olr <- as.formula(
paste("Stress_Level ~", paste(predictors, collapse = " + "))
)
Uji Asumsi
Uji Homogenitas Levene Test
# Langkah ini HARUS dilakukan dulu:
data_reduced <- data[, !(names(data) %in% c("Occupation", "Cholesterol_Level", "Blood_Sugar_Level"))]
# Ambil variabel numerik selain target
numeric_vars <- names(Filter(is.numeric, data_reduced))
numeric_vars <- setdiff(numeric_vars, "Stress_Level")
# Inisialisasi dataframe kosong untuk menyimpan hasil
levene_results <- data.frame(
Variabel = character(),
Df_Group = integer(),
Df_Residual = integer(),
F_value = numeric(),
p_value = numeric(),
Significance = character(),
stringsAsFactors = FALSE
)
# Loop tiap variabel numerik dan simpan hasil ke dalam dataframe
for (var in numeric_vars) {
formula_levene <- as.formula(paste(var, "~ Stress_Level"))
test_result <- leveneTest(formula_levene, data = data_reduced)
# Ambil nilai dari hasil test
df_group <- test_result[1, "Df"]
df_residual <- test_result[2, "Df"]
f_value <- test_result[1, "F value"]
p_value <- test_result[1, "Pr(>F)"]
# Tentukan simbol signifikansi
sig <- if (p_value < 0.001) {
"***"
} else if (p_value < 0.01) {
"**"
} else if (p_value < 0.05) {
"*"
} else if (p_value < 0.1) {
"."
} else {
""
}
# Tambahkan baris ke dataframe
levene_results <- rbind(levene_results, data.frame(
Variabel = var,
Df_Group = df_group,
Df_Residual = df_residual,
F_value = round(f_value, 4),
p_value = signif(p_value, 5),
Significance = sig
))
}
print(levene_results)
## Variabel Df_Group Df_Residual F_value p_value Significance
## 1 Age 2 770 1.7085 1.8182e-01
## 2 Sleep_Duration 2 770 2.3866 9.2618e-02 .
## 3 Sleep_Quality 2 770 8.6998 1.8355e-04 ***
## 4 Physical_Activity 2 770 4.1469 1.6168e-02 *
## 5 Screen_Time 2 770 11.7555 9.3554e-06 ***
## 6 Caffeine_Intake 2 770 14.0041 1.0619e-06 ***
## 7 Alcohol_Intake 2 770 22.0597 4.8305e-10 ***
## 8 Work_Hours 2 770 0.0326 9.6789e-01
## 9 Travel_Time 2 770 6.4970 1.5919e-03 **
## 10 Social_Interactions 2 770 10.1524 4.4461e-05 ***
## 11 Blood_Pressure 2 770 6.4483 1.6701e-03 **
Uji Multikolinearitas VIF
formula_lm <- as.formula(
paste("as.numeric(Stress_Level) ~", paste(predictors, collapse = " + "))
)
model_lm <- lm(formula_lm, data = data)
cat("\nUji Multikolinearitas (VIF):\n")
##
## Uji Multikolinearitas (VIF):
vif_values <- vif(model_lm)
print(vif_values)
## GVIF Df GVIF^(1/(2*Df))
## Age 1.909720 1 1.381926
## Sleep_Duration 2.196792 1 1.482158
## Sleep_Quality 2.005374 1 1.416112
## Physical_Activity 4.937866 1 2.222131
## Screen_Time 4.408499 1 2.099643
## Caffeine_Intake 4.999134 1 2.235874
## Alcohol_Intake 3.039647 1 1.743458
## Work_Hours 3.178136 1 1.782733
## Travel_Time 4.686248 1 2.164774
## Social_Interactions 2.823614 1 1.680361
## Blood_Pressure 11.127018 1 3.335718
## Cholesterol_Level 17.341192 1 4.164276
## Blood_Sugar_Level 16.584033 1 4.072350
## Gender 2.140902 1 1.463182
## Occupation 2768.354669 168 1.023870
## Marital_Status 2.741720 2 1.286784
## Smoking_Habit 1.832087 1 1.353546
## Meditation_Practice 2.004254 1 1.415717
## Exercise_Type 26.452024 6 1.313827
Penanganan Multikolinearitas
vars_check <- c("Blood_Pressure", "Cholesterol_Level", "Blood_Sugar_Level")
data_num <- data[vars_check]
cor_matrix_check <- cor(data_num, use = "complete.obs")
print(round(cor_matrix_check, 2))
## Blood_Pressure Cholesterol_Level Blood_Sugar_Level
## Blood_Pressure 1.00 0.81 0.83
## Cholesterol_Level 0.81 1.00 0.94
## Blood_Sugar_Level 0.83 0.94 1.00
# Visualisasi korelasi dengan corrplot
library(corrplot)
corrplot(cor_matrix_check, method = "number", type = "upper")

# Pastikan dplyr sudah di-load (tidak wajib jika pakai base R)
library(dplyr)
# Hapus kolom Occupation, Cholesterol_Level, dan Blood_Sugar_Level dari data
data_reduced <- data[, !(names(data) %in% c("Occupation", "Cholesterol_Level", "Blood_Sugar_Level"))]
# Update variabel prediktor, hapus 'Occupation', 'Cholesterol_Level', dan 'Blood_Sugar_Level'
predictors_reduced <- setdiff(predictors, c("Occupation", "Cholesterol_Level", "Blood_Sugar_Level"))
# Buat formula baru untuk model OLR dan linear (cek VIF)
formula_olr_reduced <- as.formula(
paste("Stress_Level ~", paste(predictors_reduced, collapse = " + "))
)
formula_lm_reduced <- as.formula(
paste("as.numeric(Stress_Level) ~", paste(predictors_reduced, collapse = " + "))
)
Uji Model
# Model linear untuk cek VIF
model_lm_reduced <- lm(formula_lm_reduced, data = data_reduced)
cat("\nVIF setelah hapus Occupation, Cholesterol_Level, dan Blood_Sugar_Level:\n")
##
## VIF setelah hapus Occupation, Cholesterol_Level, dan Blood_Sugar_Level:
print(vif(model_lm_reduced))
## GVIF Df GVIF^(1/(2*Df))
## Age 1.458957 1 1.207873
## Sleep_Duration 1.445326 1 1.202217
## Sleep_Quality 1.468714 1 1.211905
## Physical_Activity 3.585626 1 1.893575
## Screen_Time 3.132329 1 1.769839
## Caffeine_Intake 3.739988 1 1.933905
## Alcohol_Intake 2.244304 1 1.498100
## Work_Hours 2.145756 1 1.464840
## Travel_Time 3.611850 1 1.900487
## Social_Interactions 2.017837 1 1.420506
## Blood_Pressure 4.002524 1 2.000631
## Gender 1.335309 1 1.155556
## Marital_Status 1.509696 2 1.108466
## Smoking_Habit 1.350746 1 1.162216
## Meditation_Practice 1.436075 1 1.198364
## Exercise_Type 2.517047 6 1.079960
# Model OLR tanpa Occupation, Cholesterol_Level, dan Blood_Sugar_Level
model_olr_reduced <- polr(formula_olr_reduced, data = data_reduced, Hess = TRUE)
cat("\nSummary Model OLR tanpa Occupation, Cholesterol_Level, dan Blood_Sugar_Level:\n")
##
## Summary Model OLR tanpa Occupation, Cholesterol_Level, dan Blood_Sugar_Level:
print(summary(model_olr_reduced))
## Call:
## polr(formula = formula_olr_reduced, data = data_reduced, Hess = TRUE)
##
## Coefficients:
## Value Std. Error t value
## Age -0.53885 0.4973 -1.0835
## Sleep_Duration -2.58445 0.5705 -4.5303
## Sleep_Quality -0.85584 0.5129 -1.6686
## Physical_Activity -1.09287 0.7423 -1.4723
## Screen_Time 2.39654 0.9909 2.4186
## Caffeine_Intake 0.18292 0.6982 0.2620
## Alcohol_Intake 0.79914 0.3609 2.2145
## Work_Hours 4.90542 0.8924 5.4966
## Travel_Time 0.08261 0.6105 0.1353
## Social_Interactions 1.29990 0.5309 2.4486
## Blood_Pressure 5.17643 0.7355 7.0382
## GenderMale -0.36254 0.1777 -2.0404
## Marital_StatusMarried 0.09306 0.3185 0.2922
## Marital_StatusSingle -0.29668 0.3193 -0.9292
## Smoking_HabitYes 0.26530 0.1753 1.5136
## Meditation_PracticeYes 0.63085 0.1832 3.4434
## Exercise_TypeCardio -2.28877 0.9860 -2.3212
## Exercise_TypeMeditation -0.86557 1.0738 -0.8061
## Exercise_TypePilates -2.39004 0.9890 -2.4167
## Exercise_TypeStrength Training -1.64813 0.9837 -1.6754
## Exercise_TypeWalking -2.51186 1.5769 -1.5929
## Exercise_TypeYoga -2.11336 0.9830 -2.1499
##
## Intercepts:
## Value Std. Error t value
## Low|Medium -0.9444 1.1366 -0.8310
## Medium|High 1.7433 1.1361 1.5345
##
## Residual Deviance: 1203.024
## AIC: 1251.024
Uji Brant Test
library(brant)
brant_result <- brant(model_olr_reduced) # model tanpa Occupation
## --------------------------------------------------------------------
## Test for X2 df probability
## --------------------------------------------------------------------
## Omnibus 43.38 22 0
## Age 0.38 1 0.54
## Sleep_Duration 0.11 1 0.74
## Sleep_Quality 1.52 1 0.22
## Physical_Activity 2.3 1 0.13
## Screen_Time 0 1 0.97
## Caffeine_Intake 0.52 1 0.47
## Alcohol_Intake 0.17 1 0.68
## Work_Hours 5.89 1 0.02
## Travel_Time 0.29 1 0.59
## Social_Interactions 0.02 1 0.89
## Blood_Pressure 0.9 1 0.34
## GenderMale 0.09 1 0.77
## Marital_StatusMarried 0.33 1 0.57
## Marital_StatusSingle 0.23 1 0.63
## Smoking_HabitYes 0.51 1 0.48
## Meditation_PracticeYes 5.49 1 0.02
## Exercise_TypeCardio 0 1 0.99
## Exercise_TypeMeditation 0 1 0.99
## Exercise_TypePilates 0 1 0.99
## Exercise_TypeStrength Training 0 1 0.99
## Exercise_TypeWalking 0 1 1
## Exercise_TypeYoga 0 1 0.99
## --------------------------------------------------------------------
##
## H0: Parallel Regression Assumption holds
## Warning in brant(model_olr_reduced): 291 combinations in table(dv,ivs) do not
## occur. Because of that, the test results might be invalid.
print(brant_result)
## X2 df probability
## Omnibus 4.338153e+01 22 0.004234094
## Age 3.821521e-01 1 0.536453690
## Sleep_Duration 1.097887e-01 1 0.740384781
## Sleep_Quality 1.522510e+00 1 0.217240046
## Physical_Activity 2.303068e+00 1 0.129118777
## Screen_Time 1.001217e-03 1 0.974757535
## Caffeine_Intake 5.235043e-01 1 0.469350661
## Alcohol_Intake 1.690718e-01 1 0.680938052
## Work_Hours 5.889732e+00 1 0.015229416
## Travel_Time 2.857900e-01 1 0.592931098
## Social_Interactions 1.793536e-02 1 0.893463498
## Blood_Pressure 8.967436e-01 1 0.343656386
## GenderMale 8.627875e-02 1 0.768962361
## Marital_StatusMarried 3.279669e-01 1 0.566858706
## Marital_StatusSingle 2.264632e-01 1 0.634158806
## Smoking_HabitYes 5.068980e-01 1 0.476484760
## Meditation_PracticeYes 5.488855e+00 1 0.019138071
## Exercise_TypeCardio 1.678269e-04 1 0.989663854
## Exercise_TypeMeditation 1.577229e-04 1 0.989979809
## Exercise_TypePilates 1.638582e-04 1 0.989786791
## Exercise_TypeStrength Training 1.715674e-04 1 0.989549308
## Exercise_TypeWalking 2.723550e-07 1 0.999583603
## Exercise_TypeYoga 1.767143e-04 1 0.989393720
model_null <- polr(Stress_Level ~ 1, data = data_reduced, Hess = TRUE)
anova_result <- anova(model_null, model_olr_reduced, test = "Chisq")
print(anova_result)
## Likelihood ratio tests of ordinal regression models
##
## Response: Stress_Level
## Model
## 1 1
## 2 Age + Sleep_Duration + Sleep_Quality + Physical_Activity + Screen_Time + Caffeine_Intake + Alcohol_Intake + Work_Hours + Travel_Time + Social_Interactions + Blood_Pressure + Gender + Marital_Status + Smoking_Habit + Meditation_Practice + Exercise_Type
## Resid. df Resid. Dev Test Df LR stat. Pr(Chi)
## 1 771 1640.595
## 2 749 1203.024 1 vs 2 22 437.5708 0
Cek koefisien model
exp_coef <- exp(coef(model_olr_reduced))
print(exp_coef)
## Age Sleep_Duration
## 0.58341749 0.07543769
## Sleep_Quality Physical_Activity
## 0.42492652 0.33525253
## Screen_Time Caffeine_Intake
## 10.98506882 1.20071877
## Alcohol_Intake Work_Hours
## 2.22362045 135.01986788
## Travel_Time Social_Interactions
## 1.08611331 3.66894638
## Blood_Pressure GenderMale
## 177.04897441 0.69590562
## Marital_StatusMarried Marital_StatusSingle
## 1.09752446 0.74327879
## Smoking_HabitYes Meditation_PracticeYes
## 1.30381637 1.87920855
## Exercise_TypeCardio Exercise_TypeMeditation
## 0.10139134 0.42081203
## Exercise_TypePilates Exercise_TypeStrength Training
## 0.09162601 0.19241009
## Exercise_TypeWalking Exercise_TypeYoga
## 0.08111703 0.12083094
Prediksi dan Evaluasi
# Prediksi kategori stress
predicted_stress <- predict(model_olr_reduced, newdata = data_reduced)
# Convert predicted_stress to an ordered factor with the same levels as Stress_Level
predicted_stress <- factor(predicted_stress,
levels = levels(data_reduced$Stress_Level),
ordered = TRUE)
# Buat confusion matrix
table(Predicted = predicted_stress, Actual = data_reduced$Stress_Level)
## Actual
## Predicted Low Medium High
## Low 70 34 1
## Medium 79 222 89
## High 13 54 211
# Hitung akurasi sederhana
mean(predicted_stress == data_reduced$Stress_Level)
## [1] 0.6507115
Confusion Matrix
library(ggplot2)
conf_matrix <- table(Predicted = predicted_stress, Actual = data_reduced$Stress_Level)
conf_df <- as.data.frame(conf_matrix)
ggplot(conf_df, aes(x = Actual, y = Predicted, fill = Freq)) +
geom_tile() +
geom_text(aes(label = Freq), color = "white", size = 5) +
scale_fill_gradient(low = "blue", high = "red") +
theme_minimal() +
labs(title = "Confusion Matrix: Prediksi vs Aktual")

Uji kemungkinan model
library(MASS) # Untuk polr (Ordinal Logistic Regression)
library(car) # Untuk VIF
library(brant) # Untuk Brant test
library(ggplot2) # Untuk visualisasi
library(dplyr) # Untuk manipulasi data
library(gridExtra) # Untuk menampilkan plot secara bersamaan
# Definisikan variabel prediktor berdasarkan tabel
# Variabel yang digunakan setelah penghapusan (berdasarkan data_reduced)
predictors_reduced <- setdiff(names(data_reduced), c("Stress_Level", "Stress_Detection"))
# Subset prediktor numerik dan kategorikal (tanpa Occupation, Cholesterol_Level, Blood_Sugar_Level)
numeric_vars_reduced <- names(Filter(is.numeric, data_reduced))
numeric_vars_reduced <- setdiff(numeric_vars_reduced, c("Stress_Level", "Stress_Detection"))
categorical_vars_reduced <- names(Filter(is.factor, data_reduced))
categorical_vars_reduced <- setdiff(categorical_vars_reduced, c("Stress_Level", "Stress_Detection"))
Model OLR Biasa
predictors_olr_biasa <- c("Sleep_Quality", "Screen_Time", "Age", "Sleep_Duration", "Physical_Activity", "Caffeine_Intake", "Alcohol_Intake", "Travel_Time", "Social_Interactions", "Blood_Pressure", "Gender", "Marital_Status","Smoking_Habit", "Exercise_Type")
formula_olr_biasa <- as.formula(
paste("Stress_Level ~", paste(predictors_olr_biasa, collapse = " + "))
)
model_olr_biasa <- polr(formula_olr_biasa, data = data_reduced, Hess = TRUE)
# Prediksi dan akurasi
pred_olr_biasa <- predict(model_olr_biasa, newdata = data_reduced)
pred_olr_biasa <- factor(pred_olr_biasa, levels = levels(data_reduced$Stress_Level), ordered = TRUE)
acc_olr_biasa <- mean(pred_olr_biasa == data_reduced$Stress_Level)
Model OLR + Partial
# Gunakan semua variabel prediktor + Work_Hours sebagai mediator
predictors_olr_partial <- predictors_reduced
formula_olr_partial <- as.formula(
paste("Stress_Level ~", paste(predictors_olr_partial, collapse = " + "))
)
model_olr_partial <- polr(formula_olr_partial, data = data_reduced, Hess = TRUE)
# Prediksi dan akurasi
pred_olr_partial <- predict(model_olr_partial, newdata = data_reduced)
pred_olr_partial <- factor(pred_olr_partial, levels = levels(data_reduced$Stress_Level), ordered = TRUE)
acc_olr_partial <- mean(pred_olr_partial == data_reduced$Stress_Level)
Model OLR Parsial
# Hanya gunakan variabel paling signifikan (misalnya berdasarkan summary model_olr_partial)
predictors_olr_parsimoni <- c("Work_Hours", "Meditation_Practice") # Sesuaikan berdasarkan signifikansi
formula_olr_parsimoni <- as.formula(
paste("Stress_Level ~", paste(predictors_olr_parsimoni, collapse = " + "))
)
model_olr_parsimoni <- polr(formula_olr_parsimoni, data = data_reduced, Hess = TRUE)
# Prediksi dan akurasi
pred_olr_parsimoni <- predict(model_olr_parsimoni, newdata = data_reduced)
pred_olr_parsimoni <- factor(pred_olr_parsimoni, levels = levels(data_reduced$Stress_Level), ordered = TRUE)
acc_olr_parsimoni <- mean(pred_olr_parsimoni == data_reduced$Stress_Level)
Perbandingan model
model_comparison <- data.frame(
Model = c("OLR Biasa", "OLR + Partial", "OLR Parsial"),
AIC = c(AIC(model_olr_biasa), AIC(model_olr_partial), AIC(model_olr_parsimoni)),
Accuracy = c(acc_olr_biasa, acc_olr_partial, acc_olr_parsimoni)
)
# Tampilkan perbandingan
cat("\n[Perbandingan Model]\n")
##
## [Perbandingan Model]
print(model_comparison)
## Model AIC Accuracy
## 1 OLR Biasa 1292.412 0.6287193
## 2 OLR + Partial 1242.585 0.6636481
## 3 OLR Parsial 1561.791 0.5355757
# OLR Biasa
conf_matrix_biasa <- table(Predicted = pred_olr_biasa, Actual = data_reduced$Stress_Level)
conf_df_biasa <- as.data.frame(conf_matrix_biasa)
p1 <- ggplot(conf_df_biasa, aes(x = Actual, y = Predicted, fill = Freq)) +
geom_tile() +
geom_text(aes(label = Freq), color = "white", size = 5) +
scale_fill_gradient(low = "blue", high = "red") +
theme_minimal() +
labs(title = "Confusion Matrix: OLR Biasa")
# OLR + Partial
conf_matrix_partial <- table(Predicted = pred_olr_partial, Actual = data_reduced$Stress_Level)
conf_df_partial <- as.data.frame(conf_matrix_partial)
p2 <- ggplot(conf_df_partial, aes(x = Actual, y = Predicted, fill = Freq)) +
geom_tile() +
geom_text(aes(label = Freq), color = "white", size = 5) +
scale_fill_gradient(low = "blue", high = "red") +
theme_minimal() +
labs(title = "Confusion Matrix: OLR + Partial")
# OLR Parsial
conf_matrix_parsimoni <- table(Predicted = pred_olr_parsimoni, Actual = data_reduced$Stress_Level)
conf_df_parsimoni <- as.data.frame(conf_matrix_parsimoni)
p3 <- ggplot(conf_df_parsimoni, aes(x = Actual, y = Predicted, fill = Freq)) +
geom_tile() +
geom_text(aes(label = Freq), color = "white", size = 5) +
scale_fill_gradient(low = "blue", high = "red") +
theme_minimal() +
labs(title = "Confusion Matrix: OLR Parsial")
# Tampilkan semua plot
grid.arrange(p1, p2, p3, ncol = 2)

ANALISIS DISKRIMINAN
Inisialisasi Library
library(dplyr)
library(MASS)
library(ggplot2)
library(tidyr)
## Warning: package 'tidyr' was built under R version 4.4.3
library(e1071)
## Warning: package 'e1071' was built under R version 4.4.3
library(car)
#install.packages("heplots")
library(heplots)
## Warning: package 'heplots' was built under R version 4.4.3
## Loading required package: broom
## Warning: package 'broom' was built under R version 4.4.3
#install.packages("MVN")
library(MVN)
## Warning: package 'MVN' was built under R version 4.4.3
library(stats)
#install.packages("ggfortify")
library(ggfortify)
## Warning: package 'ggfortify' was built under R version 4.4.3
library(ggrepel)
## Warning: package 'ggrepel' was built under R version 4.4.3
load Data
data <- read.csv("D:/Kuliah Semester 4/Analisis Multivariat/stress_detection_data.csv")
categorical_vars <- c("Gender", "Occupation", "Marital_Status", "Smoking_Habit",
"Meditation_Practice", "Exercise_Type", "Stress_Detection")
data[categorical_vars] <- lapply(data[categorical_vars], factor)
Pre-Processing
time_to_decimal <- function(time_str) {
parsed <- strptime(time_str, format = "%I:%M %p")
hour_decimal <- as.numeric(format(parsed, "%H")) + as.numeric(format(parsed, "%M")) / 60
return(hour_decimal)
}
data$Wake_Up_Time_num <- time_to_decimal(data$Wake_Up_Time)
data$Bed_Time_num <- time_to_decimal(data$Bed_Time)
# Variabel numerik
numerik_vars <- c("Age", "Sleep_Duration", "Sleep_Quality", "Physical_Activity", "Screen_Time",
"Caffeine_Intake", "Alcohol_Intake", "Work_Hours", "Travel_Time",
"Social_Interactions", "Blood_Pressure", "Cholesterol_Level", "Blood_Sugar_Level",
"Wake_Up_Time_num", "Bed_Time_num")
# Ubah Stress_Detection ke numerik
data$Stress_Num <- as.numeric(data$Stress_Detection)
Seleksi Fitur
# Korelasi numerik dengan target
cor_values <- sapply(data[, numerik_vars], function(x) cor(x, data$Stress_Num, use = "complete.obs"))
selected_numeric <- names(cor_values[abs(cor_values) > 0.1])
cat("Fitur numerik lolos korelasi:\n")
## Fitur numerik lolos korelasi:
print(selected_numeric)
## [1] "Age" "Sleep_Duration" "Physical_Activity"
## [4] "Screen_Time" "Caffeine_Intake" "Alcohol_Intake"
## [7] "Work_Hours" "Travel_Time" "Social_Interactions"
## [10] "Blood_Pressure" "Cholesterol_Level" "Blood_Sugar_Level"
## [13] "Wake_Up_Time_num"
# Uji Chi-square fitur kategorikal dan tampilkan p-value
selected_categorical <- c()
for (v in categorical_vars[-length(categorical_vars)]) {
tbl <- table(data[[v]], data$Stress_Detection)
chi <- suppressWarnings(chisq.test(tbl))
pval <- chi$p.value
if (pval < 0.05) {
selected_categorical <- c(selected_categorical, v)
}
cat(sprintf("- %s: p-value = %.2e %s\n", v, pval,
ifelse(pval < 0.05, "--> Lolos", "--> Tidak Lolos")))
}
## - Gender: p-value = 2.57e-04 --> Lolos
## - Occupation: p-value = 1.11e-02 --> Lolos
## - Marital_Status: p-value = 6.80e-14 --> Lolos
## - Smoking_Habit: p-value = 2.50e-17 --> Lolos
## - Meditation_Practice: p-value = 6.36e-19 --> Lolos
## - Exercise_Type: p-value = 4.48e-08 --> Lolos
cat("\nFitur kategorikal yang lolos uji Chi-square:\n")
##
## Fitur kategorikal yang lolos uji Chi-square:
print(selected_categorical)
## [1] "Gender" "Occupation" "Marital_Status"
## [4] "Smoking_Habit" "Meditation_Practice" "Exercise_Type"
selected_features <- c(selected_numeric, selected_categorical)
data_selected <- data[, c(selected_features, "Stress_Detection")]
# Distribusi Kelas
class_dist <- data_selected %>%
count(Stress_Detection) %>%
mutate(Percent = round(n / sum(n) * 100, 1))
cat("\n Distribusi kelas:\n")
##
## Distribusi kelas:
print(class_dist)
## Stress_Detection n Percent
## 1 High 301 38.9
## 2 Low 162 21.0
## 3 Medium 310 40.1
Feature Engineering
# Pisah numerik dan kategorikal
numeric_selected <- intersect(selected_features, numerik_vars)
categorical_selected <- setdiff(selected_features, numeric_selected)
# Skewness numerik
cat("\n Skewness Fitur Numerik:\n")
##
## Skewness Fitur Numerik:
for (var in numeric_selected) {
sk <- skewness(data_selected[[var]], na.rm = TRUE)
tag <- ifelse(abs(sk) > 0.5, "Bisa dinormalisasi", "Standarisasi cukup")
cat(sprintf(" - %s: skewness = %.2f --> %s\n", var, sk, tag))
}
## - Age: skewness = -0.01 --> Standarisasi cukup
## - Sleep_Duration: skewness = -0.80 --> Bisa dinormalisasi
## - Physical_Activity: skewness = -0.23 --> Standarisasi cukup
## - Screen_Time: skewness = -0.03 --> Standarisasi cukup
## - Caffeine_Intake: skewness = 0.03 --> Standarisasi cukup
## - Alcohol_Intake: skewness = 0.14 --> Standarisasi cukup
## - Work_Hours: skewness = 0.77 --> Bisa dinormalisasi
## - Travel_Time: skewness = 0.10 --> Standarisasi cukup
## - Social_Interactions: skewness = 0.13 --> Standarisasi cukup
## - Blood_Pressure: skewness = 0.06 --> Standarisasi cukup
## - Cholesterol_Level: skewness = -0.12 --> Standarisasi cukup
## - Blood_Sugar_Level: skewness = -0.18 --> Standarisasi cukup
## - Wake_Up_Time_num: skewness = 0.18 --> Standarisasi cukup
# Transformasi numerik
to_standardize <- c("Age", "Physical_Activity", "Screen_Time", "Caffeine_Intake",
"Alcohol_Intake", "Travel_Time", "Social_Interactions",
"Blood_Pressure", "Cholesterol_Level", "Blood_Sugar_Level",
"Wake_Up_Time_num")
to_normalize <- c("Sleep_Duration", "Work_Hours")
# Standarisasi
data_selected[to_standardize] <- scale(data_selected[to_standardize])
# Normalisasi
minmax_norm <- function(x) {(x - min(x)) / (max(x) - min(x))}
data_selected[to_normalize] <- lapply(data_selected[to_normalize], minmax_norm)
cat("\nSkewness fitur numerik setelah transformasi:\n")
##
## Skewness fitur numerik setelah transformasi:
for (var in numeric_selected) {
sk <- skewness(data_selected[[var]], na.rm = TRUE)
tag <- ifelse(abs(sk) > 0.5, "Masih miring (perlu perhatian)", "Sudah cukup normal")
cat(sprintf(" - %s: skewness = %.2f --> %s\n", var, sk, tag))
}
## - Age: skewness = -0.01 --> Sudah cukup normal
## - Sleep_Duration: skewness = -0.80 --> Masih miring (perlu perhatian)
## - Physical_Activity: skewness = -0.23 --> Sudah cukup normal
## - Screen_Time: skewness = -0.03 --> Sudah cukup normal
## - Caffeine_Intake: skewness = 0.03 --> Sudah cukup normal
## - Alcohol_Intake: skewness = 0.14 --> Sudah cukup normal
## - Work_Hours: skewness = 0.77 --> Masih miring (perlu perhatian)
## - Travel_Time: skewness = 0.10 --> Sudah cukup normal
## - Social_Interactions: skewness = 0.13 --> Sudah cukup normal
## - Blood_Pressure: skewness = 0.06 --> Sudah cukup normal
## - Cholesterol_Level: skewness = -0.12 --> Sudah cukup normal
## - Blood_Sugar_Level: skewness = -0.18 --> Sudah cukup normal
## - Wake_Up_Time_num: skewness = 0.18 --> Sudah cukup normal
# Boxcox (karena masih terlalu miring pada beberapa fitur)
boxcox_transform <- function(x) {
x_shifted <- x + 0.001
bc <- boxcox(x_shifted ~ 1, lambda = seq(-2, 2, 0.1), plotit = FALSE)
lambda_opt <- bc$x[which.max(bc$y)]
if (abs(lambda_opt) < 1e-5) {
return(log(x_shifted))
} else {
return((x_shifted^lambda_opt - 1) / lambda_opt)
}
}
# Transformasi tambahan dengan Box-Cox ke Sleep_Duration dan Work_Hours
data_selected$Sleep_Duration_boxcox <- boxcox_transform(data_selected$Sleep_Duration)
data_selected$Work_Hours_boxcox <- boxcox_transform(data_selected$Work_Hours)
cat("Distribusi numerik Sleep_Duration (Box-Cox):\n")
## Distribusi numerik Sleep_Duration (Box-Cox):
print(summary(data_selected$Sleep_Duration_boxcox))
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -0.7142 -0.3998 -0.3458 -0.3311 -0.2110 0.0010
cat(sprintf("Skewness: %.4f\n\n", skewness(data_selected$Sleep_Duration_boxcox, na.rm = TRUE)))
## Skewness: -0.0238
cat("Distribusi numerik Work_Hours (Box-Cox):\n")
## Distribusi numerik Work_Hours (Box-Cox):
print(summary(data_selected$Work_Hours_boxcox))
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -1.4172239 -0.8857293 -0.8857293 -0.8525954 -0.7082382 0.0009999
cat(sprintf("Skewness: %.4f\n", skewness(data_selected$Work_Hours_boxcox, na.rm = TRUE)))
## Skewness: 0.1167
Uji Asumsi Analisis Diskriminan
# Normalitas multivariat per kelas (Mardia test)
cat("\nUji Normalitas Multivariat (Mardia Test) per kelas:\n")
##
## Uji Normalitas Multivariat (Mardia Test) per kelas:
library(MVN) # Pastikan paket MVN sudah dimuat
for (grp in levels(data_selected$Stress_Detection)) {
cat("\nKelas:", grp, "\n")
subset_data <- data_selected[data_selected$Stress_Detection == grp, numeric_selected]
mvn_test <- mvn(subset_data) # Hapus mvnTest = "mardia"
print(mvn_test$multivariateNormality)
}
##
## Kelas: High
## NULL
##
## Kelas: Low
## NULL
##
## Kelas: Medium
## NULL
# Homogenitas Kovarians (Box's M test)
cat("\n Uji Homogenitas Kovarians (Box's M Test):\n")
##
## Uji Homogenitas Kovarians (Box's M Test):
boxm_res <- boxM(data_selected[, numeric_selected], data_selected$Stress_Detection)
print(boxm_res)
##
## Box's M-test for Homogeneity of Covariance Matrices
##
## data: data_selected[, numeric_selected]
## Chi-Sq (approx.) = 844.6, df = 182, p-value < 2.2e-16
# Multikolinearitas (VIF)
cat("\n Uji Multikolinearitas (VIF):\n")
##
## Uji Multikolinearitas (VIF):
vif_values <- sapply(numeric_selected, function(var) {
others <- setdiff(numeric_selected, var)
form <- as.formula(paste(var, "~", paste(others, collapse = "+")))
vif_model <- lm(form, data = data_selected)
vif_val <- vif(vif_model)[1]
return(vif_val)
})
print(vif_values)
## Age.Sleep_Duration Sleep_Duration.Age Physical_Activity.Age
## 1.542308 1.399028 1.428361
## Screen_Time.Age Caffeine_Intake.Age Alcohol_Intake.Age
## 1.429564 1.428860 1.429927
## Work_Hours.Age Travel_Time.Age Social_Interactions.Age
## 1.392150 1.424854 1.421479
## Blood_Pressure.Age Cholesterol_Level.Age Blood_Sugar_Level.Age
## 1.419996 1.410630 1.359280
## Wake_Up_Time_num.Age
## 1.430795
# Linearitas
cat("\n Visualisasi Linearitas antar variabel numerik:\n")
##
## Visualisasi Linearitas antar variabel numerik:
pairs(data_selected[, numeric_selected], col = as.numeric(data_selected$Stress_Detection),
pch=19, main = "Scatterplot Matrix - Cek Linearitas antar Variabel Numerik")

Pemodelan
# LDA modeling dan evaluasi
cat("\n Training model LDA dan evaluasi:\n")
##
## Training model LDA dan evaluasi:
model_lda <- lda(Stress_Detection ~ ., data = data_selected)
pred <- predict(model_lda)
# Confusion matrix
conf_matrix <- table(Predicted = pred$class, Actual = data_selected$Stress_Detection)
accuracy <- mean(pred$class == data_selected$Stress_Detection)
cat("\n Confusion Matrix LDA:\n")
##
## Confusion Matrix LDA:
print(conf_matrix)
## Actual
## Predicted High Low Medium
## High 249 9 31
## Low 9 102 27
## Medium 43 51 252
cat(sprintf("\n Akurasi LDA: %.4f\n", accuracy))
##
## Akurasi LDA: 0.7801
Perceptual Mapping
# MDS
lda_df <- data.frame(LD1 = pred$x[,1],
LD2 = pred$x[,2],
Stress_Detection = data_selected$Stress_Detection)
ggplot(lda_df, aes(x = LD1, y = LD2, color = Stress_Detection)) +
geom_point(alpha = 0.7, size = 3) +
labs(title = "Visualisasi LDA: Persebaran Kelas Stress pada LD1 dan LD2",
x = "Linear Discriminant 1",
y = "Linear Discriminant 2") +
theme_minimal() +
theme(legend.position = "right")

dist_matrix <- dist(scale(data_selected[, numeric_selected]))
mds_res <- cmdscale(dist_matrix, k = 2)
mds_df <- data.frame(MDS1 = mds_res[,1],
MDS2 = mds_res[,2],
Stress_Detection = data_selected$Stress_Detection)
ggplot(mds_df, aes(x = MDS1, y = MDS2, color = Stress_Detection)) +
geom_point(alpha = 0.7, size = 3) +
labs(title = "Perceptual Mapping dengan MDS",
x = "Dimensi 1",
y = "Dimensi 2") +
theme_minimal() +
theme(legend.position = "right")

# Biplot
pca_res <- prcomp(scale(data_selected[, numeric_selected]), center = TRUE, scale. = TRUE)
loadings <- data.frame(pca_res$rotation[, 1:2])
loadings$Feature <- rownames(loadings)
ggplot(loadings, aes(x = PC1, y = PC2)) +
geom_segment(aes(x = 0, y = 0, xend = PC1, yend = PC2),
arrow = arrow(length = unit(0.1, "cm")), color = "blue") +
geom_text_repel(aes(label = Feature), size = 3, max.overlaps = 20) +
xlim(c(min(loadings$PC1) - 0.1, max(loadings$PC1) + 0.1)) +
ylim(c(min(loadings$PC2) - 0.1, max(loadings$PC2) + 0.1)) +
ggtitle("PCA Loading Biplot") +
theme_minimal()
