ORDINAL LOGISTIC REGRESSION

Baca file dataset

cat("\nMemuat Data\n")

## 
## Memuat Data

library(readr)

## Warning: package 'readr' was built under R version 4.4.3

data <- read_csv("D:/Kuliah Semester 4/Analisis Multivariat/stress_detection_data.csv")

## Rows: 773 Columns: 22
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr   (7): Gender, Occupation, Marital_Status, Smoking_Habit, Meditation_Pra...
## dbl  (13): Age, Sleep_Duration, Sleep_Quality, Physical_Activity, Screen_Tim...
## time  (2): Wake_Up_Time, Bed_Time
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

cat("Dimensi data:", dim(data), "\n")

## Dimensi data: 773 22

Cek isi data

data

## # A tibble: 773 × 22
##      Age Gender Occupation        Marital_Status Sleep_Duration Sleep_Quality
##    <dbl> <chr>  <chr>             <chr>                   <dbl>         <dbl>
##  1    30 Male   Software Engineer Single                    7               4
##  2    35 Female Marketing Manager Married                   6               3
##  3    40 Male   Data Scientist    Divorced                  7               4
##  4    35 Male   Software Engineer Single                    7               4
##  5    29 Female Teacher           Single                    8               5
##  6    45 Male   Doctor            Married                   6               3
##  7    32 Female Graphic Designer  Single                    7               4
##  8    37 Male   Civil Engineer    Married                   7.5             4
##  9    50 Male   Business Owner    Married                   5.5             2
## 10    40 Female Nurse             Divorced                  6               3
## # ℹ 763 more rows
## # ℹ 16 more variables: Wake_Up_Time <time>, Bed_Time <time>,
## #   Physical_Activity <dbl>, Screen_Time <dbl>, Caffeine_Intake <dbl>,
## #   Alcohol_Intake <dbl>, Smoking_Habit <chr>, Work_Hours <dbl>,
## #   Travel_Time <dbl>, Social_Interactions <dbl>, Meditation_Practice <chr>,
## #   Exercise_Type <chr>, Blood_Pressure <dbl>, Cholesterol_Level <dbl>,
## #   Blood_Sugar_Level <dbl>, Stress_Detection <chr>

Siapkan library

library(ggplot2)
library(gridExtra)

## Warning: package 'gridExtra' was built under R version 4.4.3

library(dplyr)

## Warning: package 'dplyr' was built under R version 4.4.3

## 
## Attaching package: 'dplyr'

## The following object is masked from 'package:gridExtra':
## 
##     combine

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

library(corrplot)

## Warning: package 'corrplot' was built under R version 4.4.3

## corrplot 0.95 loaded

library(rlang)

Identifikasi tipe data

numeric_vars <- names(data)[sapply(data, is.numeric)]
categorical_vars <- names(data)[sapply(data, is.character)]

cat("\nVariabel numerik:\n")

## 
## Variabel numerik:

print(numeric_vars)

##  [1] "Age"                 "Sleep_Duration"      "Sleep_Quality"      
##  [4] "Physical_Activity"   "Screen_Time"         "Caffeine_Intake"    
##  [7] "Alcohol_Intake"      "Work_Hours"          "Travel_Time"        
## [10] "Social_Interactions" "Blood_Pressure"      "Cholesterol_Level"  
## [13] "Blood_Sugar_Level"

cat("\nVariabel kategorikal:\n")

## 
## Variabel kategorikal:

print(categorical_vars)

## [1] "Gender"              "Occupation"          "Marital_Status"     
## [4] "Smoking_Habit"       "Meditation_Practice" "Exercise_Type"      
## [7] "Stress_Detection"

Plot histogram variabel numerik

cat("\nVisualisasi Histogram (Numerik)\n")

## 
## Visualisasi Histogram (Numerik)

for (var in numeric_vars) {
  p <- ggplot(data, aes(x = !!sym(var))) +
    geom_histogram(fill = "skyblue", color = "black", bins = 30) +
    labs(title = paste("Distribusi:", var), x = var, y = "Frekuensi") +
    theme_minimal()
  print(p)
}

Plot bar chart variabel kategorikal

cat("\nVisualisasi Bar Plot (Kategorikal)\n")

## 
## Visualisasi Bar Plot (Kategorikal)

for (var in categorical_vars) {
  if (length(unique(data[[var]])) < 20) {  # batasi yang kategorinya terlalu banyak
    p <- ggplot(data, aes(x = !!sym(var))) +
      geom_bar(fill = "salmon", color = "black") +
      labs(title = paste("Distribusi:", var), x = var, y = "Jumlah") +
      theme_minimal()
    print(p)
  } else {
    cat(paste("Lewati visualisasi", var, "- terlalu banyak kategori.\n"))
  }
}

## Lewati visualisasi Occupation - terlalu banyak kategori.

Statistik deskriptif / EDA variabell numerik dan kategorik

# Statistik Deskriptif untuk variabel numerik
cat("\nStatistik Deskriptif (Numerik):\n")

## 
## Statistik Deskriptif (Numerik):

print(summary(data[numeric_vars]))

##       Age        Sleep_Duration  Sleep_Quality   Physical_Activity
##  Min.   :18.00   Min.   :3.500   Min.   :2.000   Min.   :1.000    
##  1st Qu.:33.00   1st Qu.:6.000   1st Qu.:3.600   1st Qu.:2.000    
##  Median :39.00   Median :6.300   Median :3.900   Median :3.000    
##  Mean   :38.89   Mean   :6.338   Mean   :3.848   Mean   :2.979    
##  3rd Qu.:45.00   3rd Qu.:7.000   3rd Qu.:4.000   3rd Qu.:4.000    
##  Max.   :60.00   Max.   :8.000   Max.   :5.000   Max.   :5.000    
##   Screen_Time    Caffeine_Intake Alcohol_Intake     Work_Hours    
##  Min.   :2.000   Min.   :0.000   Min.   :0.0000   Min.   : 6.000  
##  1st Qu.:4.000   1st Qu.:1.000   1st Qu.:0.0000   1st Qu.: 8.000  
##  Median :4.000   Median :2.000   Median :1.0000   Median : 8.000  
##  Mean   :4.105   Mean   :1.819   Mean   :0.8887   Mean   : 8.259  
##  3rd Qu.:5.000   3rd Qu.:2.000   3rd Qu.:1.0000   3rd Qu.: 9.000  
##  Max.   :8.000   Max.   :4.000   Max.   :2.0000   Max.   :14.000  
##   Travel_Time    Social_Interactions Blood_Pressure  Cholesterol_Level
##  Min.   :0.500   Min.   :1.000       Min.   :110.0   Min.   :150.0    
##  1st Qu.:2.000   1st Qu.:3.000       1st Qu.:130.0   1st Qu.:210.0    
##  Median :3.000   Median :3.000       Median :140.0   Median :220.0    
##  Mean   :2.858   Mean   :3.197       Mean   :137.9   Mean   :220.8    
##  3rd Qu.:4.000   3rd Qu.:4.000       3rd Qu.:150.0   3rd Qu.:230.0    
##  Max.   :5.000   Max.   :5.000       Max.   :170.0   Max.   :290.0    
##  Blood_Sugar_Level
##  Min.   : 80.0    
##  1st Qu.:105.0    
##  Median :115.0    
##  Mean   :111.8    
##  3rd Qu.:120.0    
##  Max.   :150.0

# Frekuensi untuk variabel kategorikal
cat("\nDistribusi Kategorikal:\n")

## 
## Distribusi Kategorikal:

for (var in categorical_vars) {
  cat(paste0("\n", var, ":\n"))
  print(table(data[[var]]))
}

## 
## Gender:
## 
## Female   Male 
##    384    389 
## 
## Occupation:
## 
##             Account Manager                  Accountant 
##                           1                          11 
##                       Actor       Advertising Executive 
##                           4                           1 
##         Advertising Manager                   Architect 
##                           1                          17 
##                      Artist                       Baker 
##                           7                           1 
##                Bakery Owner                Bank Manager 
##                           2                           3 
##                      Banker                   Bartender 
##                           3                           2 
##                   Biologist                  Blacksmith 
##                           1                           3 
##               Brand Manager                  Bus Driver 
##                           1                           2 
##            Business Analyst         Business Consultant 
##                           8                           4 
##              Business Owner                   Carpenter 
##                           1                           5 
##                         CEO                        Chef 
##                           1                          26 
##              Civil Engineer               Civil Servant 
##                          12                           1 
##                     Cleaner                     Cobbler 
##                           1                           2 
##       Construction Engineer        Construction Manager 
##                           1                           6 
##         Construction Worker                  Consultant 
##                          14                           3 
##             Content Creator          Content Strategist 
##                           2                           1 
##              Content Writer                  Copywriter 
##                           6                           3 
##                     Courier            Customer Support 
##                           1                           3 
##                Data Analyst               Data Engineer 
##                          10                           1 
##              Data Scientist      Database Administrator 
##                          10                           1 
##             Delivery Driver                     Dentist 
##                           1                           3 
##                    Designer                   Developer 
##                           2                           1 
##            Digital Marketer                      Doctor 
##                           2                           8 
##                      Driver                      Editor 
##                           4                           2 
##         Electrical Engineer       Electrical Technician 
##                           3                           1 
##                 Electrician                    Engineer 
##                          17                          11 
##                Entrepreneur           Event Coordinator 
##                           3                           1 
##               Event Manager               Event Planner 
##                           1                           6 
##          Executive Director              Factory Worker 
##                           1                           1 
##                      Farmer            Fashion Designer 
##                           8                           8 
##           Financial Advisor           Financial Analyst 
##                           2                           7 
##           Financial Planner                 Firefighter 
##                           1                           1 
##                 Fisherwoman          Fitness Instructor 
##                           2                           1 
##             Fitness Trainer               Flower Seller 
##                           4                           2 
##                  Freelancer            Graphic Designer 
##                           5                          18 
##                Hair Stylist           Handicrafts Maker 
##                           1                           2 
##        Healthcare Assistant                HR Executive 
##                           1                           2 
##                  HR Manager               HR Specialist 
##                          13                           8 
##             Human Resources     Human Resources Manager 
##                           3                           1 
##             Insurance Agent           Interior Designer 
##                           1                           7 
##               IT Consultant                  IT Manager 
##                           6                           1 
##               IT Specialist                  IT Support 
##                           2                           1 
##       IT Support Specialist                     Janitor 
##                           1                           1 
##                  Journalist       Laboratory Technician 
##                          16                           1 
##                      Lawyer                   Librarian 
##                           7                           7 
##                     Manager          Marketing Director 
##                           1                           1 
##         Marketing Executive           Marketing Manager 
##                           8                          11 
##        Marketing Specialist                    Mechanic 
##                           7                           8 
##         Mechanical Engineer           Medical Assistant 
##                           4                           1 
##                    Musician                       Nanny 
##                           3                           2 
##       Network Administrator            Network Engineer 
##                           2                           1 
##                       Nurse          Nurse Practitioner 
##                          23                           1 
##      Nutritional Specialist                Nutritionist 
##                           1                           1 
##          Operations Manager                     Painter 
##                           3                           2 
##            Personal Trainer                  Pharmacist 
##                           1                           6 
##                Photographer                   Physician 
##                          22                           4 
##                   Physicist             Physiotherapist 
##                           1                           2 
##                       Pilot                     Plumber 
##                           1                           9 
##              Police Officer                      Potter 
##                           6                           2 
##      Primary School Teacher            Product Designer 
##                           2                           1 
##             Product Manager             Program Manager 
##                           3                           1 
##         Project Coordinator             Project Manager 
##                           1                          15 
##                Psychologist Public Relations Specialist 
##                           6                           4 
##           Real Estate Agent                Receptionist 
##                           7                           1 
##            Research Analyst          Research Assistant 
##                           1                           3 
##          Research Scientist                  Researcher 
##                           5                           2 
##          Restaurant Manager              Retail Manager 
##                           2                           4 
##               Retail Worker                     Retired 
##                           2                           1 
##             Sales Executive               Sales Manager 
##                           1                           9 
##        Sales Representative                 Salesperson 
##                           6                           1 
##                   Scientist                  Seamstress 
##                          17                           2 
##                   Secretary              Security Guard 
##                           2                           3 
##            Security Officer              SEO Specialist 
##                           1                           5 
##                  Shopkeeper               Social Worker 
##                           2                           5 
##          Software Architect          Software Developer 
##                           2                          15 
##           Software Engineer             Software Tester 
##                          16                           2 
##               Street Vendor                     Student 
##                           2                           3 
##                     Surgeon                      Tailor 
##                           1                           5 
##                 Taxi Driver                     Teacher 
##                           2                          37 
##                  Technician                   Therapist 
##                           1                           1 
##                Truck Driver                 UX Designer 
##                           9                           1 
##            Vegetable Vendor                Veterinarian 
##                           2                           8 
##                    Waitress            Warehouse Worker 
##                           3                           1 
##                      Weaver               Web Developer 
##                           2                          14 
##                      Writer 
##                           5 
## 
## Marital_Status:
## 
## Divorced  Married   Single 
##       60      360      353 
## 
## Smoking_Habit:
## 
##  No Yes 
## 358 415 
## 
## Meditation_Practice:
## 
##  No Yes 
## 292 481 
## 
## Exercise_Type:
## 
##          Aerobics            Cardio        Meditation           Pilates 
##                 5               215                24                88 
## Strength Training           Walking              Yoga 
##               245                 5               191 
## 
## Stress_Detection:
## 
##   High    Low Medium 
##    301    162    310

Korelasi antar variabel numerik

cat("\nKorelasi Variabel Numerik:\n")

## 
## Korelasi Variabel Numerik:

cor_matrix <- cor(data[numeric_vars], use = "complete.obs")
print(round(cor_matrix, 2))

##                       Age Sleep_Duration Sleep_Quality Physical_Activity
## Age                  1.00          -0.17          0.07              0.29
## Sleep_Duration      -0.17           1.00          0.21             -0.19
## Sleep_Quality        0.07           0.21          1.00              0.10
## Physical_Activity    0.29          -0.19          0.10              1.00
## Screen_Time          0.35          -0.25          0.02              0.72
## Caffeine_Intake      0.33          -0.25          0.09              0.73
## Alcohol_Intake       0.25          -0.24          0.15              0.59
## Work_Hours           0.25           0.24          0.01              0.12
## Travel_Time          0.24          -0.32         -0.07              0.68
## Social_Interactions  0.10           0.15          0.33              0.09
## Blood_Pressure       0.40          -0.31          0.07              0.66
## Cholesterol_Level    0.46          -0.29          0.15              0.59
## Blood_Sugar_Level    0.50          -0.22          0.16              0.61
##                     Screen_Time Caffeine_Intake Alcohol_Intake Work_Hours
## Age                        0.35            0.33           0.25       0.25
## Sleep_Duration            -0.25           -0.25          -0.24       0.24
## Sleep_Quality              0.02            0.09           0.15       0.01
## Physical_Activity          0.72            0.73           0.59       0.12
## Screen_Time                1.00            0.68           0.54       0.31
## Caffeine_Intake            0.68            1.00           0.65       0.08
## Alcohol_Intake             0.54            0.65           1.00      -0.03
## Work_Hours                 0.31            0.08          -0.03       1.00
## Travel_Time                0.50            0.72           0.57      -0.11
## Social_Interactions        0.10           -0.08          -0.02       0.45
## Blood_Pressure             0.61            0.74           0.61      -0.05
## Cholesterol_Level          0.62            0.59           0.45       0.24
## Blood_Sugar_Level          0.64            0.63           0.47       0.23
##                     Travel_Time Social_Interactions Blood_Pressure
## Age                        0.24                0.10           0.40
## Sleep_Duration            -0.32                0.15          -0.31
## Sleep_Quality             -0.07                0.33           0.07
## Physical_Activity          0.68                0.09           0.66
## Screen_Time                0.50                0.10           0.61
## Caffeine_Intake            0.72               -0.08           0.74
## Alcohol_Intake             0.57               -0.02           0.61
## Work_Hours                -0.11                0.45          -0.05
## Travel_Time                1.00               -0.19           0.74
## Social_Interactions       -0.19                1.00          -0.21
## Blood_Pressure             0.74               -0.21           1.00
## Cholesterol_Level          0.52                0.09           0.81
## Blood_Sugar_Level          0.57                0.03           0.83
##                     Cholesterol_Level Blood_Sugar_Level
## Age                              0.46              0.50
## Sleep_Duration                  -0.29             -0.22
## Sleep_Quality                    0.15              0.16
## Physical_Activity                0.59              0.61
## Screen_Time                      0.62              0.64
## Caffeine_Intake                  0.59              0.63
## Alcohol_Intake                   0.45              0.47
## Work_Hours                       0.24              0.23
## Travel_Time                      0.52              0.57
## Social_Interactions              0.09              0.03
## Blood_Pressure                   0.81              0.83
## Cholesterol_Level                1.00              0.94
## Blood_Sugar_Level                0.94              1.00

corrplot(cor_matrix, method = "color", type = "upper", tl.cex = 0.8)

Cek missing value

cat("\nCek Missing Values:\n")

## 
## Cek Missing Values:

colSums(is.na(data))

##                 Age              Gender          Occupation      Marital_Status 
##                   0                   0                   0                   0 
##      Sleep_Duration       Sleep_Quality        Wake_Up_Time            Bed_Time 
##                   0                   0                   0                   0 
##   Physical_Activity         Screen_Time     Caffeine_Intake      Alcohol_Intake 
##                   0                   0                   0                   0 
##       Smoking_Habit          Work_Hours         Travel_Time Social_Interactions 
##                   0                   0                   0                   0 
## Meditation_Practice       Exercise_Type      Blood_Pressure   Cholesterol_Level 
##                   0                   0                   0                   0 
##   Blood_Sugar_Level    Stress_Detection 
##                   0                   0

Cek outlier

cat("\nBoxplot untuk Deteksi Outlier:\n")

## 
## Boxplot untuk Deteksi Outlier:

for (var in numeric_vars) {
  p <- ggplot(data, aes(y = !!sym(var))) +
    geom_boxplot(fill = "lightgreen") +
    labs(title = paste("Boxplot:", var), y = var) +
    theme_minimal()
  print(p)
}

Cek distribusi target

cat("\nDistribusi Target Variable (Stress_Detection):\n")

## 
## Distribusi Target Variable (Stress_Detection):

print(table(data$Stress_Detection))

## 
##   High    Low Medium 
##    301    162    310

barplot(table(data$Stress_Detection), col = "orange", main = "Distribusi Stress Detection")

Normalisasi

# Fungsi normalisasi min-max
normalize <- function(x) {
  return((x - min(x, na.rm = TRUE)) / (max(x, na.rm = TRUE) - min(x, na.rm = TRUE)))
}

# Normalisasi semua variabel numerik
data[numeric_vars] <- lapply(data[numeric_vars], normalize)

# Cek hasil normalisasi (optional)
summary(data[numeric_vars])

##       Age         Sleep_Duration   Sleep_Quality    Physical_Activity
##  Min.   :0.0000   Min.   :0.0000   Min.   :0.0000   Min.   :0.0000   
##  1st Qu.:0.3571   1st Qu.:0.5556   1st Qu.:0.5333   1st Qu.:0.2500   
##  Median :0.5000   Median :0.6222   Median :0.6333   Median :0.5000   
##  Mean   :0.4973   Mean   :0.6308   Mean   :0.6160   Mean   :0.4948   
##  3rd Qu.:0.6429   3rd Qu.:0.7778   3rd Qu.:0.6667   3rd Qu.:0.7500   
##  Max.   :1.0000   Max.   :1.0000   Max.   :1.0000   Max.   :1.0000   
##   Screen_Time     Caffeine_Intake  Alcohol_Intake     Work_Hours    
##  Min.   :0.0000   Min.   :0.0000   Min.   :0.0000   Min.   :0.0000  
##  1st Qu.:0.3333   1st Qu.:0.2500   1st Qu.:0.0000   1st Qu.:0.2500  
##  Median :0.3333   Median :0.5000   Median :0.5000   Median :0.2500  
##  Mean   :0.3509   Mean   :0.4547   Mean   :0.4444   Mean   :0.2823  
##  3rd Qu.:0.5000   3rd Qu.:0.5000   3rd Qu.:0.5000   3rd Qu.:0.3750  
##  Max.   :1.0000   Max.   :1.0000   Max.   :1.0000   Max.   :1.0000  
##   Travel_Time     Social_Interactions Blood_Pressure   Cholesterol_Level
##  Min.   :0.0000   Min.   :0.0000      Min.   :0.0000   Min.   :0.0000   
##  1st Qu.:0.3333   1st Qu.:0.5000      1st Qu.:0.3333   1st Qu.:0.4286   
##  Median :0.5556   Median :0.5000      Median :0.5000   Median :0.5000   
##  Mean   :0.5241   Mean   :0.5492      Mean   :0.4657   Mean   :0.5060   
##  3rd Qu.:0.7778   3rd Qu.:0.7500      3rd Qu.:0.6667   3rd Qu.:0.5714   
##  Max.   :1.0000   Max.   :1.0000      Max.   :1.0000   Max.   :1.0000   
##  Blood_Sugar_Level
##  Min.   :0.0000   
##  1st Qu.:0.3571   
##  Median :0.5000   
##  Mean   :0.4538   
##  3rd Qu.:0.5714   
##  Max.   :1.0000

Encoding (factor)

categorical_vars_except_target <- setdiff(c("Gender", "Occupation", "Marital_Status", "Smoking_Habit", "Meditation_Practice", "Exercise_Type"), "Stress_Detection")

for (var in categorical_vars_except_target) {
  data[[var]] <- as.factor(data[[var]])
}

Validasi target

data$Stress_Level <- factor(data$Stress_Detection,
                           levels = c("Low", "Medium", "High"),
                           ordered = TRUE)

Load library uji dan model

library(MASS)    # untuk polr

## Warning: package 'MASS' was built under R version 4.4.3

## 
## Attaching package: 'MASS'

## The following object is masked from 'package:dplyr':
## 
##     select

library(car)     # untuk VIF

## Warning: package 'car' was built under R version 4.4.3

## Loading required package: carData

## Warning: package 'carData' was built under R version 4.4.3

## 
## Attaching package: 'car'

## The following object is masked from 'package:dplyr':
## 
##     recode

library(brant)   # untuk Brant test

## Warning: package 'brant' was built under R version 4.4.3

Inisialisasi variabel predictor

predictors <- c(numeric_vars, categorical_vars_except_target)

Load Model

formula_olr <- as.formula(
  paste("Stress_Level ~", paste(predictors, collapse = " + "))
)

Uji Asumsi

Uji Homogenitas Levene Test

# Langkah ini HARUS dilakukan dulu:
data_reduced <- data[, !(names(data) %in% c("Occupation", "Cholesterol_Level", "Blood_Sugar_Level"))]

# Ambil variabel numerik selain target
numeric_vars <- names(Filter(is.numeric, data_reduced))
numeric_vars <- setdiff(numeric_vars, "Stress_Level")

# Inisialisasi dataframe kosong untuk menyimpan hasil
levene_results <- data.frame(
  Variabel = character(),
  Df_Group = integer(),
  Df_Residual = integer(),
  F_value = numeric(),
  p_value = numeric(),
  Significance = character(),
  stringsAsFactors = FALSE
)

# Loop tiap variabel numerik dan simpan hasil ke dalam dataframe
for (var in numeric_vars) {
  formula_levene <- as.formula(paste(var, "~ Stress_Level"))
  test_result <- leveneTest(formula_levene, data = data_reduced)
  
  # Ambil nilai dari hasil test
  df_group <- test_result[1, "Df"]
  df_residual <- test_result[2, "Df"]
  f_value <- test_result[1, "F value"]
  p_value <- test_result[1, "Pr(>F)"]
  
  # Tentukan simbol signifikansi
  sig <- if (p_value < 0.001) {
    "***"
  } else if (p_value < 0.01) {
    "**"
  } else if (p_value < 0.05) {
    "*"
  } else if (p_value < 0.1) {
    "."
  } else {
    ""
  }
  
  # Tambahkan baris ke dataframe
  levene_results <- rbind(levene_results, data.frame(
    Variabel = var,
    Df_Group = df_group,
    Df_Residual = df_residual,
    F_value = round(f_value, 4),
    p_value = signif(p_value, 5),
    Significance = sig
  ))
}

print(levene_results)

##               Variabel Df_Group Df_Residual F_value    p_value Significance
## 1                  Age        2         770  1.7085 1.8182e-01             
## 2       Sleep_Duration        2         770  2.3866 9.2618e-02            .
## 3        Sleep_Quality        2         770  8.6998 1.8355e-04          ***
## 4    Physical_Activity        2         770  4.1469 1.6168e-02            *
## 5          Screen_Time        2         770 11.7555 9.3554e-06          ***
## 6      Caffeine_Intake        2         770 14.0041 1.0619e-06          ***
## 7       Alcohol_Intake        2         770 22.0597 4.8305e-10          ***
## 8           Work_Hours        2         770  0.0326 9.6789e-01             
## 9          Travel_Time        2         770  6.4970 1.5919e-03           **
## 10 Social_Interactions        2         770 10.1524 4.4461e-05          ***
## 11      Blood_Pressure        2         770  6.4483 1.6701e-03           **

Uji Multikolinearitas VIF

formula_lm <- as.formula(
  paste("as.numeric(Stress_Level) ~", paste(predictors, collapse = " + "))
)
model_lm <- lm(formula_lm, data = data)

cat("\nUji Multikolinearitas (VIF):\n")

## 
## Uji Multikolinearitas (VIF):

vif_values <- vif(model_lm)
print(vif_values)

##                            GVIF  Df GVIF^(1/(2*Df))
## Age                    1.909720   1        1.381926
## Sleep_Duration         2.196792   1        1.482158
## Sleep_Quality          2.005374   1        1.416112
## Physical_Activity      4.937866   1        2.222131
## Screen_Time            4.408499   1        2.099643
## Caffeine_Intake        4.999134   1        2.235874
## Alcohol_Intake         3.039647   1        1.743458
## Work_Hours             3.178136   1        1.782733
## Travel_Time            4.686248   1        2.164774
## Social_Interactions    2.823614   1        1.680361
## Blood_Pressure        11.127018   1        3.335718
## Cholesterol_Level     17.341192   1        4.164276
## Blood_Sugar_Level     16.584033   1        4.072350
## Gender                 2.140902   1        1.463182
## Occupation          2768.354669 168        1.023870
## Marital_Status         2.741720   2        1.286784
## Smoking_Habit          1.832087   1        1.353546
## Meditation_Practice    2.004254   1        1.415717
## Exercise_Type         26.452024   6        1.313827

Penanganan Multikolinearitas

vars_check <- c("Blood_Pressure", "Cholesterol_Level", "Blood_Sugar_Level")

data_num <- data[vars_check]

cor_matrix_check <- cor(data_num, use = "complete.obs")

print(round(cor_matrix_check, 2))

##                   Blood_Pressure Cholesterol_Level Blood_Sugar_Level
## Blood_Pressure              1.00              0.81              0.83
## Cholesterol_Level           0.81              1.00              0.94
## Blood_Sugar_Level           0.83              0.94              1.00

# Visualisasi korelasi dengan corrplot
library(corrplot)
corrplot(cor_matrix_check, method = "number", type = "upper")

# Pastikan dplyr sudah di-load (tidak wajib jika pakai base R)
library(dplyr)

# Hapus kolom Occupation, Cholesterol_Level, dan Blood_Sugar_Level dari data
data_reduced <- data[, !(names(data) %in% c("Occupation", "Cholesterol_Level", "Blood_Sugar_Level"))]

# Update variabel prediktor, hapus 'Occupation', 'Cholesterol_Level', dan 'Blood_Sugar_Level'
predictors_reduced <- setdiff(predictors, c("Occupation", "Cholesterol_Level", "Blood_Sugar_Level"))

# Buat formula baru untuk model OLR dan linear (cek VIF)
formula_olr_reduced <- as.formula(
  paste("Stress_Level ~", paste(predictors_reduced, collapse = " + "))
)

formula_lm_reduced <- as.formula(
  paste("as.numeric(Stress_Level) ~", paste(predictors_reduced, collapse = " + "))
)

Uji Model

# Model linear untuk cek VIF
model_lm_reduced <- lm(formula_lm_reduced, data = data_reduced)
cat("\nVIF setelah hapus Occupation, Cholesterol_Level, dan Blood_Sugar_Level:\n")

## 
## VIF setelah hapus Occupation, Cholesterol_Level, dan Blood_Sugar_Level:

print(vif(model_lm_reduced))

##                         GVIF Df GVIF^(1/(2*Df))
## Age                 1.458957  1        1.207873
## Sleep_Duration      1.445326  1        1.202217
## Sleep_Quality       1.468714  1        1.211905
## Physical_Activity   3.585626  1        1.893575
## Screen_Time         3.132329  1        1.769839
## Caffeine_Intake     3.739988  1        1.933905
## Alcohol_Intake      2.244304  1        1.498100
## Work_Hours          2.145756  1        1.464840
## Travel_Time         3.611850  1        1.900487
## Social_Interactions 2.017837  1        1.420506
## Blood_Pressure      4.002524  1        2.000631
## Gender              1.335309  1        1.155556
## Marital_Status      1.509696  2        1.108466
## Smoking_Habit       1.350746  1        1.162216
## Meditation_Practice 1.436075  1        1.198364
## Exercise_Type       2.517047  6        1.079960

# Model OLR tanpa Occupation, Cholesterol_Level, dan Blood_Sugar_Level
model_olr_reduced <- polr(formula_olr_reduced, data = data_reduced, Hess = TRUE)
cat("\nSummary Model OLR tanpa Occupation, Cholesterol_Level, dan Blood_Sugar_Level:\n")

## 
## Summary Model OLR tanpa Occupation, Cholesterol_Level, dan Blood_Sugar_Level:

print(summary(model_olr_reduced))

## Call:
## polr(formula = formula_olr_reduced, data = data_reduced, Hess = TRUE)
## 
## Coefficients:
##                                   Value Std. Error t value
## Age                            -0.53885     0.4973 -1.0835
## Sleep_Duration                 -2.58445     0.5705 -4.5303
## Sleep_Quality                  -0.85584     0.5129 -1.6686
## Physical_Activity              -1.09287     0.7423 -1.4723
## Screen_Time                     2.39654     0.9909  2.4186
## Caffeine_Intake                 0.18292     0.6982  0.2620
## Alcohol_Intake                  0.79914     0.3609  2.2145
## Work_Hours                      4.90542     0.8924  5.4966
## Travel_Time                     0.08261     0.6105  0.1353
## Social_Interactions             1.29990     0.5309  2.4486
## Blood_Pressure                  5.17643     0.7355  7.0382
## GenderMale                     -0.36254     0.1777 -2.0404
## Marital_StatusMarried           0.09306     0.3185  0.2922
## Marital_StatusSingle           -0.29668     0.3193 -0.9292
## Smoking_HabitYes                0.26530     0.1753  1.5136
## Meditation_PracticeYes          0.63085     0.1832  3.4434
## Exercise_TypeCardio            -2.28877     0.9860 -2.3212
## Exercise_TypeMeditation        -0.86557     1.0738 -0.8061
## Exercise_TypePilates           -2.39004     0.9890 -2.4167
## Exercise_TypeStrength Training -1.64813     0.9837 -1.6754
## Exercise_TypeWalking           -2.51186     1.5769 -1.5929
## Exercise_TypeYoga              -2.11336     0.9830 -2.1499
## 
## Intercepts:
##             Value   Std. Error t value
## Low|Medium  -0.9444  1.1366    -0.8310
## Medium|High  1.7433  1.1361     1.5345
## 
## Residual Deviance: 1203.024 
## AIC: 1251.024

Uji Brant Test

library(brant)

brant_result <- brant(model_olr_reduced)  # model tanpa Occupation

## -------------------------------------------------------------------- 
## Test for             X2  df  probability 
## -------------------------------------------------------------------- 
## Omnibus                  43.38   22  0
## Age                  0.38    1   0.54
## Sleep_Duration               0.11    1   0.74
## Sleep_Quality                1.52    1   0.22
## Physical_Activity            2.3 1   0.13
## Screen_Time              0   1   0.97
## Caffeine_Intake          0.52    1   0.47
## Alcohol_Intake               0.17    1   0.68
## Work_Hours               5.89    1   0.02
## Travel_Time              0.29    1   0.59
## Social_Interactions          0.02    1   0.89
## Blood_Pressure               0.9 1   0.34
## GenderMale               0.09    1   0.77
## Marital_StatusMarried            0.33    1   0.57
## Marital_StatusSingle         0.23    1   0.63
## Smoking_HabitYes         0.51    1   0.48
## Meditation_PracticeYes       5.49    1   0.02
## Exercise_TypeCardio          0   1   0.99
## Exercise_TypeMeditation      0   1   0.99
## Exercise_TypePilates         0   1   0.99
## Exercise_TypeStrength Training   0   1   0.99
## Exercise_TypeWalking         0   1   1
## Exercise_TypeYoga            0   1   0.99
## -------------------------------------------------------------------- 
## 
## H0: Parallel Regression Assumption holds

## Warning in brant(model_olr_reduced): 291 combinations in table(dv,ivs) do not
## occur. Because of that, the test results might be invalid.

print(brant_result)

##                                          X2 df probability
## Omnibus                        4.338153e+01 22 0.004234094
## Age                            3.821521e-01  1 0.536453690
## Sleep_Duration                 1.097887e-01  1 0.740384781
## Sleep_Quality                  1.522510e+00  1 0.217240046
## Physical_Activity              2.303068e+00  1 0.129118777
## Screen_Time                    1.001217e-03  1 0.974757535
## Caffeine_Intake                5.235043e-01  1 0.469350661
## Alcohol_Intake                 1.690718e-01  1 0.680938052
## Work_Hours                     5.889732e+00  1 0.015229416
## Travel_Time                    2.857900e-01  1 0.592931098
## Social_Interactions            1.793536e-02  1 0.893463498
## Blood_Pressure                 8.967436e-01  1 0.343656386
## GenderMale                     8.627875e-02  1 0.768962361
## Marital_StatusMarried          3.279669e-01  1 0.566858706
## Marital_StatusSingle           2.264632e-01  1 0.634158806
## Smoking_HabitYes               5.068980e-01  1 0.476484760
## Meditation_PracticeYes         5.488855e+00  1 0.019138071
## Exercise_TypeCardio            1.678269e-04  1 0.989663854
## Exercise_TypeMeditation        1.577229e-04  1 0.989979809
## Exercise_TypePilates           1.638582e-04  1 0.989786791
## Exercise_TypeStrength Training 1.715674e-04  1 0.989549308
## Exercise_TypeWalking           2.723550e-07  1 0.999583603
## Exercise_TypeYoga              1.767143e-04  1 0.989393720

model_null <- polr(Stress_Level ~ 1, data = data_reduced, Hess = TRUE)
anova_result <- anova(model_null, model_olr_reduced, test = "Chisq")
print(anova_result)

## Likelihood ratio tests of ordinal regression models
## 
## Response: Stress_Level
##                                                                                                                                                                                                                                                         Model
## 1                                                                                                                                                                                                                                                           1
## 2 Age + Sleep_Duration + Sleep_Quality + Physical_Activity + Screen_Time + Caffeine_Intake + Alcohol_Intake + Work_Hours + Travel_Time + Social_Interactions + Blood_Pressure + Gender + Marital_Status + Smoking_Habit + Meditation_Practice + Exercise_Type
##   Resid. df Resid. Dev   Test    Df LR stat. Pr(Chi)
## 1       771   1640.595                              
## 2       749   1203.024 1 vs 2    22 437.5708       0

Cek koefisien model

exp_coef <- exp(coef(model_olr_reduced))
print(exp_coef)

##                            Age                 Sleep_Duration 
##                     0.58341749                     0.07543769 
##                  Sleep_Quality              Physical_Activity 
##                     0.42492652                     0.33525253 
##                    Screen_Time                Caffeine_Intake 
##                    10.98506882                     1.20071877 
##                 Alcohol_Intake                     Work_Hours 
##                     2.22362045                   135.01986788 
##                    Travel_Time            Social_Interactions 
##                     1.08611331                     3.66894638 
##                 Blood_Pressure                     GenderMale 
##                   177.04897441                     0.69590562 
##          Marital_StatusMarried           Marital_StatusSingle 
##                     1.09752446                     0.74327879 
##               Smoking_HabitYes         Meditation_PracticeYes 
##                     1.30381637                     1.87920855 
##            Exercise_TypeCardio        Exercise_TypeMeditation 
##                     0.10139134                     0.42081203 
##           Exercise_TypePilates Exercise_TypeStrength Training 
##                     0.09162601                     0.19241009 
##           Exercise_TypeWalking              Exercise_TypeYoga 
##                     0.08111703                     0.12083094

Prediksi dan Evaluasi

# Prediksi kategori stress
predicted_stress <- predict(model_olr_reduced, newdata = data_reduced)

# Convert predicted_stress to an ordered factor with the same levels as Stress_Level
predicted_stress <- factor(predicted_stress, 
                          levels = levels(data_reduced$Stress_Level), 
                          ordered = TRUE)

# Buat confusion matrix
table(Predicted = predicted_stress, Actual = data_reduced$Stress_Level)

##          Actual
## Predicted Low Medium High
##    Low     70     34    1
##    Medium  79    222   89
##    High    13     54  211

# Hitung akurasi sederhana
mean(predicted_stress == data_reduced$Stress_Level)

## [1] 0.6507115

Confusion Matrix

library(ggplot2)
conf_matrix <- table(Predicted = predicted_stress, Actual = data_reduced$Stress_Level)

conf_df <- as.data.frame(conf_matrix)
ggplot(conf_df, aes(x = Actual, y = Predicted, fill = Freq)) +
  geom_tile() +
  geom_text(aes(label = Freq), color = "white", size = 5) +
  scale_fill_gradient(low = "blue", high = "red") +
  theme_minimal() +
  labs(title = "Confusion Matrix: Prediksi vs Aktual")

Uji kemungkinan model

library(MASS)    # Untuk polr (Ordinal Logistic Regression)
library(car)     # Untuk VIF
library(brant)   # Untuk Brant test
library(ggplot2) # Untuk visualisasi
library(dplyr)   # Untuk manipulasi data
library(gridExtra) # Untuk menampilkan plot secara bersamaan

# Definisikan variabel prediktor berdasarkan tabel
# Variabel yang digunakan setelah penghapusan (berdasarkan data_reduced)
predictors_reduced <- setdiff(names(data_reduced), c("Stress_Level", "Stress_Detection"))

# Subset prediktor numerik dan kategorikal (tanpa Occupation, Cholesterol_Level, Blood_Sugar_Level)
numeric_vars_reduced <- names(Filter(is.numeric, data_reduced))
numeric_vars_reduced <- setdiff(numeric_vars_reduced, c("Stress_Level", "Stress_Detection"))
categorical_vars_reduced <- names(Filter(is.factor, data_reduced))
categorical_vars_reduced <- setdiff(categorical_vars_reduced, c("Stress_Level", "Stress_Detection"))

Model OLR Biasa

predictors_olr_biasa <- c("Sleep_Quality", "Screen_Time", "Age", "Sleep_Duration", "Physical_Activity", "Caffeine_Intake", "Alcohol_Intake", "Travel_Time", "Social_Interactions", "Blood_Pressure", "Gender", "Marital_Status","Smoking_Habit", "Exercise_Type")
formula_olr_biasa <- as.formula(
  paste("Stress_Level ~", paste(predictors_olr_biasa, collapse = " + "))
)
model_olr_biasa <- polr(formula_olr_biasa, data = data_reduced, Hess = TRUE)
# Prediksi dan akurasi
pred_olr_biasa <- predict(model_olr_biasa, newdata = data_reduced)
pred_olr_biasa <- factor(pred_olr_biasa, levels = levels(data_reduced$Stress_Level), ordered = TRUE)
acc_olr_biasa <- mean(pred_olr_biasa == data_reduced$Stress_Level)

Model OLR + Partial

# Gunakan semua variabel prediktor + Work_Hours sebagai mediator
predictors_olr_partial <- predictors_reduced
formula_olr_partial <- as.formula(
  paste("Stress_Level ~", paste(predictors_olr_partial, collapse = " + "))
)
model_olr_partial <- polr(formula_olr_partial, data = data_reduced, Hess = TRUE)

# Prediksi dan akurasi
pred_olr_partial <- predict(model_olr_partial, newdata = data_reduced)
pred_olr_partial <- factor(pred_olr_partial, levels = levels(data_reduced$Stress_Level), ordered = TRUE)
acc_olr_partial <- mean(pred_olr_partial == data_reduced$Stress_Level)

Model OLR Parsial

# Hanya gunakan variabel paling signifikan (misalnya berdasarkan summary model_olr_partial)
predictors_olr_parsimoni <- c("Work_Hours", "Meditation_Practice") # Sesuaikan berdasarkan signifikansi
formula_olr_parsimoni <- as.formula(
  paste("Stress_Level ~", paste(predictors_olr_parsimoni, collapse = " + "))
)
model_olr_parsimoni <- polr(formula_olr_parsimoni, data = data_reduced, Hess = TRUE)

# Prediksi dan akurasi
pred_olr_parsimoni <- predict(model_olr_parsimoni, newdata = data_reduced)
pred_olr_parsimoni <- factor(pred_olr_parsimoni, levels = levels(data_reduced$Stress_Level), ordered = TRUE)
acc_olr_parsimoni <- mean(pred_olr_parsimoni == data_reduced$Stress_Level)

Perbandingan model

model_comparison <- data.frame(
  Model = c("OLR Biasa", "OLR + Partial", "OLR Parsial"),
  AIC = c(AIC(model_olr_biasa), AIC(model_olr_partial), AIC(model_olr_parsimoni)),
  Accuracy = c(acc_olr_biasa, acc_olr_partial, acc_olr_parsimoni)
)

# Tampilkan perbandingan
cat("\n[Perbandingan Model]\n")

## 
## [Perbandingan Model]

print(model_comparison)

##           Model      AIC  Accuracy
## 1     OLR Biasa 1292.412 0.6287193
## 2 OLR + Partial 1242.585 0.6636481
## 3   OLR Parsial 1561.791 0.5355757

# OLR Biasa
conf_matrix_biasa <- table(Predicted = pred_olr_biasa, Actual = data_reduced$Stress_Level)
conf_df_biasa <- as.data.frame(conf_matrix_biasa)
p1 <- ggplot(conf_df_biasa, aes(x = Actual, y = Predicted, fill = Freq)) +
  geom_tile() +
  geom_text(aes(label = Freq), color = "white", size = 5) +
  scale_fill_gradient(low = "blue", high = "red") +
  theme_minimal() +
  labs(title = "Confusion Matrix: OLR Biasa")

# OLR + Partial
conf_matrix_partial <- table(Predicted = pred_olr_partial, Actual = data_reduced$Stress_Level)
conf_df_partial <- as.data.frame(conf_matrix_partial)
p2 <- ggplot(conf_df_partial, aes(x = Actual, y = Predicted, fill = Freq)) +
  geom_tile() +
  geom_text(aes(label = Freq), color = "white", size = 5) +
  scale_fill_gradient(low = "blue", high = "red") +
  theme_minimal() +
  labs(title = "Confusion Matrix: OLR + Partial")

# OLR Parsial
conf_matrix_parsimoni <- table(Predicted = pred_olr_parsimoni, Actual = data_reduced$Stress_Level)
conf_df_parsimoni <- as.data.frame(conf_matrix_parsimoni)
p3 <- ggplot(conf_df_parsimoni, aes(x = Actual, y = Predicted, fill = Freq)) +
  geom_tile() +
  geom_text(aes(label = Freq), color = "white", size = 5) +
  scale_fill_gradient(low = "blue", high = "red") +
  theme_minimal() +
  labs(title = "Confusion Matrix: OLR Parsial")

# Tampilkan semua plot
grid.arrange(p1, p2, p3, ncol = 2)

ANALISIS DISKRIMINAN

Inisialisasi Library

library(dplyr)
library(MASS)
library(ggplot2)
library(tidyr)

## Warning: package 'tidyr' was built under R version 4.4.3

library(e1071)

## Warning: package 'e1071' was built under R version 4.4.3

library(car)       
#install.packages("heplots")
library(heplots)

## Warning: package 'heplots' was built under R version 4.4.3

## Loading required package: broom

## Warning: package 'broom' was built under R version 4.4.3

#install.packages("MVN")
library(MVN)

## Warning: package 'MVN' was built under R version 4.4.3

library(stats)
#install.packages("ggfortify")
library(ggfortify)

## Warning: package 'ggfortify' was built under R version 4.4.3

library(ggrepel)

## Warning: package 'ggrepel' was built under R version 4.4.3

load Data

data <- read.csv("D:/Kuliah Semester 4/Analisis Multivariat/stress_detection_data.csv")

categorical_vars <- c("Gender", "Occupation", "Marital_Status", "Smoking_Habit", 
                      "Meditation_Practice", "Exercise_Type", "Stress_Detection")
data[categorical_vars] <- lapply(data[categorical_vars], factor)

Pre-Processing

time_to_decimal <- function(time_str) {
  parsed <- strptime(time_str, format = "%I:%M %p")
  hour_decimal <- as.numeric(format(parsed, "%H")) + as.numeric(format(parsed, "%M")) / 60
  return(hour_decimal)
}
data$Wake_Up_Time_num <- time_to_decimal(data$Wake_Up_Time)
data$Bed_Time_num <- time_to_decimal(data$Bed_Time)

# Variabel numerik
numerik_vars <- c("Age", "Sleep_Duration", "Sleep_Quality", "Physical_Activity", "Screen_Time",
                  "Caffeine_Intake", "Alcohol_Intake", "Work_Hours", "Travel_Time",
                  "Social_Interactions", "Blood_Pressure", "Cholesterol_Level", "Blood_Sugar_Level",
                  "Wake_Up_Time_num", "Bed_Time_num")

# Ubah Stress_Detection ke numerik 
data$Stress_Num <- as.numeric(data$Stress_Detection)

Seleksi Fitur

# Korelasi numerik dengan target
cor_values <- sapply(data[, numerik_vars], function(x) cor(x, data$Stress_Num, use = "complete.obs"))
selected_numeric <- names(cor_values[abs(cor_values) > 0.1])

cat("Fitur numerik lolos korelasi:\n")

## Fitur numerik lolos korelasi:

print(selected_numeric)

##  [1] "Age"                 "Sleep_Duration"      "Physical_Activity"  
##  [4] "Screen_Time"         "Caffeine_Intake"     "Alcohol_Intake"     
##  [7] "Work_Hours"          "Travel_Time"         "Social_Interactions"
## [10] "Blood_Pressure"      "Cholesterol_Level"   "Blood_Sugar_Level"  
## [13] "Wake_Up_Time_num"

# Uji Chi-square fitur kategorikal dan tampilkan p-value
selected_categorical <- c()
for (v in categorical_vars[-length(categorical_vars)]) {
  tbl <- table(data[[v]], data$Stress_Detection)
  
  chi <- suppressWarnings(chisq.test(tbl))
  pval <- chi$p.value
  
  if (pval < 0.05) {
    selected_categorical <- c(selected_categorical, v)
  }
  
  cat(sprintf("- %s: p-value = %.2e %s\n", v, pval,
              ifelse(pval < 0.05, "--> Lolos", "--> Tidak Lolos")))
}

## - Gender: p-value = 2.57e-04 --> Lolos
## - Occupation: p-value = 1.11e-02 --> Lolos
## - Marital_Status: p-value = 6.80e-14 --> Lolos
## - Smoking_Habit: p-value = 2.50e-17 --> Lolos
## - Meditation_Practice: p-value = 6.36e-19 --> Lolos
## - Exercise_Type: p-value = 4.48e-08 --> Lolos

cat("\nFitur kategorikal yang lolos uji Chi-square:\n")

## 
## Fitur kategorikal yang lolos uji Chi-square:

print(selected_categorical)

## [1] "Gender"              "Occupation"          "Marital_Status"     
## [4] "Smoking_Habit"       "Meditation_Practice" "Exercise_Type"

selected_features <- c(selected_numeric, selected_categorical)
data_selected <- data[, c(selected_features, "Stress_Detection")]

# Distribusi Kelas
class_dist <- data_selected %>%
  count(Stress_Detection) %>%
  mutate(Percent = round(n / sum(n) * 100, 1))

cat("\n Distribusi kelas:\n")

## 
##  Distribusi kelas:

print(class_dist)

##   Stress_Detection   n Percent
## 1             High 301    38.9
## 2              Low 162    21.0
## 3           Medium 310    40.1

Feature Engineering

# Pisah numerik dan kategorikal
numeric_selected <- intersect(selected_features, numerik_vars)
categorical_selected <- setdiff(selected_features, numeric_selected)

# Skewness numerik
cat("\n Skewness Fitur Numerik:\n")

## 
##  Skewness Fitur Numerik:

for (var in numeric_selected) {
  sk <- skewness(data_selected[[var]], na.rm = TRUE)
  tag <- ifelse(abs(sk) > 0.5, "Bisa dinormalisasi", "Standarisasi cukup")
  cat(sprintf(" - %s: skewness = %.2f --> %s\n", var, sk, tag))
}

##  - Age: skewness = -0.01 --> Standarisasi cukup
##  - Sleep_Duration: skewness = -0.80 --> Bisa dinormalisasi
##  - Physical_Activity: skewness = -0.23 --> Standarisasi cukup
##  - Screen_Time: skewness = -0.03 --> Standarisasi cukup
##  - Caffeine_Intake: skewness = 0.03 --> Standarisasi cukup
##  - Alcohol_Intake: skewness = 0.14 --> Standarisasi cukup
##  - Work_Hours: skewness = 0.77 --> Bisa dinormalisasi
##  - Travel_Time: skewness = 0.10 --> Standarisasi cukup
##  - Social_Interactions: skewness = 0.13 --> Standarisasi cukup
##  - Blood_Pressure: skewness = 0.06 --> Standarisasi cukup
##  - Cholesterol_Level: skewness = -0.12 --> Standarisasi cukup
##  - Blood_Sugar_Level: skewness = -0.18 --> Standarisasi cukup
##  - Wake_Up_Time_num: skewness = 0.18 --> Standarisasi cukup

# Transformasi numerik
to_standardize <- c("Age", "Physical_Activity", "Screen_Time", "Caffeine_Intake", 
                    "Alcohol_Intake", "Travel_Time", "Social_Interactions",
                    "Blood_Pressure", "Cholesterol_Level", "Blood_Sugar_Level", 
                    "Wake_Up_Time_num")
to_normalize <- c("Sleep_Duration", "Work_Hours")

# Standarisasi
data_selected[to_standardize] <- scale(data_selected[to_standardize])

# Normalisasi
minmax_norm <- function(x) {(x - min(x)) / (max(x) - min(x))}
data_selected[to_normalize] <- lapply(data_selected[to_normalize], minmax_norm)

cat("\nSkewness fitur numerik setelah transformasi:\n")

## 
## Skewness fitur numerik setelah transformasi:

for (var in numeric_selected) {
  sk <- skewness(data_selected[[var]], na.rm = TRUE)
  tag <- ifelse(abs(sk) > 0.5, "Masih miring (perlu perhatian)", "Sudah cukup normal")
  cat(sprintf(" - %s: skewness = %.2f --> %s\n", var, sk, tag))
}

##  - Age: skewness = -0.01 --> Sudah cukup normal
##  - Sleep_Duration: skewness = -0.80 --> Masih miring (perlu perhatian)
##  - Physical_Activity: skewness = -0.23 --> Sudah cukup normal
##  - Screen_Time: skewness = -0.03 --> Sudah cukup normal
##  - Caffeine_Intake: skewness = 0.03 --> Sudah cukup normal
##  - Alcohol_Intake: skewness = 0.14 --> Sudah cukup normal
##  - Work_Hours: skewness = 0.77 --> Masih miring (perlu perhatian)
##  - Travel_Time: skewness = 0.10 --> Sudah cukup normal
##  - Social_Interactions: skewness = 0.13 --> Sudah cukup normal
##  - Blood_Pressure: skewness = 0.06 --> Sudah cukup normal
##  - Cholesterol_Level: skewness = -0.12 --> Sudah cukup normal
##  - Blood_Sugar_Level: skewness = -0.18 --> Sudah cukup normal
##  - Wake_Up_Time_num: skewness = 0.18 --> Sudah cukup normal

# Boxcox (karena masih terlalu miring pada beberapa fitur)
boxcox_transform <- function(x) {
  x_shifted <- x + 0.001
  bc <- boxcox(x_shifted ~ 1, lambda = seq(-2, 2, 0.1), plotit = FALSE)
  lambda_opt <- bc$x[which.max(bc$y)]
  
  if (abs(lambda_opt) < 1e-5) {
    return(log(x_shifted))
  } else {
    return((x_shifted^lambda_opt - 1) / lambda_opt)
  }
}

# Transformasi tambahan dengan Box-Cox ke Sleep_Duration dan Work_Hours
data_selected$Sleep_Duration_boxcox <- boxcox_transform(data_selected$Sleep_Duration)
data_selected$Work_Hours_boxcox <- boxcox_transform(data_selected$Work_Hours)

cat("Distribusi numerik Sleep_Duration (Box-Cox):\n")

## Distribusi numerik Sleep_Duration (Box-Cox):

print(summary(data_selected$Sleep_Duration_boxcox))

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
## -0.7142 -0.3998 -0.3458 -0.3311 -0.2110  0.0010

cat(sprintf("Skewness: %.4f\n\n", skewness(data_selected$Sleep_Duration_boxcox, na.rm = TRUE)))

## Skewness: -0.0238

cat("Distribusi numerik Work_Hours (Box-Cox):\n")

## Distribusi numerik Work_Hours (Box-Cox):

print(summary(data_selected$Work_Hours_boxcox))

##       Min.    1st Qu.     Median       Mean    3rd Qu.       Max. 
## -1.4172239 -0.8857293 -0.8857293 -0.8525954 -0.7082382  0.0009999

cat(sprintf("Skewness: %.4f\n", skewness(data_selected$Work_Hours_boxcox, na.rm = TRUE)))

## Skewness: 0.1167

Uji Asumsi Analisis Diskriminan

# Normalitas multivariat per kelas (Mardia test)
cat("\nUji Normalitas Multivariat (Mardia Test) per kelas:\n")

## 
## Uji Normalitas Multivariat (Mardia Test) per kelas:

library(MVN)  # Pastikan paket MVN sudah dimuat
for (grp in levels(data_selected$Stress_Detection)) {
  cat("\nKelas:", grp, "\n")
  subset_data <- data_selected[data_selected$Stress_Detection == grp, numeric_selected]
  mvn_test <- mvn(subset_data)  # Hapus mvnTest = "mardia"
  print(mvn_test$multivariateNormality)
}

## 
## Kelas: High 
## NULL
## 
## Kelas: Low 
## NULL
## 
## Kelas: Medium 
## NULL

# Homogenitas Kovarians (Box's M test)
cat("\n Uji Homogenitas Kovarians (Box's M Test):\n")

## 
##  Uji Homogenitas Kovarians (Box's M Test):

boxm_res <- boxM(data_selected[, numeric_selected], data_selected$Stress_Detection)
print(boxm_res)

## 
##  Box's M-test for Homogeneity of Covariance Matrices
## 
## data:  data_selected[, numeric_selected]
## Chi-Sq (approx.) = 844.6, df = 182, p-value < 2.2e-16

# Multikolinearitas (VIF)
cat("\n Uji Multikolinearitas (VIF):\n")

## 
##  Uji Multikolinearitas (VIF):

vif_values <- sapply(numeric_selected, function(var) {
  others <- setdiff(numeric_selected, var)
  form <- as.formula(paste(var, "~", paste(others, collapse = "+")))
  vif_model <- lm(form, data = data_selected)
  vif_val <- vif(vif_model)[1]
  return(vif_val)
})
print(vif_values)

##      Age.Sleep_Duration      Sleep_Duration.Age   Physical_Activity.Age 
##                1.542308                1.399028                1.428361 
##         Screen_Time.Age     Caffeine_Intake.Age      Alcohol_Intake.Age 
##                1.429564                1.428860                1.429927 
##          Work_Hours.Age         Travel_Time.Age Social_Interactions.Age 
##                1.392150                1.424854                1.421479 
##      Blood_Pressure.Age   Cholesterol_Level.Age   Blood_Sugar_Level.Age 
##                1.419996                1.410630                1.359280 
##    Wake_Up_Time_num.Age 
##                1.430795

# Linearitas 
cat("\n Visualisasi Linearitas antar variabel numerik:\n")

## 
##  Visualisasi Linearitas antar variabel numerik:

pairs(data_selected[, numeric_selected], col = as.numeric(data_selected$Stress_Detection),
      pch=19, main = "Scatterplot Matrix - Cek Linearitas antar Variabel Numerik")

Pemodelan

# LDA modeling dan evaluasi
cat("\n Training model LDA dan evaluasi:\n")

## 
##  Training model LDA dan evaluasi:

model_lda <- lda(Stress_Detection ~ ., data = data_selected)
pred <- predict(model_lda)

# Confusion matrix 
conf_matrix <- table(Predicted = pred$class, Actual = data_selected$Stress_Detection)
accuracy <- mean(pred$class == data_selected$Stress_Detection)

cat("\n Confusion Matrix LDA:\n")

## 
##  Confusion Matrix LDA:

print(conf_matrix)

##          Actual
## Predicted High Low Medium
##    High    249   9     31
##    Low       9 102     27
##    Medium   43  51    252

cat(sprintf("\n Akurasi LDA: %.4f\n", accuracy))

## 
##  Akurasi LDA: 0.7801

Perceptual Mapping

# MDS
lda_df <- data.frame(LD1 = pred$x[,1],
                     LD2 = pred$x[,2],
                     Stress_Detection = data_selected$Stress_Detection)

ggplot(lda_df, aes(x = LD1, y = LD2, color = Stress_Detection)) +
  geom_point(alpha = 0.7, size = 3) +
  labs(title = "Visualisasi LDA: Persebaran Kelas Stress pada LD1 dan LD2",
       x = "Linear Discriminant 1",
       y = "Linear Discriminant 2") +
  theme_minimal() +
  theme(legend.position = "right")

dist_matrix <- dist(scale(data_selected[, numeric_selected]))
mds_res <- cmdscale(dist_matrix, k = 2)

mds_df <- data.frame(MDS1 = mds_res[,1],
                     MDS2 = mds_res[,2],
                     Stress_Detection = data_selected$Stress_Detection)

ggplot(mds_df, aes(x = MDS1, y = MDS2, color = Stress_Detection)) +
  geom_point(alpha = 0.7, size = 3) +
  labs(title = "Perceptual Mapping dengan MDS",
       x = "Dimensi 1",
       y = "Dimensi 2") +
  theme_minimal() +
  theme(legend.position = "right")

# Biplot 
pca_res <- prcomp(scale(data_selected[, numeric_selected]), center = TRUE, scale. = TRUE)
loadings <- data.frame(pca_res$rotation[, 1:2])
loadings$Feature <- rownames(loadings)

ggplot(loadings, aes(x = PC1, y = PC2)) +
  geom_segment(aes(x = 0, y = 0, xend = PC1, yend = PC2), 
               arrow = arrow(length = unit(0.1, "cm")), color = "blue") +
  geom_text_repel(aes(label = Feature), size = 3, max.overlaps = 20) +
  xlim(c(min(loadings$PC1) - 0.1, max(loadings$PC1) + 0.1)) +
  ylim(c(min(loadings$PC2) - 0.1, max(loadings$PC2) + 0.1)) +
  ggtitle("PCA Loading Biplot") +
  theme_minimal()

Perbandingan Analisis Diskriminan dan Regresi Logistik Ordinal dalam Prediksi Tingkat Stres Berdasarkan Faktor Gaya Hidup

Muhammad Faiz Munif Billah (028), Nadia Nafista (095), Monika Intan Puspitasari (189)

2025-06-02

ORDINAL LOGISTIC REGRESSION

Baca file dataset

Cek isi data

Siapkan library

Identifikasi tipe data

Plot histogram variabel numerik

Plot bar chart variabel kategorikal

Statistik deskriptif / EDA variabell numerik dan kategorik

Korelasi antar variabel numerik

Cek missing value

Cek outlier

Cek distribusi target

Normalisasi

Encoding (factor)

Validasi target

Load library uji dan model

Inisialisasi variabel predictor

Load Model

Uji Asumsi

Uji Homogenitas Levene Test

Uji Multikolinearitas VIF

Penanganan Multikolinearitas

Uji Model

Uji Brant Test

Cek koefisien model

Prediksi dan Evaluasi

Confusion Matrix

Uji kemungkinan model

Model OLR Biasa

Model OLR + Partial

Model OLR Parsial

Perbandingan model

ANALISIS DISKRIMINAN

Inisialisasi Library

load Data

Pre-Processing

Seleksi Fitur

Feature Engineering

Uji Asumsi Analisis Diskriminan

Pemodelan

Perceptual Mapping