Analisis pada modul ini bertujuan untuk mengeksplorasi data, menguji asumsi, serta menerapkan metode Two-Way MANOVA, MANCOVA, dan ANCOVA untuk menganalisis pengaruh faktor terhadap variabel dependen dengan kovariat. Dataset yang digunakan adalah Sleep Health and Lifestyle Dataset yang memuat informasi terkait durasi tidur, kualitas tidur, tingkat stres, serta karakteristik individu.

Dataset dapat diakses melalui link berikut: https://www.kaggle.com/datasets/adilshamim8/sleep-cycle-and-productivity

1. Import dan Persiapan Data

1.1 Import Library

library(readr)
library(ggplot2)
library(psych)
## 
## Attaching package: 'psych'
## The following objects are masked from 'package:ggplot2':
## 
##     %+%, alpha
library(MVN)
## Warning: package 'MVN' was built under R version 4.5.3
## 
## Attaching package: 'MVN'
## The following object is masked from 'package:psych':
## 
##     mardia
library(biotools)
## Warning: package 'biotools' was built under R version 4.5.3
## Loading required package: MASS
## ---
## biotools version 4.3
library(car)
## Loading required package: carData
## 
## Attaching package: 'car'
## The following object is masked from 'package:psych':
## 
##     logit
library(emmeans)
## Welcome to emmeans.
## Caution: You lose important information if you filter this package's results.
## See '? untidy'
library(tidyr)
library(corrplot)
## corrplot 0.95 loaded
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following object is masked from 'package:car':
## 
##     recode
## The following object is masked from 'package:MASS':
## 
##     select
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

1.2 Persiapan Data

data <- read_csv("Sleep_Health_and_Lifestyle_Dataset.csv")
## Rows: 374 Columns: 13
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (5): Gender, Occupation, BMI Category, Blood Pressure, Sleep Disorder
## dbl (8): Person ID, Age, Sleep Duration, Quality of Sleep, Physical Activity...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
names(data) <- make.names(names(data))

data$Gender <- as.factor(data$Gender)
data$BMI.Category <- as.factor(data$BMI.Category)

data_analisis <- data %>%
  dplyr::select(
    Sleep.Duration,
    Quality.of.Sleep,
    Gender,
    BMI.Category,
    Stress.Level
  )

head(data_analisis)
## # A tibble: 6 × 5
##   Sleep.Duration Quality.of.Sleep Gender BMI.Category Stress.Level
##            <dbl>            <dbl> <fct>  <fct>               <dbl>
## 1            6.1                6 Male   Overweight              6
## 2            6.2                6 Male   Normal                  8
## 3            6.2                6 Male   Normal                  8
## 4            5.9                4 Male   Obese                   8
## 5            5.9                4 Male   Obese                   8
## 6            5.9                4 Male   Obese                   8

1.3 Cek Missing Value

colSums(is.na(data_analisis))
##   Sleep.Duration Quality.of.Sleep           Gender     BMI.Category 
##                0                0                0                0 
##     Stress.Level 
##                0

1.4 Penanganan Missing Value

data_analisis <- na.omit(data_analisis)

dim(data_analisis)
## [1] 374   5
summary(data_analisis)
##  Sleep.Duration  Quality.of.Sleep    Gender           BMI.Category
##  Min.   :5.800   Min.   :4.000    Female:185   Normal       :195  
##  1st Qu.:6.400   1st Qu.:6.000    Male  :189   Normal Weight: 21  
##  Median :7.200   Median :7.000                 Obese        : 10  
##  Mean   :7.132   Mean   :7.313                 Overweight   :148  
##  3rd Qu.:7.800   3rd Qu.:8.000                                    
##  Max.   :8.500   Max.   :9.000                                    
##   Stress.Level  
##  Min.   :3.000  
##  1st Qu.:4.000  
##  Median :5.000  
##  Mean   :5.385  
##  3rd Qu.:7.000  
##  Max.   :8.000

1.5 Statistika Deskriptif

describe(data_analisis)
##                  vars   n mean   sd median trimmed  mad min max range  skew
## Sleep.Duration      1 374 7.13 0.80    7.2    7.12 1.04 5.8 8.5   2.7  0.04
## Quality.of.Sleep    2 374 7.31 1.20    7.0    7.32 1.48 4.0 9.0   5.0 -0.21
## Gender*             3 374 1.51 0.50    2.0    1.51 0.00 1.0 2.0   1.0 -0.02
## BMI.Category*       4 374 2.30 1.43    1.0    2.25 0.00 1.0 4.0   3.0  0.28
## Stress.Level        5 374 5.39 1.77    5.0    5.36 2.97 3.0 8.0   5.0  0.15
##                  kurtosis   se
## Sleep.Duration      -1.29 0.04
## Quality.of.Sleep    -0.77 0.06
## Gender*             -2.00 0.03
## BMI.Category*       -1.85 0.07
## Stress.Level        -1.33 0.09

1.6 Statistika Deskriptif per Kelompok

data_analisis %>%
  group_by(Gender, BMI.Category) %>%
  summarise(
    n = n(),
    mean_sleep_duration = mean(Sleep.Duration),
    mean_quality_sleep = mean(Quality.of.Sleep),
    mean_stress = mean(Stress.Level),
    .groups = "drop"
  )
## # A tibble: 8 × 6
##   Gender BMI.Category      n mean_sleep_duration mean_quality_sleep mean_stress
##   <fct>  <fct>         <int>               <dbl>              <dbl>       <dbl>
## 1 Female Normal           64                7.79               8.5         3.5 
## 2 Female Normal Weight    14                7.29               7.29        5.14
## 3 Female Obese             1                7.4                7           5   
## 4 Female Overweight      106                6.88               7.22        5.32
## 5 Male   Normal          131                7.20               7.25        5.92
## 6 Male   Normal Weight     7                7.43               7.71        5.29
## 7 Male   Obese             9                6.91               6.33        5.78
## 8 Male   Overweight       42                6.49               6.10        6.76

1.7 Visualisasi Data

1.7.1 Visualisasi Distribusi Sleep Duration dan Quality of Sleep

ggplot(data_analisis, aes(x = Sleep.Duration)) +
  geom_histogram(bins = 20, fill = "skyblue", color = "black") +
  theme_minimal()

ggplot(data_analisis, aes(x = Quality.of.Sleep)) +
  geom_histogram(bins = 20, fill = "Pink", color = "black") +
  theme_minimal()

1.7.2 Visualisasi Boxplot Quality of Sleep Berdasarkan BMI Category dan Gender

ggplot(data_analisis, aes(x = BMI.Category, y = Quality.of.Sleep, fill = Gender)) +
  geom_boxplot() +
  theme_minimal()

1.7.3 Visualisasi Hubungan Stress Level dan Quality of Sleep dengan Garis Tren

ggplot(data_analisis, aes(x = Stress.Level, y = Quality.of.Sleep)) +
  geom_point() +
  geom_smooth(method = "lm") +
  theme_minimal()
## `geom_smooth()` using formula = 'y ~ x'

1.7.4 Visualisasi Rata-rata Quality of Sleep Berdasarkan BMI Category

rata_kelompok <- data_analisis %>%
  dplyr::group_by(BMI.Category) %>%
  dplyr::summarise(mean_quality = mean(Quality.of.Sleep))

ggplot(rata_kelompok, aes(x = BMI.Category, y = mean_quality, fill = BMI.Category)) +
  geom_col() +
  theme_minimal()

1.8 Korelasi Variabel

mat_korelasi <- cor(
  data_analisis[, c("Sleep.Duration", "Quality.of.Sleep", "Stress.Level")]
)

mat_korelasi
##                  Sleep.Duration Quality.of.Sleep Stress.Level
## Sleep.Duration         1.000000         0.883213    -0.811023
## Quality.of.Sleep       0.883213         1.000000    -0.898752
## Stress.Level          -0.811023        -0.898752     1.000000

1.8.1 Heatmap Korelasi

mat_korelasi <- cor(data_analisis[, c("Sleep.Duration", "Quality.of.Sleep", "Stress.Level")])

corrplot(mat_korelasi, method = "color", type = "upper", addCoef.col = "black")

2. Pre Processing Data

2.1 Deteksi Outlier (IQR)

Q1_sd <- quantile(data_analisis$Sleep.Duration, 0.25)
Q3_sd <- quantile(data_analisis$Sleep.Duration, 0.75)
IQR_sd <- Q3_sd - Q1_sd

Q1_qs <- quantile(data_analisis$Quality.of.Sleep, 0.25)
Q3_qs <- quantile(data_analisis$Quality.of.Sleep, 0.75)
IQR_qs <- Q3_qs - Q1_qs

data_bersih <- data_analisis %>%
  filter(
    Sleep.Duration >= (Q1_sd - 1.5 * IQR_sd),
    Sleep.Duration <= (Q3_sd + 1.5 * IQR_sd),
    Quality.of.Sleep >= (Q1_qs - 1.5 * IQR_qs),
    Quality.of.Sleep <= (Q3_qs + 1.5 * IQR_qs)
  )

dim(data_analisis)
## [1] 374   5
dim(data_bersih)
## [1] 374   5

2.2 Normalisasi Kovariat

data_bersih$Stress.Level.Z <- scale(data_bersih$Stress.Level)

3. Uji Asumsi MANOVA

3.1 Normalitas Multivariat

Y <- data_bersih[, c("Sleep.Duration", "Quality.of.Sleep")]

3.2 Homogenitas Kovarians

data_bersih$grup <- interaction(data_bersih$Gender, data_bersih$BMI.Category)

freq_grup <- table(data_bersih$grup)
freq_grup
## 
##        Female.Normal          Male.Normal Female.Normal Weight 
##                   64                  131                   14 
##   Male.Normal Weight         Female.Obese           Male.Obese 
##                    7                    1                    9 
##    Female.Overweight      Male.Overweight 
##                  106                   42
grup_valid <- names(freq_grup[freq_grup > 2])

data_boxm <- data_bersih[data_bersih$grup %in% grup_valid, ]

data_boxm$grup <- droplevels(data_boxm$grup)

Y_boxm <- data_boxm[, c("Sleep.Duration", "Quality.of.Sleep")]

biotools::boxM(Y_boxm, data_boxm$grup)
## 
##  Box's M-test for Homogeneity of Covariance Matrices
## 
## data:  Y_boxm
## Chi-Sq (approx.) = 426.54, df = 18, p-value < 2.2e-16

3.3 Uji Dependensi

cor(Y)
##                  Sleep.Duration Quality.of.Sleep
## Sleep.Duration         1.000000         0.883213
## Quality.of.Sleep       0.883213         1.000000
cortest.bartlett(cor(Y), n = nrow(Y))
## $chisq
## [1] 562.6086
## 
## $p.value
## [1] 2.275981e-124
## 
## $df
## [1] 1

4. Two-Way MANOVA

model_manova <- manova(
  cbind(Sleep.Duration, Quality.of.Sleep) ~ Gender * BMI.Category,
  data = data_bersih
)

summary(model_manova, test = "Pillai")
##                      Df   Pillai approx F num Df den Df Pr(>F)    
## Gender                1 0.190926   43.067      2    365 <2e-16 ***
## BMI.Category          3 0.266079   18.722      6    732 <2e-16 ***
## Gender:BMI.Category   3 0.050312    3.148      6    732 0.0047 ** 
## Residuals           366                                           
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary.aov(model_manova)
##  Response Sleep.Duration :
##                      Df  Sum Sq Mean Sq F value    Pr(>F)    
## Gender                1   3.490  3.4904  7.0117  0.008448 ** 
## BMI.Category          3  47.817 15.9391 32.0193 < 2.2e-16 ***
## Gender:BMI.Category   3   2.633  0.8778  1.7634  0.153759    
## Residuals           366 182.194  0.4978                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##  Response Quality.of.Sleep :
##                      Df Sum Sq Mean Sq F value    Pr(>F)    
## Gender                1  45.37  45.367 44.8040   8.2e-11 ***
## BMI.Category          3 106.40  35.468 35.0274 < 2.2e-16 ***
## Gender:BMI.Category   3  12.03   4.009  3.9591  0.008466 ** 
## Residuals           366 370.60   1.013                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

5. MANCOVA

5.1 Uji Homogenitas Slope

model_slope <- lm(
  Sleep.Duration ~ Gender * BMI.Category +
    Gender:Stress.Level.Z + BMI.Category:Stress.Level.Z,
  data = data_bersih
)

Anova(model_slope, type = 3)
## Anova Table (Type III tests)
## 
## Response: Sleep.Duration
##                             Sum Sq  Df  F value    Pr(>F)    
## (Intercept)                 350.47   1 2370.164 < 2.2e-16 ***
## Gender                       11.32   1   76.583 < 2.2e-16 ***
## BMI.Category                  4.94   3   11.143 5.196e-07 ***
## Gender:BMI.Category           7.90   3   17.819 8.312e-11 ***
## Gender:Stress.Level.Z        60.53   2  204.660 < 2.2e-16 ***
## BMI.Category:Stress.Level.Z   8.31   3   18.736 2.572e-11 ***
## Residuals                    53.38 361                       
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

5.2 Hasil MANCOVA

model_mancova <- manova(
  cbind(Sleep.Duration, Quality.of.Sleep) ~ Gender * BMI.Category + Stress.Level.Z,
  data = data_bersih
)

summary(model_mancova, test = "Pillai")
##                      Df  Pillai approx F num Df den Df    Pr(>F)    
## Gender                1 0.38506   113.96      2    364 < 2.2e-16 ***
## BMI.Category          3 0.62629    55.47      6    730 < 2.2e-16 ***
## Stress.Level.Z        1 0.79789   718.52      2    364 < 2.2e-16 ***
## Gender:BMI.Category   3 0.07124     4.49      6    730 0.0001759 ***
## Residuals           365                                             
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary.aov(model_mancova)
##  Response Sleep.Duration :
##                      Df  Sum Sq Mean Sq  F value    Pr(>F)    
## Gender                1   3.490   3.490  20.5252 7.984e-06 ***
## BMI.Category          3  47.817  15.939  93.7297 < 2.2e-16 ***
## Stress.Level.Z        1 122.087 122.087 717.9304 < 2.2e-16 ***
## Gender:BMI.Category   3   0.670   0.223   1.3137    0.2696    
## Residuals           365  62.070   0.170                       
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##  Response Quality.of.Sleep :
##                      Df  Sum Sq Mean Sq F value    Pr(>F)    
## Gender                1  45.367  45.367  210.46 < 2.2e-16 ***
## BMI.Category          3 106.403  35.468  164.54 < 2.2e-16 ***
## Stress.Level.Z        1 299.072 299.072 1387.40 < 2.2e-16 ***
## Gender:BMI.Category   3   4.876   1.625    7.54 6.596e-05 ***
## Residuals           365  78.680   0.216                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

6. ANCOVA

6.1 Sleep Duration

ancova_sd <- lm(
  Sleep.Duration ~ Gender * BMI.Category + Stress.Level.Z,
  data = data_bersih
)

Anova(ancova_sd, type = 3)
## Anova Table (Type III tests)
## 
## Response: Sleep.Duration
##                      Sum Sq  Df    F value    Pr(>F)    
## (Intercept)         2512.52   1 14774.8054 < 2.2e-16 ***
## Gender                 3.56   1    20.9147 6.586e-06 ***
## BMI.Category           2.64   3     5.1789  0.001626 ** 
## Stress.Level.Z       120.12   1   706.3858 < 2.2e-16 ***
## Gender:BMI.Category    0.67   3     1.3137  0.269642    
## Residuals             62.07 365                         
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

6.2 Quality of Sleep

ancova_qs <- lm(
  Quality.of.Sleep ~ Gender * BMI.Category + Stress.Level.Z,
  data = data_bersih
)

Anova(ancova_qs, type = 3)
## Anova Table (Type III tests)
## 
## Response: Quality.of.Sleep
##                      Sum Sq  Df    F value    Pr(>F)    
## (Intercept)         2733.54   1 12680.9801 < 2.2e-16 ***
## Gender                 1.22   1     5.6426   0.01804 *  
## BMI.Category           1.75   3     2.7082   0.04506 *  
## Stress.Level.Z       291.92   1  1354.2290 < 2.2e-16 ***
## Gender:BMI.Category    4.88   3     7.5400 6.596e-05 ***
## Residuals             78.68 365                         
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

7. Post-Hoc Test

emmeans(ancova_sd, pairwise ~ BMI.Category, adjust = "tukey")
## NOTE: Results may be misleading due to involvement in interactions
## $emmeans
##  BMI.Category  emmean     SE  df lower.CL upper.CL
##  Normal          7.24 0.0329 365     7.17     7.30
##  Normal Weight   7.29 0.0955 365     7.10     7.48
##  Obese           7.16 0.2170 365     6.73     7.58
##  Overweight      6.94 0.0388 365     6.86     7.01
## 
## Results are averaged over the levels of: Gender 
## Confidence level used: 0.95 
## 
## $contrasts
##  contrast                   estimate     SE  df t.ratio p.value
##  Normal - Normal Weight      -0.0526 0.1010 365  -0.522  0.9538
##  Normal - Obese               0.0825 0.2200 365   0.375  0.9820
##  Normal - Overweight          0.3028 0.0526 365   5.757 <0.0001
##  Normal Weight - Obese        0.1350 0.2370 365   0.569  0.9413
##  Normal Weight - Overweight   0.3553 0.1030 365   3.441  0.0036
##  Obese - Overweight           0.2203 0.2210 365   0.998  0.7506
## 
## Results are averaged over the levels of: Gender 
## P value adjustment: tukey method for comparing a family of 4 estimates