Analisis pada modul ini bertujuan untuk mengeksplorasi data, menguji asumsi, serta menerapkan metode Two-Way MANOVA, MANCOVA, dan ANCOVA untuk menganalisis pengaruh faktor terhadap variabel dependen dengan kovariat. Dataset yang digunakan adalah Sleep Health and Lifestyle Dataset yang memuat informasi terkait durasi tidur, kualitas tidur, tingkat stres, serta karakteristik individu.
Dataset dapat diakses melalui link berikut: https://www.kaggle.com/datasets/adilshamim8/sleep-cycle-and-productivity
library(readr)
library(ggplot2)
library(psych)
##
## Attaching package: 'psych'
## The following objects are masked from 'package:ggplot2':
##
## %+%, alpha
library(MVN)
## Warning: package 'MVN' was built under R version 4.5.3
##
## Attaching package: 'MVN'
## The following object is masked from 'package:psych':
##
## mardia
library(biotools)
## Warning: package 'biotools' was built under R version 4.5.3
## Loading required package: MASS
## ---
## biotools version 4.3
library(car)
## Loading required package: carData
##
## Attaching package: 'car'
## The following object is masked from 'package:psych':
##
## logit
library(emmeans)
## Welcome to emmeans.
## Caution: You lose important information if you filter this package's results.
## See '? untidy'
library(tidyr)
library(corrplot)
## corrplot 0.95 loaded
library(dplyr)
##
## Attaching package: 'dplyr'
## The following object is masked from 'package:car':
##
## recode
## The following object is masked from 'package:MASS':
##
## select
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
data <- read_csv("Sleep_Health_and_Lifestyle_Dataset.csv")
## Rows: 374 Columns: 13
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (5): Gender, Occupation, BMI Category, Blood Pressure, Sleep Disorder
## dbl (8): Person ID, Age, Sleep Duration, Quality of Sleep, Physical Activity...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
names(data) <- make.names(names(data))
data$Gender <- as.factor(data$Gender)
data$BMI.Category <- as.factor(data$BMI.Category)
data_analisis <- data %>%
dplyr::select(
Sleep.Duration,
Quality.of.Sleep,
Gender,
BMI.Category,
Stress.Level
)
head(data_analisis)
## # A tibble: 6 × 5
## Sleep.Duration Quality.of.Sleep Gender BMI.Category Stress.Level
## <dbl> <dbl> <fct> <fct> <dbl>
## 1 6.1 6 Male Overweight 6
## 2 6.2 6 Male Normal 8
## 3 6.2 6 Male Normal 8
## 4 5.9 4 Male Obese 8
## 5 5.9 4 Male Obese 8
## 6 5.9 4 Male Obese 8
colSums(is.na(data_analisis))
## Sleep.Duration Quality.of.Sleep Gender BMI.Category
## 0 0 0 0
## Stress.Level
## 0
data_analisis <- na.omit(data_analisis)
dim(data_analisis)
## [1] 374 5
summary(data_analisis)
## Sleep.Duration Quality.of.Sleep Gender BMI.Category
## Min. :5.800 Min. :4.000 Female:185 Normal :195
## 1st Qu.:6.400 1st Qu.:6.000 Male :189 Normal Weight: 21
## Median :7.200 Median :7.000 Obese : 10
## Mean :7.132 Mean :7.313 Overweight :148
## 3rd Qu.:7.800 3rd Qu.:8.000
## Max. :8.500 Max. :9.000
## Stress.Level
## Min. :3.000
## 1st Qu.:4.000
## Median :5.000
## Mean :5.385
## 3rd Qu.:7.000
## Max. :8.000
describe(data_analisis)
## vars n mean sd median trimmed mad min max range skew
## Sleep.Duration 1 374 7.13 0.80 7.2 7.12 1.04 5.8 8.5 2.7 0.04
## Quality.of.Sleep 2 374 7.31 1.20 7.0 7.32 1.48 4.0 9.0 5.0 -0.21
## Gender* 3 374 1.51 0.50 2.0 1.51 0.00 1.0 2.0 1.0 -0.02
## BMI.Category* 4 374 2.30 1.43 1.0 2.25 0.00 1.0 4.0 3.0 0.28
## Stress.Level 5 374 5.39 1.77 5.0 5.36 2.97 3.0 8.0 5.0 0.15
## kurtosis se
## Sleep.Duration -1.29 0.04
## Quality.of.Sleep -0.77 0.06
## Gender* -2.00 0.03
## BMI.Category* -1.85 0.07
## Stress.Level -1.33 0.09
data_analisis %>%
group_by(Gender, BMI.Category) %>%
summarise(
n = n(),
mean_sleep_duration = mean(Sleep.Duration),
mean_quality_sleep = mean(Quality.of.Sleep),
mean_stress = mean(Stress.Level),
.groups = "drop"
)
## # A tibble: 8 × 6
## Gender BMI.Category n mean_sleep_duration mean_quality_sleep mean_stress
## <fct> <fct> <int> <dbl> <dbl> <dbl>
## 1 Female Normal 64 7.79 8.5 3.5
## 2 Female Normal Weight 14 7.29 7.29 5.14
## 3 Female Obese 1 7.4 7 5
## 4 Female Overweight 106 6.88 7.22 5.32
## 5 Male Normal 131 7.20 7.25 5.92
## 6 Male Normal Weight 7 7.43 7.71 5.29
## 7 Male Obese 9 6.91 6.33 5.78
## 8 Male Overweight 42 6.49 6.10 6.76
ggplot(data_analisis, aes(x = Sleep.Duration)) +
geom_histogram(bins = 20, fill = "skyblue", color = "black") +
theme_minimal()
ggplot(data_analisis, aes(x = Quality.of.Sleep)) +
geom_histogram(bins = 20, fill = "Pink", color = "black") +
theme_minimal()
ggplot(data_analisis, aes(x = BMI.Category, y = Quality.of.Sleep, fill = Gender)) +
geom_boxplot() +
theme_minimal()
ggplot(data_analisis, aes(x = Stress.Level, y = Quality.of.Sleep)) +
geom_point() +
geom_smooth(method = "lm") +
theme_minimal()
## `geom_smooth()` using formula = 'y ~ x'
rata_kelompok <- data_analisis %>%
dplyr::group_by(BMI.Category) %>%
dplyr::summarise(mean_quality = mean(Quality.of.Sleep))
ggplot(rata_kelompok, aes(x = BMI.Category, y = mean_quality, fill = BMI.Category)) +
geom_col() +
theme_minimal()
mat_korelasi <- cor(
data_analisis[, c("Sleep.Duration", "Quality.of.Sleep", "Stress.Level")]
)
mat_korelasi
## Sleep.Duration Quality.of.Sleep Stress.Level
## Sleep.Duration 1.000000 0.883213 -0.811023
## Quality.of.Sleep 0.883213 1.000000 -0.898752
## Stress.Level -0.811023 -0.898752 1.000000
mat_korelasi <- cor(data_analisis[, c("Sleep.Duration", "Quality.of.Sleep", "Stress.Level")])
corrplot(mat_korelasi, method = "color", type = "upper", addCoef.col = "black")
Q1_sd <- quantile(data_analisis$Sleep.Duration, 0.25)
Q3_sd <- quantile(data_analisis$Sleep.Duration, 0.75)
IQR_sd <- Q3_sd - Q1_sd
Q1_qs <- quantile(data_analisis$Quality.of.Sleep, 0.25)
Q3_qs <- quantile(data_analisis$Quality.of.Sleep, 0.75)
IQR_qs <- Q3_qs - Q1_qs
data_bersih <- data_analisis %>%
filter(
Sleep.Duration >= (Q1_sd - 1.5 * IQR_sd),
Sleep.Duration <= (Q3_sd + 1.5 * IQR_sd),
Quality.of.Sleep >= (Q1_qs - 1.5 * IQR_qs),
Quality.of.Sleep <= (Q3_qs + 1.5 * IQR_qs)
)
dim(data_analisis)
## [1] 374 5
dim(data_bersih)
## [1] 374 5
data_bersih$Stress.Level.Z <- scale(data_bersih$Stress.Level)
Y <- data_bersih[, c("Sleep.Duration", "Quality.of.Sleep")]
data_bersih$grup <- interaction(data_bersih$Gender, data_bersih$BMI.Category)
freq_grup <- table(data_bersih$grup)
freq_grup
##
## Female.Normal Male.Normal Female.Normal Weight
## 64 131 14
## Male.Normal Weight Female.Obese Male.Obese
## 7 1 9
## Female.Overweight Male.Overweight
## 106 42
grup_valid <- names(freq_grup[freq_grup > 2])
data_boxm <- data_bersih[data_bersih$grup %in% grup_valid, ]
data_boxm$grup <- droplevels(data_boxm$grup)
Y_boxm <- data_boxm[, c("Sleep.Duration", "Quality.of.Sleep")]
biotools::boxM(Y_boxm, data_boxm$grup)
##
## Box's M-test for Homogeneity of Covariance Matrices
##
## data: Y_boxm
## Chi-Sq (approx.) = 426.54, df = 18, p-value < 2.2e-16
cor(Y)
## Sleep.Duration Quality.of.Sleep
## Sleep.Duration 1.000000 0.883213
## Quality.of.Sleep 0.883213 1.000000
cortest.bartlett(cor(Y), n = nrow(Y))
## $chisq
## [1] 562.6086
##
## $p.value
## [1] 2.275981e-124
##
## $df
## [1] 1
model_manova <- manova(
cbind(Sleep.Duration, Quality.of.Sleep) ~ Gender * BMI.Category,
data = data_bersih
)
summary(model_manova, test = "Pillai")
## Df Pillai approx F num Df den Df Pr(>F)
## Gender 1 0.190926 43.067 2 365 <2e-16 ***
## BMI.Category 3 0.266079 18.722 6 732 <2e-16 ***
## Gender:BMI.Category 3 0.050312 3.148 6 732 0.0047 **
## Residuals 366
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary.aov(model_manova)
## Response Sleep.Duration :
## Df Sum Sq Mean Sq F value Pr(>F)
## Gender 1 3.490 3.4904 7.0117 0.008448 **
## BMI.Category 3 47.817 15.9391 32.0193 < 2.2e-16 ***
## Gender:BMI.Category 3 2.633 0.8778 1.7634 0.153759
## Residuals 366 182.194 0.4978
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Response Quality.of.Sleep :
## Df Sum Sq Mean Sq F value Pr(>F)
## Gender 1 45.37 45.367 44.8040 8.2e-11 ***
## BMI.Category 3 106.40 35.468 35.0274 < 2.2e-16 ***
## Gender:BMI.Category 3 12.03 4.009 3.9591 0.008466 **
## Residuals 366 370.60 1.013
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
model_slope <- lm(
Sleep.Duration ~ Gender * BMI.Category +
Gender:Stress.Level.Z + BMI.Category:Stress.Level.Z,
data = data_bersih
)
Anova(model_slope, type = 3)
## Anova Table (Type III tests)
##
## Response: Sleep.Duration
## Sum Sq Df F value Pr(>F)
## (Intercept) 350.47 1 2370.164 < 2.2e-16 ***
## Gender 11.32 1 76.583 < 2.2e-16 ***
## BMI.Category 4.94 3 11.143 5.196e-07 ***
## Gender:BMI.Category 7.90 3 17.819 8.312e-11 ***
## Gender:Stress.Level.Z 60.53 2 204.660 < 2.2e-16 ***
## BMI.Category:Stress.Level.Z 8.31 3 18.736 2.572e-11 ***
## Residuals 53.38 361
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
model_mancova <- manova(
cbind(Sleep.Duration, Quality.of.Sleep) ~ Gender * BMI.Category + Stress.Level.Z,
data = data_bersih
)
summary(model_mancova, test = "Pillai")
## Df Pillai approx F num Df den Df Pr(>F)
## Gender 1 0.38506 113.96 2 364 < 2.2e-16 ***
## BMI.Category 3 0.62629 55.47 6 730 < 2.2e-16 ***
## Stress.Level.Z 1 0.79789 718.52 2 364 < 2.2e-16 ***
## Gender:BMI.Category 3 0.07124 4.49 6 730 0.0001759 ***
## Residuals 365
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary.aov(model_mancova)
## Response Sleep.Duration :
## Df Sum Sq Mean Sq F value Pr(>F)
## Gender 1 3.490 3.490 20.5252 7.984e-06 ***
## BMI.Category 3 47.817 15.939 93.7297 < 2.2e-16 ***
## Stress.Level.Z 1 122.087 122.087 717.9304 < 2.2e-16 ***
## Gender:BMI.Category 3 0.670 0.223 1.3137 0.2696
## Residuals 365 62.070 0.170
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Response Quality.of.Sleep :
## Df Sum Sq Mean Sq F value Pr(>F)
## Gender 1 45.367 45.367 210.46 < 2.2e-16 ***
## BMI.Category 3 106.403 35.468 164.54 < 2.2e-16 ***
## Stress.Level.Z 1 299.072 299.072 1387.40 < 2.2e-16 ***
## Gender:BMI.Category 3 4.876 1.625 7.54 6.596e-05 ***
## Residuals 365 78.680 0.216
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
ancova_sd <- lm(
Sleep.Duration ~ Gender * BMI.Category + Stress.Level.Z,
data = data_bersih
)
Anova(ancova_sd, type = 3)
## Anova Table (Type III tests)
##
## Response: Sleep.Duration
## Sum Sq Df F value Pr(>F)
## (Intercept) 2512.52 1 14774.8054 < 2.2e-16 ***
## Gender 3.56 1 20.9147 6.586e-06 ***
## BMI.Category 2.64 3 5.1789 0.001626 **
## Stress.Level.Z 120.12 1 706.3858 < 2.2e-16 ***
## Gender:BMI.Category 0.67 3 1.3137 0.269642
## Residuals 62.07 365
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
ancova_qs <- lm(
Quality.of.Sleep ~ Gender * BMI.Category + Stress.Level.Z,
data = data_bersih
)
Anova(ancova_qs, type = 3)
## Anova Table (Type III tests)
##
## Response: Quality.of.Sleep
## Sum Sq Df F value Pr(>F)
## (Intercept) 2733.54 1 12680.9801 < 2.2e-16 ***
## Gender 1.22 1 5.6426 0.01804 *
## BMI.Category 1.75 3 2.7082 0.04506 *
## Stress.Level.Z 291.92 1 1354.2290 < 2.2e-16 ***
## Gender:BMI.Category 4.88 3 7.5400 6.596e-05 ***
## Residuals 78.68 365
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
emmeans(ancova_sd, pairwise ~ BMI.Category, adjust = "tukey")
## NOTE: Results may be misleading due to involvement in interactions
## $emmeans
## BMI.Category emmean SE df lower.CL upper.CL
## Normal 7.24 0.0329 365 7.17 7.30
## Normal Weight 7.29 0.0955 365 7.10 7.48
## Obese 7.16 0.2170 365 6.73 7.58
## Overweight 6.94 0.0388 365 6.86 7.01
##
## Results are averaged over the levels of: Gender
## Confidence level used: 0.95
##
## $contrasts
## contrast estimate SE df t.ratio p.value
## Normal - Normal Weight -0.0526 0.1010 365 -0.522 0.9538
## Normal - Obese 0.0825 0.2200 365 0.375 0.9820
## Normal - Overweight 0.3028 0.0526 365 5.757 <0.0001
## Normal Weight - Obese 0.1350 0.2370 365 0.569 0.9413
## Normal Weight - Overweight 0.3553 0.1030 365 3.441 0.0036
## Obese - Overweight 0.2203 0.2210 365 0.998 0.7506
##
## Results are averaged over the levels of: Gender
## P value adjustment: tukey method for comparing a family of 4 estimates