LIBRARY
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.2.0 ✔ readr 2.2.0
## ✔ forcats 1.0.1 ✔ stringr 1.6.0
## ✔ ggplot2 4.0.2 ✔ tibble 3.3.1
## ✔ lubridate 1.9.5 ✔ tidyr 1.3.2
## ✔ purrr 1.2.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(biotools)
## Loading required package: MASS
##
## Attaching package: 'MASS'
##
## The following object is masked from 'package:dplyr':
##
## select
##
## ---
## biotools version 4.3
library(MVN)
## Registered S3 method overwritten by 'lme4':
## method from
## na.action.merMod car
library(car)
## Loading required package: carData
##
## Attaching package: 'car'
##
## The following object is masked from 'package:dplyr':
##
## recode
##
## The following object is masked from 'package:purrr':
##
## some
library(ggplot2)
LOAD DATA
# Load data
data <- read.csv("genz_mental_wellness_synthetic_dataset.csv")
CEK STRUKTUR & MISSING VALUE
str(data)
## 'data.frame': 10000 obs. of 22 variables:
## $ Age : int 24 21 25 22 24 20 24 25 22 21 ...
## $ Gender : chr "Male" "Male" "Male" "Female" ...
## $ Country : chr "Canada" "USA" "Pakistan" "Pakistan" ...
## $ Student_Working_Status : chr "Working" "Student" "Student" "Student" ...
## $ Daily_Social_Media_Hours : num 4.81 4.16 3.07 4.41 2.97 6.06 3.68 4.64 6.5 3.73 ...
## $ Screen_Time_Hours : num 6.93 7.94 7.45 7.34 5.76 8.75 6.15 6.7 9.61 5.97 ...
## $ Night_Scrolling_Frequency : num 2.61 1.85 2.96 4.51 2.36 4.6 1.68 4.24 4.33 4.26 ...
## $ Online_Gaming_Hours : num 2.07 3.58 2.85 3.37 1.77 0.61 1.65 1.66 0 0.98 ...
## $ Content_Type_Preference : chr "News" "Gaming" "Entertainment" "Educational" ...
## $ Exercise_Frequency_per_Week: num 5.41 3.41 3.4 2.19 4.93 5.51 3.61 2.21 1.41 1.94 ...
## $ Daily_Sleep_Hours : num 6.84 7.88 6.39 7.92 5.97 6.7 5.18 6.27 6.78 4.65 ...
## $ Caffeine_Intake_Cups : num 1.52 2.23 0.53 0.58 1.64 0 1.5 2.46 1.89 2.9 ...
## $ Study_Work_Hours_per_Day : num 11.42 6.98 7.79 6.61 4.81 ...
## $ Overthinking_Score : num 4.95 5.91 4.06 6.1 5.22 6.19 5.31 4.07 5.32 4.46 ...
## $ Anxiety_Score : num 4.13 3.63 5.67 4.78 4.23 4.29 5.29 4.36 5.26 4.67 ...
## $ Mood_Stability_Score : num 5.74 5.75 6.03 4.85 5.05 3.21 6.23 3.68 3.54 5.91 ...
## $ Social_Comparison_Index : num 4.67 5.38 2.41 5.86 5.54 5.99 3.07 4.16 5.15 4.63 ...
## $ Sleep_Quality_Score : num 6.27 7.37 6.48 7.27 6.34 7.55 4.79 7.18 7.17 4.61 ...
## $ Motivation_Level : num 6.13 6.27 4.82 5.17 5.72 4.88 5.51 2.8 4.51 6.06 ...
## $ Emotional_Fatigue_Score : num 6.45 3.74 6.69 5.96 2.22 6.35 5.87 6.03 5.59 5.38 ...
## $ Wellbeing_Index : num 4.28 5.23 3.72 3.97 4.63 3.44 3.65 2.82 3.27 3.85 ...
## $ Burnout_Risk : chr "Medium" "Medium" "High" "High" ...
head(data)
## Age Gender Country Student_Working_Status Daily_Social_Media_Hours
## 1 24 Male Canada Working 4.81
## 2 21 Male USA Student 4.16
## 3 25 Male Pakistan Student 3.07
## 4 22 Female Pakistan Student 4.41
## 5 24 Male Pakistan Student 2.97
## 6 20 Male Australia Both 6.06
## Screen_Time_Hours Night_Scrolling_Frequency Online_Gaming_Hours
## 1 6.93 2.61 2.07
## 2 7.94 1.85 3.58
## 3 7.45 2.96 2.85
## 4 7.34 4.51 3.37
## 5 5.76 2.36 1.77
## 6 8.75 4.60 0.61
## Content_Type_Preference Exercise_Frequency_per_Week Daily_Sleep_Hours
## 1 News 5.41 6.84
## 2 Gaming 3.41 7.88
## 3 Entertainment 3.40 6.39
## 4 Educational 2.19 7.92
## 5 Educational 4.93 5.97
## 6 Gaming 5.51 6.70
## Caffeine_Intake_Cups Study_Work_Hours_per_Day Overthinking_Score
## 1 1.52 11.42 4.95
## 2 2.23 6.98 5.91
## 3 0.53 7.79 4.06
## 4 0.58 6.61 6.10
## 5 1.64 4.81 5.22
## 6 0.00 6.44 6.19
## Anxiety_Score Mood_Stability_Score Social_Comparison_Index
## 1 4.13 5.74 4.67
## 2 3.63 5.75 5.38
## 3 5.67 6.03 2.41
## 4 4.78 4.85 5.86
## 5 4.23 5.05 5.54
## 6 4.29 3.21 5.99
## Sleep_Quality_Score Motivation_Level Emotional_Fatigue_Score Wellbeing_Index
## 1 6.27 6.13 6.45 4.28
## 2 7.37 6.27 3.74 5.23
## 3 6.48 4.82 6.69 3.72
## 4 7.27 5.17 5.96 3.97
## 5 6.34 5.72 2.22 4.63
## 6 7.55 4.88 6.35 3.44
## Burnout_Risk
## 1 Medium
## 2 Medium
## 3 High
## 4 High
## 5 Medium
## 6 High
# missing value
sum(is.na(data))
## [1] 0
colSums(is.na(data))
## Age Gender
## 0 0
## Country Student_Working_Status
## 0 0
## Daily_Social_Media_Hours Screen_Time_Hours
## 0 0
## Night_Scrolling_Frequency Online_Gaming_Hours
## 0 0
## Content_Type_Preference Exercise_Frequency_per_Week
## 0 0
## Daily_Sleep_Hours Caffeine_Intake_Cups
## 0 0
## Study_Work_Hours_per_Day Overthinking_Score
## 0 0
## Anxiety_Score Mood_Stability_Score
## 0 0
## Social_Comparison_Index Sleep_Quality_Score
## 0 0
## Motivation_Level Emotional_Fatigue_Score
## 0 0
## Wellbeing_Index Burnout_Risk
## 0 0
TIPE DATA
# Variabel numerik
num_data <- data[, sapply(data, is.numeric)]
# Variabel kategorik
cat_data <- data[, sapply(data, is.character)]
#variabel Y1, Y2
Y <- data[, c("Anxiety_Score", "Emotional_Fatigue_Score")]
# Variabel X (exclude Y)
X <- num_data[, !(names(num_data) %in% c("Anxiety_Score", "Emotional_Fatigue_Score"))]
# Lihat ringkasan X
summary(X)
## Age Daily_Social_Media_Hours Screen_Time_Hours
## Min. :18.00 Min. : 0.500 Min. : 2.000
## 1st Qu.:20.00 1st Qu.: 3.010 1st Qu.: 5.810
## Median :22.00 Median : 3.990 Median : 7.000
## Mean :21.99 Mean : 4.006 Mean : 7.017
## 3rd Qu.:24.00 3rd Qu.: 4.982 3rd Qu.: 8.210
## Max. :26.00 Max. :10.000 Max. :14.000
## Night_Scrolling_Frequency Online_Gaming_Hours Exercise_Frequency_per_Week
## Min. :0.000 Min. :0.000 Min. :0.000
## 1st Qu.:2.188 1st Qu.:0.470 1st Qu.:1.960
## Median :2.980 Median :1.460 Median :3.010
## Mean :2.986 Mean :1.607 Mean :2.995
## 3rd Qu.:3.790 3rd Qu.:2.500 3rd Qu.:4.000
## Max. :7.000 Max. :6.000 Max. :7.000
## Daily_Sleep_Hours Caffeine_Intake_Cups Study_Work_Hours_per_Day
## Min. : 3.000 Min. :0.000 Min. : 1.000
## 1st Qu.: 5.710 1st Qu.:0.800 1st Qu.: 4.700
## Median : 6.520 Median :1.480 Median : 6.030
## Mean : 6.512 Mean :1.506 Mean : 6.031
## 3rd Qu.: 7.320 3rd Qu.:2.150 3rd Qu.: 7.340
## Max. :10.000 Max. :6.000 Max. :13.050
## Overthinking_Score Mood_Stability_Score Social_Comparison_Index
## Min. :1.000 Min. : 1.000 Min. :1.000
## 1st Qu.:4.240 1st Qu.: 4.310 1st Qu.:3.470
## Median :5.000 Median : 5.170 Median :4.400
## Mean :5.002 Mean : 5.161 Mean :4.404
## 3rd Qu.:5.760 3rd Qu.: 6.000 3rd Qu.:5.320
## Max. :9.510 Max. :10.000 Max. :9.420
## Sleep_Quality_Score Motivation_Level Wellbeing_Index
## Min. : 1.020 Min. : 1.000 Min. :1.00
## 1st Qu.: 5.550 1st Qu.: 4.200 1st Qu.:3.02
## Median : 6.530 Median : 5.080 Median :3.87
## Mean : 6.514 Mean : 5.086 Mean :3.87
## 3rd Qu.: 7.490 3rd Qu.: 5.950 3rd Qu.:4.70
## Max. :10.000 Max. :10.000 Max. :8.86
FITUR SELECTION
# Korelasi semua X dengan Y
cor_X_Y1 <- cor(X, Y$Anxiety_Score)
cor_X_Y2 <- cor(X, Y$Emotional_Fatigue_Score)
# Gabungkan korelasi dan urutkan
cor_summary <- data.frame(
Variable = names(X),
Cor_Y1 = cor_X_Y1,
Cor_Y2 = cor_X_Y2
)
# Hitung rata-rata korelasi absolute
cor_summary$Mean_Cor <- rowMeans(abs(cor_summary[, c("Cor_Y1", "Cor_Y2")]))
# Urutkan dari yang paling tinggi
cor_summary <- cor_summary[order(-cor_summary$Mean_Cor), ]
cor_summary
## Variable Cor_Y1
## Wellbeing_Index Wellbeing_Index -0.775874608
## Mood_Stability_Score Mood_Stability_Score -0.618832063
## Motivation_Level Motivation_Level -0.462907598
## Overthinking_Score Overthinking_Score 0.444053681
## Daily_Sleep_Hours Daily_Sleep_Hours -0.449064771
## Sleep_Quality_Score Sleep_Quality_Score -0.366901894
## Screen_Time_Hours Screen_Time_Hours 0.099618781
## Daily_Social_Media_Hours Daily_Social_Media_Hours 0.114328011
## Social_Comparison_Index Social_Comparison_Index 0.076411227
## Exercise_Frequency_per_Week Exercise_Frequency_per_Week -0.001128178
## Night_Scrolling_Frequency Night_Scrolling_Frequency 0.197665553
## Study_Work_Hours_per_Day Study_Work_Hours_per_Day 0.015335185
## Age Age -0.005333447
## Caffeine_Intake_Cups Caffeine_Intake_Cups -0.002761609
## Online_Gaming_Hours Online_Gaming_Hours 0.001825249
## Cor_Y2 Mean_Cor
## Wellbeing_Index -0.570005761 0.672940184
## Mood_Stability_Score -0.313900930 0.466366497
## Motivation_Level -0.339556743 0.401232170
## Overthinking_Score 0.289684997 0.366869339
## Daily_Sleep_Hours -0.221697599 0.335381185
## Sleep_Quality_Score -0.187234693 0.277068294
## Screen_Time_Hours 0.435616433 0.267617607
## Daily_Social_Media_Hours 0.380408076 0.247368044
## Social_Comparison_Index 0.255886340 0.166148783
## Exercise_Frequency_per_Week -0.310736474 0.155932326
## Night_Scrolling_Frequency 0.095219835 0.146442694
## Study_Work_Hours_per_Day 0.010379539 0.012857362
## Age 0.016289882 0.010811664
## Caffeine_Intake_Cups 0.005595893 0.004178751
## Online_Gaming_Hours 0.002110346 0.001967798
#variabel hasil fitur selection X
X <- data[, c(
"Mood_Stability_Score",
"Motivation_Level",
"Overthinking_Score",
"Daily_Sleep_Hours"
)]
ANOVA (UNIVARIAT)
aov1 <- aov(Anxiety_Score ~ Gender, data = data)
summary(aov1)
## Df Sum Sq Mean Sq F value Pr(>F)
## Gender 2 0 0.2258 0.132 0.876
## Residuals 9997 17044 1.7049
aov2 <- aov(Emotional_Fatigue_Score ~ Gender, data = data)
summary(aov2)
## Df Sum Sq Mean Sq F value Pr(>F)
## Gender 2 3 1.588 0.791 0.453
## Residuals 9997 20061 2.007
MANOVA (MULTIVARIAT)
manova_model <- manova(
cbind(Anxiety_Score, Emotional_Fatigue_Score) ~ Gender,
data = data
)
summary(manova_model, test = "Wilks")
## Df Wilks approx F num Df den Df Pr(>F)
## Gender 2 0.99969 0.78376 4 19992 0.5355
## Residuals 9997
summary.aov(manova_model)
## Response Anxiety_Score :
## Df Sum Sq Mean Sq F value Pr(>F)
## Gender 2 0.5 0.22579 0.1324 0.876
## Residuals 9997 17043.6 1.70487
##
## Response Emotional_Fatigue_Score :
## Df Sum Sq Mean Sq F value Pr(>F)
## Gender 2 3.2 1.5877 0.7912 0.4533
## Residuals 9997 20061.2 2.0067
UJI NORMALITAS
set.seed(10)
sample_data <- data[sample(nrow(data), 5000), ]
shapiro.test(sample_data$Anxiety_Score)
##
## Shapiro-Wilk normality test
##
## data: sample_data$Anxiety_Score
## W = 0.99938, p-value = 0.08613
shapiro.test(sample_data$Emotional_Fatigue_Score)
##
## Shapiro-Wilk normality test
##
## data: sample_data$Emotional_Fatigue_Score
## W = 0.99942, p-value = 0.1207
UJI HOMOGENITAS
Y <- data[, c("Anxiety_Score", "Emotional_Fatigue_Score")]
boxM(Y, interaction(data$Gender, data$Burnout_Risk))
##
## Box's M-test for Homogeneity of Covariance Matrices
##
## data: Y
## Chi-Sq (approx.) = 60.068, df = 24, p-value = 6.249e-05
MANCOVA (MULTIVARIAT + COVARIATE)
mancova_model <- manova(
cbind(Anxiety_Score, Emotional_Fatigue_Score) ~
Gender +
Mood_Stability_Score +
Motivation_Level +
Overthinking_Score +
Daily_Sleep_Hours +
Sleep_Quality_Score +
Sleep_Quality_Score +
Screen_Time_Hours +
Daily_Social_Media_Hours,
data = data
)
summary(mancova_model, test = "Wilks")
## Df Wilks approx F num Df den Df Pr(>F)
## Gender 2 0.99959 1.0 4 19978 0.3871
## Mood_Stability_Score 1 0.51987 4612.7 2 9989 <2e-16 ***
## Motivation_Level 1 0.80708 1193.8 2 9989 <2e-16 ***
## Overthinking_Score 1 0.84663 904.8 2 9989 <2e-16 ***
## Daily_Sleep_Hours 1 0.90482 525.4 2 9989 <2e-16 ***
## Sleep_Quality_Score 1 0.99971 1.5 2 9989 0.2323
## Screen_Time_Hours 1 0.79937 1253.6 2 9989 <2e-16 ***
## Daily_Social_Media_Hours 1 0.99980 1.0 2 9989 0.3677
## Residuals 9990
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary.aov(mancova_model)
## Response Anxiety_Score :
## Df Sum Sq Mean Sq F value Pr(>F)
## Gender 2 0.5 0.2 0.3171 0.7283
## Mood_Stability_Score 1 6527.4 6527.4 9166.5938 <2e-16 ***
## Motivation_Level 1 1467.2 1467.2 2060.4711 <2e-16 ***
## Overthinking_Score 1 1201.2 1201.2 1686.9176 <2e-16 ***
## Daily_Sleep_Hours 1 732.8 732.8 1029.0505 <2e-16 ***
## Sleep_Quality_Score 1 0.4 0.4 0.6261 0.4288
## Screen_Time_Hours 1 0.1 0.1 0.0919 0.7617
## Daily_Social_Media_Hours 1 0.6 0.6 0.7760 0.3784
## Residuals 9990 7113.8 0.7
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Response Emotional_Fatigue_Score :
## Df Sum Sq Mean Sq F value Pr(>F)
## Gender 2 3.2 1.59 1.2168 0.2962
## Mood_Stability_Score 1 1976.5 1976.51 1514.8378 < 2.2e-16 ***
## Motivation_Level 1 1342.2 1342.19 1028.6797 < 2.2e-16 ***
## Overthinking_Score 1 753.1 753.13 577.2149 < 2.2e-16 ***
## Daily_Sleep_Hours 1 50.0 50.03 38.3411 6.173e-10 ***
## Sleep_Quality_Score 1 1.8 1.78 1.3648 0.2427
## Screen_Time_Hours 1 2902.2 2902.20 2224.3101 < 2.2e-16 ***
## Daily_Social_Media_Hours 1 0.7 0.74 0.5685 0.4509
## Residuals 9990 13034.6 1.30
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
VISUALISASI
#Visualisasi anova
# Anxiety Score
ggplot(data, aes(x = Anxiety_Score)) +
geom_histogram(bins = 30, fill = "lightblue", color = "black") +
facet_wrap(~ Gender) +
labs(
title = "Distribusi Anxiety Score berdasarkan Gender",
x = "Anxiety Score",
y = "Frekuensi"
) +
theme_minimal() +
theme(panel.grid = element_blank())

# Emotional Fatigue Score
ggplot(data, aes(x = Emotional_Fatigue_Score)) +
geom_histogram(bins = 30, fill = "lightcoral", color = "black") +
facet_wrap(~ Gender) +
labs(
title = "Distribusi Emotional Fatigue Score berdasarkan Gender",
x = "Emotional Fatigue Score",
y = "Frekuensi"
) +
theme_minimal() +
theme(panel.grid = element_blank())

#visualisasi manova
data_long <- data %>%
pivot_longer(
cols = c(Anxiety_Score, Emotional_Fatigue_Score),
names_to = "Variable",
values_to = "Score"
)
ggplot(data_long, aes(x = Score)) +
geom_histogram(bins = 30, fill = "lightpink", color = "black") +
facet_grid(Variable ~ Gender) +
labs(
title = "Distribusi Anxiety dan Emotional Fatigue berdasarkan Gender",
x = "Score",
y = "Frekuensi"
) +
theme_classic()

#visualisasi mancova
# Model linear (adjust covariates)
model_adj_A <- lm(Anxiety_Score ~
Mood_Stability_Score +
Motivation_Level +
Overthinking_Score +
Daily_Sleep_Hours, data = data)
model_adj_E <- lm(Emotional_Fatigue_Score ~
Mood_Stability_Score +
Motivation_Level +
Overthinking_Score +
Daily_Sleep_Hours, data = data)
# Ambil residual
data$Adj_Anxiety <- residuals(model_adj_A)
data$Adj_Emotional <- residuals(model_adj_E)
# Long format
data_adj_long <- data %>%
pivot_longer(
cols = c(Adj_Anxiety, Adj_Emotional),
names_to = "Variable",
values_to = "Score"
)
# Summary
summary_adj <- data_adj_long %>%
group_by(Gender, Variable) %>%
summarise(
mean_score = mean(Score),
se = sd(Score)/sqrt(n()),
.groups = "drop"
)
# Plot
ggplot(summary_adj, aes(x = Gender, y = mean_score, fill = Gender)) +
geom_bar(stat = "identity", width = 0.6) +
geom_errorbar(aes(ymin = mean_score - se, ymax = mean_score + se), width = 0.2) +
facet_wrap(~ Variable, scales = "free_y") +
labs(
title = "Adjusted Mean (MANCOVA) setelah Kontrol Covariate",
x = "Gender",
y = "Adjusted Score"
) +
theme_minimal() +
theme(legend.position = "none")
