LIBRARY

library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.2.0     ✔ readr     2.2.0
## ✔ forcats   1.0.1     ✔ stringr   1.6.0
## ✔ ggplot2   4.0.2     ✔ tibble    3.3.1
## ✔ lubridate 1.9.5     ✔ tidyr     1.3.2
## ✔ purrr     1.2.1     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(biotools)
## Loading required package: MASS
## 
## Attaching package: 'MASS'
## 
## The following object is masked from 'package:dplyr':
## 
##     select
## 
## ---
## biotools version 4.3
library(MVN)
## Registered S3 method overwritten by 'lme4':
##   method           from
##   na.action.merMod car
library(car)
## Loading required package: carData
## 
## Attaching package: 'car'
## 
## The following object is masked from 'package:dplyr':
## 
##     recode
## 
## The following object is masked from 'package:purrr':
## 
##     some
library(ggplot2)

LOAD DATA

# Load data
data <- read.csv("genz_mental_wellness_synthetic_dataset.csv")

CEK STRUKTUR & MISSING VALUE

str(data)
## 'data.frame':    10000 obs. of  22 variables:
##  $ Age                        : int  24 21 25 22 24 20 24 25 22 21 ...
##  $ Gender                     : chr  "Male" "Male" "Male" "Female" ...
##  $ Country                    : chr  "Canada" "USA" "Pakistan" "Pakistan" ...
##  $ Student_Working_Status     : chr  "Working" "Student" "Student" "Student" ...
##  $ Daily_Social_Media_Hours   : num  4.81 4.16 3.07 4.41 2.97 6.06 3.68 4.64 6.5 3.73 ...
##  $ Screen_Time_Hours          : num  6.93 7.94 7.45 7.34 5.76 8.75 6.15 6.7 9.61 5.97 ...
##  $ Night_Scrolling_Frequency  : num  2.61 1.85 2.96 4.51 2.36 4.6 1.68 4.24 4.33 4.26 ...
##  $ Online_Gaming_Hours        : num  2.07 3.58 2.85 3.37 1.77 0.61 1.65 1.66 0 0.98 ...
##  $ Content_Type_Preference    : chr  "News" "Gaming" "Entertainment" "Educational" ...
##  $ Exercise_Frequency_per_Week: num  5.41 3.41 3.4 2.19 4.93 5.51 3.61 2.21 1.41 1.94 ...
##  $ Daily_Sleep_Hours          : num  6.84 7.88 6.39 7.92 5.97 6.7 5.18 6.27 6.78 4.65 ...
##  $ Caffeine_Intake_Cups       : num  1.52 2.23 0.53 0.58 1.64 0 1.5 2.46 1.89 2.9 ...
##  $ Study_Work_Hours_per_Day   : num  11.42 6.98 7.79 6.61 4.81 ...
##  $ Overthinking_Score         : num  4.95 5.91 4.06 6.1 5.22 6.19 5.31 4.07 5.32 4.46 ...
##  $ Anxiety_Score              : num  4.13 3.63 5.67 4.78 4.23 4.29 5.29 4.36 5.26 4.67 ...
##  $ Mood_Stability_Score       : num  5.74 5.75 6.03 4.85 5.05 3.21 6.23 3.68 3.54 5.91 ...
##  $ Social_Comparison_Index    : num  4.67 5.38 2.41 5.86 5.54 5.99 3.07 4.16 5.15 4.63 ...
##  $ Sleep_Quality_Score        : num  6.27 7.37 6.48 7.27 6.34 7.55 4.79 7.18 7.17 4.61 ...
##  $ Motivation_Level           : num  6.13 6.27 4.82 5.17 5.72 4.88 5.51 2.8 4.51 6.06 ...
##  $ Emotional_Fatigue_Score    : num  6.45 3.74 6.69 5.96 2.22 6.35 5.87 6.03 5.59 5.38 ...
##  $ Wellbeing_Index            : num  4.28 5.23 3.72 3.97 4.63 3.44 3.65 2.82 3.27 3.85 ...
##  $ Burnout_Risk               : chr  "Medium" "Medium" "High" "High" ...
head(data)
##   Age Gender   Country Student_Working_Status Daily_Social_Media_Hours
## 1  24   Male    Canada                Working                     4.81
## 2  21   Male       USA                Student                     4.16
## 3  25   Male  Pakistan                Student                     3.07
## 4  22 Female  Pakistan                Student                     4.41
## 5  24   Male  Pakistan                Student                     2.97
## 6  20   Male Australia                   Both                     6.06
##   Screen_Time_Hours Night_Scrolling_Frequency Online_Gaming_Hours
## 1              6.93                      2.61                2.07
## 2              7.94                      1.85                3.58
## 3              7.45                      2.96                2.85
## 4              7.34                      4.51                3.37
## 5              5.76                      2.36                1.77
## 6              8.75                      4.60                0.61
##   Content_Type_Preference Exercise_Frequency_per_Week Daily_Sleep_Hours
## 1                    News                        5.41              6.84
## 2                  Gaming                        3.41              7.88
## 3           Entertainment                        3.40              6.39
## 4             Educational                        2.19              7.92
## 5             Educational                        4.93              5.97
## 6                  Gaming                        5.51              6.70
##   Caffeine_Intake_Cups Study_Work_Hours_per_Day Overthinking_Score
## 1                 1.52                    11.42               4.95
## 2                 2.23                     6.98               5.91
## 3                 0.53                     7.79               4.06
## 4                 0.58                     6.61               6.10
## 5                 1.64                     4.81               5.22
## 6                 0.00                     6.44               6.19
##   Anxiety_Score Mood_Stability_Score Social_Comparison_Index
## 1          4.13                 5.74                    4.67
## 2          3.63                 5.75                    5.38
## 3          5.67                 6.03                    2.41
## 4          4.78                 4.85                    5.86
## 5          4.23                 5.05                    5.54
## 6          4.29                 3.21                    5.99
##   Sleep_Quality_Score Motivation_Level Emotional_Fatigue_Score Wellbeing_Index
## 1                6.27             6.13                    6.45            4.28
## 2                7.37             6.27                    3.74            5.23
## 3                6.48             4.82                    6.69            3.72
## 4                7.27             5.17                    5.96            3.97
## 5                6.34             5.72                    2.22            4.63
## 6                7.55             4.88                    6.35            3.44
##   Burnout_Risk
## 1       Medium
## 2       Medium
## 3         High
## 4         High
## 5       Medium
## 6         High
# missing value
sum(is.na(data))
## [1] 0
colSums(is.na(data))
##                         Age                      Gender 
##                           0                           0 
##                     Country      Student_Working_Status 
##                           0                           0 
##    Daily_Social_Media_Hours           Screen_Time_Hours 
##                           0                           0 
##   Night_Scrolling_Frequency         Online_Gaming_Hours 
##                           0                           0 
##     Content_Type_Preference Exercise_Frequency_per_Week 
##                           0                           0 
##           Daily_Sleep_Hours        Caffeine_Intake_Cups 
##                           0                           0 
##    Study_Work_Hours_per_Day          Overthinking_Score 
##                           0                           0 
##               Anxiety_Score        Mood_Stability_Score 
##                           0                           0 
##     Social_Comparison_Index         Sleep_Quality_Score 
##                           0                           0 
##            Motivation_Level     Emotional_Fatigue_Score 
##                           0                           0 
##             Wellbeing_Index                Burnout_Risk 
##                           0                           0

TIPE DATA

# Variabel numerik
num_data <- data[, sapply(data, is.numeric)]

# Variabel kategorik
cat_data <- data[, sapply(data, is.character)]

#variabel Y1, Y2
Y <- data[, c("Anxiety_Score", "Emotional_Fatigue_Score")]

# Variabel X (exclude Y)
X <- num_data[, !(names(num_data) %in% c("Anxiety_Score", "Emotional_Fatigue_Score"))]

# Lihat ringkasan X
summary(X)
##       Age        Daily_Social_Media_Hours Screen_Time_Hours
##  Min.   :18.00   Min.   : 0.500           Min.   : 2.000   
##  1st Qu.:20.00   1st Qu.: 3.010           1st Qu.: 5.810   
##  Median :22.00   Median : 3.990           Median : 7.000   
##  Mean   :21.99   Mean   : 4.006           Mean   : 7.017   
##  3rd Qu.:24.00   3rd Qu.: 4.982           3rd Qu.: 8.210   
##  Max.   :26.00   Max.   :10.000           Max.   :14.000   
##  Night_Scrolling_Frequency Online_Gaming_Hours Exercise_Frequency_per_Week
##  Min.   :0.000             Min.   :0.000       Min.   :0.000              
##  1st Qu.:2.188             1st Qu.:0.470       1st Qu.:1.960              
##  Median :2.980             Median :1.460       Median :3.010              
##  Mean   :2.986             Mean   :1.607       Mean   :2.995              
##  3rd Qu.:3.790             3rd Qu.:2.500       3rd Qu.:4.000              
##  Max.   :7.000             Max.   :6.000       Max.   :7.000              
##  Daily_Sleep_Hours Caffeine_Intake_Cups Study_Work_Hours_per_Day
##  Min.   : 3.000    Min.   :0.000        Min.   : 1.000          
##  1st Qu.: 5.710    1st Qu.:0.800        1st Qu.: 4.700          
##  Median : 6.520    Median :1.480        Median : 6.030          
##  Mean   : 6.512    Mean   :1.506        Mean   : 6.031          
##  3rd Qu.: 7.320    3rd Qu.:2.150        3rd Qu.: 7.340          
##  Max.   :10.000    Max.   :6.000        Max.   :13.050          
##  Overthinking_Score Mood_Stability_Score Social_Comparison_Index
##  Min.   :1.000      Min.   : 1.000       Min.   :1.000          
##  1st Qu.:4.240      1st Qu.: 4.310       1st Qu.:3.470          
##  Median :5.000      Median : 5.170       Median :4.400          
##  Mean   :5.002      Mean   : 5.161       Mean   :4.404          
##  3rd Qu.:5.760      3rd Qu.: 6.000       3rd Qu.:5.320          
##  Max.   :9.510      Max.   :10.000       Max.   :9.420          
##  Sleep_Quality_Score Motivation_Level Wellbeing_Index
##  Min.   : 1.020      Min.   : 1.000   Min.   :1.00   
##  1st Qu.: 5.550      1st Qu.: 4.200   1st Qu.:3.02   
##  Median : 6.530      Median : 5.080   Median :3.87   
##  Mean   : 6.514      Mean   : 5.086   Mean   :3.87   
##  3rd Qu.: 7.490      3rd Qu.: 5.950   3rd Qu.:4.70   
##  Max.   :10.000      Max.   :10.000   Max.   :8.86

FITUR SELECTION

# Korelasi semua X dengan Y
cor_X_Y1 <- cor(X, Y$Anxiety_Score)
cor_X_Y2 <- cor(X, Y$Emotional_Fatigue_Score)

# Gabungkan korelasi dan urutkan
cor_summary <- data.frame(
  Variable = names(X),
  Cor_Y1 = cor_X_Y1,
  Cor_Y2 = cor_X_Y2
)

# Hitung rata-rata korelasi absolute
cor_summary$Mean_Cor <- rowMeans(abs(cor_summary[, c("Cor_Y1", "Cor_Y2")]))

# Urutkan dari yang paling tinggi
cor_summary <- cor_summary[order(-cor_summary$Mean_Cor), ]
cor_summary
##                                                Variable       Cor_Y1
## Wellbeing_Index                         Wellbeing_Index -0.775874608
## Mood_Stability_Score               Mood_Stability_Score -0.618832063
## Motivation_Level                       Motivation_Level -0.462907598
## Overthinking_Score                   Overthinking_Score  0.444053681
## Daily_Sleep_Hours                     Daily_Sleep_Hours -0.449064771
## Sleep_Quality_Score                 Sleep_Quality_Score -0.366901894
## Screen_Time_Hours                     Screen_Time_Hours  0.099618781
## Daily_Social_Media_Hours       Daily_Social_Media_Hours  0.114328011
## Social_Comparison_Index         Social_Comparison_Index  0.076411227
## Exercise_Frequency_per_Week Exercise_Frequency_per_Week -0.001128178
## Night_Scrolling_Frequency     Night_Scrolling_Frequency  0.197665553
## Study_Work_Hours_per_Day       Study_Work_Hours_per_Day  0.015335185
## Age                                                 Age -0.005333447
## Caffeine_Intake_Cups               Caffeine_Intake_Cups -0.002761609
## Online_Gaming_Hours                 Online_Gaming_Hours  0.001825249
##                                   Cor_Y2    Mean_Cor
## Wellbeing_Index             -0.570005761 0.672940184
## Mood_Stability_Score        -0.313900930 0.466366497
## Motivation_Level            -0.339556743 0.401232170
## Overthinking_Score           0.289684997 0.366869339
## Daily_Sleep_Hours           -0.221697599 0.335381185
## Sleep_Quality_Score         -0.187234693 0.277068294
## Screen_Time_Hours            0.435616433 0.267617607
## Daily_Social_Media_Hours     0.380408076 0.247368044
## Social_Comparison_Index      0.255886340 0.166148783
## Exercise_Frequency_per_Week -0.310736474 0.155932326
## Night_Scrolling_Frequency    0.095219835 0.146442694
## Study_Work_Hours_per_Day     0.010379539 0.012857362
## Age                          0.016289882 0.010811664
## Caffeine_Intake_Cups         0.005595893 0.004178751
## Online_Gaming_Hours          0.002110346 0.001967798
#variabel hasil fitur selection X
X <- data[, c(
  "Mood_Stability_Score",
  "Motivation_Level",
  "Overthinking_Score",
  "Daily_Sleep_Hours"
)]

ANOVA (UNIVARIAT)

aov1 <- aov(Anxiety_Score ~ Gender, data = data)
summary(aov1)
##               Df Sum Sq Mean Sq F value Pr(>F)
## Gender         2      0  0.2258   0.132  0.876
## Residuals   9997  17044  1.7049
aov2 <- aov(Emotional_Fatigue_Score ~ Gender, data = data)
summary(aov2)
##               Df Sum Sq Mean Sq F value Pr(>F)
## Gender         2      3   1.588   0.791  0.453
## Residuals   9997  20061   2.007

MANOVA (MULTIVARIAT)

manova_model <- manova(
  cbind(Anxiety_Score, Emotional_Fatigue_Score) ~ Gender,
  data = data
)

summary(manova_model, test = "Wilks")
##             Df   Wilks approx F num Df den Df Pr(>F)
## Gender       2 0.99969  0.78376      4  19992 0.5355
## Residuals 9997
summary.aov(manova_model)
##  Response Anxiety_Score :
##               Df  Sum Sq Mean Sq F value Pr(>F)
## Gender         2     0.5 0.22579  0.1324  0.876
## Residuals   9997 17043.6 1.70487               
## 
##  Response Emotional_Fatigue_Score :
##               Df  Sum Sq Mean Sq F value Pr(>F)
## Gender         2     3.2  1.5877  0.7912 0.4533
## Residuals   9997 20061.2  2.0067

UJI NORMALITAS

set.seed(10)
sample_data <- data[sample(nrow(data), 5000), ]

shapiro.test(sample_data$Anxiety_Score)
## 
##  Shapiro-Wilk normality test
## 
## data:  sample_data$Anxiety_Score
## W = 0.99938, p-value = 0.08613
shapiro.test(sample_data$Emotional_Fatigue_Score)
## 
##  Shapiro-Wilk normality test
## 
## data:  sample_data$Emotional_Fatigue_Score
## W = 0.99942, p-value = 0.1207

UJI HOMOGENITAS

Y <- data[, c("Anxiety_Score", "Emotional_Fatigue_Score")]
boxM(Y, interaction(data$Gender, data$Burnout_Risk))
## 
##  Box's M-test for Homogeneity of Covariance Matrices
## 
## data:  Y
## Chi-Sq (approx.) = 60.068, df = 24, p-value = 6.249e-05

MANCOVA (MULTIVARIAT + COVARIATE)

mancova_model <- manova(
  cbind(Anxiety_Score, Emotional_Fatigue_Score) ~ 
    Gender +
   Mood_Stability_Score +
   Motivation_Level +
   Overthinking_Score +
   Daily_Sleep_Hours +
   Sleep_Quality_Score +
   Sleep_Quality_Score +
   Screen_Time_Hours +
   Daily_Social_Media_Hours,
  data = data
)

summary(mancova_model, test = "Wilks")
##                            Df   Wilks approx F num Df den Df Pr(>F)    
## Gender                      2 0.99959      1.0      4  19978 0.3871    
## Mood_Stability_Score        1 0.51987   4612.7      2   9989 <2e-16 ***
## Motivation_Level            1 0.80708   1193.8      2   9989 <2e-16 ***
## Overthinking_Score          1 0.84663    904.8      2   9989 <2e-16 ***
## Daily_Sleep_Hours           1 0.90482    525.4      2   9989 <2e-16 ***
## Sleep_Quality_Score         1 0.99971      1.5      2   9989 0.2323    
## Screen_Time_Hours           1 0.79937   1253.6      2   9989 <2e-16 ***
## Daily_Social_Media_Hours    1 0.99980      1.0      2   9989 0.3677    
## Residuals                9990                                          
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary.aov(mancova_model)
##  Response Anxiety_Score :
##                            Df Sum Sq Mean Sq   F value Pr(>F)    
## Gender                      2    0.5     0.2    0.3171 0.7283    
## Mood_Stability_Score        1 6527.4  6527.4 9166.5938 <2e-16 ***
## Motivation_Level            1 1467.2  1467.2 2060.4711 <2e-16 ***
## Overthinking_Score          1 1201.2  1201.2 1686.9176 <2e-16 ***
## Daily_Sleep_Hours           1  732.8   732.8 1029.0505 <2e-16 ***
## Sleep_Quality_Score         1    0.4     0.4    0.6261 0.4288    
## Screen_Time_Hours           1    0.1     0.1    0.0919 0.7617    
## Daily_Social_Media_Hours    1    0.6     0.6    0.7760 0.3784    
## Residuals                9990 7113.8     0.7                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##  Response Emotional_Fatigue_Score :
##                            Df  Sum Sq Mean Sq   F value    Pr(>F)    
## Gender                      2     3.2    1.59    1.2168    0.2962    
## Mood_Stability_Score        1  1976.5 1976.51 1514.8378 < 2.2e-16 ***
## Motivation_Level            1  1342.2 1342.19 1028.6797 < 2.2e-16 ***
## Overthinking_Score          1   753.1  753.13  577.2149 < 2.2e-16 ***
## Daily_Sleep_Hours           1    50.0   50.03   38.3411 6.173e-10 ***
## Sleep_Quality_Score         1     1.8    1.78    1.3648    0.2427    
## Screen_Time_Hours           1  2902.2 2902.20 2224.3101 < 2.2e-16 ***
## Daily_Social_Media_Hours    1     0.7    0.74    0.5685    0.4509    
## Residuals                9990 13034.6    1.30                        
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

VISUALISASI

#Visualisasi anova 
# Anxiety Score
ggplot(data, aes(x = Anxiety_Score)) +
  geom_histogram(bins = 30, fill = "lightblue", color = "black") +
  facet_wrap(~ Gender) +
  labs(
    title = "Distribusi Anxiety Score berdasarkan Gender",
    x = "Anxiety Score",
    y = "Frekuensi"
  ) +
  theme_minimal() +
  theme(panel.grid = element_blank())

# Emotional Fatigue Score
ggplot(data, aes(x = Emotional_Fatigue_Score)) +
  geom_histogram(bins = 30, fill = "lightcoral", color = "black") +
  facet_wrap(~ Gender) +
  labs(
    title = "Distribusi Emotional Fatigue Score berdasarkan Gender",
    x = "Emotional Fatigue Score",
    y = "Frekuensi"
  ) +
  theme_minimal() +
  theme(panel.grid = element_blank())

#visualisasi manova
data_long <- data %>%
  pivot_longer(
    cols = c(Anxiety_Score, Emotional_Fatigue_Score),
    names_to = "Variable",
    values_to = "Score"
  )

ggplot(data_long, aes(x = Score)) +
  geom_histogram(bins = 30, fill = "lightpink", color = "black") +
  facet_grid(Variable ~ Gender) +
  labs(
    title = "Distribusi Anxiety dan Emotional Fatigue berdasarkan Gender",
    x = "Score",
    y = "Frekuensi"
  ) +
  theme_classic()

#visualisasi mancova
# Model linear (adjust covariates)
model_adj_A <- lm(Anxiety_Score ~ 
                    Mood_Stability_Score +
                    Motivation_Level +
                    Overthinking_Score +
                    Daily_Sleep_Hours, data = data)

model_adj_E <- lm(Emotional_Fatigue_Score ~ 
                    Mood_Stability_Score +
                    Motivation_Level +
                    Overthinking_Score +
                    Daily_Sleep_Hours, data = data)

# Ambil residual
data$Adj_Anxiety <- residuals(model_adj_A)
data$Adj_Emotional <- residuals(model_adj_E)

# Long format
data_adj_long <- data %>%
  pivot_longer(
    cols = c(Adj_Anxiety, Adj_Emotional),
    names_to = "Variable",
    values_to = "Score"
  )

# Summary
summary_adj <- data_adj_long %>%
  group_by(Gender, Variable) %>%
  summarise(
    mean_score = mean(Score),
    se = sd(Score)/sqrt(n()),
    .groups = "drop"
  )

# Plot
ggplot(summary_adj, aes(x = Gender, y = mean_score, fill = Gender)) +
  geom_bar(stat = "identity", width = 0.6) +
  geom_errorbar(aes(ymin = mean_score - se, ymax = mean_score + se), width = 0.2) +
  facet_wrap(~ Variable, scales = "free_y") +
  labs(
    title = "Adjusted Mean (MANCOVA) setelah Kontrol Covariate",
    x = "Gender",
    y = "Adjusted Score"
  ) +
  theme_minimal() +
  theme(legend.position = "none")