1. INSTALL & LOAD PACKAGE

library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.2.0     ✔ readr     2.2.0
## ✔ forcats   1.0.1     ✔ stringr   1.6.0
## ✔ ggplot2   4.0.2     ✔ tibble    3.3.1
## ✔ lubridate 1.9.5     ✔ tidyr     1.3.2
## ✔ purrr     1.2.1     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(car)
## Loading required package: carData
## 
## Attaching package: 'car'
## 
## The following object is masked from 'package:dplyr':
## 
##     recode
## 
## The following object is masked from 'package:purrr':
## 
##     some
library(biotools)
## Loading required package: MASS
## 
## Attaching package: 'MASS'
## 
## The following object is masked from 'package:dplyr':
## 
##     select
## 
## ---
## biotools version 4.3
library(psych)
## 
## Attaching package: 'psych'
## 
## The following object is masked from 'package:car':
## 
##     logit
## 
## The following objects are masked from 'package:ggplot2':
## 
##     %+%, alpha
library(effectsize)
## 
## Attaching package: 'effectsize'
## 
## The following object is masked from 'package:psych':
## 
##     phi

2. LOAD DATASET

df <- read.csv("ObesityDataSet_raw_and_data_sinthetic.csv")

3. MEMBACA DATASET

head(df)
##   Age Gender Height Weight       CALC FAVC FCVC NCP SCC SMOKE CH2O
## 1  21 Female   1.62   64.0         no   no    2   3  no    no    2
## 2  21 Female   1.52   56.0  Sometimes   no    3   3 yes   yes    3
## 3  23   Male   1.80   77.0 Frequently   no    2   3  no    no    2
## 4  27   Male   1.80   87.0 Frequently   no    3   3  no    no    2
## 5  22   Male   1.78   89.8  Sometimes   no    2   1  no    no    2
## 6  29   Male   1.62   53.0  Sometimes  yes    2   3  no    no    2
##   family_history_with_overweight FAF TUE      CAEC                MTRANS
## 1                            yes   0   1 Sometimes Public_Transportation
## 2                            yes   3   0 Sometimes Public_Transportation
## 3                            yes   2   1 Sometimes Public_Transportation
## 4                             no   2   0 Sometimes               Walking
## 5                             no   0   0 Sometimes Public_Transportation
## 6                             no   0   0 Sometimes            Automobile
##            NObeyesdad
## 1       Normal_Weight
## 2       Normal_Weight
## 3       Normal_Weight
## 4  Overweight_Level_I
## 5 Overweight_Level_II
## 6       Normal_Weight

4. CEK STRUKTUR DATASET

str(df)
## 'data.frame':    2111 obs. of  17 variables:
##  $ Age                           : num  21 21 23 27 22 29 23 22 24 22 ...
##  $ Gender                        : chr  "Female" "Female" "Male" "Male" ...
##  $ Height                        : num  1.62 1.52 1.8 1.8 1.78 1.62 1.5 1.64 1.78 1.72 ...
##  $ Weight                        : num  64 56 77 87 89.8 53 55 53 64 68 ...
##  $ CALC                          : chr  "no" "Sometimes" "Frequently" "Frequently" ...
##  $ FAVC                          : chr  "no" "no" "no" "no" ...
##  $ FCVC                          : num  2 3 2 3 2 2 3 2 3 2 ...
##  $ NCP                           : num  3 3 3 3 1 3 3 3 3 3 ...
##  $ SCC                           : chr  "no" "yes" "no" "no" ...
##  $ SMOKE                         : chr  "no" "yes" "no" "no" ...
##  $ CH2O                          : num  2 3 2 2 2 2 2 2 2 2 ...
##  $ family_history_with_overweight: chr  "yes" "yes" "yes" "no" ...
##  $ FAF                           : num  0 3 2 2 0 0 1 3 1 1 ...
##  $ TUE                           : num  1 0 1 0 0 0 0 0 1 1 ...
##  $ CAEC                          : chr  "Sometimes" "Sometimes" "Sometimes" "Sometimes" ...
##  $ MTRANS                        : chr  "Public_Transportation" "Public_Transportation" "Public_Transportation" "Walking" ...
##  $ NObeyesdad                    : chr  "Normal_Weight" "Normal_Weight" "Normal_Weight" "Overweight_Level_I" ...

5. TABEL DESKRIPTIF

describe(df)
##                                 vars    n  mean    sd median trimmed   mad
## Age                                1 2111 24.31  6.35  22.78   23.34  4.78
## Gender*                            2 2111  1.51  0.50   2.00    1.51  0.00
## Height                             3 2111  1.70  0.09   1.70    1.70  0.10
## Weight                             4 2111 86.59 26.19  83.00   85.82 32.22
## CALC*                              5 2111  3.63  0.55   4.00    3.70  0.00
## FAVC*                              6 2111  1.88  0.32   2.00    1.98  0.00
## FCVC                               7 2111  2.42  0.53   2.39    2.46  0.57
## NCP                                8 2111  2.69  0.78   3.00    2.77  0.00
## SCC*                               9 2111  1.05  0.21   1.00    1.00  0.00
## SMOKE*                            10 2111  1.02  0.14   1.00    1.00  0.00
## CH2O                              11 2111  2.01  0.61   2.00    2.01  0.67
## family_history_with_overweight*   12 2111  1.82  0.39   2.00    1.90  0.00
## FAF                               13 2111  1.01  0.85   1.00    0.94  1.19
## TUE                               14 2111  0.66  0.61   0.63    0.59  0.72
## CAEC*                             15 2111  3.67  0.78   4.00    3.87  0.00
## MTRANS*                           16 2111  3.37  1.26   4.00    3.55  0.00
## NObeyesdad*                       17 2111  4.02  1.95   4.00    4.02  2.97
##                                   min    max  range  skew kurtosis   se
## Age                             14.00  61.00  47.00  1.53     2.81 0.14
## Gender*                          1.00   2.00   1.00 -0.02    -2.00 0.01
## Height                           1.45   1.98   0.53 -0.01    -0.57 0.00
## Weight                          39.00 173.00 134.00  0.26    -0.70 0.57
## CALC*                            1.00   4.00   3.00 -1.17     0.46 0.01
## FAVC*                            1.00   2.00   1.00 -2.40     3.74 0.01
## FCVC                             1.00   3.00   2.00 -0.43    -0.64 0.01
## NCP                              1.00   4.00   3.00 -1.11     0.38 0.02
## SCC*                             1.00   2.00   1.00  4.36    17.02 0.00
## SMOKE*                           1.00   2.00   1.00  6.70    42.95 0.00
## CH2O                             1.00   3.00   2.00 -0.10    -0.88 0.01
## family_history_with_overweight*  1.00   2.00   1.00 -1.64     0.70 0.01
## FAF                              0.00   3.00   3.00  0.50    -0.62 0.02
## TUE                              0.00   2.00   2.00  0.62    -0.55 0.01
## CAEC*                            1.00   4.00   3.00 -2.13     3.06 0.02
## MTRANS*                          1.00   5.00   4.00 -1.28    -0.20 0.03
## NObeyesdad*                      1.00   7.00   6.00  0.01    -1.19 0.04

6. PILIH VARIABEL

df <- df[, c("Weight", "Height", "Gender", "family_history_with_overweight", "Age")]

7. UBAH TIPE DATA

df$Gender <- as.factor(df$Gender)
df$family_history_with_overweight <- as.factor(df$family_history_with_overweight)
df$Age <- as.numeric(df$Age)
df$Weight <- as.numeric(df$Weight)
df$Height <- as.numeric(df$Height)

8. CEK MISSING VALUE

colSums(is.na(df))
##                         Weight                         Height 
##                              0                              0 
##                         Gender family_history_with_overweight 
##                              0                              0 
##                            Age 
##                              0

9. DESKRIPTIF STATISTIK

summary(df)
##      Weight           Height         Gender     family_history_with_overweight
##  Min.   : 39.00   Min.   :1.450   Female:1043   no : 385                      
##  1st Qu.: 65.47   1st Qu.:1.630   Male  :1068   yes:1726                      
##  Median : 83.00   Median :1.700                                               
##  Mean   : 86.59   Mean   :1.702                                               
##  3rd Qu.:107.43   3rd Qu.:1.768                                               
##  Max.   :173.00   Max.   :1.980                                               
##       Age       
##  Min.   :14.00  
##  1st Qu.:19.95  
##  Median :22.78  
##  Mean   :24.31  
##  3rd Qu.:26.00  
##  Max.   :61.00

10. VISUALISASI

boxplot(Weight ~ Gender, data=df, main="Weight by Gender")

boxplot(Height ~ Gender, data=df, main="Height by Gender")

hist(df$Weight, main="Histogram Weight", xlab="Weight")

hist(df$Height, main="Histogram Height", xlab="Height")

11. Normalitas (Shapiro-Wilk)

shapiro.test(df$Weight)
## 
##  Shapiro-Wilk normality test
## 
## data:  df$Weight
## W = 0.9765, p-value < 2.2e-16
shapiro.test(df$Height)
## 
##  Shapiro-Wilk normality test
## 
## data:  df$Height
## W = 0.99323, p-value = 2.772e-08

12. Homogenitas Varians (Levene Test)

leveneTest(Weight ~ Gender, data=df)
## Levene's Test for Homogeneity of Variance (center = median)
##         Df F value    Pr(>F)    
## group    1  77.893 < 2.2e-16 ***
##       2109                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
leveneTest(Height ~ Gender, data=df)
## Levene's Test for Homogeneity of Variance (center = median)
##         Df F value Pr(>F)
## group    1  1.3857 0.2393
##       2109

13. Homogenitas Matriks Kovarians (Box’s M)

boxM(df[, c("Weight", "Height")], df$Gender)
## 
##  Box's M-test for Homogeneity of Covariance Matrices
## 
## data:  df[, c("Weight", "Height")]
## Chi-Sq (approx.) = 116.93, df = 3, p-value < 2.2e-16

14. MANOVA

manova_model <- manova(cbind(Weight, Height) ~ Gender + family_history_with_overweight, data=df)

summary(manova_model, test="Wilks")
##                                  Df   Wilks approx F num Df den Df    Pr(>F)
## Gender                            1 0.59421   719.43      2   2107 < 2.2e-16
## family_history_with_overweight    1 0.76060   331.59      2   2107 < 2.2e-16
## Residuals                      2108                                         
##                                   
## Gender                         ***
## family_history_with_overweight ***
## Residuals                         
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(manova_model, test="Pillai")
##                                  Df  Pillai approx F num Df den Df    Pr(>F)
## Gender                            1 0.40579   719.43      2   2107 < 2.2e-16
## family_history_with_overweight    1 0.23940   331.59      2   2107 < 2.2e-16
## Residuals                      2108                                         
##                                   
## Gender                         ***
## family_history_with_overweight ***
## Residuals                         
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

15. MANCOVA

mancova_model <- manova(cbind(Weight, Height) ~ Gender + family_history_with_overweight + Age, data=df)

summary(mancova_model, test="Wilks")
##                                  Df   Wilks approx F num Df den Df    Pr(>F)
## Gender                            1 0.58787   738.23      2   2106 < 2.2e-16
## family_history_with_overweight    1 0.75820   335.82      2   2106 < 2.2e-16
## Age                               1 0.95044    54.91      2   2106 < 2.2e-16
## Residuals                      2107                                         
##                                   
## Gender                         ***
## family_history_with_overweight ***
## Age                            ***
## Residuals                         
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary.aov(mancova_model)
##  Response Weight :
##                                  Df  Sum Sq Mean Sq F value    Pr(>F)    
## Gender                            1   37830   37830  75.346 < 2.2e-16 ***
## family_history_with_overweight    1  337373  337373 671.946 < 2.2e-16 ***
## Age                               1   14319   14319  28.518 1.028e-07 ***
## Residuals                      2107 1057890     502                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##  Response Height :
##                                  Df  Sum Sq Mean Sq  F value    Pr(>F)    
## Gender                            1  7.0262  7.0262 1403.847 < 2.2e-16 ***
## family_history_with_overweight    1  0.6305  0.6305  125.965 < 2.2e-16 ***
## Age                               1  0.1670  0.1670   33.372 8.744e-09 ***
## Residuals                      2107 10.5455  0.0050                       
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

16. ANOVA PER VARIABEL

summary.aov(manova_model)
##  Response Weight :
##                                  Df  Sum Sq Mean Sq F value    Pr(>F)    
## Gender                            1   37830   37830  74.375 < 2.2e-16 ***
## family_history_with_overweight    1  337373  337373 663.287 < 2.2e-16 ***
## Residuals                      2108 1072209     509                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##  Response Height :
##                                  Df  Sum Sq Mean Sq F value    Pr(>F)    
## Gender                            1  7.0262  7.0262 1382.62 < 2.2e-16 ***
## family_history_with_overweight    1  0.6305  0.6305  124.06 < 2.2e-16 ***
## Residuals                      2108 10.7125  0.0051                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary.aov(mancova_model)
##  Response Weight :
##                                  Df  Sum Sq Mean Sq F value    Pr(>F)    
## Gender                            1   37830   37830  75.346 < 2.2e-16 ***
## family_history_with_overweight    1  337373  337373 671.946 < 2.2e-16 ***
## Age                               1   14319   14319  28.518 1.028e-07 ***
## Residuals                      2107 1057890     502                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##  Response Height :
##                                  Df  Sum Sq Mean Sq  F value    Pr(>F)    
## Gender                            1  7.0262  7.0262 1403.847 < 2.2e-16 ***
## family_history_with_overweight    1  0.6305  0.6305  125.965 < 2.2e-16 ***
## Age                               1  0.1670  0.1670   33.372 8.744e-09 ***
## Residuals                      2107 10.5455  0.0050                       
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

17. TEST TAMBAHAN MANOVA

summary(manova_model, test="Pillai")
##                                  Df  Pillai approx F num Df den Df    Pr(>F)
## Gender                            1 0.40579   719.43      2   2107 < 2.2e-16
## family_history_with_overweight    1 0.23940   331.59      2   2107 < 2.2e-16
## Residuals                      2108                                         
##                                   
## Gender                         ***
## family_history_with_overweight ***
## Residuals                         
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(manova_model, test="Hotelling-Lawley")
##                                  Df Hotelling-Lawley approx F num Df den Df
## Gender                            1          0.68290   719.43      2   2107
## family_history_with_overweight    1          0.31475   331.59      2   2107
## Residuals                      2108                                        
##                                   Pr(>F)    
## Gender                         < 2.2e-16 ***
## family_history_with_overweight < 2.2e-16 ***
## Residuals                                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(manova_model, test="Roy")
##                                  Df     Roy approx F num Df den Df    Pr(>F)
## Gender                            1 0.68290   719.43      2   2107 < 2.2e-16
## family_history_with_overweight    1 0.31475   331.59      2   2107 < 2.2e-16
## Residuals                      2108                                         
##                                   
## Gender                         ***
## family_history_with_overweight ***
## Residuals                         
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

18. EFFECT SIZE

model_aov <- aov(Weight ~ Gender + family_history_with_overweight, data=df)
eta_squared(model_aov)
## # Effect Size for ANOVA (Type I)
## 
## Parameter                      | Eta2 (partial) |       95% CI
## --------------------------------------------------------------
## Gender                         |           0.03 | [0.02, 1.00]
## family_history_with_overweight |           0.24 | [0.21, 1.00]
## 
## - One-sided CIs: upper bound fixed at [1.00].

19. TAMPILKAN MODEL

manova_model
## Call:
##    manova(cbind(Weight, Height) ~ Gender + family_history_with_overweight, 
##     data = df)
## 
## Terms:
##                    Gender family_history_with_overweight Residuals
## Weight            37830.2                       337373.2 1072209.1
## Height                7.0                            0.6      10.7
## Deg. of Freedom         1                              1      2108
## 
## Residual standard errors: 22.55301 0.07128706
## Estimated effects may be unbalanced
mancova_model
## Call:
##    manova(cbind(Weight, Height) ~ Gender + family_history_with_overweight + 
##     Age, data = df)
## 
## Terms:
##                    Gender family_history_with_overweight       Age Residuals
## Weight            37830.2                       337373.2   14318.7 1057890.5
## Height                7.0                            0.6       0.2      10.5
## Deg. of Freedom         1                              1         1      2107
## 
## Residual standard errors: 22.40723 0.07074592
## Estimated effects may be unbalanced