1. INSTALL & LOAD PACKAGE
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.2.0 ✔ readr 2.2.0
## ✔ forcats 1.0.1 ✔ stringr 1.6.0
## ✔ ggplot2 4.0.2 ✔ tibble 3.3.1
## ✔ lubridate 1.9.5 ✔ tidyr 1.3.2
## ✔ purrr 1.2.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(car)
## Loading required package: carData
##
## Attaching package: 'car'
##
## The following object is masked from 'package:dplyr':
##
## recode
##
## The following object is masked from 'package:purrr':
##
## some
library(biotools)
## Loading required package: MASS
##
## Attaching package: 'MASS'
##
## The following object is masked from 'package:dplyr':
##
## select
##
## ---
## biotools version 4.3
library(psych)
##
## Attaching package: 'psych'
##
## The following object is masked from 'package:car':
##
## logit
##
## The following objects are masked from 'package:ggplot2':
##
## %+%, alpha
library(effectsize)
##
## Attaching package: 'effectsize'
##
## The following object is masked from 'package:psych':
##
## phi
2. LOAD DATASET
df <- read.csv("ObesityDataSet_raw_and_data_sinthetic.csv")
3. MEMBACA DATASET
head(df)
## Age Gender Height Weight CALC FAVC FCVC NCP SCC SMOKE CH2O
## 1 21 Female 1.62 64.0 no no 2 3 no no 2
## 2 21 Female 1.52 56.0 Sometimes no 3 3 yes yes 3
## 3 23 Male 1.80 77.0 Frequently no 2 3 no no 2
## 4 27 Male 1.80 87.0 Frequently no 3 3 no no 2
## 5 22 Male 1.78 89.8 Sometimes no 2 1 no no 2
## 6 29 Male 1.62 53.0 Sometimes yes 2 3 no no 2
## family_history_with_overweight FAF TUE CAEC MTRANS
## 1 yes 0 1 Sometimes Public_Transportation
## 2 yes 3 0 Sometimes Public_Transportation
## 3 yes 2 1 Sometimes Public_Transportation
## 4 no 2 0 Sometimes Walking
## 5 no 0 0 Sometimes Public_Transportation
## 6 no 0 0 Sometimes Automobile
## NObeyesdad
## 1 Normal_Weight
## 2 Normal_Weight
## 3 Normal_Weight
## 4 Overweight_Level_I
## 5 Overweight_Level_II
## 6 Normal_Weight
4. CEK STRUKTUR DATASET
str(df)
## 'data.frame': 2111 obs. of 17 variables:
## $ Age : num 21 21 23 27 22 29 23 22 24 22 ...
## $ Gender : chr "Female" "Female" "Male" "Male" ...
## $ Height : num 1.62 1.52 1.8 1.8 1.78 1.62 1.5 1.64 1.78 1.72 ...
## $ Weight : num 64 56 77 87 89.8 53 55 53 64 68 ...
## $ CALC : chr "no" "Sometimes" "Frequently" "Frequently" ...
## $ FAVC : chr "no" "no" "no" "no" ...
## $ FCVC : num 2 3 2 3 2 2 3 2 3 2 ...
## $ NCP : num 3 3 3 3 1 3 3 3 3 3 ...
## $ SCC : chr "no" "yes" "no" "no" ...
## $ SMOKE : chr "no" "yes" "no" "no" ...
## $ CH2O : num 2 3 2 2 2 2 2 2 2 2 ...
## $ family_history_with_overweight: chr "yes" "yes" "yes" "no" ...
## $ FAF : num 0 3 2 2 0 0 1 3 1 1 ...
## $ TUE : num 1 0 1 0 0 0 0 0 1 1 ...
## $ CAEC : chr "Sometimes" "Sometimes" "Sometimes" "Sometimes" ...
## $ MTRANS : chr "Public_Transportation" "Public_Transportation" "Public_Transportation" "Walking" ...
## $ NObeyesdad : chr "Normal_Weight" "Normal_Weight" "Normal_Weight" "Overweight_Level_I" ...
5. TABEL DESKRIPTIF
describe(df)
## vars n mean sd median trimmed mad
## Age 1 2111 24.31 6.35 22.78 23.34 4.78
## Gender* 2 2111 1.51 0.50 2.00 1.51 0.00
## Height 3 2111 1.70 0.09 1.70 1.70 0.10
## Weight 4 2111 86.59 26.19 83.00 85.82 32.22
## CALC* 5 2111 3.63 0.55 4.00 3.70 0.00
## FAVC* 6 2111 1.88 0.32 2.00 1.98 0.00
## FCVC 7 2111 2.42 0.53 2.39 2.46 0.57
## NCP 8 2111 2.69 0.78 3.00 2.77 0.00
## SCC* 9 2111 1.05 0.21 1.00 1.00 0.00
## SMOKE* 10 2111 1.02 0.14 1.00 1.00 0.00
## CH2O 11 2111 2.01 0.61 2.00 2.01 0.67
## family_history_with_overweight* 12 2111 1.82 0.39 2.00 1.90 0.00
## FAF 13 2111 1.01 0.85 1.00 0.94 1.19
## TUE 14 2111 0.66 0.61 0.63 0.59 0.72
## CAEC* 15 2111 3.67 0.78 4.00 3.87 0.00
## MTRANS* 16 2111 3.37 1.26 4.00 3.55 0.00
## NObeyesdad* 17 2111 4.02 1.95 4.00 4.02 2.97
## min max range skew kurtosis se
## Age 14.00 61.00 47.00 1.53 2.81 0.14
## Gender* 1.00 2.00 1.00 -0.02 -2.00 0.01
## Height 1.45 1.98 0.53 -0.01 -0.57 0.00
## Weight 39.00 173.00 134.00 0.26 -0.70 0.57
## CALC* 1.00 4.00 3.00 -1.17 0.46 0.01
## FAVC* 1.00 2.00 1.00 -2.40 3.74 0.01
## FCVC 1.00 3.00 2.00 -0.43 -0.64 0.01
## NCP 1.00 4.00 3.00 -1.11 0.38 0.02
## SCC* 1.00 2.00 1.00 4.36 17.02 0.00
## SMOKE* 1.00 2.00 1.00 6.70 42.95 0.00
## CH2O 1.00 3.00 2.00 -0.10 -0.88 0.01
## family_history_with_overweight* 1.00 2.00 1.00 -1.64 0.70 0.01
## FAF 0.00 3.00 3.00 0.50 -0.62 0.02
## TUE 0.00 2.00 2.00 0.62 -0.55 0.01
## CAEC* 1.00 4.00 3.00 -2.13 3.06 0.02
## MTRANS* 1.00 5.00 4.00 -1.28 -0.20 0.03
## NObeyesdad* 1.00 7.00 6.00 0.01 -1.19 0.04
6. PILIH VARIABEL
df <- df[, c("Weight", "Height", "Gender", "family_history_with_overweight", "Age")]
7. UBAH TIPE DATA
df$Gender <- as.factor(df$Gender)
df$family_history_with_overweight <- as.factor(df$family_history_with_overweight)
df$Age <- as.numeric(df$Age)
df$Weight <- as.numeric(df$Weight)
df$Height <- as.numeric(df$Height)
8. CEK MISSING VALUE
colSums(is.na(df))
## Weight Height
## 0 0
## Gender family_history_with_overweight
## 0 0
## Age
## 0
9. DESKRIPTIF STATISTIK
summary(df)
## Weight Height Gender family_history_with_overweight
## Min. : 39.00 Min. :1.450 Female:1043 no : 385
## 1st Qu.: 65.47 1st Qu.:1.630 Male :1068 yes:1726
## Median : 83.00 Median :1.700
## Mean : 86.59 Mean :1.702
## 3rd Qu.:107.43 3rd Qu.:1.768
## Max. :173.00 Max. :1.980
## Age
## Min. :14.00
## 1st Qu.:19.95
## Median :22.78
## Mean :24.31
## 3rd Qu.:26.00
## Max. :61.00
10. VISUALISASI
boxplot(Weight ~ Gender, data=df, main="Weight by Gender")

boxplot(Height ~ Gender, data=df, main="Height by Gender")

hist(df$Weight, main="Histogram Weight", xlab="Weight")

hist(df$Height, main="Histogram Height", xlab="Height")

11. Normalitas (Shapiro-Wilk)
shapiro.test(df$Weight)
##
## Shapiro-Wilk normality test
##
## data: df$Weight
## W = 0.9765, p-value < 2.2e-16
shapiro.test(df$Height)
##
## Shapiro-Wilk normality test
##
## data: df$Height
## W = 0.99323, p-value = 2.772e-08
12. Homogenitas Varians (Levene Test)
leveneTest(Weight ~ Gender, data=df)
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 1 77.893 < 2.2e-16 ***
## 2109
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
leveneTest(Height ~ Gender, data=df)
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 1 1.3857 0.2393
## 2109
13. Homogenitas Matriks Kovarians (Box’s M)
boxM(df[, c("Weight", "Height")], df$Gender)
##
## Box's M-test for Homogeneity of Covariance Matrices
##
## data: df[, c("Weight", "Height")]
## Chi-Sq (approx.) = 116.93, df = 3, p-value < 2.2e-16
14. MANOVA
manova_model <- manova(cbind(Weight, Height) ~ Gender + family_history_with_overweight, data=df)
summary(manova_model, test="Wilks")
## Df Wilks approx F num Df den Df Pr(>F)
## Gender 1 0.59421 719.43 2 2107 < 2.2e-16
## family_history_with_overweight 1 0.76060 331.59 2 2107 < 2.2e-16
## Residuals 2108
##
## Gender ***
## family_history_with_overweight ***
## Residuals
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(manova_model, test="Pillai")
## Df Pillai approx F num Df den Df Pr(>F)
## Gender 1 0.40579 719.43 2 2107 < 2.2e-16
## family_history_with_overweight 1 0.23940 331.59 2 2107 < 2.2e-16
## Residuals 2108
##
## Gender ***
## family_history_with_overweight ***
## Residuals
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
15. MANCOVA
mancova_model <- manova(cbind(Weight, Height) ~ Gender + family_history_with_overweight + Age, data=df)
summary(mancova_model, test="Wilks")
## Df Wilks approx F num Df den Df Pr(>F)
## Gender 1 0.58787 738.23 2 2106 < 2.2e-16
## family_history_with_overweight 1 0.75820 335.82 2 2106 < 2.2e-16
## Age 1 0.95044 54.91 2 2106 < 2.2e-16
## Residuals 2107
##
## Gender ***
## family_history_with_overweight ***
## Age ***
## Residuals
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary.aov(mancova_model)
## Response Weight :
## Df Sum Sq Mean Sq F value Pr(>F)
## Gender 1 37830 37830 75.346 < 2.2e-16 ***
## family_history_with_overweight 1 337373 337373 671.946 < 2.2e-16 ***
## Age 1 14319 14319 28.518 1.028e-07 ***
## Residuals 2107 1057890 502
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Response Height :
## Df Sum Sq Mean Sq F value Pr(>F)
## Gender 1 7.0262 7.0262 1403.847 < 2.2e-16 ***
## family_history_with_overweight 1 0.6305 0.6305 125.965 < 2.2e-16 ***
## Age 1 0.1670 0.1670 33.372 8.744e-09 ***
## Residuals 2107 10.5455 0.0050
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
16. ANOVA PER VARIABEL
summary.aov(manova_model)
## Response Weight :
## Df Sum Sq Mean Sq F value Pr(>F)
## Gender 1 37830 37830 74.375 < 2.2e-16 ***
## family_history_with_overweight 1 337373 337373 663.287 < 2.2e-16 ***
## Residuals 2108 1072209 509
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Response Height :
## Df Sum Sq Mean Sq F value Pr(>F)
## Gender 1 7.0262 7.0262 1382.62 < 2.2e-16 ***
## family_history_with_overweight 1 0.6305 0.6305 124.06 < 2.2e-16 ***
## Residuals 2108 10.7125 0.0051
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary.aov(mancova_model)
## Response Weight :
## Df Sum Sq Mean Sq F value Pr(>F)
## Gender 1 37830 37830 75.346 < 2.2e-16 ***
## family_history_with_overweight 1 337373 337373 671.946 < 2.2e-16 ***
## Age 1 14319 14319 28.518 1.028e-07 ***
## Residuals 2107 1057890 502
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Response Height :
## Df Sum Sq Mean Sq F value Pr(>F)
## Gender 1 7.0262 7.0262 1403.847 < 2.2e-16 ***
## family_history_with_overweight 1 0.6305 0.6305 125.965 < 2.2e-16 ***
## Age 1 0.1670 0.1670 33.372 8.744e-09 ***
## Residuals 2107 10.5455 0.0050
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
17. TEST TAMBAHAN MANOVA
summary(manova_model, test="Pillai")
## Df Pillai approx F num Df den Df Pr(>F)
## Gender 1 0.40579 719.43 2 2107 < 2.2e-16
## family_history_with_overweight 1 0.23940 331.59 2 2107 < 2.2e-16
## Residuals 2108
##
## Gender ***
## family_history_with_overweight ***
## Residuals
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(manova_model, test="Hotelling-Lawley")
## Df Hotelling-Lawley approx F num Df den Df
## Gender 1 0.68290 719.43 2 2107
## family_history_with_overweight 1 0.31475 331.59 2 2107
## Residuals 2108
## Pr(>F)
## Gender < 2.2e-16 ***
## family_history_with_overweight < 2.2e-16 ***
## Residuals
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(manova_model, test="Roy")
## Df Roy approx F num Df den Df Pr(>F)
## Gender 1 0.68290 719.43 2 2107 < 2.2e-16
## family_history_with_overweight 1 0.31475 331.59 2 2107 < 2.2e-16
## Residuals 2108
##
## Gender ***
## family_history_with_overweight ***
## Residuals
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
18. EFFECT SIZE
model_aov <- aov(Weight ~ Gender + family_history_with_overweight, data=df)
eta_squared(model_aov)
## # Effect Size for ANOVA (Type I)
##
## Parameter | Eta2 (partial) | 95% CI
## --------------------------------------------------------------
## Gender | 0.03 | [0.02, 1.00]
## family_history_with_overweight | 0.24 | [0.21, 1.00]
##
## - One-sided CIs: upper bound fixed at [1.00].
19. TAMPILKAN MODEL
manova_model
## Call:
## manova(cbind(Weight, Height) ~ Gender + family_history_with_overweight,
## data = df)
##
## Terms:
## Gender family_history_with_overweight Residuals
## Weight 37830.2 337373.2 1072209.1
## Height 7.0 0.6 10.7
## Deg. of Freedom 1 1 2108
##
## Residual standard errors: 22.55301 0.07128706
## Estimated effects may be unbalanced
mancova_model
## Call:
## manova(cbind(Weight, Height) ~ Gender + family_history_with_overweight +
## Age, data = df)
##
## Terms:
## Gender family_history_with_overweight Age Residuals
## Weight 37830.2 337373.2 14318.7 1057890.5
## Height 7.0 0.6 0.2 10.5
## Deg. of Freedom 1 1 1 2107
##
## Residual standard errors: 22.40723 0.07074592
## Estimated effects may be unbalanced