Library
library(tidyverse)
library(MVN)
library(heplots)
library(car)
library(psych)
library(corrplot)
library(GGally)
Persiapan Dataset
df <- read.csv("life_expectancy_for_modul 2.csv")
head(df)
Status GDP_category Schooling_category Income_composition_of_resources
1 Developing Low Low 0.479
2 Developing Low Low 0.476
3 Developing Low Low 0.470
4 Developing Low Low 0.463
5 Developing Low Low 0.454
6 Developing Low Low 0.448
BMI HIV.AIDS Life_expectancy Adult_Mortality
1 19.1 0.09531018 65.0 263
2 18.6 0.09531018 59.9 271
3 18.1 0.09531018 59.9 268
4 17.6 0.09531018 59.5 272
5 17.2 0.09531018 59.2 275
6 16.7 0.09531018 58.8 279
data_clean <- select(df, Status, GDP_category, Schooling_category,
Income_composition_of_resources, BMI, HIV_AIDS,
Life_expectancy, Adult_Mortality)
data_clean <- drop_na(data_clean)
data_clean <- mutate(data_clean, across(c(Status, GDP_category, Schooling_category), as.factor))
str(data_clean)
'data.frame': 1649 obs. of 8 variables:
$ Status : Factor w/ 2 levels "Developed","Developing": 2 2 2 2 2 2 2 2 2 2 ...
$ GDP_category : Factor w/ 3 levels "High","Low","Medium": 2 2 2 2 2 2 2 2 2 2 ...
$ Schooling_category : Factor w/ 3 levels "High","Low","Medium": 2 2 2 2 2 2 2 2 2 2 ...
$ Income_composition_of_resources: num 0.479 0.476 0.47 0.463 0.454 0.448 0.434 0.433 0.415 0.405 ...
$ BMI : num 19.1 18.6 18.1 17.6 17.2 16.7 16.2 15.7 15.2 14.7 ...
$ HIV_AIDS : num 0.0953 0.0953 0.0953 0.0953 0.0953 ...
$ Life_expectancy : num 65 59.9 59.9 59.5 59.2 58.8 58.6 58.1 57.5 57.3 ...
$ Adult_Mortality : int 263 271 268 272 275 279 281 287 295 295 ...
Exploratory Data
Analyst (EDA)
Statistika
Deskriptif
Status GDP_category Schooling_category
Developed : 242 High :550 High :547
Developing:1407 Low :550 Low :566
Medium:549 Medium:536
Income_composition_of_resources BMI HIV_AIDS
Min. :0.0000 Min. : 2.00 Min. :0.09531
1st Qu.:0.5090 1st Qu.:19.50 1st Qu.:0.09531
Median :0.6730 Median :43.70 Median :0.09531
Mean :0.6316 Mean :38.13 Mean :0.51807
3rd Qu.:0.7510 3rd Qu.:55.80 3rd Qu.:0.53063
Max. :0.9360 Max. :77.10 Max. :3.94352
Life_expectancy Adult_Mortality
Min. :44.0 Min. : 1.0
1st Qu.:64.4 1st Qu.: 77.0
Median :71.7 Median :148.0
Mean :69.3 Mean :168.2
3rd Qu.:75.0 3rd Qu.:227.0
Max. :89.0 Max. :723.0
Visualisasi
Distribusi Data


Visualisasi Matriks
Korelasi

Pengujian Prasyarat
MANOVA dan MANCOVA
Uji Mardia’s
Test
dep_vars <- data_clean %>% select(Life_expectancy, Adult_Mortality)
mardia_test <- MVN::mvn(data = dep_vars)
mardia_test[1:3]
$multivariate_normality
Test Statistic p.value Method MVN
1 Henze-Zirkler 78.166 <0.001 asymptotic ✗ Not normal
$univariate_normality
Test Variable Statistic p.value Normality
1 Anderson-Darling Life_expectancy 21.732 <0.001 ✗ Not normal
2 Anderson-Darling Adult_Mortality 28.092 <0.001 ✗ Not normal
$descriptives
Variable n Mean Std.Dev Median Min Max 25th 75th Skew Kurtosis
1 Life_expectancy 1649 69.302 8.797 71.7 44 89 64.4 75 -0.628 3.037
2 Adult_Mortality 1649 168.215 125.310 148.0 1 723 77.0 227 1.275 5.390
Uji Box’s M Test
group_interaction <- interaction(data_clean$Status,
data_clean$GDP_category,
data_clean$Schooling_category)
box_result <- boxM(Y = dep_vars, group = group_interaction)
box_result
Box's M-test for Homogeneity of Covariance Matrices
data: dep_vars by group_interaction
Groups: 14 of 15 (1 excluded due to singular covariance matrices)
Chi-Sq (approx.) = 629.3085, df = 39, p-value = < 2.2e-16
Uji Bartlett’s
Test
cor_dep <- cor(dep_vars)
bartlett_result <- cortest.bartlett(cor_dep, n = nrow(dep_vars))
bartlett_result
$chisq
[1] 1120.126
$p.value
[1] 1.395295e-245
$df
[1] 1
Multivariate Analysis
of Variance (MANOVA)
model_manova <- manova(
cbind(Life_expectancy, Adult_Mortality) ~
GDP_category + Schooling_category + Status,
data = data_clean
)
summary(model_manova)
Df Pillai approx F num Df den Df Pr(>F)
GDP_category 2 0.35508 177.335 4 3286 < 2.2e-16 ***
Schooling_category 2 0.36783 185.136 4 3286 < 2.2e-16 ***
Status 1 0.03716 31.687 2 1642 3.143e-14 ***
Residuals 1643
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1