Analisis dilakukan menggunakan MANCOVA, MANOVA, ANCOVA. Dengan
Calories Burned dan Average BPM sebagai variabel dependen, Workout Type
sebagai variabel independen, serta Session Duration sebagai kovariat.
Kovariat ini digunakan untuk mengontrol pengaruh durasi latihan sehingga
perbedaan antar jenis workout dapat dianalisis secara lebih akurat.
Library yang diperlukan
library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.5.3
## Warning: package 'readr' was built under R version 4.5.3
## Warning: package 'forcats' was built under R version 4.5.3
## Warning: package 'lubridate' was built under R version 4.5.3
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.2.0
## ✔ forcats 1.0.1 ✔ stringr 1.6.0
## ✔ ggplot2 4.0.2 ✔ tibble 3.3.1
## ✔ lubridate 1.9.5 ✔ tidyr 1.3.2
## ✔ purrr 1.2.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(car)
## Loading required package: carData
##
## Attaching package: 'car'
##
## The following object is masked from 'package:dplyr':
##
## recode
##
## The following object is masked from 'package:purrr':
##
## some
library(MVN)
## Warning: package 'MVN' was built under R version 4.5.3
library(rstatix)
##
## Attaching package: 'rstatix'
##
## The following object is masked from 'package:stats':
##
## filter
library(emmeans)
## Welcome to emmeans.
## Caution: You lose important information if you filter this package's results.
## See '? untidy'
library(heplots)
## Warning: package 'heplots' was built under R version 4.5.3
## Loading required package: broom
library(ggpubr)
library(knitr)
library(psych)
##
## Attaching package: 'psych'
##
## The following object is masked from 'package:MVN':
##
## mardia
##
## The following object is masked from 'package:car':
##
## logit
##
## The following objects are masked from 'package:ggplot2':
##
## %+%, alpha
library(kableExtra)
## Warning: package 'kableExtra' was built under R version 4.5.3
##
## Attaching package: 'kableExtra'
##
## The following object is masked from 'package:dplyr':
##
## group_rows
library(purrr)
Data Uploading
df <- read.csv("C:/Users/WIN 10/Downloads/spotify/gym_members_exercise_tracking.csv",
stringsAsFactors = FALSE)
“Untuk mengetahui pengaruh jenis workout terhadap performa fisik
dengan mengontrol usia.”
Data Cleaning
cat("Jumlah baris x kolom:", dim(df), "\n")
## Jumlah baris x kolom: 973 15
print(names(df))
## [1] "Age" "Gender"
## [3] "Weight..kg." "Height..m."
## [5] "Max_BPM" "Avg_BPM"
## [7] "Resting_BPM" "Session_Duration..hours."
## [9] "Calories_Burned" "Workout_Type"
## [11] "Fat_Percentage" "Water_Intake..liters."
## [13] "Workout_Frequency..days.week." "Experience_Level"
## [15] "BMI"
head(df)
## Age Gender Weight..kg. Height..m. Max_BPM Avg_BPM Resting_BPM
## 1 56 Male 88.3 1.71 180 157 60
## 2 46 Female 74.9 1.53 179 151 66
## 3 32 Female 68.1 1.66 167 122 54
## 4 25 Male 53.2 1.70 190 164 56
## 5 38 Male 46.1 1.79 188 158 68
## 6 56 Female 58.0 1.68 168 156 74
## Session_Duration..hours. Calories_Burned Workout_Type Fat_Percentage
## 1 1.69 1313 Yoga 12.6
## 2 1.30 883 HIIT 33.9
## 3 1.11 677 Cardio 33.4
## 4 0.59 532 Strength 28.8
## 5 0.64 556 Strength 29.2
## 6 1.59 1116 HIIT 15.5
## Water_Intake..liters. Workout_Frequency..days.week. Experience_Level BMI
## 1 3.5 4 3 30.20
## 2 2.1 4 2 32.00
## 3 2.3 4 2 24.71
## 4 2.1 3 1 18.41
## 5 2.8 3 1 14.39
## 6 2.7 5 3 20.55
Statistik Deskriptif
summary(df[, c("Calories_Burned", "Avg_BPM",
"Workout_Type", "Session_Duration..hours.", "Age")])
## Calories_Burned Avg_BPM Workout_Type Session_Duration..hours.
## Min. : 303.0 Min. :120.0 Length:973 Min. :0.500
## 1st Qu.: 720.0 1st Qu.:131.0 Class :character 1st Qu.:1.040
## Median : 893.0 Median :143.0 Mode :character Median :1.260
## Mean : 905.4 Mean :143.8 Mean :1.256
## 3rd Qu.:1076.0 3rd Qu.:156.0 3rd Qu.:1.460
## Max. :1783.0 Max. :169.0 Max. :2.000
## Age
## Min. :18.00
## 1st Qu.:28.00
## Median :40.00
## Mean :38.68
## 3rd Qu.:49.00
## Max. :59.00
df_clean <- df %>%
select(
Workout_Type,
Calories_Burned,
Avg_BPM,
Session_Duration = Session_Duration..hours.,
Age
)
cat("Dimensi data setelah seleksi variabel:", dim(df_clean), "\n")
## Dimensi data setelah seleksi variabel: 973 5
cat("\nNama kolom baru:\n")
##
## Nama kolom baru:
print(names(df_clean))
## [1] "Workout_Type" "Calories_Burned" "Avg_BPM" "Session_Duration"
## [5] "Age"
cat("\nPreview:\n")
##
## Preview:
head(df_clean)
## Workout_Type Calories_Burned Avg_BPM Session_Duration Age
## 1 Yoga 1313 157 1.69 56
## 2 HIIT 883 151 1.30 46
## 3 Cardio 677 122 1.11 32
## 4 Strength 532 164 0.59 25
## 5 Strength 556 158 0.64 38
## 6 HIIT 1116 156 1.59 56
cat("Missing values per kolom:\n")
## Missing values per kolom:
print(colSums(is.na(df_clean)))
## Workout_Type Calories_Burned Avg_BPM Session_Duration
## 0 0 0 0
## Age
## 0
cat("\nTotal missing values:", sum(is.na(df_clean)), "\n")
##
## Total missing values: 0
cat("\nPersentase missing (%):\n")
##
## Persentase missing (%):
print(round(colMeans(is.na(df_clean)) * 100, 2))
## Workout_Type Calories_Burned Avg_BPM Session_Duration
## 0 0 0 0
## Age
## 0
Visualisasi Data
deskriptif_rapi <- df_clean %>%
group_by(Workout_Type) %>%
summarise(
n = n(),
Calories_Burned = paste0(round(mean(Calories_Burned), 2),
" (", round(sd(Calories_Burned), 2), ")"),
Avg_BPM = paste0(round(mean(Avg_BPM), 2),
" (", round(sd(Avg_BPM), 2), ")"),
Session_Duration = paste0(round(mean(Session_Duration), 2),
" (", round(sd(Session_Duration), 2), ")"),
Age = paste0(round(mean(Age), 2),
" (", round(sd(Age), 2), ")"),
.groups = "drop"
)
cat("\nStatistik deskriptif format Mean (SD):\n")
##
## Statistik deskriptif format Mean (SD):
print(deskriptif_rapi)
## # A tibble: 4 × 6
## Workout_Type n Calories_Burned Avg_BPM Session_Duration Age
## <fct> <int> <chr> <chr> <chr> <chr>
## 1 Cardio 255 884.51 (270.2) 143.89 (13.97) 1.22 (0.34) 37.67 (12.…
## 2 HIIT 221 925.81 (274.49) 143.52 (14.28) 1.29 (0.33) 38.95 (12.…
## 3 Strength 258 910.7 (270.26) 144.31 (15.08) 1.26 (0.34) 38.95 (12.…
## 4 Yoga 239 903.19 (276.14) 143.27 (14.06) 1.26 (0.36) 39.23 (12.…
p1 <- ggplot(df_clean, aes(x = Workout_Type,
y = Calories_Burned,
fill = Workout_Type)) +
geom_boxplot(alpha = 0.7) +
labs(title = "Calories Burned per Workout Type",
x = "Workout Type", y = "Calories Burned") +
theme_minimal() +
theme(legend.position = "none")
ggarrange(p1)

p2 <- ggplot(df_clean, aes(x = Workout_Type,
y = Avg_BPM,
fill = Workout_Type)) +
geom_boxplot(alpha = 0.7) +
labs(title = "Avg BPM per Workout Type",
x = "Workout Type", y = "Avg BPM") +
theme_minimal() +
theme(legend.position = "none")
ggarrange(p2)

p3 <- df_clean %>%
count(Workout_Type)
ggplot(p3, aes(x = "", y = n, fill = Workout_Type)) +
geom_bar(stat = "identity", width = 1) +
coord_polar("y") +
labs(title = "Distribusi Workout Type",
fill = "Workout Type") +
theme_void()

DVS <- c("Calories_Burned", "Avg_BPM")
COV <- "Session_Duration"
IV <- "Workout_Type"
Uji Asumsi MANOVA
Normalitas Multivariat
mardia_result <- psych::mardia(df_clean[, DVS], plot = FALSE)
p_skew <- mardia_result$p.skew
z_kurt <- mardia_result$kurtosis
p_kurt <- 2 * (1 - pnorm(abs(z_kurt)))
tibble(
Komponen = c("Mardia Skewness", "Mardia Kurtosis"),
Statistik = c(round(mardia_result$b1p, 4), round(mardia_result$b2p, 4)),
`Z / Chi²` = c("—", round(z_kurt, 4)),
`p-value` = c(round(p_skew, 4), round(p_kurt, 4))
) %>%
kable(caption = "Uji Normalitas Multivariat (Mardia) - MANOVA") %>%
kable_styling(bootstrap_options = c("striped","hover"), full_width = FALSE)
Uji Normalitas Multivariat (Mardia) - MANOVA
|
Komponen
|
Statistik
|
Z / Chi²
|
p-value
|
|
Mardia Skewness
|
0.1127
|
—
|
0.0011
|
|
Mardia Kurtosis
|
6.6332
|
-5.3294
|
0.0000
|
Box’s M dan Lavene
print(bm <- boxM(df[, DVS], df[[IV]]))
##
## Box's M-test for Homogeneity of Covariance Matrices
##
## data: df[, DVS] by df[[IV]]
## Chi-Sq (approx.) = 5.4785, df = 9, p-value = 0.7908
levene1 <- leveneTest(Calories_Burned ~ Workout_Type, data = df_clean)
levene2 <- leveneTest(Avg_BPM ~ Workout_Type, data = df_clean)
hasil_levene <- data.frame(
Variabel = c("Calories_Burned", "Avg_BPM"),
F_value = c(levene1$`F value`[1], levene2$`F value`[1]),
p_value = c(levene1$`Pr(>F)`[1], levene2$`Pr(>F)`[1])
)
hasil_levene %>%
kable(caption = "Hasil Levene’s Test") %>%
kable_styling(bootstrap_options = c("striped","hover"), full_width = FALSE)
Hasil Levene’s Test
|
Variabel
|
F_value
|
p_value
|
|
Calories_Burned
|
0.3502329
|
0.7889936
|
|
Avg_BPM
|
1.9458120
|
0.1205505
|
Korelasi Variabel Dependan dan Deteksi Outlier
cor_matrix <- cor(df_clean[, DVS])
cor_matrix %>%
kable(caption = "Matriks Korelasi Antar Variabel Dependen") %>%
kable_styling(bootstrap_options = c("striped","hover"), full_width = FALSE)
Matriks Korelasi Antar Variabel Dependen
|
|
Calories_Burned
|
Avg_BPM
|
|
Calories_Burned
|
1.0000000
|
0.3396587
|
|
Avg_BPM
|
0.3396587
|
1.0000000
|
data_manova <- df_clean[, DVS]
center <- colMeans(data_manova)
cov_mat <- cov(data_manova)
md <- mahalanobis(data_manova, center, cov_mat)
df_md <- df_clean %>%
mutate(Mahalanobis_D = md)
cutoff <- qchisq(0.999, df = length(DVS))
outliers <- df_md %>%
filter(Mahalanobis_D > cutoff)
data.frame(
Keterangan = c("Jumlah data", "Jumlah outlier", "Cutoff (Chi-square 0.001)"),
Nilai = c(nrow(df_md), nrow(outliers), round(cutoff, 4))
) %>%
kable(caption = "Deteksi Outlier Multivariat (Mahalanobis Distance)") %>%
kable_styling(bootstrap_options = c("striped","hover"), full_width = FALSE)
Deteksi Outlier Multivariat (Mahalanobis Distance)
|
Keterangan
|
Nilai
|
|
Jumlah data
|
973.0000
|
|
Jumlah outlier
|
0.0000
|
|
Cutoff (Chi-square 0.001)
|
13.8155
|
MANOVA
model_manova <- manova(cbind(Calories_Burned, Avg_BPM) ~ Workout_Type, data = df_clean)
hasil <- summary(model_manova, test = "Pillai")
p_value <- hasil$stats[1, "Pr(>F)"]
F_value <- hasil$stats[1, "approx F"]
tibble(
Uji = "Pillai's Trace",
`F value` = round(F_value, 4),
`p-value` = round(p_value, 4)
) %>%
kable(caption = "Hasil Uji MANOVA") %>%
kable_styling(bootstrap_options = c("striped","hover"), full_width = FALSE)
Hasil Uji MANOVA
|
Uji
|
F value
|
p-value
|
|
Pillai’s Trace
|
0.7005
|
0.6492
|
Uji Asumsi MANCOVA
Normalitas Multivariat
par(mfrow = c(1, 2))
qqnorm(df_clean$Calories_Burned, main = "Q-Q Plot: Calories Burned")
qqline(df_clean$Calories_Burned, col = "#c0392b", lwd = 2)
qqnorm(df_clean$Avg_BPM, main = "Q-Q Plot: Rata-rata BPM")
qqline(df_clean$Avg_BPM, col = "#2e86c1", lwd = 2)

sw_results <- map_df(DVS, function(v) {
sw <- shapiro.test(df[[v]])
tibble(
Variabel = v,
W = round(sw$statistic, 5),
`p-value` = round(sw$p.value, 5),
Status = ifelse(sw$p.value >= 0.05, "Normal", "Tidak Normal")
)
})
kable(sw_results, caption = "Shapiro-Wilk Test per Variabel Dependen") %>%
kable_styling(bootstrap_options = c("striped","hover"), full_width = FALSE)
Shapiro-Wilk Test per Variabel Dependen
|
Variabel
|
W
|
p-value
|
Status
|
|
Calories_Burned
|
0.99176
|
3e-05
|
Tidak Normal
|
|
Avg_BPM
|
0.95325
|
0e+00
|
Tidak Normal
|
Box’s M
print(box_m <- boxM(df_clean[, DVS], df_clean[[IV]]))
##
## Box's M-test for Homogeneity of Covariance Matrices
##
## data: df_clean[, DVS] by df_clean[[IV]]
## Chi-Sq (approx.) = 5.4785, df = 9, p-value = 0.7908
Linearitas Kovariat
model_lin1 <- lm(Calories_Burned ~ Session_Duration, data = df_clean)
model_quad1 <- lm(Calories_Burned ~ Session_Duration + I(Session_Duration^2), data = df_clean)
model_lin2 <- lm(Avg_BPM ~ Session_Duration, data = df_clean)
model_quad2 <- lm(Avg_BPM ~ Session_Duration + I(Session_Duration^2), data = df_clean)
anova1 <- anova(model_lin1, model_quad1)
anova2 <- anova(model_lin2, model_quad2)
tibble(
Variabel = c("Calories_Burned", "Avg_BPM"),
p_value = c(anova1$`Pr(>F)`[2], anova2$`Pr(>F)`[2])
) %>%
kable(caption = "Uji Linearitas Covariate (Session Duration)") %>%
kable_styling(bootstrap_options = c("striped","hover"), full_width = FALSE)
Uji Linearitas Covariate (Session Duration)
|
Variabel
|
p_value
|
|
Calories_Burned
|
0.5586277
|
|
Avg_BPM
|
0.2185855
|
df_scatter <- df_clean %>%
pivot_longer(cols = all_of(DVS), names_to = "DV", values_to = "Nilai_DV")
ggplot(df_scatter, aes(x = Session_Duration, y = Nilai_DV,
color = Workout_Type)) +
geom_point(alpha = 0.4, size = 1.8) +
geom_smooth(method = "lm", se = TRUE, aes(group = 1),
color = "#2c3e50", linewidth = 1, linetype = "dashed") +
facet_wrap(~DV, scales = "free_y", ncol = 2) +
scale_color_manual(values = c(
"Cardio" = "#2e86c1",
"HIIT" = "#c0392b",
"Strength" = "#1e8449",
"Yoga" = "#d35400"
)) +
labs(title = "Linearitas: Session_Duration vs Setiap DV",
subtitle = "Garis regresi linear keseluruhan ditampilkan",
x = "Session Duration — hours (Covariate)",
y = "Nilai DV",
color = "Workout Type") +
theme_minimal() +
theme(legend.position = "bottom")
## `geom_smooth()` using formula = 'y ~ x'

Independensi antar covariat dari faktor
anova_cov <- aov(Session_Duration ~ Workout_Type, data = df_clean)
anova_res <- summary(anova_cov)[[1]]
tibble(
Sumber = rownames(anova_res)[1],
`F value` = round(anova_res$`F value`[1], 4),
`p-value` = round(anova_res$`Pr(>F)`[1], 4)
) %>%
kable(caption = "Uji Independensi Covariate dan Faktor") %>%
kable_styling(bootstrap_options = c("striped","hover"), full_width = FALSE)
Uji Independensi Covariate dan Faktor
|
Sumber
|
F value
|
p-value
|
|
Workout_Type
|
1.5827
|
0.1919
|
MANCOVA
DV_matrix <- cbind(df_clean$Calories_Burned, df_clean$Avg_BPM)
model_mancova <- lm(DV_matrix ~ Workout_Type + Session_Duration,
data = df_clean)
summary(model_mancova, test = "Pillai")
## Response Y1 :
##
## Call:
## lm(formula = Y1 ~ Workout_Type + Session_Duration, data = df_clean)
##
## Residuals:
## Min 1Q Median 3Q Max
## -343.14 -74.07 -5.38 70.61 397.41
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.137 14.901 0.211 0.833
## Workout_TypeHIIT -7.094 10.528 -0.674 0.501
## Workout_TypeStrength -2.767 10.101 -0.274 0.784
## Workout_TypeYoga -12.220 10.300 -1.186 0.236
## Session_Duration 722.393 10.712 67.435 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 114.3 on 968 degrees of freedom
## Multiple R-squared: 0.825, Adjusted R-squared: 0.8243
## F-statistic: 1141 on 4 and 968 DF, p-value: < 2.2e-16
##
##
## Response Y2 :
##
## Call:
## lm(formula = Y2 ~ Workout_Type + Session_Duration, data = df_clean)
##
## Residuals:
## Min 1Q Median 3Q Max
## -24.5377 -12.3768 -0.8321 12.5500 26.2658
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 143.0367 1.8733 76.357 <2e-16 ***
## Workout_TypeHIIT -0.4122 1.3235 -0.311 0.756
## Workout_TypeStrength 0.3957 1.2698 0.312 0.755
## Workout_TypeYoga -0.6523 1.2948 -0.504 0.615
## Session_Duration 0.6995 1.3467 0.519 0.604
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 14.37 on 968 degrees of freedom
## Multiple R-squared: 0.001046, Adjusted R-squared: -0.003082
## F-statistic: 0.2535 on 4 and 968 DF, p-value: 0.9076
Uji Asumsi ANCOVA
Residual
model1 <- lm(Calories_Burned ~ Workout_Type + Session_Duration, data = df_clean)
model2 <- lm(Avg_BPM ~ Workout_Type + Session_Duration, data = df_clean)
res1 <- residuals(model1)
res2 <- residuals(model2)
shapiro1 <- shapiro.test(res1)
shapiro2 <- shapiro.test(res2)
tibble(
Variabel = c("Calories_Burned", "Avg_BPM"),
Statistik = c(round(shapiro1$statistic, 4), round(shapiro2$statistic, 4)),
`p-value` = c(round(shapiro1$p.value, 4), round(shapiro2$p.value, 4))
) %>%
kable(caption = "Uji Normalitas Residual (Shapiro-Wilk) - ANCOVA") %>%
kable_styling(bootstrap_options = c("striped","hover"), full_width = FALSE)
Uji Normalitas Residual (Shapiro-Wilk) - ANCOVA
|
Variabel
|
Statistik
|
p-value
|
|
Calories_Burned
|
0.9953
|
0.0045
|
|
Avg_BPM
|
0.9555
|
0.0000
|
Homogenitas
model1 <- lm(Calories_Burned ~ Workout_Type + Session_Duration, data = df_clean)
model2 <- lm(Avg_BPM ~ Workout_Type + Session_Duration, data = df_clean)
levene1 <- leveneTest(residuals(model1) ~ df_clean$Workout_Type)
levene2 <- leveneTest(residuals(model2) ~ df_clean$Workout_Type)
tibble(
Variabel = c("Calories_Burned", "Avg_BPM"),
`F value` = c(levene1$`F value`[1], levene2$`F value`[1]),
`p-value` = c(levene1$`Pr(>F)`[1], levene2$`Pr(>F)`[1])
) %>%
kable(caption = "Uji Homogenitas Varians (Levene) - ANCOVA") %>%
kable_styling(bootstrap_options = c("striped","hover"), full_width = FALSE)
Uji Homogenitas Varians (Levene) - ANCOVA
|
Variabel
|
F value
|
p-value
|
|
Calories_Burned
|
2.084828
|
0.1006057
|
|
Avg_BPM
|
2.026308
|
0.1085822
|
Linearitas
lin1 <- lm(Calories_Burned ~ Session_Duration, data = df_clean)
quad1 <- lm(Calories_Burned ~ Session_Duration + I(Session_Duration^2), data = df_clean)
lin2 <- lm(Avg_BPM ~ Session_Duration, data = df_clean)
quad2 <- lm(Avg_BPM ~ Session_Duration + I(Session_Duration^2), data = df_clean)
anova1 <- anova(lin1, quad1)
anova2 <- anova(lin2, quad2)
tibble(
Variabel = c("Calories_Burned", "Avg_BPM"),
`p-value` = c(anova1$`Pr(>F)`[2], anova2$`Pr(>F)`[2])
) %>%
kable(caption = "Uji Linearitas (ANCOVA)") %>%
kable_styling(bootstrap_options = c("striped","hover"), full_width = FALSE)
Uji Linearitas (ANCOVA)
|
Variabel
|
p-value
|
|
Calories_Burned
|
0.5586277
|
|
Avg_BPM
|
0.2185855
|
Homogenitas Slope
model1 <- lm(Calories_Burned ~ Workout_Type * Session_Duration, data = df_clean)
model2 <- lm(Avg_BPM ~ Workout_Type * Session_Duration, data = df_clean)
anova1 <- anova(model1)
anova2 <- anova(model2)
tibble(
Variabel = c("Calories_Burned", "Avg_BPM"),
`p-value` = c(
anova1$`Pr(>F)`[which(rownames(anova1) == "Workout_Type:Session_Duration")],
anova2$`Pr(>F)`[which(rownames(anova2) == "Workout_Type:Session_Duration")]
)
) %>%
kable(caption = "Uji Homogeneity of Regression Slopes (ANCOVA)") %>%
kable_styling(bootstrap_options = c("striped","hover"), full_width = FALSE)
Uji Homogeneity of Regression Slopes (ANCOVA)
|
Variabel
|
p-value
|
|
Calories_Burned
|
0.1986213
|
|
Avg_BPM
|
0.1250827
|