library(psych)
data <- read.csv("gym_members_exercise_tracking.csv")
head(data)
## Age Gender Weight..kg. Height..m. Max_BPM Avg_BPM Resting_BPM
## 1 56 Male 88.3 1.71 180 157 60
## 2 46 Female 74.9 1.53 179 151 66
## 3 32 Female 68.1 1.66 167 122 54
## 4 25 Male 53.2 1.70 190 164 56
## 5 38 Male 46.1 1.79 188 158 68
## 6 56 Female 58.0 1.68 168 156 74
## Session_Duration..hours. Calories_Burned Workout_Type Fat_Percentage
## 1 1.69 1313 Yoga 12.6
## 2 1.30 883 HIIT 33.9
## 3 1.11 677 Cardio 33.4
## 4 0.59 532 Strength 28.8
## 5 0.64 556 Strength 29.2
## 6 1.59 1116 HIIT 15.5
## Water_Intake..liters. Workout_Frequency..days.week. Experience_Level BMI
## 1 3.5 4 3 30.20
## 2 2.1 4 2 32.00
## 3 2.3 4 2 24.71
## 4 2.1 3 1 18.41
## 5 2.8 3 1 14.39
## 6 2.7 5 3 20.55
data$Gender <- as.factor(data$Gender)
data$Workout_Type <- as.factor(data$Workout_Type)
describe(data)
## vars n mean sd median trimmed mad
## Age 1 973 38.68 12.18 40.00 38.79 14.83
## Gender* 2 973 1.53 0.50 2.00 1.53 0.00
## Weight..kg. 3 973 73.85 21.21 70.00 71.85 19.87
## Height..m. 4 973 1.72 0.13 1.71 1.72 0.13
## Max_BPM 5 973 179.88 11.53 180.00 179.94 14.83
## Avg_BPM 6 973 143.77 14.35 143.00 143.62 17.79
## Resting_BPM 7 973 62.22 7.33 62.00 62.29 8.90
## Session_Duration..hours. 8 973 1.26 0.34 1.26 1.26 0.30
## Calories_Burned 9 973 905.42 272.64 893.00 898.76 263.90
## Workout_Type* 10 973 2.49 1.13 3.00 2.49 1.48
## Fat_Percentage 11 973 24.98 6.26 26.20 25.46 5.49
## Water_Intake..liters. 12 973 2.63 0.60 2.60 2.63 0.74
## Workout_Frequency..days.week. 13 973 3.32 0.91 3.00 3.28 1.48
## Experience_Level 14 973 1.81 0.74 2.00 1.76 1.48
## BMI 15 973 24.91 6.66 24.16 24.43 6.33
## min max range skew kurtosis se
## Age 18.00 59.00 41.00 -0.08 -1.22 0.39
## Gender* 1.00 2.00 1.00 -0.10 -1.99 0.02
## Weight..kg. 40.00 129.90 89.90 0.77 -0.04 0.68
## Height..m. 1.50 2.00 0.50 0.34 -0.73 0.00
## Max_BPM 160.00 199.00 39.00 -0.04 -1.19 0.37
## Avg_BPM 120.00 169.00 49.00 0.09 -1.20 0.46
## Resting_BPM 50.00 74.00 24.00 -0.07 -1.19 0.23
## Session_Duration..hours. 0.50 2.00 1.50 0.03 -0.36 0.01
## Calories_Burned 303.00 1783.00 1480.00 0.28 -0.07 8.74
## Workout_Type* 1.00 4.00 3.00 -0.02 -1.38 0.04
## Fat_Percentage 10.00 35.00 25.00 -0.63 -0.35 0.20
## Water_Intake..liters. 1.50 3.70 2.20 0.07 -1.03 0.02
## Workout_Frequency..days.week. 2.00 5.00 3.00 0.15 -0.81 0.03
## Experience_Level 1.00 3.00 2.00 0.32 -1.13 0.02
## BMI 12.32 49.84 37.52 0.76 0.73 0.21
hist(data$Calories_Burned, main="Histogram Calories Burned")
hist(data$Avg_BPM, main="Histogram Avg BPM")
hist(data$Resting_BPM, main="Histogram Resting BPM")
boxplot(Calories_Burned ~ Gender, data=data)
boxplot(Avg_BPM ~ Gender, data=data)
boxplot(Resting_BPM ~ Gender, data=data)
boxplot(Calories_Burned ~ Workout_Type, data=data)
boxplot(Avg_BPM ~ Workout_Type, data=data)
boxplot(Resting_BPM ~ Workout_Type, data=data)
colSums(is.na(data))
## Age Gender
## 0 0
## Weight..kg. Height..m.
## 0 0
## Max_BPM Avg_BPM
## 0 0
## Resting_BPM Session_Duration..hours.
## 0 0
## Calories_Burned Workout_Type
## 0 0
## Fat_Percentage Water_Intake..liters.
## 0 0
## Workout_Frequency..days.week. Experience_Level
## 0 0
## BMI
## 0
data <- na.omit(data)
num_var <- data[, sapply(data, is.numeric)] z_scores <- scale(num_var) outlier_per_var <- colSums(abs(z_scores) > 3) outlier_per_var
num_data <- data[, sapply(data, is.numeric)]
cat_data <- data[, sapply(data, is.factor)]
Y_num <- data[, c("Calories_Burned","Avg_BPM","Resting_BPM")]
num_data <- num_data[, !colnames(num_data) %in%
c("Calories_Burned","Avg_BPM","Resting_BPM")]
cor_matrix <- cor(cbind(Y_num, num_data))
cor_matrix
## Calories_Burned Avg_BPM Resting_BPM
## Calories_Burned 1.000000000 0.3396586672 0.016517951
## Avg_BPM 0.339658667 1.0000000000 0.059635502
## Resting_BPM 0.016517951 0.0596355022 1.000000000
## Age -0.154678760 0.0359691433 0.004353714
## Weight..kg. 0.095443473 0.0097174780 -0.032138091
## Height..m. 0.086348051 -0.0147762881 -0.005089864
## Max_BPM 0.002090016 -0.0397514432 0.036647481
## Session_Duration..hours. 0.908140376 0.0160144382 -0.016648808
## Fat_Percentage -0.597615248 -0.0073016551 -0.016834389
## Water_Intake..liters. 0.356930683 -0.0029106374 0.007725998
## Workout_Frequency..days.week. 0.576150125 -0.0106807977 -0.007966891
## Experience_Level 0.694129448 -0.0008881572 0.001757585
## BMI 0.059760826 0.0216054995 -0.032542632
## Age Weight..kg. Height..m.
## Calories_Burned -0.154678760 0.095443473 0.086348051
## Avg_BPM 0.035969143 0.009717478 -0.014776288
## Resting_BPM 0.004353714 -0.032138091 -0.005089864
## Age 1.000000000 -0.036339635 -0.027837495
## Weight..kg. -0.036339635 1.000000000 0.365321203
## Height..m. -0.027837495 0.365321203 1.000000000
## Max_BPM -0.017072597 0.057061130 -0.017659884
## Session_Duration..hours. -0.019911904 -0.013665561 -0.010205897
## Fat_Percentage 0.002370051 -0.225511640 -0.235520936
## Water_Intake..liters. 0.041528359 0.394275710 0.393532902
## Workout_Frequency..days.week. 0.008055163 -0.011769328 -0.011269883
## Experience_Level -0.018675927 0.003378528 -0.010266611
## BMI -0.013691370 0.853157690 -0.159468750
## Max_BPM Session_Duration..hours.
## Calories_Burned 0.0020900159 0.908140376
## Avg_BPM -0.0397514432 0.016014438
## Resting_BPM 0.0366474807 -0.016648808
## Age -0.0170725970 -0.019911904
## Weight..kg. 0.0570611305 -0.013665561
## Height..m. -0.0176598843 -0.010205897
## Max_BPM 1.0000000000 0.010050981
## Session_Duration..hours. 0.0100509814 1.000000000
## Fat_Percentage -0.0090557315 -0.581519771
## Water_Intake..liters. 0.0316206428 0.283410977
## Workout_Frequency..days.week. -0.0290990657 0.644140366
## Experience_Level 0.0005448337 0.764768119
## BMI 0.0671052310 -0.006492647
## Fat_Percentage Water_Intake..liters.
## Calories_Burned -0.597615248 0.356930683
## Avg_BPM -0.007301655 -0.002910637
## Resting_BPM -0.016834389 0.007725998
## Age 0.002370051 0.041528359
## Weight..kg. -0.225511640 0.394275710
## Height..m. -0.235520936 0.393532902
## Max_BPM -0.009055731 0.031620643
## Session_Duration..hours. -0.581519771 0.283410977
## Fat_Percentage 1.000000000 -0.588682834
## Water_Intake..liters. -0.588682834 1.000000000
## Workout_Frequency..days.week. -0.537059548 0.238562571
## Experience_Level -0.654362613 0.304103549
## BMI -0.119257760 0.213696572
## Workout_Frequency..days.week. Experience_Level
## Calories_Burned 0.576150125 0.6941294479
## Avg_BPM -0.010680798 -0.0008881572
## Resting_BPM -0.007966891 0.0017575852
## Age 0.008055163 -0.0186759269
## Weight..kg. -0.011769328 0.0033785279
## Height..m. -0.011269883 -0.0102666112
## Max_BPM -0.029099066 0.0005448337
## Session_Duration..hours. 0.644140366 0.7647681189
## Fat_Percentage -0.537059548 -0.6543626129
## Water_Intake..liters. 0.238562571 0.3041035494
## Workout_Frequency..days.week. 1.000000000 0.8370787094
## Experience_Level 0.837078709 1.0000000000
## BMI 0.001644974 0.0160310726
## BMI
## Calories_Burned 0.059760826
## Avg_BPM 0.021605500
## Resting_BPM -0.032542632
## Age -0.013691370
## Weight..kg. 0.853157690
## Height..m. -0.159468750
## Max_BPM 0.067105231
## Session_Duration..hours. -0.006492647
## Fat_Percentage -0.119257760
## Water_Intake..liters. 0.213696572
## Workout_Frequency..days.week. 0.001644974
## Experience_Level 0.016031073
## BMI 1.000000000
library(corrplot)
## corrplot 0.95 loaded
corrplot(cor_matrix, method="color", tl.cex=0.4)
cor_target <- cor(Y_num, num_data)
mean_cor <- apply(abs(cor_target), 2, mean)
sorted_var <- sort(mean_cor, decreasing=TRUE)
sorted_var
## Session_Duration..hours. Experience_Level
## 0.31360121 0.23225840
## Fat_Percentage Workout_Frequency..days.week.
## 0.20725043 0.19826594
## Water_Intake..liters. Age
## 0.12252244 0.06500054
## Weight..kg. BMI
## 0.04576635 0.03796965
## Height..m. Max_BPM
## 0.03540473 0.02616298
selected_vars <- names(sorted_var[1:5])
selected_vars
## [1] "Session_Duration..hours." "Experience_Level"
## [3] "Fat_Percentage" "Workout_Frequency..days.week."
## [5] "Water_Intake..liters."
library(car)
## Loading required package: carData
##
## Attaching package: 'car'
## The following object is masked from 'package:psych':
##
## logit
model_vif <- lm(Calories_Burned ~ Gender + Workout_Type + Age +
Session_Duration..hours. +
Experience_Level +
Fat_Percentage +
Workout_Frequency..days.week. +
Water_Intake..liters.,
data = data)
vif(model_vif)
## GVIF Df GVIF^(1/(2*Df))
## Gender 2.169766 1 1.473013
## Workout_Type 1.023362 3 1.003856
## Age 1.008043 1 1.004013
## Session_Duration..hours. 2.553041 1 1.597824
## Experience_Level 5.394541 1 2.322615
## Fat_Percentage 2.757374 1 1.660534
## Workout_Frequency..days.week. 3.364881 1 1.834361
## Water_Intake..liters. 2.364291 1 1.537625
final_data <- data[, c("Calories_Burned","Avg_BPM","Resting_BPM",
"Gender","Workout_Type","Age",
selected_vars)]
Y <- cbind(final_data$Calories_Burned,
final_data$Avg_BPM,
final_data$Resting_BPM)
shapiro.test(final_data$Calories_Burned)
##
## Shapiro-Wilk normality test
##
## data: final_data$Calories_Burned
## W = 0.99176, p-value = 2.982e-05
shapiro.test(final_data$Avg_BPM)
##
## Shapiro-Wilk normality test
##
## data: final_data$Avg_BPM
## W = 0.95325, p-value < 2.2e-16
shapiro.test(final_data$Resting_BPM)
##
## Shapiro-Wilk normality test
##
## data: final_data$Resting_BPM
## W = 0.94991, p-value < 2.2e-16
library(car)
leveneTest(Calories_Burned ~ Gender, data=final_data)
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 1 9.4328 0.002191 **
## 971
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
leveneTest(Avg_BPM ~ Gender, data=final_data)
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 1 0.0139 0.9062
## 971
leveneTest(Resting_BPM ~ Gender, data=final_data)
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 1 0.3651 0.5458
## 971
library(biotools)
## Warning: package 'biotools' was built under R version 4.5.3
## Loading required package: MASS
## ---
## biotools version 4.3
boxM(Y, interaction(final_data$Gender, final_data$Workout_Type))
##
## Box's M-test for Homogeneity of Covariance Matrices
##
## data: Y
## Chi-Sq (approx.) = 63.689, df = 42, p-value = 0.017
cor(final_data[, c("Calories_Burned","Avg_BPM","Resting_BPM")])
## Calories_Burned Avg_BPM Resting_BPM
## Calories_Burned 1.00000000 0.3396587 0.01651795
## Avg_BPM 0.33965867 1.0000000 0.05963550
## Resting_BPM 0.01651795 0.0596355 1.00000000
model_manova <- manova(Y ~ Gender + Workout_Type +
Session_Duration..hours. +
Experience_Level +
Fat_Percentage +
Workout_Frequency..days.week. +
Water_Intake..liters.,
data=final_data)
summary(model_manova, test="Wilks")
## Df Wilks approx F num Df den Df Pr(>F)
## Gender 1 0.66883 158.6 3 961 < 2.2e-16 ***
## Workout_Type 3 0.93361 7.4 9 2339 8.718e-11 ***
## Session_Duration..hours. 1 0.05103 5956.4 3 961 < 2.2e-16 ***
## Experience_Level 1 0.99870 0.4 3 961 0.7415
## Fat_Percentage 1 0.99885 0.4 3 961 0.7767
## Workout_Frequency..days.week. 1 0.99910 0.3 3 961 0.8326
## Water_Intake..liters. 1 0.99758 0.8 3 961 0.5066
## Residuals 963
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(model_manova, test="Pillai")
## Df Pillai approx F num Df den Df Pr(>F)
## Gender 1 0.33117 158.6 3 961 < 2.2e-16 ***
## Workout_Type 3 0.06651 7.3 9 2889 1.546e-10 ***
## Session_Duration..hours. 1 0.94897 5956.4 3 961 < 2.2e-16 ***
## Experience_Level 1 0.00130 0.4 3 961 0.7415
## Fat_Percentage 1 0.00115 0.4 3 961 0.7767
## Workout_Frequency..days.week. 1 0.00090 0.3 3 961 0.8326
## Water_Intake..liters. 1 0.00242 0.8 3 961 0.5066
## Residuals 963
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary.aov(model_manova)
## Response 1 :
## Df Sum Sq Mean Sq F value Pr(>F)
## Gender 1 1639714 1639714 147.4401 < 2.2e-16 ***
## Workout_Type 3 207214 69071 6.2108 0.0003528 ***
## Session_Duration..hours. 1 59668043 59668043 5365.2400 < 2.2e-16 ***
## Experience_Level 1 571 571 0.0513 0.8208158
## Fat_Percentage 1 2855 2855 0.2568 0.6124762
## Workout_Frequency..days.week. 1 6690 6690 0.6015 0.4381823
## Water_Intake..liters. 1 17233 17233 1.5496 0.2134993
## Residuals 963 10709740 11121
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Response 2 :
## Df Sum Sq Mean Sq F value Pr(>F)
## Gender 1 19 18.621 0.0899 0.7644
## Workout_Type 3 156 51.983 0.2508 0.8608
## Session_Duration..hours. 1 57 56.612 0.2732 0.6013
## Experience_Level 1 77 76.719 0.3702 0.5430
## Fat_Percentage 1 1 1.051 0.0051 0.9433
## Workout_Frequency..days.week. 1 75 75.040 0.3621 0.5475
## Water_Intake..liters. 1 66 65.516 0.3161 0.5741
## Residuals 963 199571 207.238
##
## Response 3 :
## Df Sum Sq Mean Sq F value Pr(>F)
## Gender 1 10 10.318 0.1913 0.6620
## Workout_Type 3 125 41.566 0.7705 0.5106
## Session_Duration..hours. 1 18 17.851 0.3309 0.5653
## Experience_Level 1 32 31.677 0.5872 0.4437
## Fat_Percentage 1 26 25.867 0.4795 0.4888
## Workout_Frequency..days.week. 1 17 17.493 0.3243 0.5692
## Water_Intake..liters. 1 2 1.698 0.0315 0.8592
## Residuals 963 51953 53.949
plot(final_data$Age, final_data$Calories_Burned)
plot(final_data$Age, final_data$Avg_BPM)
plot(final_data$Age, final_data$Resting_BPM)
## Homogeneity of Slopes
model_cb <- lm(Calories_Burned ~ Age*Gender + Age*Workout_Type, data=final_data)
anova(model_cb)
## Analysis of Variance Table
##
## Response: Calories_Burned
## Df Sum Sq Mean Sq F value Pr(>F)
## Age 1 1728668 1728668 24.3207 9.602e-07 ***
## Gender 1 1733777 1733777 24.3926 9.259e-07 ***
## Workout_Type 3 254401 84800 1.1931 0.3112
## Age:Gender 1 57761 57761 0.8126 0.3676
## Age:Workout_Type 3 29322 9774 0.1375 0.9376
## Residuals 963 68448133 71078
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
model_avg <- lm(Avg_BPM ~ Age*Gender + Age*Workout_Type, data=final_data)
anova(model_avg)
## Analysis of Variance Table
##
## Response: Avg_BPM
## Df Sum Sq Mean Sq F value Pr(>F)
## Age 1 259 258.782 1.2523 0.2634
## Gender 1 15 15.050 0.0728 0.7873
## Workout_Type 3 163 54.280 0.2627 0.8523
## Age:Gender 1 4 3.592 0.0174 0.8951
## Age:Workout_Type 3 583 194.259 0.9401 0.4206
## Residuals 963 198997 206.643
model_rest <- lm(Resting_BPM ~ Age*Gender + Age*Workout_Type, data=final_data)
anova(model_rest)
## Analysis of Variance Table
##
## Response: Resting_BPM
## Df Sum Sq Mean Sq F value Pr(>F)
## Age 1 1 0.989 0.0184 0.8923
## Gender 1 10 10.153 0.1884 0.6643
## Workout_Type 3 124 41.498 0.7700 0.5109
## Age:Gender 1 77 76.510 1.4197 0.2337
## Age:Workout_Type 3 74 24.727 0.4588 0.7111
## Residuals 963 51896 53.890
model_mancova <- manova(Y ~ Gender + Workout_Type + Age +
Session_Duration..hours. +
Experience_Level +
Fat_Percentage +
Workout_Frequency..days.week. +
Water_Intake..liters.,
data=final_data)
summary(model_mancova, test="Wilks")
## Df Wilks approx F num Df den Df Pr(>F)
## Gender 1 0.48943 333.8 3 960.0 <2e-16 ***
## Workout_Type 3 0.87209 15.0 9 2336.5 <2e-16 ***
## Age 1 0.41054 459.5 3 960.0 <2e-16 ***
## Session_Duration..hours. 1 0.02507 12445.2 3 960.0 <2e-16 ***
## Experience_Level 1 0.99869 0.4 3 960.0 0.7381
## Fat_Percentage 1 0.99812 0.6 3 960.0 0.6138
## Workout_Frequency..days.week. 1 0.99864 0.4 3 960.0 0.7267
## Water_Intake..liters. 1 0.99936 0.2 3 960.0 0.8935
## Residuals 962
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(model_mancova, test="Pillai")
## Df Pillai approx F num Df den Df Pr(>F)
## Gender 1 0.51057 333.8 3 960 <2e-16 ***
## Workout_Type 3 0.12814 14.3 9 2886 <2e-16 ***
## Age 1 0.58946 459.5 3 960 <2e-16 ***
## Session_Duration..hours. 1 0.97493 12445.2 3 960 <2e-16 ***
## Experience_Level 1 0.00131 0.4 3 960 0.7381
## Fat_Percentage 1 0.00188 0.6 3 960 0.6138
## Workout_Frequency..days.week. 1 0.00136 0.4 3 960 0.7267
## Water_Intake..liters. 1 0.00064 0.2 3 960 0.8935
## Residuals 962
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary.aov(model_mancova)
## Response 1 :
## Df Sum Sq Mean Sq F value Pr(>F)
## Gender 1 1639714 1639714 169.5278 < 2.2e-16 ***
## Workout_Type 3 207214 69071 7.1412 9.569e-05 ***
## Age 1 1869917 1869917 193.3282 < 2.2e-16 ***
## Session_Duration..hours. 1 59220231 59220231 6122.6997 < 2.2e-16 ***
## Experience_Level 1 1013 1013 0.1047 0.7463
## Fat_Percentage 1 2899 2899 0.2997 0.5842
## Workout_Frequency..days.week. 1 872 872 0.0901 0.7641
## Water_Intake..liters. 1 5505 5505 0.5691 0.4508
## Residuals 962 9304696 9672
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Response 2 :
## Df Sum Sq Mean Sq F value Pr(>F)
## Gender 1 19 18.621 0.0899 0.7644
## Workout_Type 3 156 51.983 0.2509 0.8607
## Age 1 262 262.101 1.2653 0.2609
## Session_Duration..hours. 1 62 62.306 0.3008 0.5835
## Experience_Level 1 75 74.827 0.3612 0.5480
## Fat_Percentage 1 1 1.039 0.0050 0.9435
## Workout_Frequency..days.week. 1 88 88.106 0.4253 0.5144
## Water_Intake..liters. 1 79 79.329 0.3830 0.5362
## Residuals 962 199278 207.149
##
## Response 3 :
## Df Sum Sq Mean Sq F value Pr(>F)
## Gender 1 10 10.318 0.1911 0.6621
## Workout_Type 3 125 41.566 0.7697 0.5111
## Age 1 1 0.618 0.0114 0.9148
## Session_Duration..hours. 1 18 17.709 0.3279 0.5670
## Experience_Level 1 32 31.730 0.5875 0.4436
## Fat_Percentage 1 26 25.865 0.4789 0.4891
## Workout_Frequency..days.week. 1 18 17.795 0.3295 0.5661
## Water_Intake..liters. 1 2 1.818 0.0337 0.8544
## Residuals 962 51952 54.004