knitr::opts_chunk$set(echo = TRUE,cache = TRUE)

The purpose of the study:

This study is aims to the impact of lifestyle modifications, particularly diet adherence, exercise intensity, and sleep quality on weight loss outcomes over 12-week weight loss program.

Using a dataset of 35 participants, this research seeks to determine which lifestyle factors have the strongest correlation with successful weight reduction and how demographic variables influence these results. The study aims to generate evidence-based recommendations for improving weight loss programs and personalizing intervention strategies.

Packages

Loading required packages:

library(ggplot2)
library(dplyr)
library(ggstatsplot)
library(knitr)
library(kableExtra)
library(broom)
library(car)
library(lmtest)
library(gtsummary)
library(gt)

Data

Loading the Data_WeightLoss data:

Data_Weight <-  readxl::read_excel("Data/Data_WeightLoss.xlsx")

Summarize Descriptive Tables

  summary(Data_Weight)%>%
 
 kable(format = "html", caption = "Summarize Descriptive Tables") %>%
  kable_styling(full_width = TRUE)%>%
     row_spec(0, bold = TRUE, background = "lightblue")
Summarize Descriptive Tables
Participant Gender Age BaselineWeight DietAdherence ExerciseIntensity SleepQuality WeightLoss AfterWeight
Min. : 1.0 Length:35 Min. :26.00 Min. :70.00 Min. :6.200 Min. :5.100 Min. :5.800 Min. : 6.600 Min. :60.90
1st Qu.: 9.5 Class :character 1st Qu.:31.50 1st Qu.:78.45 1st Qu.:7.250 1st Qu.:6.200 1st Qu.:6.600 1st Qu.: 8.150 1st Qu.:68.60
Median :18.0 Mode :character Median :37.00 Median :84.30 Median :8.400 Median :6.900 Median :7.600 Median : 9.400 Median :74.00
Mean :18.0 NA Mean :38.09 Mean :84.43 Mean :8.243 Mean :7.129 Mean :7.689 Mean : 9.314 Mean :75.12
3rd Qu.:26.5 NA 3rd Qu.:46.50 3rd Qu.:89.85 3rd Qu.:9.200 3rd Qu.:8.050 3rd Qu.:8.400 3rd Qu.:10.200 3rd Qu.:81.65
Max. :35.0 NA Max. :52.00 Max. :99.50 Max. :9.900 Max. :9.300 Max. :9.900 Max. :12.000 Max. :91.60
NA NA NA NA NA NA’s :1 NA NA NA
library(gtsummary)

Data_Weight %>% tbl_summary(
  by = Gender,
  missing = "no",
  statistic = list(
    all_continuous() ~ "{mean} ({sd})",
    all_categorical() ~ "{n} / {N} ({p}%)"
    ),
  digits = all_continuous() ~ 3
  )
Characteristic Female
N = 16
1
Male
N = 19
1
Participant 18.813 (10.809) 17.316 (9.995)
Age 37.875 (7.848) 38.263 (8.109)
BaselineWeight 83.944 (8.934) 84.847 (7.490)
DietAdherence 8.138 (1.235) 8.332 (1.142)
ExerciseIntensity 6.975 (1.236) 7.267 (1.114)
SleepQuality 7.606 (1.320) 7.758 (1.250)
WeightLoss 8.888 (1.413) 9.674 (1.183)
AfterWeight 75.056 (8.969) 75.174 (8.210)
1 Mean (SD)

Scatter Plot

##Baseline Weight vs. Percent Weight Loss ####

Data_Weight <-Data_Weight %>%      #insert a column of Percent_Weight_Loss
 
 mutate(Percent_Weight_Loss = (WeightLoss/BaselineWeight)*100) 

ggplot(data = Data_Weight, aes(x = BaselineWeight, y = Percent_Weight_Loss)) +
  geom_point(shape = 16,color= "blue",size =3)+
  labs( title = "Baseline Weight vs. Percent Weight Loss ",
        
        x = "Baseline Weight (kg)",
        y = "Percent Weight Loss")+
   
  theme_minimal()

##Exercise Intensity vs. Percent_Weight_Loss####

ggplot(data = Data_Weight, aes(x = ExerciseIntensity, y = Percent_Weight_Loss)) +
  geom_point(shape = 16,color= "blue",size =3,na.rm=TRUE)+
  labs( title = "Exercise Intensity  vs. Percent_Weight_Loss ",
        
        x = " Exercise Intensity",
        y = "Percent_Weight_Loss")+
   
  theme_minimal()

##Diet Adherence vs. Percent_Weight_Loss####

ggplot(data = Data_Weight, aes(x = DietAdherence, y = Percent_Weight_Loss)) +
  geom_point(shape = 16,color= "blue",size =3)+
  labs( title = " Diet_Adherence    vs. Percent_Weight_Loss ",
        
        x = " Diet_Adherence ",
        y = "Percent_Weight_Loss")+
   
  theme_minimal()

##Sleep_Quality vs. Percent_Weight_Loss####

ggplot(data = Data_Weight, aes(x = SleepQuality, y = Percent_Weight_Loss)) +
  geom_point(shape = 16,color= "blue",size =3)+
  labs( title = "Sleep_Quality    vs. Percent_Weight_Loss ",
        
        x =  "Sleep_Quality",
        y = "Percent_Weight_Loss")+
   
  theme_minimal()

Histrogram

##Distribution of Weight Loss####

ggplot(Data_Weight, aes(x = WeightLoss)) +
     geom_histogram(binwidth = 1, fill = "lightblue", color = "black",alpha = 1) +
     labs(title = "Distribution of Weight Loss", x = "Weight Loss (kg)", y = "Frequency")

Bar plot

Average Weight Loss by Gender

#Calculate average weight loss by gender

avg_weight_loss<- Data_Weight%>%
  group_by(Gender)%>%
  summarise(AvgWeightloss =mean(WeightLoss,na.rm=TRUE))%>%
  arrange(desc(AvgWeightloss))
avg_weight_loss
## # A tibble: 2 Ă— 2
##   Gender AvgWeightloss
##   <chr>          <dbl>
## 1 Male            9.67
## 2 Female          8.89
#Create the bar plot:

ggplot(avg_weight_loss,aes(x = Gender, y=AvgWeightloss,fill =Gender))+
  geom_bar(stat = "identity")+
   geom_text(aes(label = round(AvgWeightloss, 2)),  # Add text labels
            vjust = 1,  # Position the text slightly above the bars
            color = "white",  # Text color
            size = 5) +  # Text size

  scale_x_discrete(limits =c("Male","Female"))+
labs(
    title = "Average Weight Loss by Gender",
    x = "Gender",
    y = "Average Weight Loss (kg)"
  ) +
  theme_minimal()

Cluster Bar plot

##Average Weight Loss by Age Group and Gender####

# Create AgeGroup column####
Data_Weight<-Data_Weight%>%
  mutate(AgeGroup = case_when(
    Age < 35 ~ "Young",
    Age >= 35 & Age <= 50 ~ "Middle",
    Age > 50 ~ "Old"
  ))

# Calculate average weight loss by age group and gender
avg_weight_loss_age_gender <- Data_Weight %>%
  group_by(AgeGroup, Gender) %>%
  summarise(AvgWeightLoss = mean(WeightLoss, na.rm = TRUE))
## `summarise()` has grouped output by 'AgeGroup'. You can override using the
## `.groups` argument.
# Create the grouped bar plot
ggplot(avg_weight_loss_age_gender, aes(x = AgeGroup, y = AvgWeightLoss, fill = Gender)) +
  geom_bar(stat = "identity", position = "dodge") +
  labs(
    title = "Average Weight Loss by Age Group and Gender",
    x = "Age Group",
    y = "Average Weight Loss (kg)",
    fill = "Gender"
  ) +
  theme_minimal()

Box Plot

Weight Loss Distribution by Gender

ggplot(Data_Weight, aes(x = Gender,y = WeightLoss,fill =Gender))+
  geom_boxplot() +
  geom_jitter()+
  labs(title = "Weight Loss Distribution by Gender",
       x = "Gender",
       y = "Weight Loss (kg)") +
  theme_minimal() +
  theme(plot.title = element_text(hjust = 0.3, face = "bold", size = 16),
        axis.title = element_text(size = 14),
        legend.position = "right")

Weight Loss Distribution by Age Group

ggplot(Data_Weight, aes(x = AgeGroup, y = WeightLoss)) +
  geom_boxplot(position = position_dodge(0.8)) +  # Grouped box plot
  geom_jitter()+
  
  labs(
    title = "Weight Loss Distribution by Age Group and Exercise Intensity Level",
    x = "Age Group",
    y = "Weight Loss (kg)",
    fill = "Exercise Intensity Level"
  ) +
  theme_minimal()+
  theme(plot.title = element_text(hjust = 0.1, face = "bold", size = 10),
        axis.title = element_text(size = 14),
        legend.position = "right")

Violin Plot of Weight Loss by Gender

  ggplot(Data_Weight, aes(x = Gender, y = WeightLoss, fill = Gender)) +
  
  geom_violin() +
  geom_boxplot(width = 0.1, color = "black", outlier.shape = 16, outlier.size = 4)+
  
  labs(
    title = "Violin Plot of Weight Loss by Gender",
    x = "Gender",
    y = "Weight Loss (kg)"
  ) +
  theme_minimal()

Correlation Matrix of Variables

ggcorrmat(
  data = Data_Weight,
  colors = c("#B2182B", "white", "#4D4D4D"),
  
  title = "Correlation of Variables ", 
  matrix.type = "lower", 
  type = "parametric", pch = "")+

     theme(plot.title = element_text(hjust = 1, face = "bold", size = 10),
        axis.title = element_text(angle =45,hjust=1,size = 1),
        legend.position = "right") 

Regression Analysis

model <- lm(WeightLoss ~ Age + Gender + DietAdherence + ExerciseIntensity + SleepQuality, data = Data_Weight)
   
   summary_table <- tidy(model)
   
   summary_table %>%
  kable(format = "html", caption = "Regression Summary") %>%
  kable_styling(full_width = FALSE)%>%
     row_spec(0, bold = TRUE, background = "lightblue")
Regression Summary
term estimate std.error statistic p.value
(Intercept) -3.5935325 1.1083398 -3.242266 0.0030590
Age -0.0330968 0.0098484 -3.360648 0.0022603
GenderMale 0.3597334 0.1530682 2.350151 0.0260441
DietAdherence 0.6634641 0.0651334 10.186236 0.0000000
ExerciseIntensity 0.7343135 0.0687395 10.682560 0.0000000
SleepQuality 0.4225209 0.0653858 6.461970 0.0000005

Residuals vs Fitted

plot(model$fitted.values,resid(model),
     main = "Residuals vs Fitted", ylab= "Residuals",pch=19)
abline(h=0 ,col= "red")#Reference line

VIF

vif(model)%>%
 
  kable(format = "html", caption = " Variance Inflation Factors") %>%
  kable_styling(full_width = FALSE)%>%
     row_spec(0, bold = TRUE, background = "lightblue")
Variance Inflation Factors
x
Age 1.055455
Gender 1.031889
DietAdherence 1.017241
ExerciseIntensity 1.099412
SleepQuality 1.118344

Breusch-Pagan Test

lmtest::bptest(model)
## 
##  studentized Breusch-Pagan test
## 
## data:  model
## BP = 7.4295, df = 5, p-value = 0.1906

QQ Plot

qqnorm(resid(model))
qqline(resid(model),col ="red")

Shapiro Test

shapiro.test(resid(model))
## 
##  Shapiro-Wilk normality test
## 
## data:  resid(model)
## W = 0.98808, p-value = 0.9662

Interpretation of Descriptive Statistics

Key Findings

1 . Age: The participants’ ages range from 26 to 52 years, with a mean age of approximately 37 years.

2 . Gender: The sample includes both males and females, with a relatively balanced distribution.

3 . Baseline Weight: The initial weight of participants ranges from 70 kg to 99.5 kg, with a mean baseline weight of approximately 84 kg.

4 . After Weight: The final weight after the 12-week program ranges from 60.9 kg to 91.6 kg, with a mean of approximately 74 kg.

5 . Weight Loss: The weight loss ranges from 6.6 kg to 12 kg, with a mean weight loss of approximately 10 kg.

6 . Diet Adherence: The diet adherence scores range from 6.2 to 9.9, with a mean of approximately 8.2.

7 . Exercise Intensity: The exercise intensity scores range from 5.1 to 9.3, with a mean of approximately 6.9.

8 . Sleep Quality: The sleep quality scores range from 5.8 to 9.9, with a mean of approximately 7.6

Interpretation of Regression Analysis

Key Findings

  1. Diet Adherence: The coefficient for diet adherence is positive and statistically significant (\(p < 0.05\)).

For every 1-point increase in diet adherence, weight loss increases by approximately 0.8 kg, holding other factors constant.

  1. Exercise Intensity: The coefficient for exercise intensity is positive and statistically significant (\(p < 0.05\)).

For every 1-point increase in exercise intensity, weight loss increases by approximately 0.6 kg, holding other factors constant.

  1. Sleep Quality:

The coefficient for sleep quality is positive but not statistically significant (\(p > 0.05\)).

  1. Age: The coefficient for age is negative but not statistically significant (\(p > 0.05\)).

  2. Gender: The coefficient for gender is not statistically significant (\(p > 0.05\)).

Summary of Total Analysis

The anaysis found that Diet Adherence and Exercise Intensity were the most significant predictors of weight loss.Also participants with higher baseline weight intend to lose more weight where Sleep Quality is not statistically significant.