The purpose of the study:

This study is aims to the impact of lifestyle modifications, particularly diet adherence, exercise intensity, and sleep quality on weight loss outcomes over 12-week weight loss program.

Using a dataset of 35 participants, this research seeks to determine which lifestyle factors have the strongest correlation with successful weight reduction and how demographic variables influence these results. The study aims to generate evidence-based recommendations for improving weight loss programs and personalizing intervention strategies.

Packages

Loading required packages:

library(ggplot2)
library(dplyr)
library(ggstatsplot)
library(knitr)
library(kableExtra)
library(broom)
library(car)
library(lmtest)
library(gtsummary)
library(gt)

Data

Loading the Data_WeightLoss data:

Data_Weight <-  readxl::read_excel("Data/Data_WeightLoss.xlsx")

Summarize Descriptive Tables

  summary(Data_Weight)
##   Participant      Gender               Age        BaselineWeight 
##  Min.   : 1.0   Length:35          Min.   :26.00   Min.   :70.00  
##  1st Qu.: 9.5   Class :character   1st Qu.:31.50   1st Qu.:78.45  
##  Median :18.0   Mode  :character   Median :37.00   Median :84.30  
##  Mean   :18.0                      Mean   :38.09   Mean   :84.43  
##  3rd Qu.:26.5                      3rd Qu.:46.50   3rd Qu.:89.85  
##  Max.   :35.0                      Max.   :52.00   Max.   :99.50  
##                                                                   
##  DietAdherence   ExerciseIntensity  SleepQuality     WeightLoss    
##  Min.   :6.200   Min.   :5.100     Min.   :5.800   Min.   : 6.600  
##  1st Qu.:7.250   1st Qu.:6.200     1st Qu.:6.600   1st Qu.: 8.150  
##  Median :8.400   Median :6.900     Median :7.600   Median : 9.400  
##  Mean   :8.243   Mean   :7.129     Mean   :7.689   Mean   : 9.314  
##  3rd Qu.:9.200   3rd Qu.:8.050     3rd Qu.:8.400   3rd Qu.:10.200  
##  Max.   :9.900   Max.   :9.300     Max.   :9.900   Max.   :12.000  
##                  NA's   :1                                         
##   AfterWeight   
##  Min.   :60.90  
##  1st Qu.:68.60  
##  Median :74.00  
##  Mean   :75.12  
##  3rd Qu.:81.65  
##  Max.   :91.60  
## 
library(gtsummary)

Data_Weight %>% tbl_summary(
  by = Gender,
  missing = "no",
  statistic = list(
    all_continuous() ~ "{mean} ({sd})",
    all_categorical() ~ "{n} / {N} ({p}%)"
    ),
  digits = all_continuous() ~ 3
  )
Characteristic Female
N = 16
1
Male
N = 19
1
Participant 18.813 (10.809) 17.316 (9.995)
Age 37.875 (7.848) 38.263 (8.109)
BaselineWeight 83.944 (8.934) 84.847 (7.490)
DietAdherence 8.138 (1.235) 8.332 (1.142)
ExerciseIntensity 6.975 (1.236) 7.267 (1.114)
SleepQuality 7.606 (1.320) 7.758 (1.250)
WeightLoss 8.888 (1.413) 9.674 (1.183)
AfterWeight 75.056 (8.969) 75.174 (8.210)
1 Mean (SD)

Scatter Plot

##Baseline Weight vs. Percent Weight Loss ####

Data_Weight <-Data_Weight %>%      #insert a column of Percent_Weight_Loss
 
 mutate(Percent_Weight_Loss = (WeightLoss/BaselineWeight)*100) 

ggplot(data = Data_Weight, aes(x = BaselineWeight, y = Percent_Weight_Loss)) +
  geom_point(shape = 16,color= "blue",size =3)+
  labs( title = "Baseline Weight vs. Percent Weight Loss ",
        
        x = "Baseline Weight (kg)",
        y = "Percent Weight Loss")+
   
  theme_minimal()

##Exercise Intensity vs. Percent_Weight_Loss####

ggplot(data = Data_Weight, aes(x = ExerciseIntensity, y = Percent_Weight_Loss)) +
  geom_point(shape = 16,color= "blue",size =3,na.rm=TRUE)+
  labs( title = "Exercise Intensity  vs. Percent_Weight_Loss ",
        
        x = " Exercise Intensity",
        y = "Percent_Weight_Loss")+
   
  theme_minimal()

##Diet Adherence vs. Percent_Weight_Loss####

ggplot(data = Data_Weight, aes(x = DietAdherence, y = Percent_Weight_Loss)) +
  geom_point(shape = 16,color= "blue",size =3)+
  labs( title = " Diet_Adherence    vs. Percent_Weight_Loss ",
        
        x = " Diet_Adherence ",
        y = "Percent_Weight_Loss")+
   
  theme_minimal()

##Sleep_Quality vs. Percent_Weight_Loss####

ggplot(data = Data_Weight, aes(x = SleepQuality, y = Percent_Weight_Loss)) +
  geom_point(shape = 16,color= "blue",size =3)+
  labs( title = "Sleep_Quality    vs. Percent_Weight_Loss ",
        
        x =  "Sleep_Quality",
        y = "Percent_Weight_Loss")+
   
  theme_minimal()

Histrogram

##Distribution of Weight Loss####

ggplot(Data_Weight, aes(x = WeightLoss)) +
     geom_histogram(binwidth = 1, fill = "lightblue", color = "black",alpha = 1) +
     labs(title = "Distribution of Weight Loss", x = "Weight Loss (kg)", y = "Frequency")

Bar plot

Average Weight Loss by Gender

#Calculate average weight loss by gender

avg_weight_loss<- Data_Weight%>%
  group_by(Gender)%>%
  summarise(AvgWeightloss =mean(WeightLoss,na.rm=TRUE))%>%
  arrange(desc(AvgWeightloss))
avg_weight_loss
## # A tibble: 2 × 2
##   Gender AvgWeightloss
##   <chr>          <dbl>
## 1 Male            9.67
## 2 Female          8.89
#Create the bar plot:

ggplot(avg_weight_loss,aes(x = Gender, y=AvgWeightloss,fill =Gender))+
  geom_bar(stat = "identity")+
   geom_text(aes(label = round(AvgWeightloss, 2)),  # Add text labels
            vjust = 1,  # Position the text slightly above the bars
            color = "white",  # Text color
            size = 5) +  # Text size

  scale_x_discrete(limits =c("Male","Female"))+
labs(
    title = "Average Weight Loss by Gender",
    x = "Gender",
    y = "Average Weight Loss (kg)"
  ) +
  theme_minimal()

Cluster Bar plot

##Average Weight Loss by Age Group and Gender####

# Create AgeGroup column####
Data_Weight<-Data_Weight%>%
  mutate(AgeGroup = case_when(
    Age < 35 ~ "Young",
    Age >= 35 & Age <= 50 ~ "Middle",
    Age > 50 ~ "Old"
  ))

# Calculate average weight loss by age group and gender
avg_weight_loss_age_gender <- Data_Weight %>%
  group_by(AgeGroup, Gender) %>%
  summarise(AvgWeightLoss = mean(WeightLoss, na.rm = TRUE))
## `summarise()` has grouped output by 'AgeGroup'. You can override using the
## `.groups` argument.
# Create the grouped bar plot
ggplot(avg_weight_loss_age_gender, aes(x = AgeGroup, y = AvgWeightLoss, fill = Gender)) +
  geom_bar(stat = "identity", position = "dodge") +
  labs(
    title = "Average Weight Loss by Age Group and Gender",
    x = "Age Group",
    y = "Average Weight Loss (kg)",
    fill = "Gender"
  ) +
  theme_minimal()

Box Plot

Weight Loss Distribution by Gender

ggplot(Data_Weight, aes(x = Gender,y = WeightLoss,fill =Gender))+
  geom_boxplot() +
  geom_jitter()+
  labs(title = "Weight Loss Distribution by Gender",
       x = "Gender",
       y = "Weight Loss (kg)") +
  theme_minimal() +
  theme(plot.title = element_text(hjust = 0.3, face = "bold", size = 16),
        axis.title = element_text(size = 14),
        legend.position = "right")

Weight Loss Distribution by Age Group

ggplot(Data_Weight, aes(x = AgeGroup, y = WeightLoss)) +
  geom_boxplot(position = position_dodge(0.8)) +  # Grouped box plot
  geom_jitter()+
  
  labs(
    title = "Weight Loss Distribution by Age Group and Exercise Intensity Level",
    x = "Age Group",
    y = "Weight Loss (kg)",
    fill = "Exercise Intensity Level"
  ) +
  theme_minimal()+
  theme(plot.title = element_text(hjust = 0.1, face = "bold", size = 10),
        axis.title = element_text(size = 14),
        legend.position = "right")

Violin Plot of Weight Loss by Gender

  ggplot(Data_Weight, aes(x = Gender, y = WeightLoss, fill = Gender)) +
  
  geom_violin() +
  geom_boxplot(width = 0.1, color = "black", outlier.shape = 16, outlier.size = 4)+
  
  labs(
    title = "Violin Plot of Weight Loss by Gender",
    x = "Gender",
    y = "Weight Loss (kg)"
  ) +
  theme_minimal()

Correlation Matrix of Variables

ggcorrmat(
  data = Data_Weight,
  colors = c("#B2182B", "white", "#4D4D4D"),
  
  title = "Correlation of Variables ", 
  matrix.type = "lower", 
  type = "parametric", pch = "")+

     theme(plot.title = element_text(hjust = 1, face = "bold", size = 10),
        axis.title = element_text(angle =45,hjust=1,size = 1),
        legend.position = "right") 

Regression Analysis

model <- lm(WeightLoss ~ Age + Gender + DietAdherence + ExerciseIntensity + SleepQuality, data = Data_Weight)
   
   summary_table <- tidy(model)
   
   summary_table %>%
  kable(format = "html", caption = "Regression Summary") %>%
  kable_styling(full_width = FALSE)%>%
     row_spec(0, bold = TRUE, background = "lightblue")
Regression Summary
term estimate std.error statistic p.value
(Intercept) -3.5935325 1.1083398 -3.242266 0.0030590
Age -0.0330968 0.0098484 -3.360648 0.0022603
GenderMale 0.3597334 0.1530682 2.350151 0.0260441
DietAdherence 0.6634641 0.0651334 10.186236 0.0000000
ExerciseIntensity 0.7343135 0.0687395 10.682560 0.0000000
SleepQuality 0.4225209 0.0653858 6.461970 0.0000005

Residuals vs Fitted

plot(model$fitted.values,resid(model),
     main = "Residuals vs Fitted", ylab= "Residuals",pch=19)
abline(h=0 ,col= "red")#Reference line

VIF

vif(model)%>%
 
  kable(format = "html", caption = " Variance Inflation Factors") %>%
  kable_styling(full_width = FALSE)%>%
     row_spec(0, bold = TRUE, background = "lightblue")
Variance Inflation Factors
x
Age 1.055455
Gender 1.031889
DietAdherence 1.017241
ExerciseIntensity 1.099412
SleepQuality 1.118344

Breusch-Pagan Test

lmtest::bptest(model)
## 
##  studentized Breusch-Pagan test
## 
## data:  model
## BP = 7.4295, df = 5, p-value = 0.1906

QQ Plot

qqnorm(resid(model))
qqline(resid(model),col ="red")

Shapiro Test

shapiro.test(resid(model))
## 
##  Shapiro-Wilk normality test
## 
## data:  resid(model)
## W = 0.98808, p-value = 0.9662