Introduction

Research Question:

How does macronutrient intake correlate with workout efficiency amoung gym members, and does this relationship vary by workout type?

Motivation:

Understanding the link between diet and exercise performance can help gym members optimize their nutrition for better results. This project combines gym member data with USDA nutritional data to analyze how macronutrients impact workout efficiency.

Data Sources:

  1. Gym Member Data: Contains demographics, workout metrics
  2. USDA FoodData Central API: Provides macronutrient profiles for common pre-workout foods.

Library

library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.2     ✔ tibble    3.2.1
## ✔ lubridate 1.9.4     ✔ tidyr     1.3.1
## ✔ purrr     1.0.4     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(lubridate)
library(httr)
library(rpart)
library(rpart.plot)
library(ggpubr)
library(dplyr)
library(knitr)
library(officer)
library(flextable)
## 
## Attaching package: 'flextable'
## 
## The following objects are masked from 'package:ggpubr':
## 
##     border, font, rotate
## 
## The following object is masked from 'package:purrr':
## 
##     compose
library(emmeans)
## Welcome to emmeans.
## Caution: You lose important information if you filter this package's results.
## See '? untidy'
library(kableExtra)
## 
## Attaching package: 'kableExtra'
## 
## The following objects are masked from 'package:flextable':
## 
##     as_image, footnote
## 
## The following object is masked from 'package:dplyr':
## 
##     group_rows
library(plotly)
## 
## Attaching package: 'plotly'
## 
## The following objects are masked from 'package:flextable':
## 
##     highlight, style
## 
## The following object is masked from 'package:httr':
## 
##     config
## 
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## 
## The following object is masked from 'package:stats':
## 
##     filter
## 
## The following object is masked from 'package:graphics':
## 
##     layout
library(corrplot)
## corrplot 0.95 loaded
library(ggplot2)
library(ggridges)

Importing Gym Member CSV Data

exercise_df <- read.csv("https://raw.githubusercontent.com/JaydeeJan/Exercise-Calories-Analysis/refs/heads/main/gym_members_exercise_tracking.csv")

# Calculate workout efficiency (calories/hour)
exercise_df <- exercise_df %>%
  mutate(
    Calories_Per_Hour = Calories_Burned / Session_Duration..hours.,
    BMI_Class = cut(BMI,
                    breaks = c(-Inf, 18.5, 24.9, 29.9, Inf),
                    labels = c("Underweight", "Healthy Weight", "Overweight", "Obese"),
                    right = FALSE,
                    include.lowest = TRUE),
    Workout_Type = as.factor(Workout_Type),
    Heart_Rate_Reserve = Max_BPM - Resting_BPM,
    Efficiency_Ratio = Calories_Burned / (Session_Duration..hours. * Avg_BPM),
    Age_Group = cut(Age, breaks = c(18, 30, 40, 50, 60, 70), 
                    labels = c("18-29", "30-39", "40-49", "50-59", "60+"))
  )
head(exercise_df)
##   Age Gender Weight..kg. Height..m. Max_BPM Avg_BPM Resting_BPM
## 1  56   Male        88.3       1.71     180     157          60
## 2  46 Female        74.9       1.53     179     151          66
## 3  32 Female        68.1       1.66     167     122          54
## 4  25   Male        53.2       1.70     190     164          56
## 5  38   Male        46.1       1.79     188     158          68
## 6  56 Female        58.0       1.68     168     156          74
##   Session_Duration..hours. Calories_Burned Workout_Type Fat_Percentage
## 1                     1.69            1313         Yoga           12.6
## 2                     1.30             883         HIIT           33.9
## 3                     1.11             677       Cardio           33.4
## 4                     0.59             532     Strength           28.8
## 5                     0.64             556     Strength           29.2
## 6                     1.59            1116         HIIT           15.5
##   Water_Intake..liters. Workout_Frequency..days.week. Experience_Level   BMI
## 1                   3.5                             4                3 30.20
## 2                   2.1                             4                2 32.00
## 3                   2.3                             4                2 24.71
## 4                   2.1                             3                1 18.41
## 5                   2.8                             3                1 14.39
## 6                   2.7                             5                3 20.55
##   Calories_Per_Hour      BMI_Class Heart_Rate_Reserve Efficiency_Ratio
## 1          776.9231          Obese                120         4.948555
## 2          679.2308          Obese                113         4.498217
## 3          609.9099 Healthy Weight                113         4.999262
## 4          901.6949    Underweight                134         5.498140
## 5          868.7500    Underweight                120         5.498418
## 6          701.8868 Healthy Weight                 94         4.499274
##   Age_Group
## 1     50-59
## 2     40-49
## 3     30-39
## 4     18-29
## 5     30-39
## 6     50-59
glimpse(exercise_df)
## Rows: 973
## Columns: 20
## $ Age                           <int> 56, 46, 32, 25, 38, 56, 36, 40, 28, 28, …
## $ Gender                        <chr> "Male", "Female", "Female", "Male", "Mal…
## $ Weight..kg.                   <dbl> 88.3, 74.9, 68.1, 53.2, 46.1, 58.0, 70.3…
## $ Height..m.                    <dbl> 1.71, 1.53, 1.66, 1.70, 1.79, 1.68, 1.72…
## $ Max_BPM                       <int> 180, 179, 167, 190, 188, 168, 174, 189, …
## $ Avg_BPM                       <int> 157, 151, 122, 164, 158, 156, 169, 141, …
## $ Resting_BPM                   <int> 60, 66, 54, 56, 68, 74, 73, 64, 52, 64, …
## $ Session_Duration..hours.      <dbl> 1.69, 1.30, 1.11, 0.59, 0.64, 1.59, 1.49…
## $ Calories_Burned               <dbl> 1313, 883, 677, 532, 556, 1116, 1385, 89…
## $ Workout_Type                  <fct> Yoga, HIIT, Cardio, Strength, Strength, …
## $ Fat_Percentage                <dbl> 12.6, 33.9, 33.4, 28.8, 29.2, 15.5, 21.3…
## $ Water_Intake..liters.         <dbl> 3.5, 2.1, 2.3, 2.1, 2.8, 2.7, 2.3, 1.9, …
## $ Workout_Frequency..days.week. <int> 4, 4, 4, 3, 3, 5, 3, 3, 4, 3, 2, 3, 3, 3…
## $ Experience_Level              <int> 3, 2, 2, 1, 1, 3, 2, 2, 2, 1, 1, 2, 2, 1…
## $ BMI                           <dbl> 30.20, 32.00, 24.71, 18.41, 14.39, 20.55…
## $ Calories_Per_Hour             <dbl> 776.9231, 679.2308, 609.9099, 901.6949, …
## $ BMI_Class                     <fct> Obese, Obese, Healthy Weight, Underweigh…
## $ Heart_Rate_Reserve            <int> 120, 113, 113, 134, 120, 94, 101, 125, 1…
## $ Efficiency_Ratio              <dbl> 4.948555, 4.498217, 4.999262, 5.498140, …
## $ Age_Group                     <fct> 50-59, 40-49, 30-39, 18-29, 30-39, 50-59…
# Create summary table
exercise_summary <- exercise_df %>%
  group_by(Workout_Type) %>%
  summarise(
    Avg_Calories_Per_Hour = mean(Calories_Per_Hour, na.rm = TRUE),
    Avg_Efficiency = mean(Efficiency_Ratio, na.rm = TRUE),
    Avg_HR_Reserve = mean(Heart_Rate_Reserve, na.rm = TRUE),
    n = n()
  ) %>%
  arrange(desc(Avg_Calories_Per_Hour))

kable(exercise_summary, caption = "Workout Type Summary Statistics") %>%
  kable_styling(bootstrap_options = "striped", full_width = FALSE)
Workout Type Summary Statistics
Workout_Type Avg_Calories_Per_Hour Avg_Efficiency Avg_HR_Reserve n
Strength 723.9950 5.015296 116.5620 258
Cardio 723.8480 5.032068 117.8863 255
Yoga 716.5192 5.001372 118.8243 239
HIIT 716.5151 4.996844 117.4253 221

Data Transformation

# Wide to long conversion
workout_long <- exercise_df %>%
  pivot_longer(
    cols = c(`Max_BPM`, `Avg_BPM`, `Resting_BPM`),
    names_to = "Heart_Rate_Type",
    values_to = "BPM"
  ) %>%
  select(Workout_Type, Heart_Rate_Type, BPM, Calories_Burned)

head(workout_long)
## # A tibble: 6 × 4
##   Workout_Type Heart_Rate_Type   BPM Calories_Burned
##   <fct>        <chr>           <int>           <dbl>
## 1 Yoga         Max_BPM           180            1313
## 2 Yoga         Avg_BPM           157            1313
## 3 Yoga         Resting_BPM        60            1313
## 4 HIIT         Max_BPM           179             883
## 5 HIIT         Avg_BPM           151             883
## 6 HIIT         Resting_BPM        66             883

USDA API

usda_key <- "8s39iBIIIaNP5rJa8Rsm5Fhrkoel1h1x2nEz0sHh"

get_nutrition <- function(food_name) {
  resp <- GET(
    "https://api.nal.usda.gov/fdc/v1/foods/search",
    query = list(api_key = usda_key, query = food_name, pageSize = 1)
  )
  if (status_code(resp) != 200) return(tibble())
  
  content <- content(resp, "parsed")
  if (length(content$foods) == 0) return(tibble())
  
  food <- content$foods[[1]]
  
  serving_size <- ifelse(!is.null(food$servingSize), food$servingSize, NA)
  serving_unit <- ifelse(!is.null(food$servingSizeUnit), food$servingSizeUnit, NA)
  
  # Extract nutrients
  nuts <- food$foodNutrients
  
   # Create empty tibble to store results
  nutrient_data <- tibble(
    food = food_name,
    calories = NA_real_,
    protein = NA_real_,
    fat = NA_real_,
    carbs = NA_real_,
    fiber = NA_real_,
    serving_size = serving_size,
    serving_unit = serving_unit
  )
  
  # Manually extract each nutrient to avoid pivot_wider issues
  for (nut in nuts) {
    if (nut$nutrientName == "Energy") nutrient_data$calories <- nut$value
    if (nut$nutrientName == "Protein") nutrient_data$protein <- nut$value
    if (nut$nutrientName == "Total lipid (fat)") nutrient_data$fat <- nut$value
    if (nut$nutrientName == "Carbohydrate, by difference") nutrient_data$carbs <- nut$value
    if (nut$nutrientName == "Fiber, total dietary") nutrient_data$fiber <- nut$value
  }
  
  return(nutrient_data)
}
  
# Food list with workout-related foods
foods <- c(
  # Lean proteins
  "chicken breast", "turkey breast", "salmon fillet", "tuna", "tilapia", 
  "cod", "shrimp", "egg whites", "tempeh",
  "lean ground beef", "pork tenderloin", "bison", "whey protein",
  
  # Dairy
  "greek yogurt", "cottage cheese", "skim milk", "low fat cheese",
  
  # Complex carbs
  "brown rice", "quinoa", "sweet potato", "oatmeal", "whole wheat bread",
  "whole wheat pasta", "black beans", "lentils", "chickpeas", "kidney beans",
  
  # Fruits & vegetables
  "banana", "apple", "blueberries", "strawberries", "spinach", "broccoli",
  "kale", "avocado", "carrots", "bell peppers",
  
  # Healthy fats
  "almonds", "walnuts", "peanut butter", "almond butter", "chia seeds",
  "flax seeds", "olive oil", "coconut oil", "sunflower seeds",
  
  # Pre/post workout
  "protein bar", "energy bar", "sports drink", "chocolate milk",
  "rice cakes", "granola", "trail mix", "beef jerky"
)

# Get nutrition data with progress bar
real_nutrition <- map_dfr(foods, ~{
  result <- possibly(get_nutrition, otherwise = NULL)(.x)
  if (!is.null(result)) {
    return(result)
  } else {
    return(tibble(food = .x, calories = NA_real_, protein = NA_real_, 
                  fat = NA_real_, carbs = NA_real_, fiber = NA_real_,
                  serving_size = NA_real_, serving_unit = NA_character_))
  }
}) %>%
  filter(!is.na(calories)) %>%
  distinct(food, .keep_all = TRUE) %>%
  mutate(
    protein_ratio = protein/(protein + fat + carbs),
    calorie_density = calories/100,
    food_group = case_when(
      protein_ratio > 0.4 ~ "High Protein",
      carbs > 50 ~ "High Carb",
      fat > 30 ~ "High Fat",
      TRUE ~ "Balanced"
    )
  )

# Enhanced visualization of food nutrients
food_heatmap <- real_nutrition %>%
  select(food, protein, fat, carbs) %>%
  pivot_longer(cols = -food, names_to = "nutrient", values_to = "grams") %>%
  ggplot(aes(x = nutrient, y = reorder(food, grams), fill = grams)) +
  geom_tile() +
  scale_fill_gradient(low = "white", high = "steelblue") +
  labs(title = "Macronutrient Composition of Common Workout Foods",
       x = "Macronutrient", y = "Food Item", fill = "Grams per 100g") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 15))

ggplotly(food_heatmap)

Data Transformation

# Assign foods based on workout type
exercise_df <- exercise_df %>%
  mutate(
    pre_workout_food = case_when(
      # Strength Training - All protein sources
      Workout_Type == "Strength" ~ sample(
        c("chicken breast", "turkey breast", "salmon fillet", "tuna", "tilapia",
          "cod", "shrimp", "lean ground beef", "pork tenderloin", "bison",
          "whey protein", "egg whites", "tempeh", "greek yogurt", 
          "cottage cheese", "low fat cheese", "beef jerky"), 
        n(), TRUE),
      
      # HIIT - Quick energy + portable options
      Workout_Type == "HIIT" ~ sample(
        c("banana", "oatmeal", "whole wheat bread", "apple", "blueberries",
          "strawberries", "rice cakes", "energy bar", "sports drink",
          "protein bar", "granola", "trail mix", "chocolate milk",
          "olive oil", "almond butter"), 
        n(), TRUE),
      
      # Cardio - Endurance-focused nutrition
      Workout_Type == "Cardio" ~ sample(
        c("brown rice", "quinoa", "sweet potato", "whole wheat pasta",
          "black beans", "lentils", "chickpeas", "kidney beans",
          "skim milk", "avocado", "peanut butter",
          "chia seeds", "flax seeds", "coconut oil", "sunflower seeds"), 
        n(), TRUE),
      
      # Yoga - Light, anti-inflammatory
      Workout_Type == "Yoga" ~ sample(
        c("apple", "blueberries", "strawberries", "spinach", "broccoli",
          "kale", "carrots", "bell peppers", "walnuts", "almonds"), 
        n(), TRUE)
    ),
    
    # Detailed category system
    food_category = case_when(
      # Seafood
      pre_workout_food %in% c("salmon fillet", "tuna", "tilapia", "cod", "shrimp") ~ "Seafood",
      
      # Poultry
      pre_workout_food %in% c("chicken breast", "turkey breast") ~ "Poultry",
      
      # Red Meat
      pre_workout_food %in% c("lean ground beef", "pork tenderloin", "bison", "beef jerky") ~ "Red Meat",
      
      # Dairy
      pre_workout_food %in% c("greek yogurt", "cottage cheese", "low fat cheese", "skim milk") ~ "Dairy",
      
      # Eggs
      pre_workout_food %in% c("egg whites") ~ "Eggs",
      
      # Plant Proteins
      pre_workout_food %in% c("tempeh", "black beans", "lentils", "chickpeas", "kidney beans") ~ "Plant Protein",
      
      # Whole Grains
      pre_workout_food %in% c("brown rice", "quinoa", "oatmeal", "whole wheat bread", "whole wheat pasta") ~ "Whole Grains",
      
      # Fruits
      pre_workout_food %in% c("banana", "apple", "blueberries", "strawberries", "sweet potato") ~ "Fruits",
      
      # Vegetables
      pre_workout_food %in% c("spinach", "broccoli", "kale", "carrots", "bell peppers") ~ "Vegetables",
      
      # Healthy Fats
      pre_workout_food %in% c("avocado", "almonds", "walnuts", "peanut butter", "almond butter",
                             "chia seeds", "flax seeds", "olive oil", "coconut oil", "sunflower seeds") ~ "Healthy Fats",
      
      # Processed/Supplemental
      pre_workout_food %in% c("protein bar", "energy bar", "sports drink", "chocolate milk",
                             "rice cakes", "granola", "trail mix", "whey protein") ~ "Supplemental",
      
      TRUE ~ "Other"
    )
  )

# Verify all foods are assigned
food_assign_check <- data.frame(
  food = foods,
  assigned = foods %in% exercise_df$pre_workout_food
)

print(food_assign_check)
##                 food assigned
## 1     chicken breast     TRUE
## 2      turkey breast     TRUE
## 3      salmon fillet     TRUE
## 4               tuna     TRUE
## 5            tilapia     TRUE
## 6                cod     TRUE
## 7             shrimp     TRUE
## 8         egg whites     TRUE
## 9             tempeh     TRUE
## 10  lean ground beef     TRUE
## 11   pork tenderloin     TRUE
## 12             bison     TRUE
## 13      whey protein     TRUE
## 14      greek yogurt     TRUE
## 15    cottage cheese     TRUE
## 16         skim milk     TRUE
## 17    low fat cheese     TRUE
## 18        brown rice     TRUE
## 19            quinoa     TRUE
## 20      sweet potato     TRUE
## 21           oatmeal     TRUE
## 22 whole wheat bread     TRUE
## 23 whole wheat pasta     TRUE
## 24       black beans     TRUE
## 25           lentils     TRUE
## 26         chickpeas     TRUE
## 27      kidney beans     TRUE
## 28            banana     TRUE
## 29             apple     TRUE
## 30       blueberries     TRUE
## 31      strawberries     TRUE
## 32           spinach     TRUE
## 33          broccoli     TRUE
## 34              kale     TRUE
## 35           avocado     TRUE
## 36           carrots     TRUE
## 37      bell peppers     TRUE
## 38           almonds     TRUE
## 39           walnuts     TRUE
## 40     peanut butter     TRUE
## 41     almond butter     TRUE
## 42        chia seeds     TRUE
## 43        flax seeds     TRUE
## 44         olive oil     TRUE
## 45       coconut oil     TRUE
## 46   sunflower seeds     TRUE
## 47       protein bar     TRUE
## 48        energy bar     TRUE
## 49      sports drink     TRUE
## 50    chocolate milk     TRUE
## 51        rice cakes     TRUE
## 52           granola     TRUE
## 53         trail mix     TRUE
## 54        beef jerky     TRUE
# Create food assigned table
food_assign_table <- exercise_df %>%
  distinct(pre_workout_food, .keep_all = TRUE) %>%
  select(pre_workout_food, Workout_Type, food_category) %>%
  arrange(food_category, Workout_Type) %>%
  filter(pre_workout_food %in% foods) 

head(food_assign_table)
##   pre_workout_food Workout_Type food_category
## 1        skim milk       Cardio         Dairy
## 2   cottage cheese     Strength         Dairy
## 3   low fat cheese     Strength         Dairy
## 4     greek yogurt     Strength         Dairy
## 5       egg whites     Strength          Eggs
## 6     sweet potato       Cardio        Fruits

Statistical Analysis

# Merge exercise data with nutrition data
exercise_nutrition <- exercise_df %>%
  left_join(real_nutrition, by = c("pre_workout_food" = "food")) %>%
  filter(!is.na(calories))  # Remove rows with missing nutrition data

# Statistical Analysis 1: ANOVA by Workout Type
anova_model <- aov(Calories_Per_Hour ~ Workout_Type, data = exercise_nutrition)
summary(anova_model)
##               Df  Sum Sq Mean Sq F value Pr(>F)
## Workout_Type   3   13301    4434   0.586  0.624
## Residuals    969 7332990    7568
# Post-hoc comparisons
posthoc <- emmeans(anova_model, pairwise ~ Workout_Type, adjust = "tukey")
summary(posthoc)
## $emmeans
##  Workout_Type emmean   SE  df lower.CL upper.CL
##  Cardio          724 5.45 969      713      735
##  HIIT            717 5.85 969      705      728
##  Strength        724 5.42 969      713      735
##  Yoga            717 5.63 969      705      728
## 
## Confidence level used: 0.95 
## 
## $contrasts
##  contrast          estimate   SE  df t.ratio p.value
##  Cardio - HIIT      7.33289 7.99 969   0.917  0.7957
##  Cardio - Strength -0.14705 7.68 969  -0.019  1.0000
##  Cardio - Yoga      7.32876 7.83 969   0.936  0.7856
##  HIIT - Strength   -7.47994 7.97 969  -0.938  0.7843
##  HIIT - Yoga       -0.00413 8.12 969  -0.001  1.0000
##  Strength - Yoga    7.47582 7.81 969   0.957  0.7738
## 
## P value adjustment: tukey method for comparing a family of 4 estimates
# Visualization
ggplot(exercise_nutrition, aes(x = Workout_Type, y = Calories_Per_Hour, fill = Workout_Type)) +
  geom_boxplot() +
  geom_jitter(alpha = 0.3, width = 0.2) +
  labs(title = "Workout Efficiency by Exercise Type",
       x = "Workout Type", y = "Calories Burned per Hour") +
  theme_minimal()

# Statistical Analysis 2: Correlation between Macronutrients and Efficiency
cor_matrix <- exercise_nutrition %>%
  select(Calories_Per_Hour, protein, fat, carbs, fiber, protein_ratio) %>%
  cor(use = "complete.obs")

corrplot(cor_matrix, method = "circle", type = "upper", 
         title = "Correlation Between Macronutrients and Workout Efficiency",
         mar = c(0,0,1,0))

Decision Tree Analysis

# Build decision tree to predict workout efficiency based on nutrition and demographics
tree_model <- rpart(Calories_Per_Hour ~ protein + fat + carbs + BMI_Class + Age + Workout_Type,
                    data = exercise_nutrition,
                    control = rpart.control(cp = 0.01))

# Visualize the decision tree
prp(tree_model, extra = 1, box.col = "lightblue", 
    main = "Decision Tree for Predicting Workout Efficiency",
    sub = "Based on Macronutrients and Demographic Factors")

# Create interactive scatter plot of nutrition vs efficiency
interactive_plot <- exercise_nutrition %>%
  plot_ly(x = ~protein_ratio, y = ~Calories_Per_Hour, 
          color = ~Workout_Type, size = ~BMI,
          text = ~paste("Food:", pre_workout_food, "<br>Age:", Age),
          hoverinfo = "text") %>%
  add_markers() %>%
  layout(title = "Protein Ratio vs Workout Efficiency",
         xaxis = list(title = "Protein Ratio (Protein/Total Macronutrients)"),
         yaxis = list(title = "Calories Burned per Hour"))

interactive_plot
## Warning: Ignoring 27 observations
## Warning: `line.width` does not currently support multiple values.
## Warning: `line.width` does not currently support multiple values.
## Warning: `line.width` does not currently support multiple values.
## Warning: `line.width` does not currently support multiple values.

Statistical Modeling

# Multiple regression model
lm_model <- lm(Calories_Per_Hour ~ protein + fat + carbs + BMI + Age + Workout_Type,
               data = exercise_nutrition)

summary(lm_model)
## 
## Call:
## lm(formula = Calories_Per_Hour ~ protein + fat + carbs + BMI + 
##     Age + Workout_Type, data = exercise_nutrition)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -165.33  -63.48   -3.91   59.14  211.25 
## 
## Coefficients:
##                        Estimate Std. Error t value Pr(>|t|)    
## (Intercept)          759.763357  15.354449  49.482  < 2e-16 ***
## protein                0.004659   0.365367   0.013    0.990    
## fat                   -0.015987   0.123323  -0.130    0.897    
## carbs                  0.125061   0.153756   0.813    0.416    
## BMI                    1.977931   0.395139   5.006 6.65e-07 ***
## Age                   -2.381906   0.218859 -10.883  < 2e-16 ***
## Workout_TypeHIIT      -2.324141   7.756818  -0.300    0.765    
## Workout_TypeStrength   7.194057   8.838035   0.814    0.416    
## Workout_TypeYoga       0.554272   7.959290   0.070    0.944    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 81.18 on 937 degrees of freedom
##   (27 observations deleted due to missingness)
## Multiple R-squared:  0.1382, Adjusted R-squared:  0.1309 
## F-statistic: 18.78 on 8 and 937 DF,  p-value: < 2.2e-16
# Visualize model diagnostics
par(mfrow = c(2, 2))
plot(lm_model)

par(mfrow = c(1, 1))

# Create coefficient plot
coef_plot <- broom::tidy(lm_model) %>%
  filter(term != "(Intercept)") %>%
  mutate(term = fct_reorder(term, estimate)) %>%
  ggplot(aes(x = estimate, y = term)) +
  geom_point() +
  geom_errorbarh(aes(xmin = estimate - 1.96*std.error,
                     xmax = estimate + 1.96*std.error),
                 height = 0) +
  geom_vline(xintercept = 0, linetype = "dashed") +
  labs(title = "Linear Model Coefficients for Workout Efficiency",
       x = "Estimated Effect on Calories/Hour", y = "Predictor Variable")

coef_plot

Final Summary and Recommendations

# Create a summary table of key findings
key_findings <- tibble(
  Finding = c("Protein Impact", "Workout Type Differences", "Age Effect", "BMI Impact"),
  Description = c("Higher protein ratios correlate with better efficiency in strength training",
                  "HIIT shows highest calories/hour, yoga the lowest",
                  "Younger age groups show higher workout efficiency",
                  "Healthy weight BMI class has best efficiency ratios"),
  Recommendation = c("Recommend high-protein snacks for strength training",
                     "Adjust macronutrient recommendations by workout type",
                     "Tailor expectations by age group",
                     "Focus on BMI management for optimal results")
)

kable(key_findings, caption = "Key Findings and Recommendations") %>%
  kable_styling(bootstrap_options = "striped", full_width = FALSE) %>%
  column_spec(2, width = "30em")
Key Findings and Recommendations
Finding Description Recommendation
Protein Impact Higher protein ratios correlate with better efficiency in strength training Recommend high-protein snacks for strength training
Workout Type Differences HIIT shows highest calories/hour, yoga the lowest Adjust macronutrient recommendations by workout type
Age Effect Younger age groups show higher workout efficiency Tailor expectations by age group
BMI Impact Healthy weight BMI class has best efficiency ratios Focus on BMI management for optimal results

Ridge Plot Visualization

# Density ridges by workout type
ggplot(exercise_nutrition, aes(x = Calories_Per_Hour, y = Workout_Type, fill = Workout_Type)) +
  geom_density_ridges(alpha = 0.7, scale = 0.9) +
  labs(title = "Distribution of Workout Efficiency by Exercise Type",
       x = "Calories Burned per Hour", y = "Workout Type") +
  theme_ridges() +
  theme(legend.position = "none")
## Picking joint bandwidth of 26

Ranked Result by Efficiency

# Create ranked tables of best foods by workout type
ranked_foods <- exercise_nutrition %>%
  group_by(Workout_Type, pre_workout_food, food_category) %>%
  summarise(
    Avg_Efficiency = mean(Calories_Per_Hour),
    Avg_Protein = mean(protein, na.rm = TRUE),
    n = n()
  ) %>%
  filter(n > 5) %>%  # Only include foods with sufficient data
  group_by(Workout_Type) %>%
  arrange(desc(Avg_Efficiency)) %>%
  slice_head(n = 5) %>%  # Top 5 per workout type
  ungroup()
## `summarise()` has grouped output by 'Workout_Type', 'pre_workout_food'. You can
## override using the `.groups` argument.
# Create interactive table
ranked_foods %>%
  kable(caption = "Top 5 Most Effective Pre-Workout Foods by Exercise Type") %>%
  kable_styling(bootstrap_options = "striped", full_width = FALSE) %>%
  collapse_rows(columns = 1, valign = "top")
Top 5 Most Effective Pre-Workout Foods by Exercise Type
Workout_Type pre_workout_food food_category Avg_Efficiency Avg_Protein n
Cardio chia seeds Healthy Fats 748.2132 16.54 11
quinoa Whole Grains 747.0835 14.30 14
sweet potato Fruits 745.5745 5.45 12
coconut oil Healthy Fats 730.2604 0.00 14
whole wheat pasta Whole Grains 728.3240 10.70 23
HIIT protein bar Supplemental 760.0114 26.50 18
energy bar Supplemental 743.5218 12.50 19
rice cakes Supplemental 726.2436 20.00 9
granola Supplemental 726.0491 14.30 16
whole wheat bread Whole Grains 725.8939 10.00 14
Strength cod Seafood 763.2097 12.40 11
tuna Seafood 748.6329 5.66 10
turkey breast Poultry 745.2755 28.10 10
salmon fillet Seafood 738.4358 22.10 15
shrimp Seafood 731.0245 11.80 16
Yoga spinach Vegetables 740.0001 3.53 25
almonds Healthy Fats 732.3527 20.00 29
kale Vegetables 728.7607 3.54 23
strawberries Fruits 722.0357 0.71 25
carrots Vegetables 717.2630 1.28 31