library(ggplot2)
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.1     ✔ stringr   1.6.0
## ✔ lubridate 1.9.4     ✔ tibble    3.3.0
## ✔ purrr     1.2.0     ✔ tidyr     1.3.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(trelliscopejs)
## This package is no longer maintained. Please use the 'trelliscope' package instead (see https://github.com/trelliscope/).
LifeData <- read.csv("Final_data.csv")
str(LifeData)
## 'data.frame':    20000 obs. of  54 variables:
##  $ Age                           : num  34.9 23.4 33.2 38.7 45.1 ...
##  $ Gender                        : chr  "Male" "Female" "Female" "Female" ...
##  $ Weight..kg.                   : num  65.3 56.4 59 93.8 52.4 ...
##  $ Height..m.                    : num  1.62 1.55 1.67 1.7 1.88 1.84 1.78 1.63 1.79 1.6 ...
##  $ Max_BPM                       : num  189 179 175 191 194 ...
##  $ Avg_BPM                       : num  158 132 124 155 153 ...
##  $ Resting_BPM                   : num  69 73.2 55 50.1 70.8 ...
##  $ Session_Duration..hours.      : num  1 1.37 0.91 1.1 1.08 0.69 1.67 1.01 1.76 1.17 ...
##  $ Calories_Burned               : num  1081 1810 802 1451 1166 ...
##  $ Workout_Type                  : chr  "Strength" "HIIT" "Cardio" "HIIT" ...
##  $ Fat_Percentage                : num  26.8 27.7 24.3 32.8 17.3 ...
##  $ Water_Intake..liters.         : num  1.5 1.9 1.88 2.5 2.91 2.91 2.71 2.88 3.49 2.49 ...
##  $ Workout_Frequency..days.week. : num  3.99 4 2.99 3.99 4 3.02 4.96 3.97 4.01 2 ...
##  $ Experience_Level              : num  2.01 2.01 1.02 1.99 2 1 3 2.01 3.02 1 ...
##  $ BMI                           : num  24.9 23.5 21.1 32.5 14.8 ...
##  $ Daily.meals.frequency         : num  2.99 3.01 1.99 3 3 2.99 2.02 2.99 3 3.01 ...
##  $ Physical.exercise             : num  0.01 0.97 -0.02 0.04 3 -0.04 -0.03 0 0.02 0.02 ...
##  $ Carbs                         : num  268 214 246 203 333 ...
##  $ Proteins                      : num  106 85.4 98.1 80.8 133.1 ...
##  $ Fats                          : num  71.6 57 65.5 54.6 88.4 ...
##  $ Calories                      : num  1806 1577 1608 2657 1470 ...
##  $ meal_name                     : chr  "Other" "Other" "Other" "Other" ...
##  $ meal_type                     : chr  "Lunch" "Lunch" "Breakfast" "Lunch" ...
##  $ diet_type                     : chr  "Vegan" "Vegetarian" "Paleo" "Paleo" ...
##  $ sugar_g                       : num  31.77 12.34 42.81 9.34 23.78 ...
##  $ sodium_mg                     : num  1730 693 2142 123 1935 ...
##  $ cholesterol_mg                : num  285.1 300.6 215.4 9.7 116.9 ...
##  $ serving_size_g                : num  120.5 109.2 399.4 314.3 99.2 ...
##  $ cooking_method                : chr  "Grilled" "Fried" "Boiled" "Fried" ...
##  $ prep_time_min                 : num  16.2 16.5 54.4 27.7 34.2 ...
##  $ cook_time_min                 : num  110.79 12.01 6.09 103.72 46.55 ...
##  $ rating                        : num  1.31 1.92 4.7 4.85 3.07 3.38 3.81 3.16 2.81 1.6 ...
##  $ Name.of.Exercise              : chr  "Decline Push-ups" "Bear Crawls" "Dips" "Mountain Climbers" ...
##  $ Sets                          : num  4.99 4.01 5 4.01 4.99 4 5.01 4.97 3.99 4.02 ...
##  $ Reps                          : num  20.9 16.1 21.9 16.9 15 ...
##  $ Benefit                       : chr  "Improves shoulder health and posture" "Strengthens lower abs" "Builds chest strength" "Improves coordination and cardiovascular health" ...
##  $ Burns.Calories..per.30.min.   : num  343 357 360 352 329 ...
##  $ Target.Muscle.Group           : chr  "Shoulders, Triceps" "Back, Core, Shoulders" "Quadriceps, Glutes" "Biceps, Forearms" ...
##  $ Equipment.Needed              : chr  "Cable Machine" "Step or Box" "Step or Box" "Parallel Bars or Chair" ...
##  $ Difficulty.Level              : chr  "Advanced" "Intermediate" "Intermediate" "Advanced" ...
##  $ Body.Part                     : chr  "Legs" "Chest" "Arms" "Shoulders" ...
##  $ Type.of.Muscle                : chr  "Lats" "Lats" "Grip Strength" "Upper" ...
##  $ Workout                       : chr  "Dumbbell flyes" "Lateral raises" "Standing calf raises" "Incline dumbbell flyes" ...
##  $ BMI_calc                      : num  24.9 23.5 21.1 32.4 14.8 ...
##  $ cal_from_macros               : num  2140 1712 1966 1627 2659 ...
##  $ pct_carbs                     : num  0.5 0.501 0.501 0.5 0.501 ...
##  $ protein_per_kg                : num  1.625 1.514 1.663 0.862 2.538 ...
##  $ pct_HRR                       : num  0.741 0.551 0.575 0.744 0.668 ...
##  $ pct_maxHR                     : num  0.836 0.734 0.708 0.811 0.79 ...
##  $ cal_balance                   : num  725 -233 806 1206 304 ...
##  $ lean_mass_kg                  : num  47.8 40.8 44.6 63 43.3 ...
##  $ expected_burn                 : num  685 979 655 774 711 ...
##  $ Burns.Calories..per.30.min._bc: num  7.26e+19 1.02e+20 1.08e+20 8.99e+19 5.26e+19 ...
##  $ Burns_Calories_Bin            : chr  "Medium" "High" "High" "High" ...
View(LifeData)
# look at BMI, Diet type, and age. 
# this plot will look if you are on a specific diet, how it will change your BMI and if age has an influencing factor 



sublife <- LifeData[, c("BMI", "diet_type", "Age")]
sublife["rAge"] <- round(sublife$Age, 0)


summary(sublife$BMI)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   12.04   20.10   24.12   24.92   28.56   50.23
# check for NA values 
ld <- sublife %>% 
  filter(!is.na(BMI) & !is.na(diet_type))


# look at 5 num sum for each diet type 
sums <- sublife %>%
  group_by(diet_type) %>%
  summarise(
    Min = min(BMI, na.rm = TRUE),
    Q1 = quantile(BMI, 0.25, na.rm = TRUE),
    Median = median(BMI, na.rm = TRUE),
    Q3 = quantile(BMI, 0.75, na.rm = TRUE),
    Max = max(BMI, na.rm = TRUE),
    Mean = mean(BMI, na.rm = TRUE),
    sd = sd(BMI, na.rm = TRUE)
  )
sublife  %>% 
  ggplot(aes(x=BMI))+
  geom_histogram(fill = 'steelblue')+
  facet_trelliscope(~diet_type,
                    nrow = 1, ncol = 1,
                    name = "BMI Distribution by Diet Type",
                    path = '.')+
  labs(title = "BMI Distribution Across Diet Types",
       x = "BMI",
       y='Counts')
## `stat_bin()` using `bins = 30`. Pick better value `binwidth`.
## using data from the first layer
## 
## `stat_bin()` using `bins = 30`. Pick better value `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value `binwidth`.
## Warning: Removed 2 rows containing missing values or values outside the scale range
## (`geom_bar()`).
## `stat_bin()` using `bins = 30`. Pick better value `binwidth`.
## Warning: Removed 2 rows containing missing values or values outside the scale range
## (`geom_bar()`).
## `stat_bin()` using `bins = 30`. Pick better value `binwidth`.
## Warning: Removed 2 rows containing missing values or values outside the scale range
## (`geom_bar()`).
## `stat_bin()` using `bins = 30`. Pick better value `binwidth`.
## Warning: Removed 2 rows containing missing values or values outside the scale range
## (`geom_bar()`).
## `stat_bin()` using `bins = 30`. Pick better value `binwidth`.
## Warning: Removed 2 rows containing missing values or values outside the scale range
## (`geom_bar()`).
## `stat_bin()` using `bins = 30`. Pick better value `binwidth`.
## Warning: Removed 2 rows containing missing values or values outside the scale range
## (`geom_bar()`).
## Removed 2 rows containing missing values or values outside the scale range
## (`geom_bar()`).

Description

The data set used for this analysis was sourced from Kaggle, and it is a diet and exercise data set that provides comprehensive information on individuals’ fitness, nutrition, and exercise habits. Including physical measures, workout performance metrics, and dietary details. The data set has 20,000 observations, and 54 variables all of varying data types and structures. Of these variables body mass index (BMI), and diet type were examined to understand if an individuals diet had and affect on the overall distribution of BMI among the observed population. The standard deviation was also used to understand the spread of these distributions and if the diet type had a real affect.

Initial analysis from the five number summaries showed that there was no extreme skew among the observed variables, and that spread was relatively low as well, indicating that each diet type should resemble a symmetric distribution. The primary variable graphed was BMI. This variable was chosen because BMI is a standard measure for assessing body composition and is widely used to categorize population health risks. By examining its distribution, this gives an initial understanding of the overall physical health profile of the data sets subjects. The motivation for investigating BMI is to see if its distribution is sensitive to different lifestyle choices.

The primary variable chosen to facet the plots was Diet Type. The purpose of this faceting was to directly compare the distribution of BMI across distinct dietary groups. This comparison aims to investigate the trend that a specific diet might lead to a measurable shift in body weight potentially identifying which diets correlate with a lower BMI. From the graphs, it is shown that all of the diets have a very similar distribution with Keto having the largest number of unusually high BMI measures. However it can be seen that not one particular diet leads to a lower BMI as all of the distributions as a very similar spread.

A key challenge involved determining the best way to visualize this relationship while also being able to glean meaningful results. Age was initially included as well to see if age, affected BMI across different diets but because of the large sample size it seemed that a scatter plot was not going to work. Then made the switch to side-by-side bloxplots, but there was still an issue of interpret ability and further proving that age may not be helpful when examining this relationship.

For further exploration, the reader could investigate a Balance Ratio, calculated by daily calorie intake - calories buried to an individuals BMI. This measure directly quantifies the daily energy surplus or deficit relative to and individuals BMI. This allows insights into which diet natrually results in a sustained, or higher calorie deficit creating the most favorable conditions for healthy weight management.