import os
os.environ['QT_QPA_PLATFORM_PLUGIN_PATH'] = 'C:/ProgramData/Anaconda3/Library/plugins/platform'

Analysis of Lifestyle Data

This data set includes 20,000 different people and their varying lifestyles. The data set includes gender, age, height, BMI, varying workouts completed, difficulty level, various pieces of data on heart rates, etc. Below are five visualizations looking at different key pieces from the data set.

First Visualization - Bar Chart

Looking at the bar chart, it is showing the average calories burned based on the type of workout completed by the participant. The workouts included strength training, HIIT, cardio, and yoga. Based on the results, HIIT was the highest average amount of calories burned, which makes sense because it is the highest intensity and there is minimal rest. Yoga was the lowest amount of calories burned because it is more focused on slower movements and stretching.

import pandas as pd 
path = "U:/Lifestyle Data.csv"
df = pd.read_csv(path)
import matplotlib.pyplot as plt
import seaborn as sns 
import matplotlib.ticker as ticker
import numpy as np
sns.set(style="whitegrid")

# Set plot size and style
plt.figure(figsize=(10,6))

# Generate a list of unique colors
unique_colors = plt.cm.tab10(np.linspace(0, 1, df['Workout_Type'].nunique())).tolist()

# Create vertical bar chart
sns.barplot(
    data=df,
    x='Workout_Type',
    y='Calories_Burned',
    hue='Workout_Type',        
    estimator='mean',
    errorbar=None,
    palette=unique_colors,     
)

# Add chart title and labels
plt.title("Average Calories Burned per Workout Type", fontsize=16, weight='bold')
plt.xlabel("Workout Type", fontsize=12)
plt.ylabel("Average Calories Burned", fontsize=12)

# Keep x-axis labels horizontal
plt.xticks(rotation=0, ha='center')

# Format y-axis labels with commas
plt.gca().yaxis.set_major_formatter(ticker.FuncFormatter(lambda x, _: f'{int(x):,}'))

plt.tight_layout()

Second Visualization - Dual Axis Bar Chart

Looking at the dual axis bar chart, it is showing the lowest 10 and highest 10 resting heart rates based on gender and workout type. I wanted to focus on the outlier’s of the data set, the ones who are very in shape (lower resting heart rate) and those who aren’t as in shape (higher resting heart rate). Male’s had a lower heart rate during HIIT workouts, while female’s had a lower heart rate in yoga which makes sense since more females traditionally do yoga compared to males.

# Clean Gender text
df['Gender'] = df['Gender'].str.title().str.strip()

# Get top 10 and bottom 10 resting BPM per gender
lowest_10 = df.sort_values(by='Resting_BPM').groupby('Gender').head(10)
highest_10 = df.sort_values(by='Resting_BPM', ascending=False).groupby('Gender').head(10)

# Combine them
combined = pd.concat([lowest_10, highest_10])

# Set up the figure
plt.figure(figsize=(12, 7))

# Create grouped bar chart (Workout Type x Gender)
sns.barplot(
    data=combined,
    x='Workout_Type',
    y='Resting_BPM',
    hue='Gender',
    errorbar=None,
    estimator='mean',
    dodge=True,
    edgecolor='black',
    alpha=0.9
)
# Customize chart
plt.title("Lowest 10 and Highest 10 Resting Heartrates by Gender and Workout Type", fontsize=16, weight='bold')
plt.xlabel("Workout Type", fontsize=12)
plt.ylabel("Average Resting BPM", fontsize=12)

# Limit y-axis range (starts at 40, ends at 80)
plt.ylim(40, 80)
# Improve layout
plt.legend(title="Gender", title_fontsize=12, fontsize=10, loc='upper right')
plt.xticks(rotation=0, ha='center')

plt.tight_layout()

Third Visualization - Donut Chart

Looking at this donut chart, it is showing the average breakdown of macros per day eaten by the members of the data set. As well as showing the average calories eaten per day. Each macro % falls within the ideal daily range for adults. Changing the macro % could be used to fit different lifestyles or fitness goals of different individuals.

# Calculate the average macronutrient percentages
macro_avgs = df[['Carbs', 'Proteins', 'Fats']].mean()
labels = macro_avgs.index
values = macro_avgs.values

# Calculate the average calories
avg_calories = df['Calories'].mean()

# Create donut chart
fig, ax = plt.subplots(figsize=(7,7))
colors = ['#ff9999','#66b3ff','#99ff99']

# Pie chart with a hole in the middle (donut)
wedges, texts, autotexts = ax.pie(
    values,
    labels=labels,
    autopct='%1.1f%%',
    startangle=90,
    colors=colors,
    textprops={'color':"black", 'fontsize':12},
    wedgeprops={'linewidth': 2, 'edgecolor': 'white'}
)

# Draw circle for donut hole
centre_circle = plt.Circle((0,0),0.70,fc='white')
fig.gca().add_artist(centre_circle)

# Add title
plt.title("Macronutrient Distribution per Meal", fontsize=16, weight='bold')

# Add average calories text inside the donut
ax.text(0, 0, f'{avg_calories:,.0f}\nAvg Calories', 
        ha='center', va='center', fontsize=14, weight='bold')

# Equal aspect ratio ensures that pie is drawn as a circle.
ax.axis('equal')  

plt.tight_layout()

Fourth Visualization - Line Plot

Looking at this line plot, it is showing the average resting heart rate per gender based on varying age group buckets. It makes sense that as the age rises, so does the resting heart rate. It begins low at the <20 age range bucket and then continues to rise into the 40-50 age range bucket. It becomes harder to maintain good physical condition as you get older and have to begin working full time.

# Clean up gender column
df['Gender'] = df['Gender'].str.title().str.strip()

# Define age bins and labels
bins = [0, 19, 30, 40, 50, 60, np.inf]
labels = ['<20', '20-30', '30-40', '40-50', '50-60', '60+']
df['Age_Group'] = pd.cut(df['Age'], bins=bins, labels=labels, right=True)

# Group by gender and age group to get average resting BPM
bpm_age_group = (
    df.groupby(['Age_Group', 'Gender'], as_index=False, observed=False)['Resting_BPM']
    .mean()
    .sort_values(by='Age_Group')
)

# Set up the line chart
plt.figure(figsize=(10,6))

# Create line plot
sns.lineplot(
    data=bpm_age_group,
    x='Age_Group',
    y='Resting_BPM',
    hue='Gender',
    marker='o',
    linewidth=2.5
)

# Add chart details
plt.title("Average Resting Heartrate by Age Group and Gender", fontsize=16, weight='bold')
plt.xlabel("Age Group", fontsize=12)
plt.ylabel("Average Resting BPM", fontsize=12)
plt.legend(title="Gender", title_fontsize=12, fontsize=10, loc='best')
plt.tight_layout()

Fifth Visualization - Nested Pie Chart

Looking at this nested pie chart, it shows the break up of strength training workout difficulty, which is roughly a third each between beginner, intermediate, and advanced. It then shows the break up of body part focus for each level of workout difficulty. There is a fairly even spread across all body parts.

# Inner ring: total session duration by difficulty level
inner_data = df.groupby("Difficulty Level")["Session_Duration (hours)"].sum()

# Outer ring: total session duration by body part within each difficulty level
outer_data = df.groupby(["Difficulty Level", "Body Part"])["Session_Duration (hours)"].sum()

# Calculate % for inner ring (share of total)
inner_pct = (inner_data / inner_data.sum()) * 100
inner_labels = [f"{level}\n{pct:.1f}%" for level, pct in zip(inner_data.index, inner_pct)]

# Calculate % for outer ring (share within difficulty)
outer_labels = []
for level, part in outer_data.index:
    part_pct = (outer_data.loc[(level, part)] / inner_data.loc[level]) * 100
    outer_labels.append(f"{part}\n{part_pct:.1f}%")

# --- Colors setup ---
base_colors = plt.get_cmap("tab10")(np.linspace(0, 1, len(inner_data)))  # base color per difficulty
outer_colors = []

# Assign lighter versions of each base color for the outer ring
for i, level in enumerate(inner_data.index):
    # Filter body parts for this level
    parts_for_level = outer_data.loc[level]
    base = base_colors[i]

    # Adjust only the RGB channels; keep alpha
    shades = [
        tuple(np.clip(base[:3] + np.random.uniform(0.1, 0.3, size=3), 0, 1)) + (base[3],)
        for _ in range(len(parts_for_level))
    ]
    outer_colors.extend(shades)

# --- Plot nested donut ---
fig, ax = plt.subplots(figsize=(10, 8))

# Inner ring (difficulty levels)
wedges, texts = ax.pie(
    inner_data,
    radius=1.0,
    labels=inner_labels,
    labeldistance=0.6,
    colors=base_colors,
    wedgeprops=dict(width=0.3, edgecolor='white'),
    textprops={'fontsize': 11, 'weight': 'bold'}
)

# Outer ring (body parts within difficulty levels)
ax.pie(
    outer_data,
    radius=1.3,
    labels=outer_labels,
    labeldistance=1.05,
    colors=outer_colors,
    wedgeprops=dict(width=0.3, edgecolor='white'),
    textprops={'fontsize': 9}
)
# --- Add legend for difficulty levels ---
ax.legend(
    wedges,
    inner_data.index,
    title="Difficulty Level",
    loc="center left",
    bbox_to_anchor=(1.15, 1),
    fontsize=10
)

# --- Title and formatting ---
ax.set(aspect="equal")
plt.title("Workout Focus by Difficulty Level", fontsize=16, weight='bold',pad=25,y=1.08)
plt.tight_layout()