---
title: "EDA_HEALTH_SLEEP_STATISTICS"
output:
flexdashboard::flex_dashboard:
orientation: rows
vertical_layout: scroll
theme: flatly
social: menu
source_code: embed
navbar:
- { title: "Dataset Description", href: "#dataset-description" }
- { title: "Univariate Analysis", href: "#univariate-analysis" }
- { title: "Bivariate Analysis", href: "#bivariate-analysis" }
- { title: "Multivariate Analysis", href: "#multivariate-analysis" }
---
---
```{r setup, include=FALSE}
library(flexdashboard)
library(readr)
library(ggplot2)
library(dplyr)
library(reshape2)
library(lubridate)
```
## Dataset Description {.tabset}
### view of the dataset
```{r}
# Load the dataset
df <- read_csv("Health_Sleep_Statistics.csv")
head(df)
tail(df)
```
### About the dataset
```{r}
# View the structure of the dataset
str(df)
```
### summary of the dataset
```{r}
# Summary statistics for numeric columns
summary(df)
```
### Head of DataSet
```{r}
# View the first few rows
head(df)
```
### Checking for missing values
```{r}
# Checking for missing values
colSums(is.na(df))
```
## Univariate Analysis {.tabset}
### Histogram for Age
```{r}
# Calculate mean and median for Age
mean_age <- mean(df$Age)
median_age <- median(df$Age)
# Plot histogram with mean and median lines for Age
ggplot(df, aes(x = Age)) +
geom_histogram(binwidth = 5, fill = "cyan", color = "black") +
geom_vline(aes(xintercept = mean_age), color = "red", linetype = "dashed", size = 1) +
geom_vline(aes(xintercept = median_age), color = "green", linetype = "solid", size = 1) +
theme_minimal() +
labs(title = "Distribution of Age with Mean and Median", x = "Age", y = "Count")
# Calculate mean and median for Age
mean_age <- mean(df$Age)
median_age <- median(df$Age)
# Plot histogram with mean, median, and density curve for Age
ggplot(df, aes(x = Age)) +
geom_histogram(aes(y = ..density..), binwidth = 5, fill = "cyan", color = "black") +
geom_density(color = "blue", size = 1) + # Add density curve
geom_vline(aes(xintercept = mean_age), color = "red", linetype = "dashed", size = 1) +
geom_vline(aes(xintercept = median_age), color = "green", linetype = "solid", size = 1) +
theme_minimal() +
labs(title = "Distribution of Age with Mean, Median, and Density Curve", x = "Age", y = "Density")
```
### Histogram for Sleep Quality
```{r}
# Calculate mean and median for Sleep Quality
mean_sleep_quality <- mean(df$`Sleep Quality`)
median_sleep_quality <- median(df$`Sleep Quality`)
# Plot histogram with mean and median lines
ggplot(df, aes(x = `Sleep Quality`)) +
geom_bar(fill = "green", color = "black") +
geom_vline(aes(xintercept = mean_sleep_quality), color = "red", linetype = "dashed", size = 1) +
geom_vline(aes(xintercept = median_sleep_quality), color = "green", linetype = "solid", size = 1) +
theme_minimal() +
labs(title = "Distribution of Sleep Quality with Mean and Median", x = "Sleep Quality", y = "Count")
# Calculate mean and median for Sleep Quality
mean_sleep_quality <- mean(df$`Sleep Quality`)
median_sleep_quality <- median(df$`Sleep Quality`)
# Plot histogram with mean and median lines, and density curve
ggplot(df, aes(x = `Sleep Quality`)) +
geom_histogram(aes(y = ..count..), fill = "green", color = "black", bins = 30) +
geom_vline(aes(xintercept = mean_sleep_quality), color = "red", linetype = "dashed", size = 1) +
geom_vline(aes(xintercept = median_sleep_quality), color = "green", linetype = "solid", size = 1) +
geom_density(aes(y = ..density.. * diff(range(df$`Sleep Quality`)) * length(df$`Sleep Quality`)),
color = "blue", size = 1) +
theme_minimal() +
labs(title = "Distribution of Sleep Quality with Mean, Median, and Density Curve", x = "Sleep Quality", y = "Count")
```
### Histogram for Calories Burned
```{r}
# histograms with the mean and median lines
# Calculate mean and median for Calories Burned
mean_calories <- mean(df$`Calories Burned`)
median_calories <- median(df$`Calories Burned`)
# Plot histogram with mean and median lines
ggplot(df, aes(x = `Calories Burned`)) +
geom_histogram(binwidth = 100, fill = "blue", color = "black") +
geom_vline(aes(xintercept = mean_calories), color = "red", linetype = "dashed", size = 1) +
geom_vline(aes(xintercept = median_calories), color = "green", linetype = "solid", size = 1) +
theme_minimal() +
labs(title = "Distribution of Calories Burned with Mean and Median", x = "Calories Burned", y = "Count")
# Calculate mean and median for Calories Burned
mean_calories <- mean(df$`Calories Burned`)
median_calories <- median(df$`Calories Burned`)
# Plot histogram with mean, median lines, and density curve
ggplot(df, aes(x = `Calories Burned`)) +
geom_histogram(aes(y = ..density..), binwidth = 100, fill = "blue", color = "black", alpha = 0.7) +
geom_density(color = "purple", size = 1) + # Add density curve
geom_vline(aes(xintercept = mean_calories), color = "red", linetype = "dashed", size = 1) +
geom_vline(aes(xintercept = median_calories), color = "green", linetype = "solid", size = 1) +
theme_minimal() +
labs(title = "Distribution of Calories Burned with Mean, Median, and Density Curve",
x = "Calories Burned", y = "Density")
```
### Histogram for Daily Steps
```{r}
# Calculate mean and median for Daily Steps
mean_steps <- mean(df$`Daily Steps`)
median_steps <- median(df$`Daily Steps`)
# Plot histogram with mean and median lines
ggplot(df, aes(x = `Daily Steps`)) +
geom_histogram(binwidth = 1000, fill = "purple", color = "black") +
geom_vline(aes(xintercept = mean_steps), color = "red", linetype = "dashed", size = 1) +
geom_vline(aes(xintercept = median_steps), color = "green", linetype = "solid", size = 1) +
theme_minimal() +
labs(title = "Distribution of Daily Steps with Mean and Median", x = "Daily Steps", y = "Count")
# Calculate mean and median for Daily Steps
mean_steps <- mean(df$`Daily Steps`)
median_steps <- median(df$`Daily Steps`)
# Plot histogram with mean and median lines, and density curve
ggplot(df, aes(x = `Daily Steps`)) +
geom_histogram(aes(y = ..count..), binwidth = 1000, fill = "purple", color = "black") +
geom_vline(aes(xintercept = mean_steps), color = "red", linetype = "dashed", size = 1) +
geom_vline(aes(xintercept = median_steps), color = "green", linetype = "solid", size = 1) +
geom_density(aes(y = ..density.. * diff(range(df$`Daily Steps`)) * length(df$`Daily Steps`)),
color = "blue", size = 1) +
theme_minimal() +
labs(title = "Distribution of Daily Steps with Mean, Median, and Density Curve", x = "Daily Steps", y = "Count")
```
### Histogram for Bedtime Hour
```{r}
# Convert Bedtime to datetime and calculate Bedtime Hour
# Assuming 'df' is your data frame
df$Bedtime <- hms(df$Bedtime) # Convert to time
df$Bedtime_hour <- hour(df$Bedtime) + minute(df$Bedtime) / 60 # Extract hour and minute
ggplot(df, aes(x=Bedtime_hour)) +
geom_histogram(bins=24, fill="skyblue", color="black", aes(y=..density..)) +
geom_density(alpha=0.2, fill="blue") +
labs(title="Distribution of Bedtime", x="Hour of the Day", y="Density") +
theme_minimal()
#Calculate the mean and median of Bedtime_hour
mean_bedtime <- mean(df$Bedtime_hour, na.rm = TRUE)
median_bedtime <- median(df$Bedtime_hour, na.rm = TRUE)
ggplot(df, aes(x=Bedtime_hour)) +
geom_histogram(bins=24, fill="skyblue", color="black", aes(y=..density..)) +
geom_density(alpha=0.2, fill="blue") +
geom_vline(aes(xintercept=mean_bedtime), color="red", linetype="dashed", size=1) +
geom_vline(aes(xintercept=median_bedtime), color="green", linetype="dashed", size=1) +
labs(title="Distribution of Bedtime", x="Hour of the Day", y="Density") +
theme_minimal() +
annotate("text", x=mean_bedtime, y=0.15, label=sprintf("Mean: %.2f", mean_bedtime), color="red", hjust=-0.1) +
annotate("text", x=median_bedtime, y=0.15, label=sprintf("Median: %.2f", median_bedtime), color="green", hjust=-0.1)
```
### Box plot for Age
```{r}
# Box plot
# Create box plot for Age
ggplot(df, aes(y = Age)) +
geom_boxplot(fill = "lightblue", color = "black") +
theme_minimal() +
labs(title = "Box Plot of Age", y = "Age")
```
### Box plot for Sleep Quality
```{r}
# Create box plot for Sleep Quality
ggplot(df, aes(y = `Sleep Quality`)) +
geom_boxplot(fill = "lightblue", color = "black") +
theme_minimal() +
labs(title = "Box Plot of Sleep Quality", y = "Sleep Quality")
```
### Box plot for Daily Steps
```{r}
# Box plot for Daily Steps
ggplot(df, aes(y = `Daily Steps`)) +
geom_boxplot(fill = "lightgreen", color = "black") +
theme_minimal() +
labs(title = "Box Plot of Daily Steps", y = "Daily Steps")
```
### Box plot for Calories Burned
```{r}
# Box plot for Calories Burned
ggplot(df, aes(y = `Calories Burned`)) +
geom_boxplot(fill = "lightcoral", color = "black") +
theme_minimal() +
labs(title = "Box Plot of Calories Burned", y = "Calories Burned")
```
### countplot for Gender
```{r}
# Assuming 'df' is your data frame and 'Gender' is the column you want to plot
ggplot(df, aes(x = Gender)) +
geom_bar() +
ggtitle('Gender Distribution')
```
### countplot for Physical Activity Level
```{r}
# Assuming 'df' is your data frame and 'Physical Activity Level' is the column you want to plot
ggplot(df, aes(x = `Physical Activity Level`)) +
geom_bar() +
ggtitle('Physical Activity Level Distribution') +
xlab('Physical Activity Level') +
ylab('Count')
```
### countplot for Dietary Habits
```{r}
# Assuming 'df' is your data frame and 'Dietary Habits' is the column you want to plot
ggplot(df, aes(x = `Dietary Habits`)) +
geom_bar() +
ggtitle('Dietary Habits Distribution') +
xlab('Dietary Habits') +
ylab('Count')
```
### countplot for Sleep Disorders
```{r}
# Assuming 'df' is your data frame and 'Sleep Disorders' is the column you want to plot
ggplot(df, aes(x = `Sleep Disorders`)) +
geom_bar() +
ggtitle('Sleep Disorders Distribution') +
xlab('Sleep Disorders') +
ylab('Count')
```
### countplot for Medication Usage
```{r}
# Assuming 'df' is your data frame and 'Medication Usage' is the column you want to plot
ggplot(df, aes(x = `Medication Usage`)) +
geom_bar() +
ggtitle('Medication Usage Distribution') +
xlab('Medication Usage') +
ylab('Count')
```
## Bivariate Analysis {.tabset}
### Scatter Plot of Daily Steps vs. Calories Burned
```{r}
# Scatter Plot of Daily Steps vs. Calories Burned with Regression Line
ggplot(df, aes(x = `Daily Steps`, y = `Calories Burned`)) +
geom_point(color = "blue") +
geom_smooth(method = "lm", color = "red", se = FALSE) +
theme_minimal() +
labs(title = "Scatter Plot of Daily Steps vs. Calories Burned", x = "Daily Steps", y = "Calories Burned")
```
### Scatter Plot of Age vs. Sleep Quality
```{r}
#Scatter Plot of Age vs. Sleep Quality with Regression Line
ggplot(df, aes(x = Age, y = `Sleep Quality`)) +
geom_point(color = "red") +
geom_smooth(method = "lm", color = "blue", se = FALSE) +
theme_minimal() +
labs(title = "Scatter Plot of Age vs. Sleep Quality", x = "Age", y = "Sleep Quality")
```
### Scatter Plot of Calories Burned vs. Sleep Quality
```{r}
#Scatter Plot of Calories Burned vs. Sleep Quality with Regression Line
ggplot(df, aes(x = `Calories Burned`, y = `Sleep Quality`)) +
geom_point(color = "purple") +
geom_smooth(method = "lm", color = "orange", se = FALSE) +
theme_minimal() +
labs(title = "Scatter Plot of Calories Burned vs. Sleep Quality", x = "Calories Burned", y = "Sleep Quality")
```
### Scatter Plot of Daily Steps vs. Sleep Quality
```{r}
# Scatter Plot of Daily Steps vs. Sleep Quality with Regression Line
ggplot(df, aes(x = `Daily Steps`, y = `Sleep Quality`)) +
geom_point(color = "green") +
geom_smooth(method = "lm", color = "purple", se = FALSE) +
theme_minimal() +
labs(title = "Scatter Plot of Daily Steps vs. Sleep Quality", x = "Daily Steps", y = "Sleep Quality")
```
## Multivariate Analysis {.tabset}
### Heatmap of Daily Steps vs. Calories Burned
```{r}
#Heatmap of Daily Steps vs. Calories Burned
# Create a summary table with bins for Daily Steps and Calories Burned
df_summary_steps_calories <- df %>%
mutate(
Daily_Steps_Bin = cut(`Daily Steps`, breaks = seq(0, ceiling(max(df$`Daily Steps`) / 1000) * 1000, by = 1000)),
Calories_Burned_Bin = cut(`Calories Burned`, breaks = seq(0, ceiling(max(df$`Calories Burned`) / 100) * 100, by = 100))
) %>%
count(Daily_Steps_Bin, Calories_Burned_Bin)
# Heatmap of Daily Steps vs. Calories Burned
heatmap_steps_calories <- ggplot(df_summary_steps_calories, aes(x = Daily_Steps_Bin, y = Calories_Burned_Bin, fill = n)) +
geom_tile() +
scale_fill_gradient(low = "white", high = "blue") +
theme_minimal() +
labs(title = "Heatmap of Daily Steps vs. Calories Burned", x = "Daily Steps", y = "Calories Burned", fill = "Count")
print(heatmap_steps_calories)
```
### Heatmap of Daily Steps vs. Calories Burned
```{r}
# Heatmap of Daily Steps vs. Calories Burned
# Example data preparation: create a summary table with bins for Daily Steps and Calories Burned
df_summary <- df %>%
mutate(
Daily_Steps_Bin = cut(`Daily Steps`, breaks = seq(0, max(df$`Daily Steps`), by = 1000)),
Calories_Burned_Bin = cut(`Calories Burned`, breaks = seq(0, max(df$`Calories Burned`), by = 100))
) %>%
count(Daily_Steps_Bin, Calories_Burned_Bin)
# Create heatmap
ggplot(df_summary, aes(x = Daily_Steps_Bin, y = Calories_Burned_Bin, fill = n)) +
geom_tile() +
scale_fill_gradient(low = "white", high = "blue") +
theme_minimal() +
labs(title = "Heatmap of Daily Steps vs. Calories Burned", x = "Daily Steps", y = "Calories Burned", fill = "Count")
```
### Heatmap of Age vs. Sleep Quality
```{r}
#Heatmap of Age vs. Sleep Quality
# Create a summary table with bins for Age and Sleep Quality
df_summary_age_sleep <- df %>%
mutate(
Age_Bin = cut(Age, breaks = seq(0, ceiling(max(df$Age) / 10) * 10, by = 10)),
Sleep_Quality_Bin = cut(`Sleep Quality`, breaks = seq(min(df$`Sleep Quality`), max(df$`Sleep Quality`), by = 1))
) %>%
count(Age_Bin, Sleep_Quality_Bin)
# Heatmap of Age vs. Sleep Quality
heatmap_age_sleep <- ggplot(df_summary_age_sleep, aes(x = Age_Bin, y = Sleep_Quality_Bin, fill = n)) +
geom_tile() +
scale_fill_gradient(low = "white", high = "purple") +
theme_minimal() +
labs(title = "Heatmap of Age vs. Sleep Quality", x = "Age", y = "Sleep Quality", fill = "Count")
print(heatmap_age_sleep)
```
### Heatmap of Daily Steps vs. Sleep Quality
```{r}
# Heatmap of Daily Steps vs. Sleep Quality
# Create a summary table with bins for Daily Steps and Sleep Quality
df_summary_steps_sleep <- df %>%
mutate(
Daily_Steps_Bin = cut(`Daily Steps`, breaks = seq(0, ceiling(max(df$`Daily Steps`) / 1000) * 1000, by = 1000)),
Sleep_Quality_Bin = cut(`Sleep Quality`, breaks = seq(min(df$`Sleep Quality`), max(df$`Sleep Quality`), by = 1))
) %>%
count(Daily_Steps_Bin, Sleep_Quality_Bin)
# Heatmap of Daily Steps vs. Sleep Quality
heatmap_steps_sleep <- ggplot(df_summary_steps_sleep, aes(x = Daily_Steps_Bin, y = Sleep_Quality_Bin, fill = n)) +
geom_tile() +
scale_fill_gradient(low = "white", high = "green") +
theme_minimal() +
labs(title = "Heatmap of Daily Steps vs. Sleep Quality", x = "Daily Steps", y = "Sleep Quality", fill = "Count")
print(heatmap_steps_sleep)
```
### Heatmap of Calories Burned vs. Sleep Quality
```{r}
#Heatmap of Calories Burned vs. Sleep Quality
# Create a summary table with bins for Calories Burned and Sleep Quality
df_summary_calories_sleep <- df %>%
mutate(
Calories_Burned_Bin = cut(`Calories Burned`, breaks = seq(0, ceiling(max(df$`Calories Burned`) / 100) * 100, by = 100)),
Sleep_Quality_Bin = cut(`Sleep Quality`, breaks = seq(min(df$`Sleep Quality`), max(df$`Sleep Quality`), by = 1))
) %>%
count(Calories_Burned_Bin, Sleep_Quality_Bin)
# Heatmap of Calories Burned vs. Sleep Quality
heatmap_calories_sleep <- ggplot(df_summary_calories_sleep, aes(x = Calories_Burned_Bin, y = Sleep_Quality_Bin, fill = n)) +
geom_tile() +
scale_fill_gradient(low = "white", high = "orange") +
theme_minimal() +
labs(title = "Heatmap of Calories Burned vs. Sleep Quality", x = "Calories Burned", y = "Sleep Quality", fill = "Count")
print(heatmap_calories_sleep)
```
### Heatmap Age', 'Sleep Quality', 'Daily Steps', 'Calories Burned
```{r}
# Assuming 'df' is your data frame
correlation_matrix <- cor(df[c('Age', 'Sleep Quality', 'Daily Steps', 'Calories Burned')])
melted_correlation_matrix <- melt(correlation_matrix)
ggplot(data = melted_correlation_matrix, aes(x=Var1, y=Var2, fill=value)) +
geom_tile() +
geom_text(aes(label=sprintf("%.2f", value)), vjust=1) +
scale_fill_gradient2(low = "blue", mid = "white", high = "red", midpoint = 0) +
theme_minimal() +
labs(title = "Correlation Matrix", x = "", y = "") +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
```