Should include the data source here
# df: data without my data
df <- read.csv("/Users/samueldavenport/Documents/UVA/Fall 2023/STAT Data Viz/Datasets/Sleep_health_and_lifestyle_dataset.csv")
# Filter Dataset
df <- df %>% mutate(Occupation = str_replace(Occupation, "Software Engineer", "Engineer"))
df <- df %>% mutate(Occupation = str_replace(Occupation, "Sales Representative",
"Salesperson"))
df <- df %>% mutate(BMI.Category = str_replace(BMI.Category, "Normal Weight", "Normal"))
df <- df %>% filter(Occupation != "Manager")
df <- df %>% filter(Occupation != "Scientist")
# df_all: data with my data (add observation for me, Person.ID == 375)
df_all <- df %>% add_row(Person.ID = 375, Gender = "Male", Age = 22, Occupation = "Student",
Sleep.Duration = 7, Quality.of.Sleep = 6, Physical.Activity.Level = 50,
Stress.Level = 8, BMI.Category = "Normal", Blood.Pressure = "122/70",
Heart.Rate = 70, Daily.Steps = 8200, Sleep.Disorder = "Insomnia")
# df_means: data grouped by occupation and numeric means
df_means <- df_all %>% group_by(Occupation) %>% summarize_if(is.numeric, mean)
# df4: data of sleep and physical and total
df4 <- df_all %>% group_by(Occupation) %>% summarize(avg_sleep = mean(Sleep.Duration),
avg_physical = mean(Physical.Activity.Level) / 60,
Total = avg_sleep+avg_physical)
# df_small: Data Frame of people with Insomnia
df_small <- df_all[df_all$Sleep.Disorder == "Insomnia",]
# df_me: Data Frame of just me
df_me <- df_all[df_all$Person.ID == 375,]
Large Population, Bivariate
choices <- c("Quality.of.Sleep", "Sleep.Duration", "Physical.Activity.Level",
"Daily.Steps")
ui <- fluidPage(
titlePanel("Interactive Scatterplots"),
sidebarLayout(
sidebarPanel(
selectInput("variable_1", "Select a Variable", choices=choices),
selectInput("variable_2", "Select a Variable", choices=choices)
),
mainPanel(
plotOutput("plot")
)
)
)
# Need to fix theme to more consistent
server <- function(input, output){
output$plot <- renderPlot({
ggplot(df_means, aes_string(x=input$variable_1, y=input$variable_2)) +
geom_point(aes(alpha=0.7, color=Occupation, size=4)) +
my_theme +
labs(title=paste(input$variable_1, "vs.",
input$variable_2),
x=input$variable_1,
y=input$variable_2,
caption=">> Engineers have better sleep habits than other occupations
>> However, engineers are not as active as other occupations
>> BIG QUESTION: Is Occupation a consistent factor in excercise and sleep habits?") +
guides(size=FALSE, alpha=FALSE)
})
}
# Run the Shiny App
shinyApp(ui, server)
Large Population, Univariate
g2 <- df %>% ggplot(aes(x=reorder(Occupation, -Sleep.Duration), y=Sleep.Duration,
fill=Occupation)) +
geom_boxplot() +
geom_hline(aes(yintercept = df_me$Sleep.Duration), color="darkred", linetype="dashed") +
geom_hline(aes(yintercept=max(df_all$Sleep.Duration)), color="navy", linetype="dashed") +
my_theme + theme(legend.position="none") +
labs(title="Boxplot of Sleep Duration by Occupation",
subtitle="Engineers have the largest median sleep duration!",
x="Occupation",
y="Sleep Duration",
caption=">> The navy line represents the max sleep duration in the dataset
>> The red line represents MY sleep duration") +
guides(color=FALSE, linetype=FALSE)
## Warning: The `<scale>` argument of `guides()` cannot be `FALSE`. Use "none" instead as
## of ggplot2 3.3.4.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
g2
Large Population, Univariate
g3 <- df_means %>% ggplot(aes(x=reorder(Occupation, -Quality.of.Sleep), y=Quality.of.Sleep)) +
geom_bar(stat="identity", aes(fill=Occupation)) +
geom_text(aes(label=round(Quality.of.Sleep, 2)), vjust=-0.3, size=3.5) +
my_theme + scale_color_discrete(palette="viridis") +
labs(title="Bar Graph of Average QUALITY of Sleep by Occupation",
subtitle="Engineers also have the Best Quality of Sleep!",
x="Occupation",
y="Average Quality of Sleep") +
theme(legend.position = "none")
g3
Large Population, Bivariate
# Adding a variable to show the highest total sleep and exercise
df4$max_Total <- ifelse(df4$Total == max(df4$Total), "highest", "not highest")
g4 <- df4 %>%
ggplot(aes(x = avg_sleep, y = avg_physical)) +
geom_point(stat="identity", size=3, aes(color=max_Total)) +
geom_text_repel(aes(label = Occupation, color=max_Total),
box.padding = 0.5, point.padding = 0.2, size = 3) +
scale_color_manual(name="Most Total\nSleep and Exercise",
labels=c("Highest", "Not the highest"),
values=c("highest"="navy", "not highest"="darkred")) +
# Remove size legend
guides(size = "none") +
# Adding in Theme
my_theme +
labs(
title = "Average Sleep vs. Average Exercise by Occupation",
subtitle="Engineers Sleep and Workout the MOST!",
x = "Average Sleep Hours Per Day",
y = "Average Physical Exercise Hours Per Day",
color = "Total Sleep and\nExercise Per Day"
)
## Display the plot
g4
Small Population, Univariate
# Create a dataframe of people with insomnia (smaller population)
insomnia_count <- df_small %>% group_by(Occupation) %>% summarize(num = n())
# Treemap of People with Insomnia by Occupation
g5 <- insomnia_count %>% ggplot(aes(area=num, fill=Occupation,
label=paste(Occupation, num, sep="\n"))) +
geom_treemap() + geom_treemap_text(color="white", place="center", size=15) +
my_theme + theme(legend.position="none",
panel.border=element_rect(color="white")) +
scale_color_discrete(palette="viridis") +
ggtitle("Number of People with Insomnia by Occupation") +
labs(caption=">> Only 6 engineers have insomnia
>> Engineers sleep and eat the most... but it is also important to look at sleeping disorders")
g5
### ADDITIONS
# Add text to the text or something
Small Population, Bivariate
#### Animated heat map #####
# Small Population #3
g6 <- df_small %>% ggplot(aes(x=Sleep.Duration, y=Quality.of.Sleep)) +
geom_hex(bins=5) + scale_fill_gradient(low="red", high="navy") +
my_theme +labs(title="Heat Map of Sleep Duration and Quality of Sleep",
caption="The majority of people with insomnia have
low sleep duration and poor sleep quality",
x="Sleep Duration per night (hours)",
y="Quality of Sleep (1-10)",
subtitle="This plot only includes people with insomnia")
g6 <- ggplotly(g6)
g6
### ADDITIONS
# Subtitle and/or caption
Large and Small Population, Bivariate
######### Patchwork Violin Plots ##########
# Number 1: Quality of Sleep
patch1 <- df_all %>% ggplot(aes(x=Sleep.Disorder, y=Quality.of.Sleep)) +
geom_violin(aes(fill=Sleep.Disorder)) +
scale_color_discrete(palette="viridis") +
labs(x="Sleep Disorder",
y="Quality of Sleep (1-10)") +
my_theme + theme(legend.position="none",
axis.title=element_text(size=8),
axis.text=element_text(size=6))
# Number 2: Sleep Duration
patch2 <- df_all %>% ggplot(aes(x=Sleep.Disorder, y=Sleep.Duration)) +
geom_violin(aes(fill=Sleep.Disorder)) +
scale_color_discrete(palette="viridis") +
labs(x="Sleep Disorder",
y="Sleep Duration (Hours)") +
my_theme + theme(legend.position="none",
axis.title=element_text(size=8),
axis.text=element_text(size=6))
# Number 3: Daily Steps
patch3 <- df_all %>% ggplot(aes(x=Sleep.Disorder, y=Daily.Steps)) +
geom_violin(aes(fill=Sleep.Disorder)) +
scale_color_discrete(palette="viridis") +
labs(x="Sleep Disorder",
y="Steps per Day") + my_theme + theme(legend.position="none",
axis.title=element_text(size=8),
axis.text=element_text(size=6))
# Number 4: Physical Activity
patch4 <- df_all %>% ggplot(aes(x=Sleep.Disorder, y=Physical.Activity.Level)) +
geom_violin(aes(fill=Sleep.Disorder)) +
scale_color_discrete(palette="viridis") +
labs(x="Sleep Disorder",
y="Exercise per Day (Mins)") +
my_theme + theme(legend.position="none",
axis.title=element_text(size=8),
axis.text=element_text(size=6))
# Create the patchwork
patch <- patch1 + patch2 + patch3 + patch4
g7 <- patch + plot_annotation(
title="Violin Plots on Sleep Disorders vs. Exercise & Sleep",
subtitle="People with insomnia sleep and exercise LESS compared to those without insomnia",
tag_levels="A"
)
g7
### ADDITIONS
# Need to use consisten theme here
# Can Add the legend, and should also center title and subtitle
Small Population, Bivariate, Animated
##### Animated Plot ######
## 3D Scatterplot of Quality of sleep, heart rate, and daily steps
df_small_means <- df_small %>% group_by(Occupation) %>%
summarize_if(is.numeric, mean)
g8 <- plot_ly(
type = "scatter3d", mode = "markers",
data = df_small_means,
x = ~ Sleep.Duration,y = ~ Quality.of.Sleep, z = ~ Physical.Activity.Level,
color = ~Occupation,
marker = list(size = 6)
) %>%
layout(title="3D Scatterplot of People with Insomnia",
annotations = list(
list(
x = 0.5,
y = 1,
xref = "paper",
yref = "paper",
text = "Average values by Occupation
Engineers sleep habits don't stand out as much anymore!",
showarrow = FALSE
)
),
scene = list(camera = list(
eye = list(
x = 1.25,
y = 1.25,
z = 1.25
),
center = list(x = 0,
y = 0,
z = 0)
))) %>%
onRender("
function(el, x){
var id = el.getAttribute('id');
var gd = document.getElementById(id);
Plotly.update(id).then(attach);
function attach() {
var cnt = 0;
function run() {
rotate('scene', Math.PI / 400);
requestAnimationFrame(run);
}
run();
function rotate(id, angle) {
var eye0 = gd.layout[id].camera.eye
var rtz = xyz2rtz(eye0);
rtz.t += angle;
var eye1 = rtz2xyz(rtz);
Plotly.relayout(gd, id + '.camera.eye', eye1)
}
function xyz2rtz(xyz) {
return {
r: Math.sqrt(xyz.x * xyz.x + xyz.y * xyz.y),
t: Math.atan2(xyz.y, xyz.x),
z: xyz.z
};
}
function rtz2xyz(rtz) {
return {
x: rtz.r * Math.cos(rtz.t),
y: rtz.r * Math.sin(rtz.t),
z: rtz.z
};
}
};
}
")
g8