Importing Libraries

Reading in Data

Should include the data source here

# df: data without my data
df <- read.csv("/Users/samueldavenport/Documents/UVA/Fall 2023/STAT Data Viz/Datasets/Sleep_health_and_lifestyle_dataset.csv")

# Filter Dataset
df <- df %>% mutate(Occupation = str_replace(Occupation, "Software Engineer", "Engineer"))
df <- df %>% mutate(Occupation = str_replace(Occupation, "Sales Representative", 
                                             "Salesperson"))
df <- df %>% mutate(BMI.Category = str_replace(BMI.Category, "Normal Weight", "Normal"))
df <- df %>% filter(Occupation != "Manager")
df <- df %>% filter(Occupation != "Scientist")

# df_all: data with my data (add observation for me, Person.ID == 375)
df_all <- df %>% add_row(Person.ID = 375, Gender = "Male", Age = 22, Occupation = "Student",
               Sleep.Duration = 7, Quality.of.Sleep = 6, Physical.Activity.Level = 50,
               Stress.Level = 8, BMI.Category = "Normal", Blood.Pressure = "122/70",
               Heart.Rate = 70, Daily.Steps = 8200, Sleep.Disorder = "Insomnia")
  
# df_means: data grouped by occupation and numeric means
df_means <- df_all %>% group_by(Occupation) %>% summarize_if(is.numeric, mean)

# df4: data of sleep and physical and total
df4 <- df_all %>% group_by(Occupation) %>% summarize(avg_sleep = mean(Sleep.Duration),
                                                  avg_physical = mean(Physical.Activity.Level) / 60,
                                                 Total = avg_sleep+avg_physical)
# df_small: Data Frame of people with Insomnia
df_small <- df_all[df_all$Sleep.Disorder == "Insomnia",]

# df_me: Data Frame of just me
df_me <- df_all[df_all$Person.ID == 375,]

Creating My Default Theme and Color Scheme

Graph 1

Interactive Scatterplots

Average Sleep Statistics vs. Average Exercise Statistics by Occupation

Large Population, Bivariate

choices <- c("Quality.of.Sleep", "Sleep.Duration", "Physical.Activity.Level",
             "Daily.Steps")

ui <- fluidPage(
  titlePanel("Interactive Scatterplots"),
  sidebarLayout(
    sidebarPanel(
      selectInput("variable_1", "Select a Variable", choices=choices),
      selectInput("variable_2", "Select a Variable", choices=choices)
    ),
    mainPanel(
      plotOutput("plot")
    )
  )
)

# Need to fix theme to more consistent
server <- function(input, output){
  output$plot <- renderPlot({
    ggplot(df_means, aes_string(x=input$variable_1, y=input$variable_2)) +
      geom_point(aes(alpha=0.7, color=Occupation, size=4)) +
      my_theme +
      labs(title=paste(input$variable_1, "vs.",
                       input$variable_2),
           x=input$variable_1,
           y=input$variable_2,
           caption=">> Engineers have better sleep habits than other occupations
>> However, engineers are not as active as other occupations
>> BIG QUESTION: Is Occupation a consistent factor in excercise and sleep habits?") +
      guides(size=FALSE, alpha=FALSE)
  })
}

# Run the Shiny App
shinyApp(ui, server)
Shiny applications not supported in static R Markdown documents

Graph 2

Boxplot on Sleep.Duration

Large Population, Univariate

g2 <- df %>% ggplot(aes(x=reorder(Occupation, -Sleep.Duration), y=Sleep.Duration, 
                        fill=Occupation)) +
  geom_boxplot() +
  geom_hline(aes(yintercept = df_me$Sleep.Duration), color="darkred", linetype="dashed") +
  geom_hline(aes(yintercept=max(df_all$Sleep.Duration)), color="navy", linetype="dashed") +
  my_theme + theme(legend.position="none") +
  labs(title="Boxplot of Sleep Duration by Occupation",
       subtitle="Engineers have the largest median sleep duration!",
       x="Occupation",
       y="Sleep Duration",
       caption=">> The navy line represents the max sleep duration in the dataset
>> The red line represents MY sleep duration") +   
  guides(color=FALSE, linetype=FALSE)
## Warning: The `<scale>` argument of `guides()` cannot be `FALSE`. Use "none" instead as
## of ggplot2 3.3.4.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
g2

Graph 3

Bar Graph on Quality of Sleep by Occupation

Large Population, Univariate

g3 <- df_means %>% ggplot(aes(x=reorder(Occupation, -Quality.of.Sleep), y=Quality.of.Sleep)) +
  geom_bar(stat="identity", aes(fill=Occupation)) + 
  geom_text(aes(label=round(Quality.of.Sleep, 2)), vjust=-0.3, size=3.5) +
  my_theme + scale_color_discrete(palette="viridis") +
  labs(title="Bar Graph of Average QUALITY of Sleep by Occupation",
       subtitle="Engineers also have the Best Quality of Sleep!",
       x="Occupation",
       y="Average Quality of Sleep") +
  theme(legend.position = "none")

g3

Graph 4

Scatterplot of average sleep vs. exercise for occupations

Large Population, Bivariate

# Adding a variable to show the highest total sleep and exercise
df4$max_Total <- ifelse(df4$Total == max(df4$Total), "highest", "not highest")

g4 <- df4 %>% 
  ggplot(aes(x = avg_sleep, y = avg_physical)) +
  geom_point(stat="identity", size=3, aes(color=max_Total)) +
  geom_text_repel(aes(label = Occupation, color=max_Total), 
                  box.padding = 0.5, point.padding = 0.2, size = 3) +
  scale_color_manual(name="Most Total\nSleep and Exercise",
                    labels=c("Highest", "Not the highest"),
                    values=c("highest"="navy", "not highest"="darkred")) +
  # Remove size legend
  guides(size = "none") +
  # Adding in Theme
  my_theme + 
  labs(
    title = "Average Sleep vs. Average Exercise by Occupation",
    subtitle="Engineers Sleep and Workout the MOST!",
    x = "Average Sleep Hours Per Day",
    y = "Average Physical Exercise Hours Per Day",
    color = "Total Sleep and\nExercise Per Day"
  )

## Display the plot
g4

Graph 5

Treemap of People with Insomnia by Occupation

Small Population, Univariate

# Create a dataframe of people with insomnia (smaller population)
insomnia_count <- df_small %>% group_by(Occupation) %>% summarize(num = n())

# Treemap of People with Insomnia by Occupation
g5 <- insomnia_count %>% ggplot(aes(area=num, fill=Occupation,
                              label=paste(Occupation, num, sep="\n"))) +
  geom_treemap() + geom_treemap_text(color="white", place="center", size=15) +
  my_theme + theme(legend.position="none", 
                   panel.border=element_rect(color="white")) +
  scale_color_discrete(palette="viridis") +
  ggtitle("Number of People with Insomnia by Occupation") +
  labs(caption=">> Only 6 engineers have insomnia
>> Engineers sleep and eat the most... but it is also important to look at sleeping disorders")

g5

### ADDITIONS
# Add text to the text or something 

Graph 6

Plotly Heat Map

Small Population, Bivariate

#### Animated heat map #####
# Small Population #3
g6 <- df_small %>% ggplot(aes(x=Sleep.Duration, y=Quality.of.Sleep)) +
  geom_hex(bins=5) + scale_fill_gradient(low="red", high="navy") +
  my_theme +labs(title="Heat Map of Sleep Duration and Quality of Sleep",
                 caption="The majority of people with insomnia have 
low sleep duration and poor sleep quality",
                 x="Sleep Duration per night (hours)",
                 y="Quality of Sleep (1-10)",
                 subtitle="This plot only includes people with insomnia")

g6 <- ggplotly(g6)
g6
### ADDITIONS
# Subtitle and/or caption

Graph 7

Violin Plot Sleep Disorder and Sleeping Quality

Large and Small Population, Bivariate

######### Patchwork Violin Plots ##########
# Number 1: Quality of Sleep
patch1 <- df_all %>% ggplot(aes(x=Sleep.Disorder, y=Quality.of.Sleep)) +
  geom_violin(aes(fill=Sleep.Disorder)) +
  scale_color_discrete(palette="viridis") + 
  labs(x="Sleep Disorder",
       y="Quality of Sleep (1-10)") +
  my_theme + theme(legend.position="none",
                   axis.title=element_text(size=8),
                   axis.text=element_text(size=6))

# Number 2: Sleep Duration
patch2 <- df_all %>% ggplot(aes(x=Sleep.Disorder, y=Sleep.Duration)) +
  geom_violin(aes(fill=Sleep.Disorder)) +
  scale_color_discrete(palette="viridis") + 
  labs(x="Sleep Disorder",
       y="Sleep Duration (Hours)") +
  my_theme + theme(legend.position="none",
                   axis.title=element_text(size=8),
                   axis.text=element_text(size=6))


# Number 3: Daily Steps
patch3 <- df_all %>% ggplot(aes(x=Sleep.Disorder, y=Daily.Steps)) +
  geom_violin(aes(fill=Sleep.Disorder)) +
  scale_color_discrete(palette="viridis") + 
  labs(x="Sleep Disorder",
       y="Steps per Day") + my_theme + theme(legend.position="none",
                                             axis.title=element_text(size=8),
                                             axis.text=element_text(size=6))

# Number 4: Physical Activity
patch4 <- df_all %>% ggplot(aes(x=Sleep.Disorder, y=Physical.Activity.Level)) +
  geom_violin(aes(fill=Sleep.Disorder)) +
  scale_color_discrete(palette="viridis") + 
  labs(x="Sleep Disorder",
       y="Exercise per Day (Mins)") +
  my_theme + theme(legend.position="none",
                   axis.title=element_text(size=8),
                   axis.text=element_text(size=6))

# Create the patchwork
patch <- patch1 + patch2 + patch3 + patch4


g7 <- patch + plot_annotation(
  title="Violin Plots on Sleep Disorders vs. Exercise & Sleep",
  subtitle="People with insomnia sleep and exercise LESS compared to those without insomnia",
  tag_levels="A"
)

g7

### ADDITIONS
# Need to use consisten theme here
# Can Add the legend, and should also center title and subtitle

Graph 8

Animated Plot

Small Population, Bivariate, Animated

##### Animated Plot ######
## 3D Scatterplot of Quality of sleep, heart rate, and daily steps
df_small_means <- df_small %>% group_by(Occupation) %>%
  summarize_if(is.numeric, mean)
g8 <- plot_ly(
  type = "scatter3d", mode = "markers",
  data = df_small_means,
  x = ~ Sleep.Duration,y = ~ Quality.of.Sleep, z = ~ Physical.Activity.Level,
  color = ~Occupation,
  marker = list(size = 6)
) %>%
  layout(title="3D Scatterplot of People with Insomnia",
         annotations = list(
           list(
             x = 0.5,
             y = 1,
             xref = "paper",
             yref = "paper",
             text = "Average values by Occupation
Engineers sleep habits don't stand out as much anymore!",
             showarrow = FALSE
           )
         ),
    scene = list(camera = list(
    eye = list(
      x = 1.25,
      y = 1.25,
      z = 1.25
    ),
    center = list(x = 0,
                  y = 0,
                  z = 0)
  ))) %>%
  onRender("
      function(el, x){
  var id = el.getAttribute('id');
  var gd = document.getElementById(id);
  Plotly.update(id).then(attach);
  function attach() {
    var cnt = 0;
    
    function run() {
      rotate('scene', Math.PI / 400);
      requestAnimationFrame(run);
    } 
    run();
    
    function rotate(id, angle) {
      var eye0 = gd.layout[id].camera.eye
      var rtz = xyz2rtz(eye0);
      rtz.t += angle;
      
      var eye1 = rtz2xyz(rtz);
      Plotly.relayout(gd, id + '.camera.eye', eye1)
    }
    
    function xyz2rtz(xyz) {
      return {
        r: Math.sqrt(xyz.x * xyz.x + xyz.y * xyz.y),
        t: Math.atan2(xyz.y, xyz.x),
        z: xyz.z
      };
    }
    
    function rtz2xyz(rtz) {
      return {
        x: rtz.r * Math.cos(rtz.t),
        y: rtz.r * Math.sin(rtz.t),
        z: rtz.z
      };
    }
  };
}
")

g8