This is an R Markdown Notebook. When you execute code within the notebook, the results appear beneath the code.

Try executing this chunk by clicking the Run button within the chunk or by placing your cursor inside it and pressing Cmd+Shift+Enter.

# Load the required libraries
library(ggplot2)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(googlesheets4)
library(plotly) # For interactive plot
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
# Read the survey data from Google Sheets
url <- "https://docs.google.com/spreadsheets/d/1zxn9liBPwH_Sm64A78-jzyVzVTCzggw1mJLldSrwNVE/edit#gid=1655556458"
SURVEY <- read_sheet(url)
## ! Using an auto-discovered, cached token.
##   To suppress this message, modify your code or options to clearly consent to
##   the use of a cached token.
##   See gargle's "Non-interactive auth" vignette for more details:
##   <https://gargle.r-lib.org/articles/non-interactive-auth.html>
## ℹ The googlesheets4 package is using a cached token for
##   'jvaugh30@vols.utk.edu'.
## ✔ Reading from "Normal Distribution Survey (Responses)".
## ✔ Range 'Form Responses 1'.
# Define column names
NAMES <- c("Time", "Sleep", "Teeth", 
           "WeeklyFastFood", "PhoneHours", 
           "TimesAbroad", "DailyCupsWater", 
           "MinMusic23", "GPA", "OneMinuteMile",
           "StatesTravlled", "WeeklySoda", "DailySoda")

# Rename columns
names(SURVEY) <- NAMES

# Calculate summary statistics for MinMusic23
min_music_summary <- SURVEY$MinMusic23 %>%
  summary()

# Calculate mean and median
mean_min_music <- mean(SURVEY$MinMusic23, na.rm = TRUE)
median_min_music <- median(SURVEY$MinMusic23, na.rm = TRUE)

# Calculate standard deviation
sd_min_music <- sd(SURVEY$MinMusic23, na.rm = TRUE)

# Create cumulative distribution data
cumulative_data <- SURVEY %>%
  mutate(cumulative_percentage = ecdf(MinMusic23)(MinMusic23) * 100)

# Create a plot for cumulative distribution
p <- ggplot(data = cumulative_data, aes(x = MinMusic23, y = cumulative_percentage)) +
  geom_line(color = "blue") + # Add cumulative distribution line
  geom_ribbon(data = cumulative_data %>% filter(MinMusic23 >= median_min_music),
              aes(ymax = cumulative_percentage, ymin = 0), fill = "grey", alpha = 0.3) + # Add shaded area from median to highest value
  geom_hline(yintercept = 50, linetype = "dashed", color = "red") + # Add dashed line at 50th percentile (median)
  labs(title = "Cumulative Distribution of Minutes of Music Listened to in 2023",
       x = "Minutes of Music Listened to in 2023",
       y = "Cumulative Percentage") + # Change y-axis label
  theme_minimal()

# Convert to interactive plot
gp <- ggplotly(p)

# Print the interactive plot
print(gp)