Original graph from Homework 7 problem 4:

library(dplyr)
library(ggplot2)
library(tidyverse)

exped_clean = read_csv("exped_clean.csv")
peaks_clean = read_csv("peaks_clean.csv")

himalayas_joined = exped_clean %>%
  left_join(peaks_clean, by = "PEAKID") 
himalayas_joined |> 
  ggplot(aes(x = HEIGHTM, y = TOTDAYS, color = SEASON_FACTOR)) +
  geom_point(size=1) +
  geom_smooth(method = "lm", se = FALSE, linewidth = 1) +
  scale_color_manual(values = c("purple4", "darkgoldenrod2", "skyblue2", "seagreen4")) +
  theme_minimal() +
  guides(color = guide_legend(override.aes = list(linetype = "solid", shape = NA)))

library(tidyverse)
library(plotly)
library(dplyr)
library(stringr)

exped_clean <- read_csv("exped_clean.csv")
peaks_clean <- read_csv("peaks_clean.csv")

himalayas_joined <- exped_clean %>%
  left_join(peaks_clean, by = "PEAKID")

season_colors <- c(
  "Autumn" = "blue",
  "Spring" = "green",
  "Summer" = "red",
  "Winter" = "purple"
)

p <- plot_ly(width = 800, height = 600) %>%
  layout(
    title = list(
      text = "Length of Expedition vs Height of Himalayan Peak",
      y = 0.95   # Move title down a bit
    ),
    xaxis = list(title = "Height (meters)"),
    yaxis = list(title = "Total Days"),
    legend = list(title = list(text = "Season")),
    margin = list(t = 80)  # Adds extra space at top to prevent overlap
  )

seasons <- sort(unique(himalayas_joined$SEASON_FACTOR, na.rm=TRUE))

trace_indexes <- list()
current_index <- 0

for (season in seasons) {
  season_data <- himalayas_joined %>%
    filter(SEASON_FACTOR == season, !is.na(HEIGHTM), !is.na(TOTDAYS))

  start_index <- current_index + 1

  p <- p %>% add_trace(
    data = season_data,
    x = ~HEIGHTM,
    y = ~TOTDAYS,
    type = "scatter",
    mode = "markers",
    name = season,
    marker = list(size = 5, color = season_colors[[season]]),  # <--- color
    hoverinfo = "text",
    text = ~paste(
      "Height:", HEIGHTM, "m<br>",
      "Duration:", TOTDAYS, "days<br>",
      "Season:", SEASON_FACTOR
    )
  )
  current_index <- current_index + 1

  model <- lm(TOTDAYS ~ HEIGHTM, data = season_data)
  coefs <- coef(model)
  x_range <- range(season_data$HEIGHTM, na.rm = TRUE)
  x_seq <- seq(from = x_range[1], to = x_range[2], length.out = 100)
  y_seq <- coefs[1] + coefs[2]*x_seq

  slope <- round(coefs[2], 4)
  intercept <- round(coefs[1], 1)
  equation <- str_glue("{season}: y = {slope}x + {intercept}")

  p <- p %>% add_trace(
    x = x_seq,
    y = y_seq,
    type = "scatter",
    mode = "lines",
    name = paste(season, "Trend"),
    line = list(width = 2, color = season_colors[[season]]),  # <--- color
    hoverinfo = "text",
    hovertext = rep(equation, length(x_seq)),
    showlegend = FALSE
  )
  current_index <- current_index + 1

  trace_indexes[[season]] <- start_index:current_index
}

total_traces <- current_index

all_visible <- rep(TRUE, total_traces)
visibility_by_season <- lapply(seasons, function(s) {
  vis <- rep(FALSE, total_traces)
  vis[trace_indexes[[s]]] <- TRUE
  vis
})

button_list <- list(
  list(
    method = "update",
    args = list(
      list(visible = all_visible),
      list(title = "Length of Expedition vs Height of Himalayan Peak")
    ),
    label = "All Seasons"
  )
)

for (i in seq_along(seasons)) {
  season <- seasons[i]
  button_list[[i+1]] <- list(
    method = "update",
    args = list(
      list(visible = visibility_by_season[[i]]),
      list(title = paste("Length vs Height:", season))
    ),
    label = season
  )
}

p <- p %>%
  layout(
    updatemenus = list(
      list(
        type = "buttons",
        direction = "right",
        x = 0.1,
        y = 1.08,
        buttons = button_list
      )
    )
  )

p

I added tooltips for both the points and regression lines to make it easier to check the exact data for a specific expedition. The tooltips on the lines are especially useful, as users can see the exact equation of the trend rather than estimating it visually. To reduce clutter, I also added buttons that let users filter by season, displaying only one season’s points and trend line at a time. This makes it much easier to analyze a specific season’s trend.