Original graph from Homework 7 problem 4:
library(dplyr)
library(ggplot2)
library(tidyverse)
exped_clean = read_csv("exped_clean.csv")
peaks_clean = read_csv("peaks_clean.csv")
himalayas_joined = exped_clean %>%
left_join(peaks_clean, by = "PEAKID")
himalayas_joined |>
ggplot(aes(x = HEIGHTM, y = TOTDAYS, color = SEASON_FACTOR)) +
geom_point(size=1) +
geom_smooth(method = "lm", se = FALSE, linewidth = 1) +
scale_color_manual(values = c("purple4", "darkgoldenrod2", "skyblue2", "seagreen4")) +
theme_minimal() +
guides(color = guide_legend(override.aes = list(linetype = "solid", shape = NA)))
library(tidyverse)
library(plotly)
library(dplyr)
library(stringr)
exped_clean <- read_csv("exped_clean.csv")
peaks_clean <- read_csv("peaks_clean.csv")
himalayas_joined <- exped_clean %>%
left_join(peaks_clean, by = "PEAKID")
season_colors <- c(
"Autumn" = "blue",
"Spring" = "green",
"Summer" = "red",
"Winter" = "purple"
)
p <- plot_ly(width = 800, height = 600) %>%
layout(
title = list(
text = "Length of Expedition vs Height of Himalayan Peak",
y = 0.95 # Move title down a bit
),
xaxis = list(title = "Height (meters)"),
yaxis = list(title = "Total Days"),
legend = list(title = list(text = "Season")),
margin = list(t = 80) # Adds extra space at top to prevent overlap
)
seasons <- sort(unique(himalayas_joined$SEASON_FACTOR, na.rm=TRUE))
trace_indexes <- list()
current_index <- 0
for (season in seasons) {
season_data <- himalayas_joined %>%
filter(SEASON_FACTOR == season, !is.na(HEIGHTM), !is.na(TOTDAYS))
start_index <- current_index + 1
p <- p %>% add_trace(
data = season_data,
x = ~HEIGHTM,
y = ~TOTDAYS,
type = "scatter",
mode = "markers",
name = season,
marker = list(size = 5, color = season_colors[[season]]), # <--- color
hoverinfo = "text",
text = ~paste(
"Height:", HEIGHTM, "m<br>",
"Duration:", TOTDAYS, "days<br>",
"Season:", SEASON_FACTOR
)
)
current_index <- current_index + 1
model <- lm(TOTDAYS ~ HEIGHTM, data = season_data)
coefs <- coef(model)
x_range <- range(season_data$HEIGHTM, na.rm = TRUE)
x_seq <- seq(from = x_range[1], to = x_range[2], length.out = 100)
y_seq <- coefs[1] + coefs[2]*x_seq
slope <- round(coefs[2], 4)
intercept <- round(coefs[1], 1)
equation <- str_glue("{season}: y = {slope}x + {intercept}")
p <- p %>% add_trace(
x = x_seq,
y = y_seq,
type = "scatter",
mode = "lines",
name = paste(season, "Trend"),
line = list(width = 2, color = season_colors[[season]]), # <--- color
hoverinfo = "text",
hovertext = rep(equation, length(x_seq)),
showlegend = FALSE
)
current_index <- current_index + 1
trace_indexes[[season]] <- start_index:current_index
}
total_traces <- current_index
all_visible <- rep(TRUE, total_traces)
visibility_by_season <- lapply(seasons, function(s) {
vis <- rep(FALSE, total_traces)
vis[trace_indexes[[s]]] <- TRUE
vis
})
button_list <- list(
list(
method = "update",
args = list(
list(visible = all_visible),
list(title = "Length of Expedition vs Height of Himalayan Peak")
),
label = "All Seasons"
)
)
for (i in seq_along(seasons)) {
season <- seasons[i]
button_list[[i+1]] <- list(
method = "update",
args = list(
list(visible = visibility_by_season[[i]]),
list(title = paste("Length vs Height:", season))
),
label = season
)
}
p <- p %>%
layout(
updatemenus = list(
list(
type = "buttons",
direction = "right",
x = 0.1,
y = 1.08,
buttons = button_list
)
)
)
p
I added tooltips for both the points and regression lines to make it easier to check the exact data for a specific expedition. The tooltips on the lines are especially useful, as users can see the exact equation of the trend rather than estimating it visually. To reduce clutter, I also added buttons that let users filter by season, displaying only one season’s points and trend line at a time. This makes it much easier to analyze a specific season’s trend.