library(dplyr)
library(ggplot2)
library(tidyverse)
library(stringr)
library(stringdist)
library(plotly)
library(scales)
library(lubridate)
library(htmlwidgets)
library(viridisLite)
bb <- read.csv("/Users/jackholland/Downloads/Togetherhood CSVs/cleaned_aug_6 - Alter Start Date.csv")
View(bb)
course_terms <- bb |>
group_by(Course.name, Partner, Term) |>
summarize(first_date = min(Date),
last_date = max(Date),
sessions = n())
## `summarise()` has grouped output by 'Course.name', 'Partner'. You can override
## using the `.groups` argument.
View(course_terms)
spillover <- course_terms |>
group_by(Course.name, Partner) |>
filter(n() > 1)
View(spillover)
##(spillover$Course.name)
##Term over term, how many sessions do we offer, grouped by course name and unique partner? ##BY PARTNER
renewal_report_partners <- bb |>
group_by(Partner, Term) |>
summarize(sessions = n())
## `summarise()` has grouped output by 'Partner'. You can override using the
## `.groups` argument.
partner_term_order <- c("2023-W", "2023-S", "2023-Su", "2023-Fa", "2024-W", "2024-S", "2024-Su", "2024-Fa")
renewal_report_partners <- renewal_report_partners |>
mutate(Term = factor(Term, levels = partner_term_order))
View(renewal_report_partners)
time_series_partner_plot <- plot_ly(
renewal_report_partners,
x = ~Term,
y = ~sessions,
color = ~Partner,
type = 'scatter',
mode = 'lines+markers',
colors = viridis(50)
) %>%
layout(
title = "Sessions per Partner Over Time",
xaxis = list(
title = "Term",
type = 'category',
categoryorder = "array",
categoryarray = unique(renewal_report_partners$Term)
),
yaxis = list(title = "Number of Sessions"),
legend = list(orientation = "h", x = 0, y = -0.2)
)
time_series_partner_plot
saveWidget(time_series_partner_plot, "/Users/jackholland/Downloads/Togetherhood CSVs/Time Series Partner Plot.html")
##BY COURSE
renewal_report_course <- bb |>
group_by(Course.name, Term) |>
summarize(sessions = n())
## `summarise()` has grouped output by 'Course.name'. You can override using the
## `.groups` argument.
term_order <- c("2023-W", "2023-S", "2023-Su", "2023-Fa", "2024-W", "2024-S", "2024-Su", "2024-Fa")
renewal_report_course <- renewal_report_course |>
mutate(Term = factor(Term, levels = term_order))
View(renewal_report_course)
time_series_course_plot <- plot_ly(renewal_report_course, x = ~Term, y = ~sessions, color = ~Course.name, type = 'scatter', mode = 'lines', colors = viridis(50)) %>%
layout(
title = "Sessions per Course Name Over Time",
xaxis = list(
title = "Term",
type = 'category',
categoryorder = "array",
categoryarray = unique(renewal_report_course$Term)
),
yaxis = list(title = "Number of Sessions"),
legend = list(orientation = "h", x = 0, y = -0.2)
)
# Display the plot
time_series_course_plot
saveWidget(time_series_course_plot, "/Users/jackholland/Downloads/Togetherhood CSVs/Time Series Course Plot.html")
##BY PARTNER TYPE
renewal_report_type <- bb |>
group_by(partner_type, Term) |>
summarize(sessions = n())
## `summarise()` has grouped output by 'partner_type'. You can override using the
## `.groups` argument.
partner_term_order <- c("2023-W", "2023-S", "2023-Su", "2023-Fa", "2024-W", "2024-S", "2024-Su", "2024-Fa")
renewal_report_type <- renewal_report_type |>
mutate(Term = factor(Term, levels = partner_term_order))
View(renewal_report_type)
unique(renewal_report_type$Term)
## [1] 2023-Fa 2023-S 2023-Su 2023-W 2024-Fa 2024-S 2024-Su 2024-W
## Levels: 2023-W 2023-S 2023-Su 2023-Fa 2024-W 2024-S 2024-Su 2024-Fa
###REVISIT time_series_type_plot <- plot_ly( renewal_report_type, x = ~Term, y = ~reorder(sessions, term_order), color = ~partner_type, type = ‘scatter’, mode = ‘lines+markers’, colors = viridis(50) ) %>% layout( title = “Sessions per Partner Type Over Time”, xaxis = list( title = “Term”, type = ‘category’, categoryorder = “array”, categoryarray = unique(renewal_report_type$Term) ), yaxis = list(title = “Number of Sessions”), legend = list(orientation = “h”, x = 0, y = -0.2) )
time_series_type_plot
saveWidget(time_series_type_plot, “/Users/jackholland/Downloads/Togetherhood CSVs/Time Series Partner Type Plot.html”)
unique(bb$Punctuality)
## [1] "On Time" "" "30+ min late" "5-9 min late"
## [5] "10-29 min late" "TH - No Sub" "TH - Error"
##PROVIDER SCORECARD VERSION 1.3
spring_1.3_subset <- bb |>
filter(Term == "2024-S") |>
mutate(Punctuality_points = case_when(
Punctuality == "On Time" ~ 10,
Punctuality == "5-9 min late" ~ 7,
Punctuality == "10-29 min late" ~ 4,
Punctuality == "30+ min late" ~ 1,
Punctuality == "TH - No Sub" ~ 0,
TRUE ~ 0))
View(spring_1.3_subset)
spring_1.3_pivot <- spring_1.3_subset |>
group_by(Regular.Instructor) |>
summarize(
punctuality_instances = sum(Sub.Binary),
Punctuality_points = mean(Punctuality_points, na.rm = TRUE),
sub_instances = sum(Sub.Binary),
sub_score = (1 - mean(Sub.Binary, na.rm = TRUE)) * 10,
total_sessions = n(),
total_courses = n_distinct(Course.name),
total_locations = n_distinct(Address),
locations_score = ifelse(total_locations < 3, (total_locations / 3) * 10, 10),
tenure_score = ifelse(total_sessions <= 18, (total_sessions / 18) * 10, 10),
courses_score = ifelse(total_courses <= 3, (total_courses / 3) * 10, 10),
comp_score = (Punctuality_points / 2) + ((sub_score + locations_score + tenure_score + courses_score) / 8)
)
View(spring_1.3_pivot)
median(spring_1.3_pivot$total_sessions)
## [1] 15
median(spring_1.3_pivot$total_courses)
## [1] 1
median(spring_1.3_pivot$total_locations)
## [1] 1
mean(spring_1.3_pivot$total_sessions)
## [1] 18.34247
mean(spring_1.3_pivot$total_courses)
## [1] 1.59589
mean(spring_1.3_pivot$total_locations)
## [1] 1.780822
mean(spring_1.3_pivot$comp_score)
## [1] 8.211091
median(spring_1.3_pivot$comp_score)
## [1] 8.194444
##Meatmap #1 - Raw Occurances
col <- heat.colors(256)
heatmap_1.3_raw <- spring_1.3_pivot |>
select(Regular.Instructor, total_sessions, total_courses, total_locations, punctuality_instances, sub_instances)
heatmap_matrix <- as.matrix(heatmap_1.3_raw[,-1])
heatmap(heatmap_matrix, scale = "column", col = col,
main = "Heatmap - Variable Instances")
##Heatmap #2 - Scaled scoring
col <- heat.colors(256)
heatmap_1.3_scored <- spring_1.3_pivot |>
select(Regular.Instructor, tenure_score, courses_score, locations_score, Punctuality_points, sub_score)
heatmap_matrix <- as.matrix(heatmap_1.3_raw[,-1])
heatmap(heatmap_matrix, scale = "column", col = col,
main = "Heatmap - Scored Variables")
# Calculate mean and other statistics for annotations
mean_score <- mean(spring_1.3_pivot$comp_score, na.rm = TRUE)
low_score <- min(spring_1.3_pivot$comp_score, na.rm = TRUE)
# Create the plot
ggplot(spring_1.3_pivot, aes(x = reorder(Regular.Instructor, -comp_score), y = comp_score, fill = comp_score)) +
geom_bar(stat = "identity") +
scale_fill_gradient(low = "lavenderblush1", high = "lavenderblush4") +
geom_hline(yintercept = mean_score, linetype = "dashed", color = "green") +
geom_hline(yintercept = low_score, linetype = "dashed", color = "red") +
labs(x = "Regular Instructor", y = "Comp Score", title = "V1.3 Composite Score Histogram (Spring 2024)") +
theme(axis.text.x = element_blank()) +
annotate("text", x = nrow(spring_1.3_pivot)/2, y = mean_score, label = paste("Mean:", round(mean_score, 3)), vjust = -1, color = "darkgreen") +
annotate("text", x = nrow(spring_1.3_pivot)/2, y = low_score, label = paste("Low:", round(low_score, 3)), vjust = 1, color = "darkred")