Aug-6.knit

library(dplyr)
library(ggplot2)
library(tidyverse)
library(stringr)
library(stringdist)
library(plotly)
library(scales)
library(lubridate)
library(htmlwidgets)
library(viridisLite)

bb <- read.csv("/Users/jackholland/Downloads/Togetherhood CSVs/cleaned_aug_6 - Alter Start Date.csv")
View(bb)

course_terms <- bb |>
  group_by(Course.name, Partner, Term) |>
  summarize(first_date = min(Date),
            last_date = max(Date),
            sessions = n())

## `summarise()` has grouped output by 'Course.name', 'Partner'. You can override
## using the `.groups` argument.

View(course_terms)

spillover <- course_terms |>
    group_by(Course.name, Partner) |>
  filter(n() > 1)

View(spillover)
##(spillover$Course.name)

##Term over term, how many sessions do we offer, grouped by course name and unique partner? ##BY PARTNER

renewal_report_partners <- bb |>
  group_by(Partner, Term) |>
  summarize(sessions = n())

## `summarise()` has grouped output by 'Partner'. You can override using the
## `.groups` argument.

partner_term_order <- c("2023-W", "2023-S", "2023-Su", "2023-Fa", "2024-W", "2024-S", "2024-Su", "2024-Fa")

renewal_report_partners <- renewal_report_partners |>
  mutate(Term = factor(Term, levels = partner_term_order))

View(renewal_report_partners)

time_series_partner_plot <- plot_ly(
  renewal_report_partners, 
  x = ~Term, 
  y = ~sessions, 
  color = ~Partner, 
  type = 'scatter', 
  mode = 'lines+markers', 
  colors = viridis(50)
) %>%
  layout(
    title = "Sessions per Partner Over Time",
    xaxis = list(
      title = "Term", 
      type = 'category',
      categoryorder = "array",
      categoryarray = unique(renewal_report_partners$Term)
    ),
    yaxis = list(title = "Number of Sessions"),
    legend = list(orientation = "h", x = 0, y = -0.2)
  )

time_series_partner_plot

saveWidget(time_series_partner_plot, "/Users/jackholland/Downloads/Togetherhood CSVs/Time Series Partner Plot.html")

##BY COURSE

renewal_report_course <- bb |>
  group_by(Course.name, Term) |>
  summarize(sessions = n())

## `summarise()` has grouped output by 'Course.name'. You can override using the
## `.groups` argument.

term_order <- c("2023-W", "2023-S", "2023-Su", "2023-Fa", "2024-W", "2024-S", "2024-Su", "2024-Fa")

renewal_report_course <- renewal_report_course |>
  mutate(Term = factor(Term, levels = term_order))

View(renewal_report_course)

time_series_course_plot <- plot_ly(renewal_report_course, x = ~Term, y = ~sessions, color = ~Course.name, type = 'scatter', mode = 'lines', colors = viridis(50)) %>%
  layout(
     title = "Sessions per Course Name Over Time",
    xaxis = list(
      title = "Term", 
      type = 'category',
      categoryorder = "array",
      categoryarray = unique(renewal_report_course$Term)
    ),
    yaxis = list(title = "Number of Sessions"),
    legend = list(orientation = "h", x = 0, y = -0.2)
  )

# Display the plot
time_series_course_plot

saveWidget(time_series_course_plot, "/Users/jackholland/Downloads/Togetherhood CSVs/Time Series Course Plot.html")

##BY PARTNER TYPE

renewal_report_type <- bb |>
  group_by(partner_type, Term) |>
  summarize(sessions = n())

## `summarise()` has grouped output by 'partner_type'. You can override using the
## `.groups` argument.

partner_term_order <- c("2023-W", "2023-S", "2023-Su", "2023-Fa", "2024-W", "2024-S", "2024-Su", "2024-Fa")

renewal_report_type <- renewal_report_type |>
  mutate(Term = factor(Term, levels = partner_term_order))

View(renewal_report_type)
unique(renewal_report_type$Term)

## [1] 2023-Fa 2023-S  2023-Su 2023-W  2024-Fa 2024-S  2024-Su 2024-W 
## Levels: 2023-W 2023-S 2023-Su 2023-Fa 2024-W 2024-S 2024-Su 2024-Fa

###REVISIT time_series_type_plot <- plot_ly( renewal_report_type, x = ~Term, y = ~reorder(sessions, term_order), color = ~partner_type, type = ‘scatter’, mode = ‘lines+markers’, colors = viridis(50) ) %>% layout( title = “Sessions per Partner Type Over Time”, xaxis = list( title = “Term”, type = ‘category’, categoryorder = “array”, categoryarray = unique(renewal_report_type$Term) ), yaxis = list(title = “Number of Sessions”), legend = list(orientation = “h”, x = 0, y = -0.2) )

time_series_type_plot

saveWidget(time_series_type_plot, “/Users/jackholland/Downloads/Togetherhood CSVs/Time Series Partner Type Plot.html”)

unique(bb$Punctuality)

## [1] "On Time"        ""               "30+ min late"   "5-9 min late"  
## [5] "10-29 min late" "TH - No Sub"    "TH - Error"

##PROVIDER SCORECARD VERSION 1.3

spring_1.3_subset <- bb |>
  filter(Term == "2024-S") |>
  mutate(Punctuality_points = case_when(
      Punctuality == "On Time" ~ 10,
      Punctuality == "5-9 min late" ~ 7,
      Punctuality == "10-29 min late" ~ 4,
      Punctuality == "30+ min late" ~ 1,
      Punctuality == "TH - No Sub" ~ 0,
      TRUE ~ 0))

View(spring_1.3_subset)

spring_1.3_pivot <- spring_1.3_subset |>
  group_by(Regular.Instructor) |>
  summarize(
    punctuality_instances = sum(Sub.Binary),
    Punctuality_points = mean(Punctuality_points, na.rm = TRUE),
    sub_instances = sum(Sub.Binary),
    sub_score = (1 - mean(Sub.Binary, na.rm = TRUE)) * 10,
    total_sessions = n(),
    total_courses = n_distinct(Course.name),
    total_locations = n_distinct(Address),
    locations_score = ifelse(total_locations < 3, (total_locations / 3) * 10, 10),
    tenure_score = ifelse(total_sessions <= 18, (total_sessions / 18) * 10, 10),
    courses_score = ifelse(total_courses <= 3, (total_courses / 3) * 10, 10),
    comp_score = (Punctuality_points / 2) + ((sub_score + locations_score + tenure_score + courses_score) / 8)
  )

View(spring_1.3_pivot)

median(spring_1.3_pivot$total_sessions)

## [1] 15

median(spring_1.3_pivot$total_courses)

## [1] 1

median(spring_1.3_pivot$total_locations)

## [1] 1

mean(spring_1.3_pivot$total_sessions)

## [1] 18.34247

mean(spring_1.3_pivot$total_courses)

## [1] 1.59589

mean(spring_1.3_pivot$total_locations)

## [1] 1.780822

mean(spring_1.3_pivot$comp_score)

## [1] 8.211091

median(spring_1.3_pivot$comp_score)

## [1] 8.194444

##Meatmap #1 - Raw Occurances

col <- heat.colors(256)

heatmap_1.3_raw <- spring_1.3_pivot |>
  select(Regular.Instructor, total_sessions, total_courses, total_locations, punctuality_instances, sub_instances)

heatmap_matrix <- as.matrix(heatmap_1.3_raw[,-1])

heatmap(heatmap_matrix, scale = "column", col = col, 
        main = "Heatmap - Variable Instances")

##Heatmap #2 - Scaled scoring

col <- heat.colors(256)

heatmap_1.3_scored <- spring_1.3_pivot |>
  select(Regular.Instructor, tenure_score, courses_score, locations_score, Punctuality_points, sub_score)

heatmap_matrix <- as.matrix(heatmap_1.3_raw[,-1])

heatmap(heatmap_matrix, scale = "column", col = col, 
        main = "Heatmap - Scored Variables")

# Calculate mean and other statistics for annotations
mean_score <- mean(spring_1.3_pivot$comp_score, na.rm = TRUE)
low_score <- min(spring_1.3_pivot$comp_score, na.rm = TRUE)

# Create the plot
ggplot(spring_1.3_pivot, aes(x = reorder(Regular.Instructor, -comp_score), y = comp_score, fill = comp_score)) +
  geom_bar(stat = "identity") +
  scale_fill_gradient(low = "lavenderblush1", high = "lavenderblush4") +
  geom_hline(yintercept = mean_score, linetype = "dashed", color = "green") +
  geom_hline(yintercept = low_score, linetype = "dashed", color = "red") +
  labs(x = "Regular Instructor", y = "Comp Score", title = "V1.3 Composite Score Histogram (Spring 2024)") +
  theme(axis.text.x = element_blank()) +
  annotate("text", x = nrow(spring_1.3_pivot)/2, y = mean_score, label = paste("Mean:", round(mean_score, 3)), vjust = -1, color = "darkgreen") +
  annotate("text", x = nrow(spring_1.3_pivot)/2, y = low_score, label = paste("Low:", round(low_score, 3)), vjust = 1, color = "darkred")