Performing analysis to investigate the impact of recent changes made to the CI. Emphasis on TTFF, total Elapsed time for ‘successful’, ‘failed’, ‘canceled’ workflows in date range [July 2024 thru October 3].
# Set CRAN mirror to in order to publish (RStudio Cloud)
options(repos = c(CRAN = "https://cloud.r-project.org"))
# Install necessary packages if not already installed
required_packages <- c("tidyverse", "lubridate", "ggplot2", "scales", "cowplot", "zoo")
# Check if packages are installed and install the missing ones
installed_packages <- rownames(installed.packages())
for (pkg in required_packages) {
if (!(pkg %in% installed_packages)) {
install.packages(pkg)
}
}
# Load libraries
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.1 ✔ tibble 3.2.1
## ✔ lubridate 1.9.3 ✔ tidyr 1.3.1
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(lubridate)
library(scales)
##
## Attaching package: 'scales'
##
## The following object is masked from 'package:purrr':
##
## discard
##
## The following object is masked from 'package:readr':
##
## col_factor
library(cowplot)
##
## Attaching package: 'cowplot'
##
## The following object is masked from 'package:lubridate':
##
## stamp
library(zoo)
##
## Attaching package: 'zoo'
##
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
# Read the data from the CSV file
ci_data <- read.csv("data.csv", stringsAsFactors = FALSE)
# Check if the data has been loaded correctly
head(ci_data)
# Ensure the 'Created.At' column is in datetime format
ci_data$Created.At <- as.POSIXct(ci_data$Created.At, format = "%Y-%m-%dT%H:%M:%OSZ", tz = "UTC")
# Add 'Date' and 'DayOfWeek' columns
ci_data <- ci_data %>%
mutate(
Date = as.Date(Created.At), # Extract the date without time
DayOfWeek = lubridate::wday(Date, label = TRUE, week_start = 1) # Add day of the week, with week starting on Monday
)
# Filter out weekends (Saturday and Sunday)
ci_data <- ci_data %>%
filter(!DayOfWeek %in% c("Sat", "Sun"))
# Add AttemptCount per day using add_count()
ci_data <- ci_data %>%
add_count(Date, name = "AttemptCount") %>%
filter(AttemptCount >= 15)
# Replace TTFF of -1 with NA (successful runs without failures)
ci_data <- ci_data %>%
mutate(TTFF = ifelse(TTFF == -1, NA, TTFF))
# Filter workflows for TTFF analysis
ttff_data <- ci_data %>%
filter(
(Status %in% c("failed", "canceled")) &
!is.na(TTFF) & # Ensure TTFF is not missing
TTFF >= 0 # Exclude negative TTFF values
)
# Filter workflows for elapsed time analysis
elapsed_data <- ci_data %>%
filter(
Status %in% c("failed", "success") # Exclude "canceled" statuses
&TTFF >= 0
)
# Calculate IQR for TTFF
Q1_TTFF <- quantile(ttff_data$TTFF, 0.25)
Q3_TTFF <- quantile(ttff_data$TTFF, 0.75)
IQR_TTFF <- Q3_TTFF - Q1_TTFF
# Define thresholds
lower_bound_TTFF <- Q1_TTFF - 1.5 * IQR_TTFF
upper_bound_TTFF <- Q3_TTFF + 1.5 * IQR_TTFF
# Remove outliers from TTFF data
ttff_data <- ttff_data %>%
filter(TTFF >= lower_bound_TTFF & TTFF <= upper_bound_TTFF)
# Calculate IQR for Elapsed Time
Q1_Elapsed <- quantile(elapsed_data$Elapsed, 0.25)
Q3_Elapsed <- quantile(elapsed_data$Elapsed, 0.75)
IQR_Elapsed <- Q3_Elapsed - Q1_Elapsed
# Define thresholds
lower_bound_Elapsed <- Q1_Elapsed - 1.5 * IQR_Elapsed
upper_bound_Elapsed <- Q3_Elapsed + 1.5 * IQR_Elapsed
# Remove outliers from Elapsed Time data
elapsed_data <- elapsed_data %>%
filter(Elapsed >= lower_bound_Elapsed & Elapsed <= upper_bound_Elapsed)
# Ensure 'Created.At' is in datetime format
elapsed_data$Created.At <- as.POSIXct(elapsed_data$Created.At, format = "%Y-%m-%dT%H:%M:%OSZ", tz = "UTC")
# Add a period indicator based on the date
elapsed_data <- elapsed_data %>%
mutate(Period = if_else(Created.At < as.Date("2024-09-03"), "Before", "After"))
# Now set "Before" and "After" as ordered factor levels
elapsed_data <- elapsed_data %>%
mutate(Period = factor(Period, levels = c("Before", "After")))
# Boxplot by period
ggplot(elapsed_data, aes(x = Period, y = Elapsed, fill = Period)) +
geom_boxplot() +
labs(title = "Elapsed Time Before and After September 3rd", x = "Period", y = "Elapsed Time (Seconds)") +
theme_minimal()
# Add a period indicator if not already present
elapsed_data <- elapsed_data %>%
mutate(Period = if_else(Date < as.Date("2024-09-03"), "Before", "After"))
# Split data into two groups for Elapsed
elapsed_before <- elapsed_data %>% filter(Period == "Before") %>% pull(Elapsed)
elapsed_after <- elapsed_data %>% filter(Period == "After") %>% pull(Elapsed)
# Perform the Mann-Whitney U Test (Wilcoxon rank sum test)
elapsed_test_result <- wilcox.test(elapsed_before, elapsed_after, alternative = "two.sided")
print(elapsed_test_result)
##
## Wilcoxon rank sum test with continuity correction
##
## data: elapsed_before and elapsed_after
## W = 554371, p-value < 2.2e-16
## alternative hypothesis: true location shift is not equal to 0
Since the p-value is far below the conventional significance level (e.g., 0.05), we reject the null hypothesis. This suggests that there is a statistically significant difference in Elapsed time between the two periods.
# Calculate daily average TTFF
daily_elapsed <- elapsed_data %>%
group_by(Date) %>%
summarise(Avg_Elapsed = mean(Elapsed))
# Calculate 7-day rolling average
daily_elapsed <- daily_elapsed %>%
arrange(Date) %>%
mutate(Rolling_Avg_Elapsed = zoo::rollmean(Avg_Elapsed, k = 7, fill = NA, align = "right"))
# Plot TTFF over time with rolling average
ggplot(daily_elapsed, aes(x = Date)) +
geom_line(aes(y = Avg_Elapsed), color = "grey70") +
geom_line(aes(y = Rolling_Avg_Elapsed), color = "blue", size = 1) +
geom_vline(xintercept = as.Date("2024-09-03"), linetype = "dashed", color = "red") +
annotate("text", x = as.Date("2024-09-03") + 5, y = max(daily_elapsed$Avg_Elapsed, na.rm = TRUE),
label = "Optimizations Begin", color = "red") +
labs(title = "Elapsed Over Time with 7-Day Rolling Average", x = "Date", y = "Elapsed (Seconds)") +
theme_minimal()
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Warning: Removed 6 rows containing missing values or values outside the scale range
## (`geom_line()`).
# Calculate daily average TTFF
daily_ttff <- ttff_data %>%
group_by(Date) %>%
summarise(Avg_TTFF = mean(TTFF))
# Calculate 7-day rolling average
daily_ttff <- daily_ttff %>%
arrange(Date) %>%
mutate(Rolling_Avg_TTFF = zoo::rollmean(Avg_TTFF, k = 7, fill = NA, align = "right"))
# Plot TTFF over time with rolling average
ggplot(daily_ttff, aes(x = Date)) +
geom_line(aes(y = Avg_TTFF), color = "grey70") +
geom_line(aes(y = Rolling_Avg_TTFF), color = "blue", size = 1) +
geom_vline(xintercept = as.Date("2024-09-03"), linetype = "dashed", color = "red") +
annotate("text", x = as.Date("2024-09-03") + 5, y = max(daily_ttff$Avg_TTFF, na.rm = TRUE),
label = "Optimizations Begin", color = "red") +
labs(title = "TTFF Over Time with 7-Day Rolling Average", x = "Date", y = "TTFF (Seconds)") +
theme_minimal()
## Warning: Removed 6 rows containing missing values or values outside the scale range
## (`geom_line()`).
# Summary statistics for TTFF
ttff_summary <- ttff_data %>%
summarise(
Count = n(),
Mean_TTFF = mean(TTFF),
Median_TTFF = median(TTFF),
SD_TTFF = sd(TTFF),
Min_TTFF = min(TTFF),
Max_TTFF = max(TTFF)
)
print(ttff_summary)
## Count Mean_TTFF Median_TTFF SD_TTFF Min_TTFF Max_TTFF
## 1 1999 441.6411 378.198 356.4356 1.427 1704.282
# Summary statistics for Elapsed Time
elapsed_summary <- elapsed_data %>%
summarise(
Count = n(),
Mean_Elapsed = mean(Elapsed),
Median_Elapsed = median(Elapsed),
SD_Elapsed = sd(Elapsed),
Min_Elapsed = min(Elapsed),
Max_Elapsed = max(Elapsed)
)
print(elapsed_summary)
## Count Mean_Elapsed Median_Elapsed SD_Elapsed Min_Elapsed Max_Elapsed
## 1 1629 873.5088 775.625 379.2558 3.854 2106.584
# Split data into two groups
ttff_before <- ttff_data %>% filter(Period == "Before") %>% pull(TTFF)
ttff_after <- ttff_data %>% filter(Period == "After") %>% pull(TTFF)
# Perform the Mann-Whitney U Test
test_result <- wilcox.test(ttff_before, ttff_after, alternative = "two.sided")
print(test_result)
##
## Wilcoxon rank sum test with continuity correction
##
## data: ttff_before and ttff_after
## W = 695797, p-value < 2.2e-16
## alternative hypothesis: true location shift is not equal to 0
The p-value is less than 0.05 (in fact, it is extremely small: < 2.2e-16), which indicates strong evidence against the null hypothesis. The null hypothesis in this test is that there is no difference in the distributions of TTFF between the “Before” and “After” periods. Given the very small p-value, we can conclude that there is a statistically significant difference in TTFF between the two periods.
library(ggplot2)
library(dplyr)
# Calculate daily status counts
daily_status_counts <- ci_data %>%
group_by(Date) %>%
summarise(
Success = sum(Status == "success"),
Failed = sum(Status == "failed"),
Canceled = sum(Status == "canceled"),
Total = n(),
.groups = 'drop'
)
# Convert to long format for stacking in the area chart
daily_status_long <- daily_status_counts %>%
pivot_longer(cols = c(Success, Failed, Canceled), names_to = "Status", values_to = "Count")
# Plot the stacked area chart
ggplot(daily_status_long, aes(x = Date, y = Count, fill = Status)) +
geom_area(alpha = 0.6, size = 0.5, color = "white") +
labs(title = "Daily Breakdown of Success, Failed, and Canceled Runs",
x = "Date",
y = "Number of Runs") +
scale_fill_manual(values = c("Success" = "green", "Failed" = "red", "Canceled" = "orange")) + # Colors for each status
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1))