CI Analysis at OP Labs Before and After September 3rd

Introduction

Performing analysis to investigate the impact of recent changes made to the CI. Emphasis on TTFF, total Elapsed time for ‘successful’, ‘failed’, ‘canceled’ workflows in date range [July 2024 thru October 3].

Preparation

# Set CRAN mirror to in order to publish (RStudio Cloud)
options(repos = c(CRAN = "https://cloud.r-project.org"))

# Install necessary packages if not already installed
required_packages <- c("tidyverse", "lubridate", "ggplot2", "scales", "cowplot", "zoo")

# Check if packages are installed and install the missing ones
installed_packages <- rownames(installed.packages())
for (pkg in required_packages) {
  if (!(pkg %in% installed_packages)) {
    install.packages(pkg)
  }
}

# Load libraries
library(tidyverse)

## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.1     ✔ tibble    3.2.1
## ✔ lubridate 1.9.3     ✔ tidyr     1.3.1
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

library(lubridate)
library(scales)

## 
## Attaching package: 'scales'
## 
## The following object is masked from 'package:purrr':
## 
##     discard
## 
## The following object is masked from 'package:readr':
## 
##     col_factor

library(cowplot)

## 
## Attaching package: 'cowplot'
## 
## The following object is masked from 'package:lubridate':
## 
##     stamp

library(zoo)

## 
## Attaching package: 'zoo'
## 
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric

# Read the data from the CSV file
ci_data <- read.csv("data.csv", stringsAsFactors = FALSE)

# Check if the data has been loaded correctly
head(ci_data)

# Ensure the 'Created.At' column is in datetime format
ci_data$Created.At <- as.POSIXct(ci_data$Created.At, format = "%Y-%m-%dT%H:%M:%OSZ", tz = "UTC")

# Add 'Date' and 'DayOfWeek' columns
ci_data <- ci_data %>%
  mutate(
    Date = as.Date(Created.At),  # Extract the date without time
    DayOfWeek = lubridate::wday(Date, label = TRUE, week_start = 1)  # Add day of the week, with week starting on Monday
  )

# Filter out weekends (Saturday and Sunday)
ci_data <- ci_data %>%
  filter(!DayOfWeek %in% c("Sat", "Sun"))

# Add AttemptCount per day using add_count()
ci_data <- ci_data %>%
  add_count(Date, name = "AttemptCount") %>%
  filter(AttemptCount >= 15)

# Replace TTFF of -1 with NA (successful runs without failures)
ci_data <- ci_data %>%
  mutate(TTFF = ifelse(TTFF == -1, NA, TTFF))

# Filter workflows for TTFF analysis
ttff_data <- ci_data %>%
  filter(
    (Status %in% c("failed", "canceled")) & 
    !is.na(TTFF) &    # Ensure TTFF is not missing
    TTFF >= 0         # Exclude negative TTFF values
  )

# Filter workflows for elapsed time analysis
elapsed_data <- ci_data %>%
  filter(
    Status %in% c("failed", "success")  # Exclude "canceled" statuses
    &TTFF >= 0
  )

# Calculate IQR for TTFF
Q1_TTFF <- quantile(ttff_data$TTFF, 0.25)
Q3_TTFF <- quantile(ttff_data$TTFF, 0.75)
IQR_TTFF <- Q3_TTFF - Q1_TTFF

# Define thresholds
lower_bound_TTFF <- Q1_TTFF - 1.5 * IQR_TTFF
upper_bound_TTFF <- Q3_TTFF + 1.5 * IQR_TTFF

# Remove outliers from TTFF data
ttff_data <- ttff_data %>%
  filter(TTFF >= lower_bound_TTFF & TTFF <= upper_bound_TTFF)

# Calculate IQR for Elapsed Time
Q1_Elapsed <- quantile(elapsed_data$Elapsed, 0.25)
Q3_Elapsed <- quantile(elapsed_data$Elapsed, 0.75)
IQR_Elapsed <- Q3_Elapsed - Q1_Elapsed

# Define thresholds
lower_bound_Elapsed <- Q1_Elapsed - 1.5 * IQR_Elapsed
upper_bound_Elapsed <- Q3_Elapsed + 1.5 * IQR_Elapsed

# Remove outliers from Elapsed Time data
elapsed_data <- elapsed_data %>%
  filter(Elapsed >= lower_bound_Elapsed & Elapsed <= upper_bound_Elapsed)

Elapsed Time Analysis

Plotting Elapsed Over Time

Elapsed Visualization by Month

Compare Elapsed Before and After September 3rd

# Ensure 'Created.At' is in datetime format
elapsed_data$Created.At <- as.POSIXct(elapsed_data$Created.At, format = "%Y-%m-%dT%H:%M:%OSZ", tz = "UTC")

# Add a period indicator based on the date
elapsed_data <- elapsed_data %>%
  mutate(Period = if_else(Created.At < as.Date("2024-09-03"), "Before", "After"))

# Now set "Before" and "After" as ordered factor levels
elapsed_data <- elapsed_data %>%
  mutate(Period = factor(Period, levels = c("Before", "After")))

# Boxplot by period
ggplot(elapsed_data, aes(x = Period, y = Elapsed, fill = Period)) +
  geom_boxplot() +
  labs(title = "Elapsed Time Before and After September 3rd", x = "Period", y = "Elapsed Time (Seconds)") +
  theme_minimal()

Statistical Testing (Mann-Whitney U Test)

# Add a period indicator if not already present
elapsed_data <- elapsed_data %>%
  mutate(Period = if_else(Date < as.Date("2024-09-03"), "Before", "After"))

# Split data into two groups for Elapsed
elapsed_before <- elapsed_data %>% filter(Period == "Before") %>% pull(Elapsed)
elapsed_after <- elapsed_data %>% filter(Period == "After") %>% pull(Elapsed)

# Perform the Mann-Whitney U Test (Wilcoxon rank sum test)
elapsed_test_result <- wilcox.test(elapsed_before, elapsed_after, alternative = "two.sided")
print(elapsed_test_result)

## 
##  Wilcoxon rank sum test with continuity correction
## 
## data:  elapsed_before and elapsed_after
## W = 554371, p-value < 2.2e-16
## alternative hypothesis: true location shift is not equal to 0

Interpretation

Since the p-value is far below the conventional significance level (e.g., 0.05), we reject the null hypothesis. This suggests that there is a statistically significant difference in Elapsed time between the two periods.

Elapsed Over Time with 7-Day Rolling Average

# Calculate daily average TTFF
daily_elapsed <- elapsed_data %>%
  group_by(Date) %>%
  summarise(Avg_Elapsed = mean(Elapsed))

# Calculate 7-day rolling average
daily_elapsed <- daily_elapsed %>%
  arrange(Date) %>%
  mutate(Rolling_Avg_Elapsed = zoo::rollmean(Avg_Elapsed, k = 7, fill = NA, align = "right"))

# Plot TTFF over time with rolling average
ggplot(daily_elapsed, aes(x = Date)) +
  geom_line(aes(y = Avg_Elapsed), color = "grey70") +
  geom_line(aes(y = Rolling_Avg_Elapsed), color = "blue", size = 1) +
  geom_vline(xintercept = as.Date("2024-09-03"), linetype = "dashed", color = "red") +
  annotate("text", x = as.Date("2024-09-03") + 5, y = max(daily_elapsed$Avg_Elapsed, na.rm = TRUE),
           label = "Optimizations Begin", color = "red") +
  labs(title = "Elapsed Over Time with 7-Day Rolling Average", x = "Date", y = "Elapsed (Seconds)") +
  theme_minimal()

## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

## Warning: Removed 6 rows containing missing values or values outside the scale range
## (`geom_line()`).

TTFF Over Time with 7-Day Rolling Average

# Calculate daily average TTFF
daily_ttff <- ttff_data %>%
  group_by(Date) %>%
  summarise(Avg_TTFF = mean(TTFF))

# Calculate 7-day rolling average
daily_ttff <- daily_ttff %>%
  arrange(Date) %>%
  mutate(Rolling_Avg_TTFF = zoo::rollmean(Avg_TTFF, k = 7, fill = NA, align = "right"))

# Plot TTFF over time with rolling average
ggplot(daily_ttff, aes(x = Date)) +
  geom_line(aes(y = Avg_TTFF), color = "grey70") +
  geom_line(aes(y = Rolling_Avg_TTFF), color = "blue", size = 1) +
  geom_vline(xintercept = as.Date("2024-09-03"), linetype = "dashed", color = "red") +
  annotate("text", x = as.Date("2024-09-03") + 5, y = max(daily_ttff$Avg_TTFF, na.rm = TRUE),
           label = "Optimizations Begin", color = "red") +
  labs(title = "TTFF Over Time with 7-Day Rolling Average", x = "Date", y = "TTFF (Seconds)") +
  theme_minimal()

## Warning: Removed 6 rows containing missing values or values outside the scale range
## (`geom_line()`).

Exploratory Data Analysis

Exploratory Data Analysis of TTFF

# Summary statistics for TTFF
ttff_summary <- ttff_data %>%
  summarise(
    Count = n(),
    Mean_TTFF = mean(TTFF),
    Median_TTFF = median(TTFF),
    SD_TTFF = sd(TTFF),
    Min_TTFF = min(TTFF),
    Max_TTFF = max(TTFF)
  )
print(ttff_summary)

##   Count Mean_TTFF Median_TTFF  SD_TTFF Min_TTFF Max_TTFF
## 1  1999  441.6411     378.198 356.4356    1.427 1704.282

Exploratory Data Analysis of Elapsed Time

# Summary statistics for Elapsed Time
elapsed_summary <- elapsed_data %>%
  summarise(
    Count = n(),
    Mean_Elapsed = mean(Elapsed),
    Median_Elapsed = median(Elapsed),
    SD_Elapsed = sd(Elapsed),
    Min_Elapsed = min(Elapsed),
    Max_Elapsed = max(Elapsed)
  )

print(elapsed_summary)

##   Count Mean_Elapsed Median_Elapsed SD_Elapsed Min_Elapsed Max_Elapsed
## 1  1629     873.5088        775.625   379.2558       3.854    2106.584

TTFF Analysis

Plotting TTFF Over Time

TTFF Visualization by Month

Compare TTFF Before and After September 3rd

Statistical Testing (Mann-Whitney U Test)

# Split data into two groups
ttff_before <- ttff_data %>% filter(Period == "Before") %>% pull(TTFF)
ttff_after <- ttff_data %>% filter(Period == "After") %>% pull(TTFF)

# Perform the Mann-Whitney U Test
test_result <- wilcox.test(ttff_before, ttff_after, alternative = "two.sided")
print(test_result)

## 
##  Wilcoxon rank sum test with continuity correction
## 
## data:  ttff_before and ttff_after
## W = 695797, p-value < 2.2e-16
## alternative hypothesis: true location shift is not equal to 0

Interpretation

The p-value is less than 0.05 (in fact, it is extremely small: < 2.2e-16), which indicates strong evidence against the null hypothesis. The null hypothesis in this test is that there is no difference in the distributions of TTFF between the “Before” and “After” periods. Given the very small p-value, we can conclude that there is a statistically significant difference in TTFF between the two periods.

library(ggplot2)
library(dplyr)

# Calculate daily status counts
daily_status_counts <- ci_data %>%
  group_by(Date) %>%
  summarise(
    Success = sum(Status == "success"),
    Failed = sum(Status == "failed"),
    Canceled = sum(Status == "canceled"),
    Total = n(),
    .groups = 'drop'
  )

# Convert to long format for stacking in the area chart
daily_status_long <- daily_status_counts %>%
  pivot_longer(cols = c(Success, Failed, Canceled), names_to = "Status", values_to = "Count")

# Plot the stacked area chart
ggplot(daily_status_long, aes(x = Date, y = Count, fill = Status)) +
  geom_area(alpha = 0.6, size = 0.5, color = "white") +
  labs(title = "Daily Breakdown of Success, Failed, and Canceled Runs", 
       x = "Date", 
       y = "Number of Runs") +
  scale_fill_manual(values = c("Success" = "green", "Failed" = "red", "Canceled" = "orange")) +  # Colors for each status
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))