library(readxl)
library(readr)
library(dplyr)
library(lubridate)
library(tsibble)
library(ggplot2)
library(fable)
library(feasts)
library(slider)
Load the dataset
ufc_data <- read_excel(“~/Downloads/UFC_Dataset.xls”)
ufc_data_subset <- ufc_data |>
select(Date, RedAvgSigStrLanded, BlueAvgSigStrLanded) |>
mutate(
Date = case_when(
!is.na(mdy(Date)) ~ mdy(Date),
!is.na(dmy(Date)) ~ dmy(Date),
!is.na(ymd(Date)) ~ ymd(Date),
TRUE ~ as.Date(NA) # If parsing fails, set to NA
),
AvgStrikesLanded = rowMeans(cbind(RedAvgSigStrLanded,
BlueAvgSigStrLanded), na.rm = TRUE)
) |>
# Remove rows with NA dates or missing values for analysis
filter(!is.na(Date), !is.na(AvgStrikesLanded)) |>
group_by(Date) |>
summarize(AvgStrikesLanded = mean(AvgStrikesLanded, na.rm =
TRUE))
Check for duplicates
ufc_data_subset <- ufc_data_subset |>
distinct(Date, .keep_all = TRUE)
Ensure the Date column is properly formatted
ufc_data_subset <- ufc_data_subset |>
mutate(Date = as_date(Date))
Create tsibble
ufc_tsibble <- ufc_data_subset |>
as_tsibble(index = Date)
Check the tsibble
print(ufc_tsibble)
Plot the average strikes landed over time
ggplot(ufc_tsibble, aes(x = Date, y = AvgStrikesLanded)) +
geom_line() +
labs(title = “Average Significant Strikes Landed Over Time”,
x = “Date”, y = “Average Strikes Landed”) +
theme_minimal()
Linear regression for strikes over time
linear_model <- ufc_tsibble |>
model(trend_model = TSLM(AvgStrikesLanded ~ trend()))
report(linear_model)
linear_fc <- linear_model |>
forecast(h = “1 year”)
autoplot(ufc_tsibble, AvgStrikesLanded) +
autolayer(linear_fc, series = “Trend Forecast”) +
labs(title = “Linear Trend of Average Strikes Landed”,
x = “Date”, y = “Average Strikes Landed”) +
theme_minimal()
Smoothing to detect seasonality
ufc_tsibble |>
mutate(MA_AvgStrikesLanded = slide_dbl(AvgStrikesLanded, mean,
.before = 4, .complete = TRUE)) |>
ggplot(aes(x = Date)) +
geom_line(aes(y = AvgStrikesLanded), color = “blue”, alpha = 0.5)
+
geom_line(aes(y = MA_AvgStrikesLanded), color = “red”) +
labs(title = “Average Strikes Landed with Smoothing (Moving
Average)”,
x = “Date”, y = “Average Strikes Landed”) +
theme_minimal()
Seasonality detection using ACF/PACF
ufc_tsibble |>
ACF(AvgStrikesLanded) |>
autoplot() +
labs(title = “ACF for Average Strikes Landed”)
ufc_tsibble |>
PACF(AvgStrikesLanded) |>
autoplot() +
labs(title = “PACF for Average Strikes Landed”)
I give up, I think i was doing something wrong with the
tsibble.