Abstract
This report summarises mixed martial arts (MMA) data from bouts under the Ultimate Fighting Championship (UFC) promotion. That data used was This analysis and visualision has been conducted using DPLYR and GGPLOT2. scraped from ufcstats.com and made publicly available through kaggle.com. (https://www.kaggle.com/datasets/danmcinerney/mma-differentials-and-elo?resource=download).Competition time vs bout number
Between 2005 to 2022, Flyweights accumulated significantly more competition time (seconds) for the same number of bouts fought. This is a consequence of there being less stopages and thus more fights going the full distance to a decision.
Between 2005 to 2022, there was no significant difference between the amount of significant strikes landed by Flyweights vs Heavyweights. This result is suprising, as it’s assumed that flyweights have more activity and therefore significant strikes in competition time.
The above box plots describe the different spread of fight outcomes (submission, decision, knockout or disqualification) between Heavyweight and Flyweight divisions.
Between 2005 to 2022, Heavyweights accumulated significantly more knockdowns than Flyweights for the same amount of competition time (seconds).
Between 2005 to 2022, Heavyweights accumulated significantly more knockouts for the same amount of competition time compared to Flyweights.
# Set up ------------------------------------------------------------------
library(ggplot2)
library(dplyr)
library(gganimate)
library(purrr)
library(gapminder)
library(lubridate)
library(gifski)
getwd()
setwd("./mmadata_up_to_20220622")
mma <- read.csv("masterdataframe.csv")
Mens_division <- c("Heavyweight", "Light Heavyweight", "Middleweight", "Welterweight", "Lightweight","Featherweight","Bantamweight","Flyweight")
column_names <- data.frame(colnames(mma))
mma$POSixTime <- as.POSIXct(mma$date, format="%Y-%m-%d")
mma$count <- 1
# Total Fight time by fighter ---------------------------------------------
fight_time <- mma %>%
filter(POSixTime > "2005-01-01")%>%
group_by(fighter, division) %>%
summarise(total_comp_time = sum(total_comp_time),count = sum(count))%>%
filter(division %in% c("Flyweight", "Heavyweight"))
plot_1<-
ggplot(fight_time,
aes(x = total_comp_time,
y = count,
color = division),
size = 3)+
geom_point()+
geom_smooth(formula = y ~ x, method = "lm")+
coord_cartesian(xlim=c(0, 15000), ylim = c(0,20))+
labs(x= "Total Competition Time (seconds)", y= "Number of bouts", title = "Number of bouts vs competition time")
# Significant strikes by fight time ---------------------------------------
fight_sig_strikes <- mma %>%
filter(POSixTime > "2005-01-01")%>%
group_by(fighter, division) %>%
summarise(total_comp_time = sum(total_comp_time),sig_strikes_landed = sum(sig_strikes_landed))%>%
filter(division %in% c("Flyweight", "Heavyweight"))
plot_2<-
ggplot(fight_sig_strikes,
aes(x = total_comp_time,
y = sig_strikes_landed,
color = division),
size = 3)+
geom_point()+
geom_smooth(formula = y ~ x, method = "lm")+
coord_cartesian(xlim=c(0, 15000), ylim = c(0,1000))+
labs(x= "Total Competition Time (seconds)", y= "Number of bouts", title = "Number of signifcant strikes vs competition time")
# knockdowns by fight time --------------------------------------
fight_kd <- mma %>%
filter(POSixTime > "2005-01-01")%>%
group_by(fighter, division) %>%
summarise(total_comp_time = sum(total_comp_time),knockdowns = sum(knockdowns))%>%
filter(division %in% c("Flyweight", "Heavyweight"))
plot_3<-
ggplot(fight_kd,
aes(x = total_comp_time,
y = knockdowns,
color = division),
size = 3)+
geom_point()+
geom_smooth(formula = y ~ x, method = "lm")+
labs(x= "Total Competition Time (seconds)", y= "Number of knockdowns", title = "Number of knockdowns vs competition time")+
coord_cartesian(xlim=c(0, 15000), ylim = c(0,15))
# KO's by fight time ----------------------------------------------------------
ko_count <- mma%>%
filter(method == "KO/TKO")%>%
filter(division %in% c("Flyweight", "Heavyweight"))%>%
group_by(fighter, division)%>%
summarise(KO = sum(count))
fight_ko <- mma %>%
filter(POSixTime > "2005-01-01")%>%
group_by(fighter, division) %>%
summarise(total_comp_time = sum(total_comp_time))%>%
filter(division %in% c("Flyweight", "Heavyweight"))%>%
inner_join(ko_count)
plot_4<-
ggplot(fight_ko,
aes(x = total_comp_time,
y = KO,
color = division),
size = 3)+
geom_point()+
geom_smooth(formula = y ~ x, method = "lm")+
coord_cartesian(xlim=c(0, 15000), ylim = c(0,15))+
labs(x= "Total Competition Time (seconds)", y= "Number of knockouts", title = "Number of knockouts vs competition time")
# Method of victory boxplots ----------------------------------------------
# Box plots facet wrapped (by finish type) for DEC,SUB,KO/TKO
# Each point representing fighter, y = count of finished, x = HW or FW
# lookup to aggregate decisions
method_2 <-
data.frame(unique(mma$method))%>%
mutate(method_2 = c("Submission","KO/TKO","Decision","Decision","Decision","Decision","Disqualification"))
colnames(method_2)<-(c("method","method_2"))
method_count <- mma%>%
inner_join(method_2)%>%
filter(POSixTime > "2005-01-01")%>%
filter(division %in% c("Flyweight", "Heavyweight"))%>%
group_by(fighter, division, method_2)%>%
summarise(method_3 = sum(count))
plot_5 <-
ggplot(method_count, aes(x = division, y = method_3, colour = method_2))+
geom_jitter(color = "azure4", size =0.7)+
geom_boxplot(aes(alpha = 0.8))+
facet_wrap(~method_2)+
labs(x = "Weight division", y = "Number of fights", title = "Number of finishes by method and weight division")+
theme(legend.position = "none")
# Longest win streak ------------------------------------------------------
#xy scatter with x = number of fights, y = number of wins and z = time(year) and size = cumulative win streak
#column names for input df
gganimate_col_names <- c("year","fighter","division","total_fights","streak_to_date")
#year column for time lapse
mma$year_month <- format(mma$POSixTime,"%m-%Y")
as.Date(mma$year_month)
#count win streak by year
#sort old to recent
sort(mma$POSixTime,decreasing = T)
# for a given fighter, check $result, if 1: sum to previous count, if 0: reset to 0.
winstreak <-
mma %>% group_by(fighter) %>%
mutate(winstreak = accumulate(result, ~ifelse(.y == 0, .y, .x + .y)))%>%
mutate(fights_to_date = cumsum(count))%>%
mutate(wins_to_date = cumsum(result))%>%
select(c("POSixTime",
"year_month",
"fighter",
"fights_to_date",
"wins_to_date",
"winstreak",
"division"))
winstreak_animate <- mma%>%
inner_join(winstreak[,c("POSixTime",
"fighter",
"winstreak",
"fights_to_date",
"wins_to_date")],
by = c("fighter" = "fighter",
"POSixTime"="POSixTime"))%>%
select(c("POSixTime",
"year_month" ,
"fighter",
"fights_to_date",
"wins_to_date",
"winstreak",
"division"))%>%
filter(division %in% Mens_division)
#date fixing
winstreak_animate2 <- winstreak_animate%>%
mutate(Date2 = floor_date(as_date(POSixTime),"month"))
plot_6 <-
ggplot(winstreak_animate2,
aes(x = fights_to_date,
y = wins_to_date,
colour = division,
size = winstreak))+
geom_point()+
labs(x = "Fights to date",
y = "Wins to date",
title = "Number of finishes by method and weight division")+
transition_time(Date2)
#plot_6_animated <-
animate(plot_6, fps =3, renderer = gifski_renderer())
anim_save("mma_xy_scatter.gif", plot_6_animated)
warnings()