Results

All Mens Divisions: Wins vs Total Fights

Heavyweight vs Flyweight Activity

Competition time vs bout number

Between 2005 to 2022, Flyweights accumulated significantly more competition time (seconds) for the same number of bouts fought. This is a consequence of there being less stopages and thus more fights going the full distance to a decision.

Between 2005 to 2022, there was no significant difference between the amount of significant strikes landed by Flyweights vs Heavyweights. This result is suprising, as it’s assumed that flyweights have more activity and therefore significant strikes in competition time.

Heavyweight vs Flyweight Outcomes

The above box plots describe the different spread of fight outcomes (submission, decision, knockout or disqualification) between Heavyweight and Flyweight divisions.

Between 2005 to 2022, Heavyweights accumulated significantly more knockdowns than Flyweights for the same amount of competition time (seconds).

Between 2005 to 2022, Heavyweights accumulated significantly more knockouts for the same amount of competition time compared to Flyweights.



# Set up ------------------------------------------------------------------

library(ggplot2)
library(dplyr)
library(gganimate)
library(purrr)
library(gapminder)
library(lubridate)
library(gifski)

getwd()
setwd("./mmadata_up_to_20220622")

mma <- read.csv("masterdataframe.csv")


Mens_division <- c("Heavyweight", "Light Heavyweight", "Middleweight", "Welterweight", "Lightweight","Featherweight","Bantamweight","Flyweight")


column_names <- data.frame(colnames(mma))


mma$POSixTime <- as.POSIXct(mma$date, format="%Y-%m-%d")

mma$count <- 1



# Total Fight time by fighter ---------------------------------------------


fight_time <- mma %>%
  filter(POSixTime > "2005-01-01")%>%
  group_by(fighter, division) %>%
  summarise(total_comp_time = sum(total_comp_time),count = sum(count))%>%
  filter(division %in% c("Flyweight", "Heavyweight"))
  


plot_1<-
  ggplot(fight_time, 
       aes(x = total_comp_time,
           y = count,
           color = division),
       size = 3)+
  geom_point()+
  geom_smooth(formula = y ~ x, method = "lm")+ 
  coord_cartesian(xlim=c(0, 15000), ylim = c(0,20))+
  labs(x= "Total Competition Time (seconds)", y= "Number of bouts", title = "Number of bouts vs competition time")



# Significant strikes by fight time ---------------------------------------


fight_sig_strikes <- mma %>%
  filter(POSixTime > "2005-01-01")%>%
  group_by(fighter, division) %>%
  summarise(total_comp_time = sum(total_comp_time),sig_strikes_landed = sum(sig_strikes_landed))%>%
  filter(division %in% c("Flyweight", "Heavyweight"))



plot_2<-
  ggplot(fight_sig_strikes, 
       aes(x = total_comp_time,
           y = sig_strikes_landed,
           color = division),
       size = 3)+
  geom_point()+
  geom_smooth(formula = y ~ x, method = "lm")+ 
  coord_cartesian(xlim=c(0, 15000), ylim = c(0,1000))+
  labs(x= "Total Competition Time (seconds)", y= "Number of bouts", title = "Number of signifcant strikes vs competition time")




# knockdowns by fight time --------------------------------------



fight_kd <- mma %>%
  filter(POSixTime > "2005-01-01")%>%
  group_by(fighter, division) %>%
  summarise(total_comp_time = sum(total_comp_time),knockdowns = sum(knockdowns))%>%
  filter(division %in% c("Flyweight", "Heavyweight"))



plot_3<-
  ggplot(fight_kd, 
       aes(x = total_comp_time,
           y = knockdowns,
           color = division),
       size = 3)+
  geom_point()+
  geom_smooth(formula = y ~ x, method = "lm")+
  labs(x= "Total Competition Time (seconds)", y= "Number of knockdowns", title = "Number of knockdowns vs competition time")+
  coord_cartesian(xlim=c(0, 15000), ylim = c(0,15))





# KO's by fight time ----------------------------------------------------------


ko_count <- mma%>%
  filter(method == "KO/TKO")%>%
  filter(division %in% c("Flyweight", "Heavyweight"))%>%
  group_by(fighter, division)%>%
  summarise(KO = sum(count))

fight_ko <- mma %>%
  filter(POSixTime > "2005-01-01")%>%
  group_by(fighter, division) %>%
  summarise(total_comp_time = sum(total_comp_time))%>%
  filter(division %in% c("Flyweight", "Heavyweight"))%>%
  inner_join(ko_count)



plot_4<-
  ggplot(fight_ko, 
       aes(x = total_comp_time,
           y = KO,
           color = division),
       size = 3)+
  geom_point()+
  geom_smooth(formula = y ~ x, method = "lm")+ 
  coord_cartesian(xlim=c(0, 15000), ylim = c(0,15))+
  labs(x= "Total Competition Time (seconds)", y= "Number of knockouts", title = "Number of knockouts vs competition time")




# Method of victory boxplots ----------------------------------------------

# Box plots facet wrapped (by finish type) for DEC,SUB,KO/TKO
# Each point representing fighter, y = count of finished, x = HW or FW



# lookup to aggregate decisions

method_2 <-
  data.frame(unique(mma$method))%>%
  mutate(method_2 = c("Submission","KO/TKO","Decision","Decision","Decision","Decision","Disqualification"))

colnames(method_2)<-(c("method","method_2"))




method_count <- mma%>%
  inner_join(method_2)%>%
  filter(POSixTime > "2005-01-01")%>%
  filter(division %in% c("Flyweight", "Heavyweight"))%>%
  group_by(fighter, division, method_2)%>%
  summarise(method_3 = sum(count))


plot_5 <-
  ggplot(method_count, aes(x = division, y = method_3, colour = method_2))+
    geom_jitter(color = "azure4", size =0.7)+
    geom_boxplot(aes(alpha = 0.8))+
    facet_wrap(~method_2)+
    labs(x = "Weight division", y = "Number of fights", title = "Number of finishes by method and weight division")+
    theme(legend.position = "none")





# Longest win streak ------------------------------------------------------

#xy scatter with x = number of fights, y = number of wins and z = time(year) and size = cumulative win streak


#column names for input df

gganimate_col_names <- c("year","fighter","division","total_fights","streak_to_date")



#year column for time lapse
mma$year_month <- format(mma$POSixTime,"%m-%Y")
as.Date(mma$year_month)


#count win streak by year

#sort old to recent
sort(mma$POSixTime,decreasing = T)


# for a given fighter, check $result, if 1: sum to previous count, if 0: reset to 0.


winstreak <- 
  mma %>% group_by(fighter) %>%
  mutate(winstreak = accumulate(result, ~ifelse(.y == 0, .y, .x + .y)))%>%
  mutate(fights_to_date = cumsum(count))%>%
  mutate(wins_to_date = cumsum(result))%>%
  select(c("POSixTime",
           "year_month",
           "fighter",
           "fights_to_date",
           "wins_to_date", 
           "winstreak",
           "division"))



winstreak_animate <- mma%>%
  inner_join(winstreak[,c("POSixTime",
                          "fighter",
                          "winstreak",
                          "fights_to_date",
                          "wins_to_date")], 
             by = c("fighter" = "fighter", 
                    "POSixTime"="POSixTime"))%>%
  select(c("POSixTime",
           "year_month" ,
           "fighter",
           "fights_to_date",
           "wins_to_date",
           "winstreak",
           "division"))%>%
  filter(division %in% Mens_division)


#date fixing

winstreak_animate2 <- winstreak_animate%>%
  mutate(Date2 = floor_date(as_date(POSixTime),"month"))

plot_6 <-
  ggplot(winstreak_animate2, 
         aes(x = fights_to_date, 
             y = wins_to_date, 
             colour = division,
             size = winstreak))+
  geom_point()+
  labs(x = "Fights to date", 
       y = "Wins to date", 
       title = "Number of finishes by method and weight division")+
  transition_time(Date2)

#plot_6_animated <- 
  animate(plot_6, fps =3, renderer = gifski_renderer())

anim_save("mma_xy_scatter.gif", plot_6_animated)
warnings()
