library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
library(readxl)
library(pwr)
dataset <- read_excel("~/Downloads/UFC_Dataset.xls")
#Filtering Dataset for fights in the Men's Division after 2020
dataset<-dataset |>
filter(Gender == "MALE")
#Creating Column for Response Variable
dataset <- dataset |>
mutate(TotalAvgSigStrLanded = RedAvgSigStrLanded + BlueAvgSigStrLanded)
dataset <- dataset |>
filter(!is.na(WeightClass) & !is.na(TotalAvgSigStrLanded))
dataset <- dataset |>
filter(WeightClass != "Catch Weight")
head(dataset)
## # A tibble: 6 × 119
## RedFighter BlueFighter RedOdds BlueOdds RedExpectedValue BlueExpectedValue
## <chr> <chr> <dbl> <dbl> <dbl> <dbl>
## 1 Jack Hermanss… Joe Pyfer 205 -250 205 40
## 2 Dan Ige Andre Fili -185 154 54.1 154
## 3 Robert Bryczek Ihor Potie… -230 190 43.5 190
## 4 Brad Tavares Gregory Ro… 190 -230 190 43.5
## 5 Michael Johns… Darrius Fl… -155 130 64.5 130
## 6 Rodolfo Vieira Armen Petr… -105 -115 95.2 87.0
## # ℹ 113 more variables: Date <dttm>, Location <chr>, Country <chr>,
## # Winner <chr>, TitleBout <lgl>, WeightClass <chr>, Gender <chr>,
## # NumberOfRounds <dbl>, BlueCurrentLoseStreak <dbl>,
## # BlueCurrentWinStreak <dbl>, BlueDraws <dbl>, BlueAvgSigStrLanded <dbl>,
## # BlueAvgSigStrPct <dbl>, BlueAvgSubAtt <dbl>, BlueAvgTDLanded <dbl>,
## # BlueAvgTDPct <dbl>, BlueLongestWinStreak <dbl>, BlueLosses <dbl>,
## # BlueTotalRoundsFought <dbl>, BlueTotalTitleBouts <dbl>, …
Hypothesis #1:
Null: There is no significant difference in TotalSigStrLanded
between different weight classes.
ALT: There is a significant difference in the TotalSigStrLanded
between different weight classes.
anova_result <- aov(TotalAvgSigStrLanded ~ WeightClass, data = dataset)
summary (anova_result)
## Df Sum Sq Mean Sq F value Pr(>F)
## WeightClass 7 53091 7584 6.368 1.81e-07 ***
## Residuals 4343 5172716 1191
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Interpretation: By observing both the F value and P value provided,
we can suggest that there is significant evidence to reject to Null
hypothesis. The high F value suggests that the correlation that we are
finding is not due to variance or chance and since the p value is close
to zero we can suggest that there is a statistically significant
difference in the strikes landed in different weight classes. I was
talking to a friend this weekend who is also passionate about UFC and
expressing how I was having trouble finding meaningful insights into my
data, he made me realize weight class is likely an important factor that
I am not taking into account. This shows the uniqueness of each
weightclass and how each must adjust and be held to different
standards.
ggplot(dataset, aes(x = WeightClass, y = TotalAvgSigStrLanded)) +
geom_boxplot() +
theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
labs(title = "Total Significant Strikes Landed by Weight Class",
x = "Weight Class",
y = "Total Significant Strikes Landed")

Interpretation: This graph supports the claims made above and depict
that each weight class has a unique average of strikes landed and unique
variances regarding the statistic as well. Not taking this into account
and only filtering for males might have prevented us from seeing the
whole picture.
Linear Regression Model
Purpose: I want to see if weight has a linear relationship with
strikes landed. This will test the hypothesis; Does the number strikes
landed matter less as you move up in weight class.
linear_reg <- lm(TotalAvgSigStrLanded ~ RedWeightLbs, data = dataset)
summary(linear_reg)
##
## Call:
## lm(formula = TotalAvgSigStrLanded ~ RedWeightLbs, data = dataset)
##
## Residuals:
## Min 1Q Median 3Q Max
## -46.379 -34.927 0.279 26.554 135.270
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 54.17787 2.70253 20.047 < 2e-16 ***
## RedWeightLbs -0.06239 0.01559 -4.002 6.39e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 34.6 on 4349 degrees of freedom
## Multiple R-squared: 0.003669, Adjusted R-squared: 0.00344
## F-statistic: 16.01 on 1 and 4349 DF, p-value: 6.392e-05
Interpretation: Based on the values above we can conclude that
weight does in fact have a significant affect on the number of strikes
that one lands. The coefficient with RedWeighLbs suggests a negative
relationship with weight and strikes landed thus suggesting that heavier
weight classes generally land less strikes. Not considering this in my
past calculations is likely a contributor as to why it was difficult to
find significant relationships between a kpi and victory. A suggestion
that can be made from this data is that the higher up in weight you go a
fighter should focus on quality not quantity, one great strike is more
effective than many okay ones at higher weights.