library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.1 ✔ tibble 3.2.1
## ✔ lubridate 1.9.3 ✔ tidyr 1.3.1
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(car)
## Loading required package: carData
##
## Attaching package: 'car'
##
## The following object is masked from 'package:dplyr':
##
## recode
##
## The following object is masked from 'package:purrr':
##
## some
library(dplyr)
library(ggplot2)
library(readxl)
library(pwr)
ufc_data <- read_excel("~/Downloads/UFC_Dataset.xls")
# Filter dataset for Men's fights and filter out the Catachweight fights
ufc_data <- ufc_data |>
filter(Gender == "MALE", !is.na(WeightClass), WeightClass != "Catch Weight") |>
mutate(
WinnerBinary = ifelse(Winner == "Red", 1, 0) # 1 if Red wins, 0 if Blue wins
)
# Analyze each weight class separately
weight_classes <- unique(ufc_data$WeightClass)
# Loop through each weight class and analyze
for (weight_class in weight_classes) {
cat("\nAnalyzing weight class:", weight_class, "\n")
# Filter data for weight class
ufc_model_data <- ufc_data |>
filter(WeightClass == weight_class) |>
select(ReachDif, AgeDif, WinnerBinary)
# Build model
model <- glm(WinnerBinary ~ ReachDif + AgeDif, data = ufc_model_data, family = binomial)
print(summary(model))
# Confidence Intervals
conf_interval <- confint(model)
print(conf_interval)
}
##
## Analyzing weight class: Middleweight
##
## Call:
## glm(formula = WinnerBinary ~ ReachDif + AgeDif, family = binomial,
## data = ufc_model_data)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.214551 0.075112 2.856 0.00428 **
## ReachDif -0.017672 0.009195 -1.922 0.05461 .
## AgeDif -0.012544 0.014743 -0.851 0.39489
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1002.03 on 728 degrees of freedom
## Residual deviance: 996.93 on 726 degrees of freedom
## AIC: 1002.9
##
## Number of Fisher Scoring iterations: 4
## Waiting for profiling to be done...
## 2.5 % 97.5 %
## (Intercept) 0.06764782 0.3622197703
## ReachDif -0.03581043 0.0002802113
## AgeDif -0.04153042 0.0163388490
##
## Analyzing weight class: Featherweight
##
## Call:
## glm(formula = WinnerBinary ~ ReachDif + AgeDif, family = binomial,
## data = ufc_model_data)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.460625 0.079129 5.821 5.84e-09 ***
## ReachDif -0.017168 0.009834 -1.746 0.080858 .
## AgeDif -0.062059 0.017487 -3.549 0.000387 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 928.53 on 693 degrees of freedom
## Residual deviance: 910.29 on 691 degrees of freedom
## AIC: 916.29
##
## Number of Fisher Scoring iterations: 4
## Waiting for profiling to be done...
## 2.5 % 97.5 %
## (Intercept) 0.30644642 0.6168034875
## ReachDif -0.03706758 0.0005142276
## AgeDif -0.09671205 -0.0280757656
##
## Analyzing weight class: Lightweight
##
## Call:
## glm(formula = WinnerBinary ~ ReachDif + AgeDif, family = binomial,
## data = ufc_model_data)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.356282 0.064151 5.554 2.8e-08 ***
## ReachDif -0.003570 0.008131 -0.439 0.6606
## AgeDif -0.030801 0.013246 -2.325 0.0201 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1371.1 on 1010 degrees of freedom
## Residual deviance: 1365.2 on 1008 degrees of freedom
## AIC: 1371.2
##
## Number of Fisher Scoring iterations: 4
## Waiting for profiling to be done...
## 2.5 % 97.5 %
## (Intercept) 0.23099049 0.482545670
## ReachDif -0.01953952 0.012364723
## AgeDif -0.05690991 -0.004937326
##
## Analyzing weight class: Welterweight
##
## Call:
## glm(formula = WinnerBinary ~ ReachDif + AgeDif, family = binomial,
## data = ufc_model_data)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.278224 0.065510 4.247 2.17e-05 ***
## ReachDif -0.014299 0.008034 -1.780 0.0751 .
## AgeDif -0.011689 0.012863 -0.909 0.3635
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1319.9 on 964 degrees of freedom
## Residual deviance: 1315.6 on 962 degrees of freedom
## AIC: 1321.6
##
## Number of Fisher Scoring iterations: 4
## Waiting for profiling to be done...
## 2.5 % 97.5 %
## (Intercept) 0.15017859 0.407068572
## ReachDif -0.03013047 0.001396362
## AgeDif -0.03696039 0.013506773
##
## Analyzing weight class: Light Heavyweight
##
## Call:
## glm(formula = WinnerBinary ~ ReachDif + AgeDif, family = binomial,
## data = ufc_model_data)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.22481 0.09546 2.355 0.0185 *
## ReachDif -0.04643 0.01127 -4.118 3.81e-05 ***
## AgeDif -0.03345 0.01677 -1.995 0.0461 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 645.02 on 470 degrees of freedom
## Residual deviance: 620.75 on 468 degrees of freedom
## AIC: 626.75
##
## Number of Fisher Scoring iterations: 4
## Waiting for profiling to be done...
## 2.5 % 97.5 %
## (Intercept) 0.03806720 0.4125742405
## ReachDif -0.06899186 -0.0247123720
## AgeDif -0.06663783 -0.0007744874
##
## Analyzing weight class: Bantamweight
##
## Call:
## glm(formula = WinnerBinary ~ ReachDif + AgeDif, family = binomial,
## data = ufc_model_data)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.417907 0.082620 5.058 4.23e-07 ***
## ReachDif 0.004860 0.009612 0.506 0.613
## AgeDif -0.022834 0.015893 -1.437 0.151
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 829.84 on 616 degrees of freedom
## Residual deviance: 827.57 on 614 degrees of freedom
## AIC: 833.57
##
## Number of Fisher Scoring iterations: 4
## Waiting for profiling to be done...
## 2.5 % 97.5 %
## (Intercept) 0.25682010 0.580872035
## ReachDif -0.01399037 0.023746863
## AgeDif -0.05415514 0.008239373
##
## Analyzing weight class: Flyweight
##
## Call:
## glm(formula = WinnerBinary ~ ReachDif + AgeDif, family = binomial,
## data = ufc_model_data)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.437172 0.117135 3.732 0.00019 ***
## ReachDif 0.004964 0.017129 0.290 0.77197
## AgeDif -0.023576 0.023457 -1.005 0.31486
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 414.59 on 308 degrees of freedom
## Residual deviance: 413.50 on 306 degrees of freedom
## AIC: 419.5
##
## Number of Fisher Scoring iterations: 4
## Waiting for profiling to be done...
## 2.5 % 97.5 %
## (Intercept) 0.20932729 0.66902865
## ReachDif -0.02867570 0.03867288
## AgeDif -0.07003136 0.02218537
##
## Analyzing weight class: Heavyweight
##
## Call:
## glm(formula = WinnerBinary ~ ReachDif + AgeDif, family = binomial,
## data = ufc_model_data)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.401025 0.096474 4.157 3.23e-05 ***
## ReachDif -0.033811 0.009769 -3.461 0.000538 ***
## AgeDif 0.004389 0.016638 0.264 0.791921
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 621.21 on 461 degrees of freedom
## Residual deviance: 607.67 on 459 degrees of freedom
## AIC: 613.67
##
## Number of Fisher Scoring iterations: 4
## Waiting for profiling to be done...
## 2.5 % 97.5 %
## (Intercept) 0.21295253 0.59143204
## ReachDif -0.05327294 -0.01501337
## AgeDif -0.02823718 0.03712179
# Residuals vs Fitted Values
plot(model$fitted.values, resid(model), main = "Residuals vs Fitted Values", xlab = "Fitted Values", ylab = "Residuals")
abline(h = 0, col = "red")
# Residuals vs Predictor Variables
plot(ufc_model_data$ReachDif, resid(model), xlab = "Reach Difference", ylab = "Residuals", main = "Residuals vs Reach Difference")
abline(h = 0, col = "red")
plot(ufc_model_data$AgeDif, resid(model), xlab = "Age Difference", ylab = "Residuals", main = "Residuals vs Age Difference")
abline(h = 0, col = "red")
# Q-Q Plot
qqnorm(resid(model), main = "Normal Q-Q Plot")
qqline(resid(model), col = "red")
# Cook's Distance by Observation
cooksD <- cooks.distance(model)
plot(1:length(cooksD), cooksD, type = "h", main = "Cook's Distance by Observation", xlab = "Observation", ylab = "Cook's Distance")
abline(h = 4/(nrow(ufc_model_data)-length(model$coefficients)-2), col = "red")
-Neither reach or age seems to have a statistically significant affect on victory.
-Neither reach or age seems to have a statistically significant affect on victory.
-Coefficient for reach is mariginally significant (.081) suggesting that a reach difference affects winning at this weightclass. Confidence interval includes zero suggesting that we should be skeptical towards the relationship. Age is determined to be significant at this weightclass and favors the younger fighter.
-Reach seems to be insignificant at this weightclass as suggested by the p-value and the facts that the confidence interval includes zero. Age however is significant at this weightclass ad is supported by the p-value and confidence interval suggesting that younger fighters have an advantage in this weight class.
-Reach is marginally significant (.075) but is indicated to not be significant by the confidence interval. Age is shown to not be significant since the p-value is .364 and the confidence interval includes zero.
-Results suggest that there is a weak relationship between having a advantage in reach and victory. However, the p-value is not quiote below .05 and is .055 indicating it is weak or moderately significant. The confidence interval also includes zero indicating we should be skeptical towards this relationship. The p-value and confidence interval indicate that age is not a statistically significant factor that affects winning a fight at this weight class.
-Reach in this division is seen to be very significant suggesting that having a larger reach in this weight class aids victory. The confidence interval supports this as well. Age is seen to do the same thing for the same reason, suggesting the younger fighter has an edge in this division.
-Reach seems to be very significant in regards to victory in this weight-class. The p-value is remarkably low and the confidence interval also suggests that the relationship is strong. Age however, does not seem to have any affect on winning in this weightclass, this is supported by both the p-value and confidence interval.
-Cook’s distance test and Q-Q plot suggest that we should doubtful and that some significant results may actually not be. May need to analyze further to ensure significance.