Gender In Powerlifting Competitions: The Relationship between Lifting Capacity & Gender

Powerlifting is a strength sport where competitors are tested on their ability to accomplish three lifts: squat, bench press, and deadlift. Strenght training builds muscle, increases bone density, and improves brain health by slowing down neurodegeneration. There are numerous misconceptions that plague the powerlifting community, such as that the sport can be quite harmful as it involves movements that are more difficult to have a perfect form like squats (squats that are not well-done can lead to knee injuries and lower back pain). The powerlifting community has a growing female presence and many women are teaching their daughters to lift as well to encourage a healthy lifestyle throughout the family. I am using the Open Powerlifting in February 2015 dataset from Kaggle. I am interested in how males and females vary in lifting capacities. I am also interested in seeing how bodyweight impacts lifting capacity and determining the average lift per gender.

Data Import

**Open Powerlifting February 2015 Data:**

library(readr)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(maxLik)
## Loading required package: miscTools
## 
## Please cite the 'maxLik' package as:
## Henningsen, Arne and Toomet, Ott (2011). maxLik: A package for maximum likelihood estimation in R. Computational Statistics 26(3), 443-458. DOI 10.1007/s00180-010-0217-1.
## 
## If you have questions, suggestions, or comments regarding the 'maxLik' package, please use a forum or 'tracker' at maxLik's R-Forge site:
## https://r-forge.r-project.org/projects/maxlik/
library(ggplot2)
powerlift<- read_csv("C:/Users/Skippz/Desktop/openpowerlifting.csv")
## Parsed with column specification:
## cols(
##   MeetID = col_double(),
##   Name = col_character(),
##   Sex = col_character(),
##   Equipment = col_character(),
##   Age = col_double(),
##   Division = col_character(),
##   BodyweightKg = col_double(),
##   WeightClassKg = col_character(),
##   Squat4Kg = col_double(),
##   BestSquatKg = col_double(),
##   Bench4Kg = col_double(),
##   BestBenchKg = col_double(),
##   Deadlift4Kg = col_double(),
##   BestDeadliftKg = col_double(),
##   TotalKg = col_double(),
##   Place = col_character(),
##   Wilks = col_double()
## )
dim(powerlift)
## [1] 386414     17
pwlift<-powerlift%>%
  filter(Age!=99)%>%
  mutate(Gender=recode(Sex, '0'="M", '1'="F"))%>%
  select(Gender, Equipment, Age, Division, BodyweightKg, WeightClassKg, BestSquatKg, BestBenchKg, BestDeadliftKg, TotalKg)%>%
  filter(!is.na(Gender), !is.na(Equipment), !is.na(Age), !is.na(Division), !is.na(BodyweightKg), !is.na(WeightClassKg), !is.na(BestSquatKg), !is.na(BestBenchKg), !is.na(BestDeadliftKg), !is.na(TotalKg))
dim(pwlift)
## [1] 96786    10

Relationship Between Best Squat Mean & Body Weight

ols.lf<-function(param){
  beta<-param[-1]
  sigma<-param[1]
  y<-as.vector(pwlift$BestSquatKg)
  x<-cbind(1, pwlift$BodyweightKg)
  mu<-x%*%beta
  sum(dnorm(y,mu, sigma, log=TRUE))
}
Max1 <- maxLik(logLik=ols.lf, start=c(sigma=1, beta1=1, beta2=1), method="nm")
summary(Max1)
## Warning in sqrt(diag(vc)): NaNs produced

## Warning in sqrt(diag(vc)): NaNs produced
## --------------------------------------------
## Maximum Likelihood estimation
## Nelder-Mead maximization, 90 iterations
## Return code 0: successful convergence 
## Log-Likelihood: -518042.8 
## 3  free parameters
## Estimates:
##       Estimate Std. error t value Pr(> t)    
## sigma  51.2947     0.1311   391.3  <2e-16 ***
## beta1   4.9420         NA      NA      NA    
## beta2   2.0252     0.0000     Inf  <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## --------------------------------------------
ols.lf2<-function(param){
  beta<-param[-1]
  sigma<-param[1]
  y<-as.vector(pwlift$BestSquatKg)
  x<-cbind(1,pwlift$BodyweightKg, pwlift$Age)
  mu<-x%*%beta
  sum(dnorm(y, mu, sigma, log=TRUE))
}
M2<-maxLik(logLik=ols.lf2, start=c(sigma=1, beta1=1, beta2=1, beta3=1), method="nm")
summary(M2)
## --------------------------------------------
## Maximum Likelihood estimation
## Nelder-Mead maximization, 179 iterations
## Return code 0: successful convergence 
## Log-Likelihood: -524806.1 
## 4  free parameters
## Estimates:
##         Estimate Std. error t value Pr(> t)    
## sigma  51.679382   0.104878  492.76  <2e-16 ***
## beta1 -65.257216   0.000000    -Inf  <2e-16 ***
## beta2   2.715047   0.004476  606.56  <2e-16 ***
## beta3   0.219802   0.012290   17.89  <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## --------------------------------------------

Plotting Age & Bodyweight Kg

ggplot(pwlift)+geom_point(aes(x=Age, y=BestSquatKg))+geom_smooth(aes(x=Age, y=BestSquatKg))
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'

Relationship Between Bench Kg & Bodyweight & Age

ols.lf3<-function(param){
  beta<-param[-1]
  sigma<-param[1]
  y<-as.vector(pwlift$BestBenchKg)
  x<-cbind(1,pwlift$BodyweightKg, pwlift$Age)
  mu<-x%*%beta
  sum(dnorm(y, mu, sigma, log=TRUE))
}
M3<-maxLik(logLik=ols.lf3, start=c(sigma=1, beta1=1, beta2=1, beta3=1), method="nm")
summary(M3)
## Warning in sqrt(diag(vc)): NaNs produced

## Warning in sqrt(diag(vc)): NaNs produced
## --------------------------------------------
## Maximum Likelihood estimation
## Nelder-Mead maximization, 195 iterations
## Return code 0: successful convergence 
## Log-Likelihood: -488004.7 
## 4  free parameters
## Estimates:
##         Estimate Std. error t value Pr(> t)    
## sigma  36.913443   0.077347 477.242  <2e-16 ***
## beta1 -24.428222         NA      NA      NA    
## beta2   1.635304   0.003103 526.930  <2e-16 ***
## beta3   0.006867   0.008734   0.786   0.432    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## --------------------------------------------
ggplot(pwlift)+geom_point(aes(x=Age, y=BestBenchKg))+geom_smooth(aes(x=Age, y=BestBenchKg))
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'

#Relationship Between DeadliftKg & Bodyweight & Age

ols.lf3<-function(param){
  beta<-param[-1]
  sigma<-param[1]
  y<-as.vector(pwlift$BestDeadliftKg)
  x<-cbind(1,pwlift$BodyweightKg, pwlift$Age)
  mu<-x%*%beta
  sum(dnorm(y, mu, sigma, log=TRUE))
}
M3<-maxLik(logLik=ols.lf3, start=c(sigma=1, beta1=1, beta2=1, beta3=1), method="nm")
summary(M3)
## --------------------------------------------
## Maximum Likelihood estimation
## Nelder-Mead maximization, 237 iterations
## Return code 0: successful convergence 
## Log-Likelihood: -525142.3 
## 4  free parameters
## Estimates:
##         Estimate Std. error t value Pr(> t)    
## sigma  51.643997   0.093080  554.83  <2e-16 ***
## beta1 -57.869572   0.110717 -522.68  <2e-16 ***
## beta2   2.712848   0.004649  583.54  <2e-16 ***
## beta3   0.706484   0.012376   57.08  <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## --------------------------------------------
ggplot(pwlift)+geom_point(aes(x=Age, y=BestDeadliftKg))+geom_smooth(aes(x=Age, y=BestDeadliftKg))
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'

=