injury<- fread("/Users/claire/Desktop/injury_data.csv", header=T, data.table = F)
wellness<- fread("/Users/claire/Desktop/wellness.csv", header=T, data.table = F)
load<- fread("/Users/claire/Desktop/external_load.csv", header=T, data.table=F)
str(injury)
str(wellness)
str(load)
table(injury$playerid)
table(wellness$playerid)
table(load$playerid)
wellness:
date
sleep hours
soreness
comment
body area (1,2,3)
soreness (1,2,3)
injury:
injury date
injury type
body part
mechanism
context
days lost
severity
load: - session date
drill
duration
RPE
total distance
high speed distance
high speed intensity distance
player load
exertion index
sprint numbers (?)
accelerations
decelerations
max speed
max accell
max decel
The comments seem relatively uniform, but as of right now I am not too sure what we are getting out of them. Some are sleep-related and some are general so it might be a bit too all over the place.
I removed any total sleep hours greater than or equal to 15 as it is likely unrealistic or might encompass a sick night or something if true. The low end of sleep is a bit harder to discern if real or not, so for now I will just exclude nights where students slept zero hours. Total soreness appears to be a likert scale from 1 to 7. The mean overall soreness is 4.0, while the mean soreness for individual body parts is a bit higher, around 5.2-5.7 which makes sense as it is a more isolated measure.
If an athelete has multiple entries on one date, take their average for total soreness and sleep hours.
head(wellness)
table(wellness$comment)
hist(wellness$sleep_hours)
hist(wellness$overall_soreness)
summary(wellness$overall_soreness)
summary(wellness$soreness1)
summary(wellness$soreness2)
summary(wellness$soreness3)
summary(wellness$soreness4)
wellness<- wellness %>%
mutate(
date= as.Date(date, format = "%m/%d/%Y"),
sleep_hours = case_when(sleep_hours >= 15 ~ NA,
sleep_hours < 1 ~ NA,
TRUE~ sleep_hours))
wellness_clean<- wellness %>% group_by(playerid, date) %>%
mutate(
sleep_hours_clean= mean(sleep_hours),
overall_soreness_clean= mean(overall_soreness)
) %>% slice(1) %>% ungroup()
There does not appear to be a realtionship between total soreness and sleep hours on any given night.
wellness_clean %>%
ggplot(aes(x = sleep_hours_clean, y = overall_soreness_clean)) +
geom_jitter()+
geom_smooth(method='lm', formula= y~x) +
theme(axis.text.x = element_text(hjust = 0.5, size = 15),
strip.text.x = element_text(size = 15, colour = "blue"),
axis.text.y = element_text(hjust = 0.5, size=15),
axis.title.x = element_text(size = 15,
vjust = 0.5,
# margin = margin(t = 10),
face = 'bold',
color = "black"),
plot.subtitle = element_text(size = 12,
hjust = 0.5,
color = "black"),
axis.title.y = element_text(size = 15,
hjust = 0.5,
# margin = margin(r = 10),
color = "black",
face = 'bold'),
plot.title = element_text(hjust = 0.5, size=20),
# plot.margin = margin(1.15, .5, .5, -.25, unit = 'in'),
panel.grid.major = element_blank(),
plot.background = element_rect(fill = 'floralwhite', color = "floralwhite"),
panel.background = element_blank(), axis.line = element_line(colour = "black")) +
labs(x = "Sleep Hours", y = "Overall Soreness", title = "Relationship Between Soreness and Sleep Hours on a Given Date")
The correlation alone between sleep hours and soreness is -0.06, however, individuals likely have differnet baselines of soreness tolerance so it would be good to include a random effect for player ID.
summary(lm(overall_soreness_clean~sleep_hours_clean, data=wellness_clean))
##
## Call:
## lm(formula = overall_soreness_clean ~ sleep_hours_clean, data = wellness_clean)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.0801 -1.8599 0.0436 1.6276 3.2464
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.35486 0.50041 8.703 <2e-16 ***
## sleep_hours_clean -0.06013 0.07044 -0.854 0.394
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.917 on 526 degrees of freedom
## (31 observations deleted due to missingness)
## Multiple R-squared: 0.001383, Adjusted R-squared: -0.0005153
## F-statistic: 0.7286 on 1 and 526 DF, p-value: 0.3937
Adding a random intercept for each player, we still do not see much of a relationship between total soreness and sleep hours.
summary(lmer(overall_soreness~sleep_hours + (1|playerid), data=wellness_clean))
## boundary (singular) fit: see help('isSingular')
## Linear mixed model fit by REML ['lmerMod']
## Formula: overall_soreness ~ sleep_hours + (1 | playerid)
## Data: wellness_clean
##
## REML criterion at convergence: 2255
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -1.50026 -0.95124 0.04115 1.02243 1.59528
##
## Random effects:
## Groups Name Variance Std.Dev.
## playerid (Intercept) 0.000 0.00
## Residual 3.961 1.99
## Number of obs: 534, groups: playerid, 30
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 4.12134 0.49627 8.305
## sleep_hours -0.02964 0.06981 -0.425
##
## Correlation of Fixed Effects:
## (Intr)
## sleep_hours -0.985
## optimizer (nloptwrap) convergence code: 0 (OK)
## boundary (singular) fit: see help('isSingular')
iso<- wellness %>%
filter(!is.na(body_area1)) %>% # select only rows with non NA body areas
select(playerid, date, soreness1, soreness2, soreness3, soreness4,
sleep_hours) %>% # get soreness for each body part
group_by(playerid, date) %>%
slice(1) %>% # if a player still has more than one entry per date, select one. not sure how to tell which is correct
pivot_longer(names_to = "body_area", values_to= "soreness", cols = starts_with(c("soreness"))) %>%
mutate(body_area = case_when(grepl("1", body_area) ~ "body_area1",
grepl("2", body_area) ~ "body_area2",
grepl("3", body_area) ~ "body_area3",
grepl("4", body_area) ~ "body_area4"))
iso_body<- wellness %>%
filter(!is.na(body_area1)) %>% # select only rows with non NA body areas
select(playerid, date, contains("body_area")) %>% # get soreness for each body part
group_by(playerid, date) %>%
slice(1) %>% # if a player still has more than one entry per date, select one. not sure how to tell which is correct
pivot_longer(names_to = "body_area", values_to= "area", cols = starts_with("body_area"))
iso<- merge(iso, iso_body, by=c("playerid", "date", "body_area"), all=T)
iso$playeriddate<- paste0(iso$playerid, iso$date)
Here, I created a dataset that has nested data containing total sleep hours and soreness of body parts on any given date in long form. This is intended to show if there is any relationship between isolated soreness and sleep hours. On a general level, we still do not see a relationship between isolated soreness and sleep hours, controlling for the athlete and date because the data are dependent at those levels.
summary(lmer(soreness~sleep_hours+(1|playeriddate), data=iso))
## Linear mixed model fit by REML ['lmerMod']
## Formula: soreness ~ sleep_hours + (1 | playeriddate)
## Data: iso
##
## REML criterion at convergence: 5584.3
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -1.5348 -0.8206 -0.1183 0.8741 1.5898
##
## Random effects:
## Groups Name Variance Std.Dev.
## playeriddate (Intercept) 0.1119 0.3345
## Residual 8.5397 2.9223
## Number of obs: 1117, groups: playeriddate, 435
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 5.33847 0.51428 10.380
## sleep_hours 0.01005 0.07266 0.138
##
## Correlation of Fixed Effects:
## (Intr)
## sleep_hours -0.985
Looking at the same analysis, but controlling for the specific body area, we start to see some possible group differences in sleep hours predicting isolated soreness, but nothing statistically significant.
summary(lmer(soreness~sleep_hours+area+(1|playeriddate), data=iso))
## Linear mixed model fit by REML ['lmerMod']
## Formula: soreness ~ sleep_hours + area + (1 | playeriddate)
## Data: iso
##
## REML criterion at convergence: 5570.7
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -1.79782 -0.87225 -0.07111 0.89352 1.76643
##
## Random effects:
## Groups Name Variance Std.Dev.
## playeriddate (Intercept) 0.1068 0.3268
## Residual 8.5098 2.9172
## Number of obs: 1117, groups: playeriddate, 435
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 5.65783 0.60644 9.330
## sleep_hours 0.01807 0.07280 0.248
## areaGroin -0.18091 0.44042 -0.411
## areaLeft Ankle -0.96935 0.47513 -2.040
## areaLeft Hamstring -0.15993 0.44965 -0.356
## areaLeft Quad -0.77149 0.44365 -1.739
## areaLeft Shoulder -0.86536 0.43264 -2.000
## areaLower Back -0.47706 0.43909 -1.086
## areaNeck -0.19041 0.43951 -0.433
## areaRight Ankle -0.45217 0.43040 -1.051
## areaRight Hamstring -0.41365 0.44243 -0.935
## areaRight Quad 0.47668 0.47154 1.011
## areaRight Shoulder -0.17668 0.46446 -0.380
## areaUpper Back -0.56530 0.44933 -1.258
##
## Correlation matrix not shown by default, as p = 14 > 12.
## Use print(x, correlation=TRUE) or
## vcov(x) if you need it
iso %>% filter(!area=="") %>%
ggplot(aes(x = sleep_hours, y = soreness)) +
geom_jitter()+
geom_smooth(method='lm', formula= y~x) +
theme(axis.text.x = element_text(hjust = 0.5, size = 15),
strip.text.x = element_text(size = 15, colour = "blue"),
axis.text.y = element_text(hjust = 0.5, size=15),
axis.title.x = element_text(size = 15,
vjust = 0.5,
# margin = margin(t = 10),
face = 'bold',
color = "black"),
plot.subtitle = element_text(size = 12,
hjust = 0.5,
color = "black"),
axis.title.y = element_text(size = 15,
hjust = 0.5,
# margin = margin(r = 10),
color = "black",
face = 'bold'),
plot.title = element_text(hjust = 0.5, size=20),
# plot.margin = margin(1.15, .5, .5, -.25, unit = 'in'),
panel.grid.major = element_blank(),
plot.background = element_rect(fill = 'floralwhite', color = "floralwhite"),
panel.background = element_blank(), axis.line = element_line(colour = "black")) +
labs(x = "Sleep Hours", y = "Isolated Soreness", title = "Relationship Between Soreness and Sleep Hours on a Given Date by Body Area") + facet_wrap(~area)
One final thing to keep in mind regarding sleep data is that sleep hours often shows a quadratic relationship with variables such that too much and too litle sleep can incur negative outcomes. For example, does sleeping 14 hours relate to back soreness? While at the same time does sleeping 3 hours relate to more back soreness because perhaps the soreness is contributing to the lack of sleep?
Modelling sleep hours squared does not seem to change the model much, indiicating perhaps just no real relationship between soreness and sleep hours in these data.
summary(lmer(soreness~sleep_hours^2+area+(1|playeriddate), data=iso))
## Linear mixed model fit by REML ['lmerMod']
## Formula: soreness ~ sleep_hours^2 + area + (1 | playeriddate)
## Data: iso
##
## REML criterion at convergence: 5570.7
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -1.79782 -0.87225 -0.07111 0.89352 1.76643
##
## Random effects:
## Groups Name Variance Std.Dev.
## playeriddate (Intercept) 0.1068 0.3268
## Residual 8.5098 2.9172
## Number of obs: 1117, groups: playeriddate, 435
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 5.65783 0.60644 9.330
## sleep_hours 0.01807 0.07280 0.248
## areaGroin -0.18091 0.44042 -0.411
## areaLeft Ankle -0.96935 0.47513 -2.040
## areaLeft Hamstring -0.15993 0.44965 -0.356
## areaLeft Quad -0.77149 0.44365 -1.739
## areaLeft Shoulder -0.86536 0.43264 -2.000
## areaLower Back -0.47706 0.43909 -1.086
## areaNeck -0.19041 0.43951 -0.433
## areaRight Ankle -0.45217 0.43040 -1.051
## areaRight Hamstring -0.41365 0.44243 -0.935
## areaRight Quad 0.47668 0.47154 1.011
## areaRight Shoulder -0.17668 0.46446 -0.380
## areaUpper Back -0.56530 0.44933 -1.258
##
## Correlation matrix not shown by default, as p = 14 > 12.
## Use print(x, correlation=TRUE) or
## vcov(x) if you need it
The injury data distributions appear to need some cleaning. I would expect the “severe” injury distribution of days lost to be a bit tighter. Moderate and minor seems ok, but I would not expect severe injuries to overlap quite so much as I feel they should be distinctly their own category.
If any severity classifications are missing I will classify them based on their days lost and the ranges below.
tapply(injury$days_lost, injury$severity, range, na.rm=T)
## [[1]]
## [1] 5 87
##
## $Minor
## [1] 1 6
##
## $Moderate
## [1] 7 27
##
## $Severe
## [1] 28 99
injury<- injury %>% mutate(severity = case_when(severity=="" & days_lost < 7 ~ "Minor",
severity=="" & days_lost >6 & days_lost < 28 ~ "Moderate",
severity=="" & days_lost >27~ "Severe",
TRUE~ severity))
injury %>% filter(!severity=="") %>%
ggplot(aes(x = days_lost, y = severity, fill = severity)) +
geom_density_ridges() +
theme_ridges() +
theme(axis.text.x = element_text(hjust = 0.5, size = 15),
strip.text.x = element_text(size = 15, colour = "blue"),
axis.text.y = element_text(hjust = 0.5, size=15),
axis.title.x = element_text(size = 15,
vjust = 0.5,
# margin = margin(t = 10),
face = 'bold',
color = "black"),
plot.subtitle = element_text(size = 12,
hjust = 0.5,
color = "black"),
axis.title.y = element_text(size = 15,
hjust = 0.5,
# margin = margin(r = 10),
color = "black",
face = 'bold'),
plot.title = element_text(hjust = 0.5, size=20),
# plot.margin = margin(1.15, .5, .5, -.25, unit = 'in'),
panel.grid.major = element_blank(),
plot.background = element_rect(fill = 'floralwhite', color = "floralwhite"),
panel.background = element_blank(), axis.line = element_line(colour = "black")) +
labs(x = "Days Lost", y = "", title = "Distribution of Days Lost by Injury Severity")
## Picking joint bandwidth of 2.8
tapply(injury$days_lost, injury$injury_type, mean, na.rm=T)
## ACL Tear Ankle Sprain Back Pain
## 46.56731 48.70213 40.59211
## Calf Strain Concussion Groin Strain
## 50.52941 51.89855 47.00000
## Hamstring Strain Shoulder Dislocation
## 48.30120 48.63750
By assessing these distributions once again but by injury type, we see that pretty much across all injury types the severe injuries have non normal distributions. However, some injuries, such as ACL tears, probably shouldn’t have minor, moderate and severe outcomes. Normally I might investigate that further to see if all minor and moderate ACL injuries should just be removed, for example, but because the average days lost for an ACL tear of any severity is 45.6 days, I can only assume this is a function of the simulated data and I am going to move past it.
injury %>% filter(!severity=="") %>%
ggplot(aes(x = days_lost, y = severity, fill = severity)) +
geom_density_ridges() +
theme_ridges() +
theme(axis.text.x = element_text(hjust = 0.5, size = 15),
strip.text.x = element_text(size = 15, colour = "blue"),
axis.text.y = element_text(hjust = 0.5, size=15),
axis.title.x = element_text(size = 15,
vjust = 0.5,
# margin = margin(t = 10),
face = 'bold',
color = "black"),
plot.subtitle = element_text(size = 12,
hjust = 0.5,
color = "black"),
axis.title.y = element_text(size = 15,
hjust = 0.5,
# margin = margin(r = 10),
color = "black",
face = 'bold'),
plot.title = element_text(hjust = 0.5, size=20),
# plot.margin = margin(1.15, .5, .5, -.25, unit = 'in'),
panel.grid.major = element_blank(),
plot.background = element_rect(fill = 'floralwhite', color = "floralwhite"),
panel.background = element_blank(), axis.line = element_line(colour = "black")) +
labs(x = "Days Lost", y = "", title = "Distribution of Days Lost by Injury Severity") + facet_wrap(~injury_type)
## Picking joint bandwidth of 4.28
## Picking joint bandwidth of 4.05
## Picking joint bandwidth of 4.37
## Picking joint bandwidth of 4.18
## Picking joint bandwidth of 4.46
## Picking joint bandwidth of 3.94
## Picking joint bandwidth of 3.99
## Picking joint bandwidth of 3.92
Across all injury types, there does not appear to be any group differences in days lost comparing contact and non contact injuries. This indicates we cannot necessarily tell if any injury is going to be worse if it is non contact compared to contact.
summary(lm(days_lost ~ mechanism*injury_type, data=injury))
##
## Call:
## lm(formula = days_lost ~ mechanism * injury_type, data = injury)
##
## Residuals:
## Min 1Q Median 3Q Max
## -51.872 -24.817 -0.575 24.175 56.744
##
## Coefficients:
## Estimate Std. Error
## (Intercept) 45.8475 3.7532
## mechanismNon-Contact 1.6637 5.7057
## injury_typeAnkle Sprain 1.7525 5.9045
## injury_typeBack Pain -3.5910 5.9495
## injury_typeCalf Strain 7.0249 5.6364
## injury_typeConcussion 6.4935 5.7424
## injury_typeGroin Strain 1.2807 5.9495
## injury_typeHamstring Strain 1.9081 5.7057
## injury_typeShoulder Dislocation -1.0225 5.9045
## mechanismNon-Contact:injury_typeAnkle Sprain 0.2549 8.2899
## mechanismNon-Contact:injury_typeBack Pain -5.0822 8.7365
## mechanismNon-Contact:injury_typeCalf Strain -6.9044 8.4917
## mechanismNon-Contact:injury_typeConcussion -2.8846 9.2026
## mechanismNon-Contact:injury_typeGroin Strain -1.9169 8.6396
## mechanismNon-Contact:injury_typeHamstring Strain -0.4718 8.5378
## mechanismNon-Contact:injury_typeShoulder Dislocation 5.9613 8.6087
## t value Pr(>|t|)
## (Intercept) 12.216 <2e-16 ***
## mechanismNon-Contact 0.292 0.771
## injury_typeAnkle Sprain 0.297 0.767
## injury_typeBack Pain -0.604 0.546
## injury_typeCalf Strain 1.246 0.213
## injury_typeConcussion 1.131 0.259
## injury_typeGroin Strain 0.215 0.830
## injury_typeHamstring Strain 0.334 0.738
## injury_typeShoulder Dislocation -0.173 0.863
## mechanismNon-Contact:injury_typeAnkle Sprain 0.031 0.975
## mechanismNon-Contact:injury_typeBack Pain -0.582 0.561
## mechanismNon-Contact:injury_typeCalf Strain -0.813 0.416
## mechanismNon-Contact:injury_typeConcussion -0.313 0.754
## mechanismNon-Contact:injury_typeGroin Strain -0.222 0.824
## mechanismNon-Contact:injury_typeHamstring Strain -0.055 0.956
## mechanismNon-Contact:injury_typeShoulder Dislocation 0.692 0.489
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 28.83 on 654 degrees of freedom
## (58 observations deleted due to missingness)
## Multiple R-squared: 0.01499, Adjusted R-squared: -0.0076
## F-statistic: 0.6636 on 15 and 654 DF, p-value: 0.8212
Here, I am imputting missing dates from the range of injury data. This will allow us to see if having a prior injury within a certain time frame (1,3,5 months)? makes you more likely to have a second injury.
injury$date<- as.Date(injury$injury_date, format = "%m/%d/%Y")
range(injury$date)
## [1] "2025-01-01" "2025-06-30"
date_dat<- data.frame(date=rep(seq(as.Date("2025/1/1"), as.Date("2025/6/30"), "days"), times=length(unique(injury$playerid)), each=1))
ids<- data.frame()
for (i in unique(injury$playerid)) {
d<- data.frame(playerid= rep(i, 1, 181))
ids<- rbind(ids, d)
}
date_dat<- cbind(date_dat, ids)
injury2<- merge(injury, date_dat, by=c("date", "playerid"), all=T)
After creating a simple binary flag that states whether or not someone was injured on a given date, we can get the number of injuries that person has had in the last X number of days.
injury2<- injury2 %>% group_by(playerid) %>%
mutate(injured= case_when(!is.na(days_lost) ~ 1, TRUE~0),
injuries_7 = rollsumr(injured, k = 7, fill = NA),
injuries_30 = rollsumr(injured, k = 30, fill = NA),
injuries_90 = rollsumr(injured, k = 90, fill = NA),
injuries_150 = rollsumr(injured, k = 150, fill = NA),
) %>% ungroup()
Binomial linear regression models tell us something very interesting, but perhaps expected here. We can see that with at least one injury in the previous 7 days, the odds of being injured on that specific day increases X%. Having at least one injury in the last 30 days, the odds of being injured on that specific day increases X%. With an injury in the last 90 days, one’s chances of being injured increase X%, and finally with an injury in the last 150 days, the odds of being injured is still significant but marginal, at X%.
summary(glm(injured~injuries_7, family="binomial", data=injury2))
##
## Call:
## glm(formula = injured ~ injuries_7, family = "binomial", data = injury2)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.1693 -0.4573 -0.2666 -0.2666 2.1492
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -3.31955 0.08373 -39.65 <2e-16 ***
## injuries_7 1.11453 0.04689 23.77 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 3938.5 on 5320 degrees of freedom
## Residual deviance: 3272.2 on 5319 degrees of freedom
## (180 observations deleted due to missingness)
## AIC: 3276.2
##
## Number of Fisher Scoring iterations: 5
summary(glm(injured~injuries_30, family="binomial", data=injury2))
##
## Call:
## glm(formula = injured ~ injuries_30, family = "binomial", data = injury2)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.0583 -0.5072 -0.4436 -0.3873 2.4052
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -3.11876 0.11462 -27.21 <2e-16 ***
## injuries_30 0.28320 0.02401 11.79 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 3423.4 on 4630 degrees of freedom
## Residual deviance: 3282.5 on 4629 degrees of freedom
## (870 observations deleted due to missingness)
## AIC: 3286.5
##
## Number of Fisher Scoring iterations: 5
summary(glm(injured~injuries_90, family="binomial", data=injury2))
##
## Call:
## glm(formula = injured ~ injuries_90, family = "binomial", data = injury2)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -0.7910 -0.5205 -0.4666 -0.4178 2.4145
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -3.32361 0.23134 -14.367 < 2e-16 ***
## injuries_90 0.11610 0.01923 6.038 1.56e-09 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 2046.6 on 2830 degrees of freedom
## Residual deviance: 2010.3 on 2829 degrees of freedom
## (2670 observations deleted due to missingness)
## AIC: 2014.3
##
## Number of Fisher Scoring iterations: 5
summary(glm(injured~injuries_150, family="binomial", data=injury2))
##
## Call:
## glm(formula = injured ~ injuries_150, family = "binomial", data = injury2)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -0.6581 -0.5174 -0.4767 -0.4388 2.2097
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -3.10631 0.46385 -6.697 2.13e-11 ***
## injuries_150 0.05816 0.02378 2.446 0.0145 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 745.69 on 1030 degrees of freedom
## Residual deviance: 739.78 on 1029 degrees of freedom
## (4470 observations deleted due to missingness)
## AIC: 743.78
##
## Number of Fisher Scoring iterations: 4
mod1<- (glm(injured~injuries_7, family="binomial", data=injury2))
mod2<- (glm(injured~injuries_30, family="binomial", data=injury2))
mod3<- (glm(injured~injuries_90, family="binomial", data=injury2))
mod4<- (glm(injured~injuries_150, family="binomial", data=injury2))
logit2prob <- function(logit){
odds <- exp(logit)
prob <- odds / (1 + odds)
return(prob)
}
After running a predictive model and converting the logit coefficients to probabilities, we can see the assumed risk an athlete takes on by playing/training based on the number of injuries they have had in a certain number of prior days.
injury2$pred_7<- logit2prob(predict(mod1, injury2))
injury2$pred_30<- logit2prob(predict(mod2, injury2))
injury2$pred_90<- logit2prob(predict(mod3, injury2))
injury2$pred_150<- logit2prob(predict(mod4, injury2))
a<- injury2 %>% group_by(injuries_7) %>%
mutate(Group= "Past 7 Days",
pct_risk= mean(pred_7, na.rm=T)) %>%
slice(1) %>% select(Group, injuries_7, pct_risk) %>%
rename(injuries= injuries_7)
b<- injury2 %>% group_by(injuries_30) %>%
mutate(Group= "Past 30 Days",
pct_risk= mean(pred_30, na.rm=T)) %>%
slice(1) %>% select(Group, injuries_30, pct_risk) %>%
rename(injuries= injuries_30)
c<- injury2 %>% group_by(injuries_90) %>%
mutate(Group= "Past 90 Days",
pct_risk= mean(pred_7, na.rm=T)) %>%
slice(1) %>% select(Group, injuries_90, pct_risk) %>%
rename(injuries= injuries_90)
d<- injury2 %>% group_by(injuries_150) %>%
mutate(Group= "Past 150 Days",
pct_risk= mean(pred_150, na.rm=T)) %>%
slice(1) %>% select(Group, injuries_150, pct_risk) %>%
rename(injuries= injuries_150)
plot<- rbind(a,b,c,d)
The plot very clearly displays that the risk is much steeper with more injuries in a shorter window.
plot %>%
ggplot(aes(x = injuries, y = pct_risk, color = Group)) +
geom_jitter()+
geom_smooth(method='lm', formula= y~x) +
theme(axis.text.x = element_text(hjust = 0.5, size = 15),
strip.text.x = element_text(size = 15, colour = "blue"),
axis.text.y = element_text(hjust = 0.5, size=15),
axis.title.x = element_text(size = 15,
vjust = 0.5,
# margin = margin(t = 10),
face = 'bold',
color = "black"),
plot.subtitle = element_text(size = 12,
hjust = 0.5,
color = "black"),
axis.title.y = element_text(size = 15,
hjust = 0.5,
# margin = margin(r = 10),
color = "black",
face = 'bold'),
plot.title = element_text(hjust = 0.5, size=20),
# plot.margin = margin(1.15, .5, .5, -.25, unit = 'in'),
panel.grid.major = element_blank(),
plot.background = element_rect(fill = 'floralwhite', color = "floralwhite"),
panel.background = element_blank(), axis.line = element_line(colour = "black")) +
labs(y = "Avg Injury Risk", x = "Number of Prior Injuries", title = "Number of Injuries in the Past Days and Predicted Injury Risk") +
scale_y_continuous(labels = scales::percent)
In this dataset, the variable DrillName seems relatively messy so I cleaned it by removing all underscores and replacing them with spaces and then Camel Casing the text. Now, we only have 11 different drills.
head(load)
## playerid Athlete Name sessionDate DrillName Duration(min) RPE
## 1 1009 Riley Murph 20250105 Small-sided_Game 57 7
## 2 1018 Quinn Brown 02262025 Shooting Drill 31 9
## 3 1003 Casey Williams 02032025 rondo 26 6
## 4 1006 Devon Hernandez 20250205 Shooting Drill 58 9
## 5 1026 Alex Smith 01262025 High-intensity_Grid 28 5
## 6 1011 Taylor Williams 01272025 1v1 Zone Battle 28 8
## totalDistance HighSpeedDist_m high_intensity_distance_m PlayerLoad
## 1 3373.989 1051.242 1078.6320 478.2714
## 2 4922.466 1390.362 1039.9317 496.3927
## 3 4388.231 1188.889 661.5158 415.6558
## 4 3124.828 1234.508 934.2680 437.1894
## 5 5343.845 1430.669 566.9115 517.3194
## 6 6521.398 1052.719 766.3384 490.9539
## exertion_index Sprint# Accelerations Decelerations MaxSpeed(m/s) MaxAccel
## 1 3.774067 7 23 19 9.179136 2.506681
## 2 4.188510 7 34 17 8.583080 3.453429
## 3 3.593683 11 16 16 7.875342 2.845395
## 4 5.213197 5 20 20 6.872046 2.645797
## 5 6.051950 17 26 16 6.531567 3.293965
## 6 6.226405 9 27 26 9.735645 3.029176
## MaxDecel
## 1 -3.234291
## 2 -3.107749
## 3 -3.376078
## 4 -3.756857
## 5 -2.457203
## 6 -3.372392
load$DrillName<- gsub("_", " ", load$DrillName)
load$DrillName<- str_to_title(load$DrillName)
table(load$DrillName)
##
## 1v1 Zone Battle Box To Box Shuttle Chaos Circuit Conditioning Run
## 86 67 95 100
## High-Intensity Grid Rondo Scrimmage Shooting Drill
## 77 91 82 86
## Small-Sided Game Tactical Drill Technical Warmup
## 98 71 83
load<- load[!duplicated(load), ]
load$date<- ymd(load$sessionDate)
## Warning: 488 failed to parse.
load<- load %>% mutate(date= case_when(
grepl("-", sessionDate) ~ as.Date(sessionDate, format="%d-%m-%Y"),
is.na(date) ~ mdy(sessionDate),
TRUE~ date))
## Warning: There was 1 warning in `mutate()`.
## ℹ In argument: `date = case_when(...)`.
## Caused by warning:
## ! 436 failed to parse.
Taking a first look at these features, the variables themselves do not show any super strong signs of correlations.
corrs<- load[,c(5:(ncol(load)-1))]
res2 <- rcorr(as.matrix(corrs))
corrs<- res2$r
corrplot(corrs, method = "color",
type = "full", number.cex = .6,
addCoef.col = "black",
tl.col = "black", tl.srt = 90)
This makes me wonder if the data are a bit messy still. The only major looking outlier I see is for total distance.
load<- load %>%
rename(dur= "Duration(min)",
num_sprints= 'Sprint#',
maxspeed = 'MaxSpeed(m/s)')
for (i in colnames(load)[5:(ncol(load)-1)]) {
eval(parse(text=paste0("hist(load$", i, ")")))
}
summary(load$totalDistance)
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 50 4248 4974 5078 5646 20000 21
hist(load$totalDistance)
load<- load %>% mutate(totalDistance = case_when(totalDistance>=10000~ NA, TRUE~totalDistance))
flags<- load %>% group_by(playerid) %>%
mutate(avg_load= mean(PlayerLoad, na.rm=T),
sd_load= sd(PlayerLoad, na.rm=T),
diff_load= PlayerLoad - avg_load,
flag = case_when(abs(diff_load) > 2*sd_load ~ 1, TRUE~0)
) %>% select(playerid, `Athlete Name`, date, PlayerLoad, avg_load, sd_load, diff_load, flag) %>% filter(flag==1) %>%
mutate(name_date = paste(`Athlete Name`, date)) %>%
group_by(name_date) %>% slice(1)
ggplot(flags,aes(x=name_date))+
geom_boxplot(aes(lower=avg_load-sd_load,upper=avg_load+sd_load,middle=avg_load,ymin=avg_load-3*sd_load,ymax=avg_load+3*sd_load, fill=`Athlete Name`),stat="identity") +
geom_point(aes(x = name_date, y = PlayerLoad), color = "red", shape = 19, size=8) +
theme(axis.text.x = element_text(hjust = 0.5, vjust = 1, size = 10, angle = 90),
strip.text.x = element_text(size = 15, colour = "blue"),
axis.text.y = element_text(hjust = 0.5, size=15),
axis.title.x = element_text(size = 15,
vjust = 0.5,
# margin = margin(t = 10),
face = 'bold',
color = "black"),
plot.subtitle = element_text(size = 12,
hjust = 0.5,
color = "black"),
axis.title.y = element_text(size = 15,
hjust = 0.5,
# margin = margin(r = 10),
color = "black",
face = 'bold'),
plot.title = element_text(hjust = 0.5, size=20),
legend.position="none",
# plot.margin = margin(1.15, .5, .5, -.25, unit = 'in'),
panel.grid.major = element_blank(),
plot.background = element_rect(fill = 'floralwhite', color = "floralwhite"),
panel.background = element_blank(), axis.line = element_line(colour = "black")) +
labs(y = "Player Load", x = "Athlete - Date", title = "Average Player Load Distribution and Flagged Dates", subtitle = "Dates are flagged when player load is 2+ standard deviations away from the player mean. \nError bars represent 3 standard deviations")
Assessing the relationship between severity of injury and abnormal load shows there does not seem to be a difference in how severe your injury is relative to how abnormal your load is.
flags<- merge(flags, injury, by=c("playerid", "date"), all.x=T)
flags<- flags %>% mutate(severity = case_when(is.na(severity)~ "No Injury", TRUE~severity))
summary(lm(diff_load~severity, data=flags))
##
## Call:
## lm(formula = diff_load ~ severity, data = flags)
##
## Residuals:
## Min 1Q Median 3Q Max
## -255.9 -165.7 28.3 154.7 237.0
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -173.4 103.1 -1.682 0.1012
## severityNo Injury 176.8 108.5 1.630 0.1118
## severitySevere 212.8 120.9 1.760 0.0869 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 178.6 on 36 degrees of freedom
## Multiple R-squared: 0.08208, Adjusted R-squared: 0.03108
## F-statistic: 1.61 on 2 and 36 DF, p-value: 0.214
flags<- load %>% group_by(playerid) %>%
mutate(avg_load= mean(high_intensity_distance_m, na.rm=T),
sd_load= sd(high_intensity_distance_m, na.rm=T),
diff_load= high_intensity_distance_m - avg_load,
flag = case_when(abs(diff_load) > 2*sd_load ~ 1, TRUE~0)
) %>% select(playerid, `Athlete Name`, date, high_intensity_distance_m, avg_load, sd_load, diff_load, flag) %>% filter(flag==1)
Overall, there does not seem to be a relationship between abnormal load days and injury.
flags<- merge(flags, injury, by=c("playerid", "date"), all.x=T)
flags<- flags %>% mutate(severity = case_when(is.na(severity)~ "No Injury", TRUE~severity))
summary(lm(diff_load~severity, data=flags))
##
## Call:
## lm(formula = diff_load ~ severity, data = flags)
##
## Residuals:
## Min 1Q Median 3Q Max
## -678.6 -461.1 50.3 447.7 603.6
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -368.797 333.445 -1.106 0.275
## severityNo Injury 374.095 341.679 1.095 0.280
## severitySevere -4.639 471.562 -0.010 0.992
##
## Residual standard error: 471.6 on 41 degrees of freedom
## Multiple R-squared: 0.05349, Adjusted R-squared: 0.00732
## F-statistic: 1.159 on 2 and 41 DF, p-value: 0.324
On days when players did have an abnormal load amount, there were 8 occurrences of severe injuries, 3 occurrences of moderate injuries and 28 no injury days. This indicates that load is likely not a great predictor of injury, at least in this small of a sample.
summary<- flags %>% group_by(severity) %>%
mutate(diff_load_abs= mean(abs(diff_load)),
num=n()) %>%
slice(1) %>% select(severity, num, diff_load_abs)
flags %>% group_by(severity) %>%
mutate(diff_load_abs= mean(abs(diff_load))) %>%
ggplot(aes(x=severity, y=diff_load_abs, fill = as.factor(severity))) +
geom_bar(stat = "summary", fun.y = "mean") +
theme(axis.text.x = element_text(hjust = 0.5, vjust = 1, size = 10),
strip.text.x = element_text(size = 15, colour = "blue"),
axis.text.y = element_text(hjust = 0.5, size=15),
axis.title.x = element_text(size = 15,
vjust = 0.5,
# margin = margin(t = 10),
face = 'bold',
color = "black"),
plot.subtitle = element_text(size = 8,
hjust = 0.5,
color = "black"),
axis.title.y = element_text(size = 15,
hjust = 0.5,
# margin = margin(r = 10),
color = "black",
face = 'bold'),
plot.title = element_text(hjust = 0.5, size=10),
legend.position="none",
# plot.margin = margin(1.15, .5, .5, -.25, unit = 'in'),
panel.grid.major = element_blank(),
plot.background = element_rect(fill = 'floralwhite', color = "floralwhite"),
panel.background = element_blank(), axis.line = element_line(colour = "black")) +
labs(y = "Average Load", x = "Result", title = "Average Player Load on Flagged Days and Performance Outcome", subtitle = "Dates are flagged when player load is 2+ standard deviations away from the player mean.")
## No summary function supplied, defaulting to `mean_se()`
I am offsetting the data for the wellness/ sleep data because I want to align with the previous day’s load and injury report. i.e if you exerted a lot did you sleep poorly? Also I will offset sleep to get sleep before a workout, to see if tii much or too little sleep effects performance.
dat<- merge(injury2, load, by=c("playerid", "date"), all=T)
sleep_post_workout<- wellness_clean %>% select(playerid, date, sleep_hours_clean, overall_soreness_clean) %>%
mutate(date= date - 1)
colnames(sleep_post_workout)[3:4] <- paste0(colnames(sleep_post_workout)[3:4], "post_workout_data")
dat<- merge(dat, sleep_post_workout, by=c("playerid", "date"),all=T)
###
sleep_pre_workout<- wellness_clean %>% select(playerid, date, sleep_hours_clean, overall_soreness_clean) %>%
mutate(date= date + 1)
colnames(sleep_pre_workout)[3:4] <- paste0(colnames(sleep_pre_workout)[3:4], "pre_workout_data")
dat<- merge(dat, sleep_pre_workout, by=c("playerid", "date"), all=T)
dat <- dat %>% group_by(playerid) %>%
mutate(avg_load= mean(PlayerLoad, na.rm=T),
sd_load= sd(PlayerLoad, na.rm=T),
diff_load= PlayerLoad - avg_load,
flag = case_when(abs(diff_load) > 2*sd_load ~ 1, TRUE~0),
severity= case_when(severity=="" ~ "No Injury",
is.na(severity) ~ "No Injury",
TRUE~ severity)
)
There do not appear to be very strong relationships among all the variables.
corrs<- dat[,c(23:ncol(dat))]
res2 <- rcorr(as.matrix(corrs))
corrs<- res2$r
corrplot(corrs, method = "color",
type = "full", number.cex = .6,
addCoef.col = "black",
tl.col = "black", tl.srt = 90)
Running various models to assess the relationships between sleep, injury and intensity.
summary(lmer(overall_soreness_cleanpost_workout_data~PlayerLoad + (1 | playerid), data=dat))
## Linear mixed model fit by REML ['lmerMod']
## Formula: overall_soreness_cleanpost_workout_data ~ PlayerLoad + (1 | playerid)
## Data: dat
##
## REML criterion at convergence: 1090.6
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -1.75940 -0.75553 -0.01105 0.86188 1.99387
##
## Random effects:
## Groups Name Variance Std.Dev.
## playerid (Intercept) 0.3999 0.6324
## Residual 3.6233 1.9035
## Number of obs: 257, groups: playerid, 30
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 3.7199113 0.6801313 5.469
## PlayerLoad 0.0002161 0.0014867 0.145
##
## Correlation of Fixed Effects:
## (Intr)
## PlayerLoad -0.969
summary(lmer(sleep_hours_cleanpost_workout_data~PlayerLoad + (1 | playerid), data=dat))
## Linear mixed model fit by REML ['lmerMod']
## Formula: sleep_hours_cleanpost_workout_data ~ PlayerLoad + (1 | playerid)
## Data: dat
##
## REML criterion at convergence: 732.6
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.61314 -0.57145 -0.01044 0.67423 2.29637
##
## Random effects:
## Groups Name Variance Std.Dev.
## playerid (Intercept) 0.1143 0.3381
## Residual 1.1488 1.0718
## Number of obs: 236, groups: playerid, 30
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 6.7044459 0.3955218 16.95
## PlayerLoad 0.0008347 0.0008692 0.96
##
## Correlation of Fixed Effects:
## (Intr)
## PlayerLoad -0.971
dat$sleep_pre_centered<- dat$sleep_hours_cleanpre_workout_data-mean(dat$sleep_hours_cleanpre_workout_data, na.rm=T)
summary(lmer(high_intensity_distance_m~overall_soreness_cleanpre_workout_data + (1 | playerid), data=dat))
## boundary (singular) fit: see help('isSingular')
## Linear mixed model fit by REML ['lmerMod']
## Formula: high_intensity_distance_m ~ overall_soreness_cleanpre_workout_data +
## (1 | playerid)
## Data: dat
##
## REML criterion at convergence: 3651
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.62486 -0.66753 0.01553 0.63681 2.90987
##
## Random effects:
## Groups Name Variance Std.Dev.
## playerid (Intercept) 0 0.0
## Residual 35942 189.6
## Number of obs: 275, groups: playerid, 30
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 796.800 25.632 31.086
## overall_soreness_cleanpre_workout_data -3.294 5.791 -0.569
##
## Correlation of Fixed Effects:
## (Intr)
## ovrll_sr___ -0.895
## optimizer (nloptwrap) convergence code: 0 (OK)
## boundary (singular) fit: see help('isSingular')
summary(lmer(high_intensity_distance_m~sleep_pre_centered^2 + (1 | playerid), data=dat))
## boundary (singular) fit: see help('isSingular')
## Linear mixed model fit by REML ['lmerMod']
## Formula: high_intensity_distance_m ~ sleep_pre_centered^2 + (1 | playerid)
## Data: dat
##
## REML criterion at convergence: 3429
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.5311 -0.6506 0.0517 0.6255 2.9558
##
## Random effects:
## Groups Name Variance Std.Dev.
## playerid (Intercept) 0 0.0
## Residual 34915 186.9
## Number of obs: 259, groups: playerid, 30
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 784.82 11.61 67.580
## sleep_pre_centered -23.21 10.63 -2.183
##
## Correlation of Fixed Effects:
## (Intr)
## slp_pr_cntr -0.021
## optimizer (nloptwrap) convergence code: 0 (OK)
## boundary (singular) fit: see help('isSingular')
summary(glmer(injured~sleep_pre_centered+high_intensity_distance_m+ (1 | playerid), family="binomial", data=dat))
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, :
## Model failed to converge with max|grad| = 0.00548545 (tol = 0.002, component 1)
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, : Model is nearly unidentifiable: very large eigenvalue
## - Rescale variables?;Model is nearly unidentifiable: large eigenvalue ratio
## - Rescale variables?
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: binomial ( logit )
## Formula: injured ~ sleep_pre_centered + high_intensity_distance_m + (1 |
## playerid)
## Data: dat
##
## AIC BIC logLik deviance df.resid
## 172.2 186.4 -82.1 164.2 255
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -0.6169 -0.3527 -0.2443 -0.2076 3.4954
##
## Random effects:
## Groups Name Variance Std.Dev.
## playerid (Intercept) 0.9904 0.9952
## Number of obs: 259, groups: playerid, 30
##
## Fixed effects:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.9226178 1.0273402 -1.871 0.0613 .
## sleep_pre_centered -0.2448364 0.2122174 -1.154 0.2486
## high_intensity_distance_m -0.0008072 0.0012326 -0.655 0.5125
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) slp_p_
## slp_pr_cntr -0.039
## hgh_ntnst__ -0.924 0.130
## optimizer (Nelder_Mead) convergence code: 0 (OK)
## Model failed to converge with max|grad| = 0.00548545 (tol = 0.002, component 1)
## Model is nearly unidentifiable: very large eigenvalue
## - Rescale variables?
## Model is nearly unidentifiable: large eigenvalue ratio
## - Rescale variables?
summary(glmer(injured~sleep_pre_centered+high_intensity_distance_m+ (1 | playerid), family="binomial", data=dat))
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, : Model failed to converge with max|grad| = 0.00548545 (tol = 0.002, component 1)
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, : Model is nearly unidentifiable: very large eigenvalue
## - Rescale variables?;Model is nearly unidentifiable: large eigenvalue ratio
## - Rescale variables?
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: binomial ( logit )
## Formula: injured ~ sleep_pre_centered + high_intensity_distance_m + (1 |
## playerid)
## Data: dat
##
## AIC BIC logLik deviance df.resid
## 172.2 186.4 -82.1 164.2 255
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -0.6169 -0.3527 -0.2443 -0.2076 3.4954
##
## Random effects:
## Groups Name Variance Std.Dev.
## playerid (Intercept) 0.9904 0.9952
## Number of obs: 259, groups: playerid, 30
##
## Fixed effects:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.9226178 1.0273402 -1.871 0.0613 .
## sleep_pre_centered -0.2448364 0.2122174 -1.154 0.2486
## high_intensity_distance_m -0.0008072 0.0012326 -0.655 0.5125
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) slp_p_
## slp_pr_cntr -0.039
## hgh_ntnst__ -0.924 0.130
## optimizer (Nelder_Mead) convergence code: 0 (OK)
## Model failed to converge with max|grad| = 0.00548545 (tol = 0.002, component 1)
## Model is nearly unidentifiable: very large eigenvalue
## - Rescale variables?
## Model is nearly unidentifiable: large eigenvalue ratio
## - Rescale variables?
summary(glmer(injured~sleep_pre_centered+scale(high_intensity_distance_m)+ (1 | playerid), family="binomial", data=dat))
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: binomial ( logit )
## Formula: injured ~ sleep_pre_centered + scale(high_intensity_distance_m) +
## (1 | playerid)
## Data: dat
##
## AIC BIC logLik deviance df.resid
## 172.2 186.4 -82.1 164.2 255
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -0.6169 -0.3527 -0.2443 -0.2076 3.4954
##
## Random effects:
## Groups Name Variance Std.Dev.
## playerid (Intercept) 0.9904 0.9952
## Number of obs: 259, groups: playerid, 30
##
## Fixed effects:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -2.5615 0.3931 -6.516 7.2e-11 ***
## sleep_pre_centered -0.2448 0.2122 -1.154 0.249
## scale(high_intensity_distance_m) -0.1594 0.2433 -0.655 0.513
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) slp_p_
## slp_pr_cntr 0.221
## scl(hgh___) 0.069 0.131
summary(glm(injured~DrillName*sleep_pre_centered, data=dat))
##
## Call:
## glm(formula = injured ~ DrillName * sleep_pre_centered, data = dat)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -0.57497 -0.13145 -0.05439 0.00000 0.91178
##
## Coefficients:
## Estimate Std. Error t value
## (Intercept) 0.029891 0.053124 0.563
## DrillNameBox To Box Shuttle 0.139696 0.100225 1.394
## DrillNameChaos Circuit -0.029891 0.086632 -0.345
## DrillNameConditioning Run -0.029891 0.087760 -0.341
## DrillNameHigh-Intensity Grid 0.019043 0.085217 0.223
## DrillNameRondo 0.078277 0.079109 0.989
## DrillNameScrimmage 0.041871 0.079165 0.529
## DrillNameShooting Drill 0.119813 0.077185 1.552
## DrillNameSmall-Sided Game 0.247019 0.073880 3.344
## DrillNameTactical Drill 0.019654 0.081992 0.240
## DrillNameTechnical Warmup 0.038613 0.077622 0.497
## sleep_pre_centered 0.054170 0.055414 0.978
## DrillNameBox To Box Shuttle:sleep_pre_centered -0.069006 0.090509 -0.762
## DrillNameChaos Circuit:sleep_pre_centered -0.054170 0.077525 -0.699
## DrillNameConditioning Run:sleep_pre_centered -0.054170 0.083349 -0.650
## DrillNameHigh-Intensity Grid:sleep_pre_centered -0.176418 0.082253 -2.145
## DrillNameRondo:sleep_pre_centered 0.047641 0.088560 0.538
## DrillNameScrimmage:sleep_pre_centered -0.005742 0.074866 -0.077
## DrillNameShooting Drill:sleep_pre_centered -0.034491 0.072201 -0.478
## DrillNameSmall-Sided Game:sleep_pre_centered -0.202924 0.072961 -2.781
## DrillNameTactical Drill:sleep_pre_centered -0.119303 0.082450 -1.447
## DrillNameTechnical Warmup:sleep_pre_centered -0.022989 0.074906 -0.307
## Pr(>|t|)
## (Intercept) 0.574188
## DrillNameBox To Box Shuttle 0.164675
## DrillNameChaos Circuit 0.730372
## DrillNameConditioning Run 0.733703
## DrillNameHigh-Intensity Grid 0.823368
## DrillNameRondo 0.323440
## DrillNameScrimmage 0.597370
## DrillNameShooting Drill 0.121926
## DrillNameSmall-Sided Game 0.000961 ***
## DrillNameTactical Drill 0.810763
## DrillNameTechnical Warmup 0.619331
## sleep_pre_centered 0.329290
## DrillNameBox To Box Shuttle:sleep_pre_centered 0.446567
## DrillNameChaos Circuit:sleep_pre_centered 0.485401
## DrillNameConditioning Run:sleep_pre_centered 0.516373
## DrillNameHigh-Intensity Grid:sleep_pre_centered 0.032985 *
## DrillNameRondo:sleep_pre_centered 0.591117
## DrillNameScrimmage:sleep_pre_centered 0.938924
## DrillNameShooting Drill:sleep_pre_centered 0.633295
## DrillNameSmall-Sided Game:sleep_pre_centered 0.005850 **
## DrillNameTactical Drill:sleep_pre_centered 0.149223
## DrillNameTechnical Warmup:sleep_pre_centered 0.759185
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for gaussian family taken to be 0.08429217)
##
## Null deviance: 23.390 on 258 degrees of freedom
## Residual deviance: 19.977 on 237 degrees of freedom
## (5446 observations deleted due to missingness)
## AIC: 117.39
##
## Number of Fisher Scoring iterations: 2
The most interesting and concerning relationship we see is that Intensity Grid and Small Sided Game, compounded with poor sleep (less sleep), lead to an increased injury risk.
p<- dat %>% rename(`Injury Risk` = injured,
`Hours Slept Pre Workout` = sleep_hours_cleanpre_workout_data)
int<- (glm(`Injury Risk`~DrillName*`Hours Slept Pre Workout`, data=p))
summary(int)
##
## Call:
## glm(formula = `Injury Risk` ~ DrillName * `Hours Slept Pre Workout`,
## data = p)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -0.57497 -0.13145 -0.05439 0.00000 0.91178
##
## Coefficients:
## Estimate Std. Error
## (Intercept) -0.349924 0.395623
## DrillNameBox To Box Shuttle 0.623536 0.655477
## DrillNameChaos Circuit 0.349924 0.553140
## DrillNameConditioning Run 0.349924 0.603498
## DrillNameHigh-Intensity Grid 1.256003 0.583987
## DrillNameRondo -0.255759 0.634304
## DrillNameScrimmage 0.082134 0.539067
## DrillNameShooting Drill 0.361652 0.512443
## DrillNameSmall-Sided Game 1.669832 0.518631
## DrillNameTactical Drill 0.856156 0.567176
## DrillNameTechnical Warmup 0.199801 0.539411
## `Hours Slept Pre Workout` 0.054170 0.055414
## DrillNameBox To Box Shuttle:`Hours Slept Pre Workout` -0.069006 0.090509
## DrillNameChaos Circuit:`Hours Slept Pre Workout` -0.054170 0.077525
## DrillNameConditioning Run:`Hours Slept Pre Workout` -0.054170 0.083349
## DrillNameHigh-Intensity Grid:`Hours Slept Pre Workout` -0.176418 0.082253
## DrillNameRondo:`Hours Slept Pre Workout` 0.047641 0.088560
## DrillNameScrimmage:`Hours Slept Pre Workout` -0.005742 0.074866
## DrillNameShooting Drill:`Hours Slept Pre Workout` -0.034491 0.072201
## DrillNameSmall-Sided Game:`Hours Slept Pre Workout` -0.202924 0.072961
## DrillNameTactical Drill:`Hours Slept Pre Workout` -0.119303 0.082450
## DrillNameTechnical Warmup:`Hours Slept Pre Workout` -0.022989 0.074906
## t value Pr(>|t|)
## (Intercept) -0.884 0.37733
## DrillNameBox To Box Shuttle 0.951 0.34244
## DrillNameChaos Circuit 0.633 0.52760
## DrillNameConditioning Run 0.580 0.56258
## DrillNameHigh-Intensity Grid 2.151 0.03251 *
## DrillNameRondo -0.403 0.68716
## DrillNameScrimmage 0.152 0.87903
## DrillNameShooting Drill 0.706 0.48104
## DrillNameSmall-Sided Game 3.220 0.00146 **
## DrillNameTactical Drill 1.510 0.13250
## DrillNameTechnical Warmup 0.370 0.71141
## `Hours Slept Pre Workout` 0.978 0.32929
## DrillNameBox To Box Shuttle:`Hours Slept Pre Workout` -0.762 0.44657
## DrillNameChaos Circuit:`Hours Slept Pre Workout` -0.699 0.48540
## DrillNameConditioning Run:`Hours Slept Pre Workout` -0.650 0.51637
## DrillNameHigh-Intensity Grid:`Hours Slept Pre Workout` -2.145 0.03299 *
## DrillNameRondo:`Hours Slept Pre Workout` 0.538 0.59112
## DrillNameScrimmage:`Hours Slept Pre Workout` -0.077 0.93892
## DrillNameShooting Drill:`Hours Slept Pre Workout` -0.478 0.63329
## DrillNameSmall-Sided Game:`Hours Slept Pre Workout` -2.781 0.00585 **
## DrillNameTactical Drill:`Hours Slept Pre Workout` -1.447 0.14922
## DrillNameTechnical Warmup:`Hours Slept Pre Workout` -0.307 0.75919
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for gaussian family taken to be 0.08429217)
##
## Null deviance: 23.390 on 258 degrees of freedom
## Residual deviance: 19.977 on 237 degrees of freedom
## (5446 observations deleted due to missingness)
## AIC: 117.39
##
## Number of Fisher Scoring iterations: 2
plot_model(int, type = "pred", terms = c("DrillName", "Hours Slept Pre Workout"), title = "Association Between Predicted Injury Risk and the Interaction of Drill and Sleep")