raw_data = raw_data %>% filter(is.na(minutes) == FALSE)
raw_data %>% select(minutes) %>% summary()## minutes
## Min. : 18.0
## 1st Qu.: 80.0
## Median :104.0
## Mean :110.8
## 3rd Qu.:133.0
## Max. :309.0
raw_data = raw_data %>% filter(is.na(winner_rank) == FALSE & is.na(winner_rank_points) == FALSE & is.na(loser_rank) == FALSE & is.na(loser_rank_points) == FALSE)
raw_data %>% select(contains('rank')) %>% summary()## winner_rank winner_rank_points loser_rank loser_rank_points
## Min. : 1.00 Min. : 17 Min. : 1.00 Min. : 2.0
## 1st Qu.: 19.00 1st Qu.: 716 1st Qu.: 34.00 1st Qu.: 591.2
## Median : 45.00 Median : 1023 Median : 61.00 Median : 871.5
## Mean : 57.45 Mean : 1779 Mean : 79.48 Mean : 1137.3
## 3rd Qu.: 76.00 3rd Qu.: 1820 3rd Qu.: 96.75 3rd Qu.: 1243.0
## Max. :503.00 Max. :12415 Max. :1491.00 Max. :12355.0
## Parsing the score column into multiple columns ##
raw_data %>% select(score) %>% head()## # A tibble: 6 × 1
## score
## <chr>
## 1 6-4 3-6 6-2
## 2 7-6(6) 6-2
## 3 6-2 6-2
## 4 6-4 7-6(2)
## 5 6-7(2) 6-3 6-4
## 6 6-4 3-6 7-6(4)
raw_data %>% separate(score, into = c('first_set', 'second_set', 'third_set', 'fourth_set', 'fifth_set'),
sep = " ") %>% select(contains('set')) %>% head()## # A tibble: 6 × 5
## first_set second_set third_set fourth_set fifth_set
## <chr> <chr> <chr> <chr> <chr>
## 1 6-4 3-6 6-2 <NA> <NA>
## 2 7-6(6) 6-2 <NA> <NA> <NA>
## 3 6-2 6-2 <NA> <NA> <NA>
## 4 6-4 7-6(2) <NA> <NA> <NA>
## 5 6-7(2) 6-3 6-4 <NA> <NA>
## 6 6-4 3-6 7-6(4) <NA> <NA>
raw_data = raw_data %>% separate(score, into = c('first_set', 'second_set', 'third_set', 'fourth_set', 'fifth_set'),
sep = " ")
#### Extracting Games from Sets ####
# First Set
raw_data$first_sg1 = rep(NA,length(raw_data$first_set))
for (i in 1:length(raw_data$first_set)){
raw_data$first_sg1[i] = str_split_fixed(raw_data$first_set, "-", n = 2)[i,1]
}
raw_data %>% select(first_sg1) %>% head()## # A tibble: 6 × 1
## first_sg1
## <chr>
## 1 6
## 2 7
## 3 6
## 4 6
## 5 6
## 6 6
\(H_0\) = Match surface has no impact on the average set length
\(H_A\) = Match surface does have an impact on the average set length
M1 = lm(average_set_length ~ surface, data = data)| Estimate | Std. Error | t value | Pr(>|t|) | |
|---|---|---|---|---|
| (Intercept) | 43.71895 | 0.30599 | 142.87511 | 0e+00 |
| surfaceGrass | -3.06884 | 0.56502 | -5.43144 | 0e+00 |
| surfaceHard | -1.46960 | 0.37782 | -3.88970 | 1e-04 |
M2 = aov(average_set_length ~ surface, data = data)| Df | Sum Sq | Mean Sq | F value | Pr(>F) | |
|---|---|---|---|---|---|
| surface | 2 | 2296.459 | 1148.22969 | 16.157 | 1.1e-07 |
| Residuals | 2518 | 178946.745 | 71.06702 | NA | NA |
| Term | Comparison | Null Value | Estimate | Lower Bound | Upper Bound | Adj P-Value |
|---|---|---|---|---|---|---|
| surface | Grass-Clay | 0 | -3.068845 | -4.3938557 | -1.7438334 | 0.00000018 |
| surface | Hard-Clay | 0 | -1.469595 | -2.3556106 | -0.5835802 | 0.00030317 |
| surface | Hard-Grass | 0 | 1.599249 | 0.3700932 | 2.8284051 | 0.00651235 |
\(H_0\) = Hand combination has no impact on the match duration and/or average set length
\(H_A\) = Hand combination does impact the match duration and/or average set length
| Hand Combination | Mean | Variance | StDev | Obs | Mean | Variance | StDev | Obs |
|---|---|---|---|---|---|---|---|---|
| LR | 104.1327 | 967.3956 | 31.10298 | 226 | 155.2923 | 1775.273 | 42.13398 | 65 |
| RR | 101.4967 | 911.1103 | 30.18460 | 1524 | 157.4669 | 2347.862 | 48.45475 | 347 |
| LL | 101.2368 | 982.4018 | 31.34329 | 38 | 147.6667 | 1529.333 | 39.10669 | 3 |
| RL | 100.5369 | 1021.4595 | 31.96028 | 244 | 152.7162 | 2349.740 | 48.47412 | 74 |
| Term | DF | Sum Sq | Mean Sq | F Stat | P Value | DF | Sum Sq | Mean Sq | F Stat | P Value |
|---|---|---|---|---|---|---|---|---|---|---|
| hand_combo | 3 | 1736.887 | 578.9623 | 0.6212855 | 0.6012237 | 3 | 1707.717 | 569.239 | 0.2508532 | 0.8607317 |
| Residuals | 2028 | 1889848.538 | 931.8780 | NA | NA | 485 | 1100567.522 | 2269.211 | NA | NA |
| Comparison | Null Value | Estimate | Lower Bound | Upper Bound | Adj P-Value | Null Value | Estimate | Lower Bound | Upper Bound | Adj P-Value |
|---|---|---|---|---|---|---|---|---|---|---|
| LR-LL | 0 | 2.8959013 | -10.865576 | 16.657378 | 0.9489460 | 0 | 7.625641 | -64.89319 | 80.14447 | 0.9930293 |
| RL-LL | 0 | -0.6999569 | -14.388176 | 12.988262 | 0.9991887 | 0 | 5.049550 | -67.27446 | 77.37356 | 0.9979279 |
| RR-LL | 0 | 0.2598770 | -12.630491 | 13.150246 | 0.9999500 | 0 | 9.800192 | -61.40674 | 81.00713 | 0.9846743 |
| RL-LR | 0 | -3.5958581 | -10.842034 | 3.650318 | 0.5784185 | 0 | -2.576091 | -23.45215 | 18.29997 | 0.9888512 |
| RR-LR | 0 | -2.6360242 | -8.230784 | 2.958735 | 0.6196308 | 0 | 2.174551 | -14.42288 | 18.77198 | 0.9867204 |
| RR-RL | 0 | 0.9598339 | -4.452229 | 6.371897 | 0.9684699 | 0 | 4.750643 | -10.97376 | 20.47504 | 0.8640336 |
\(H_0\) = The absolute value of player ranking distance has no negative impact on a match’s duration
\(H_A\) = The absolute value of player ranking distance negatively impacts a match’s duration
| Term | Beta | SE | T-Stat | P-Value | Beta | SE | T-Stat | P-Value |
|---|---|---|---|---|---|---|---|---|
| (Intercept) | 101.36111 | 0.86693 | 116.91923 | 0.00000 | 157.5944 | 2.75153 | 57.27509 | 0.00000 |
| abs_ranking_distance | 0.00568 | 0.00996 | 0.57018 | 0.56862 | -0.0181 | 0.02599 | -0.69648 | 0.48646 |
\(H_0\) = The match round within a tournament has no impact on how long the match will take
\(H_A\) = The match round positively impacts how long the match will take
| Term | Beta | SE | T-Stat | P-Value | Beta | SE | T-Stat | P-Value |
|---|---|---|---|---|---|---|---|---|
| (Intercept) | 103.90323 | 3.86056 | 26.91402 | 0.00000 | 150.87045 | 3.00017 | 50.28724 | 0.00000 |
| roundR64 | -0.45666 | 4.29311 | -0.10637 | 0.91530 | 10.44360 | 5.23212 | 1.99606 | 0.04649 |
| roundR32 | -5.14452 | 4.00665 | -1.28400 | 0.19929 | 10.08194 | 6.65514 | 1.51491 | 0.13045 |
| roundR16 | -1.45569 | 4.10885 | -0.35428 | 0.72317 | 11.35536 | 8.98438 | 1.26390 | 0.20688 |
| roundQF | 2.35651 | 4.34789 | 0.54199 | 0.58789 | 7.99622 | 12.53867 | 0.63773 | 0.52396 |
| roundSF | 0.28165 | 4.76120 | 0.05915 | 0.95283 | 5.50455 | 16.93836 | 0.32498 | 0.74534 |
| roundF | 0.98566 | 5.43795 | 0.18126 | 0.85618 | 72.12955 | 23.76584 | 3.03501 | 0.00254 |