This is an R Markdown Notebook. When you execute code within the notebook, the results appear beneath the code.
Try executing this chunk by clicking the Run button within the chunk or by placing your cursor inside it and pressing Ctrl+Shift+Enter.
Importing Data and Packages. Merging datasets
library(dplyr)
## Warning: package 'dplyr' was built under R version 4.2.3
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.2.3
library(car)
## Loading required package: carData
## Warning: package 'carData' was built under R version 4.2.3
##
## Attaching package: 'car'
## The following object is masked from 'package:dplyr':
##
## recode
SASTD <- read.csv("AML - SA_STD.csv")
SAADV <- read.csv("AML - SA_ADV.csv")
SABAT <- read.csv("AML - SA_BAT.csv")
MWSTD <- read.csv("AML - MW_STD.csv")
MWADV <- read.csv("AML - MW_ADV.csv")
MWBAT <- read.csv("AML - MW_BAT.csv")
MLB <- read.csv("AML - MLB.csv")
SA1 <- inner_join(SASTD, SAADV, by = "Name")
SA <- inner_join(SA1, SABAT, by = "Name")
MW1 <- inner_join(MWSTD, MWADV, by = "Name")
## Warning in inner_join(MWSTD, MWADV, by = "Name"): Detected an unexpected many-to-many relationship between `x` and `y`.
## ℹ Row 25 of `x` matches multiple rows in `y`.
## ℹ Row 16 of `y` matches multiple rows in `x`.
## ℹ If a many-to-many relationship is expected, set `relationship =
## "many-to-many"` to silence this warning.
MW <- inner_join(MW1, MWBAT, by = "Name")
## Warning in inner_join(MW1, MWBAT, by = "Name"): Detected an unexpected many-to-many relationship between `x` and `y`.
## ℹ Row 25 of `x` matches multiple rows in `y`.
## ℹ Row 442 of `y` matches multiple rows in `x`.
## ℹ If a many-to-many relationship is expected, set `relationship =
## "many-to-many"` to silence this warning.
MILB <- bind_rows(SA, MW)
data <- left_join(MILB, MLB, by = "Name", suffix = c("_A", "_MLB"))
data <- data %>%
mutate(across(c(LD., GB., FB., IFFB., HR.FB, Pull., Cent., Oppo., SwStr., BB._A, K._A),
~ as.numeric(sub("%", "", .)) / 100))
head(data)
data$fin_age <- substr(data$Age.x, nchar(data$Age.x) - 1, nchar(data$Age.x))
data$fin_age <- as.numeric(data$fin_age)
Out of 996 players in A ball, only 99 have had at least 500 plate appearances in the major leagues, just under 10%. My goal is to find a range of statistics that can predict A ball players to reach the 500 PA threshold at a greater rate.
Getting train and test sets
set.seed(4036)
train_index <- sample(nrow(data), 600)
train <- data[train_index,]
test <- data[-train_index,]
train %>% filter(PA_MLB > 400)
nrow(train %>% filter(PA_MLB > 400))
## [1] 59
Creating first split: The goal is to find a range of a statistic that maximizes the percentage of players who have made it to the MLB. I’m going to begin with line drive%
mlld <- function(){
minld <- min(train$LD.)
maxld <- max(train$LD.)
splitind <- 2:20
upperldcol <- c()
lowerldcol <- c()
totalldcol <- c()
srldcol <- c()
test_succcol <- c()
testtotalcol <- c()
# This for loop will split line drive% into equal groups of 2, 3, 4, ... up to 20
for(i in splitind){
byld <- (maxld - minld)/i
rangeld <- seq(from = minld, to = maxld, by = byld)
# This for loop will find the percentage of players who reached the MLB within each division of line drive% found in the previous for loop
for(x in rangeld){
success_rate <- nrow(train %>% filter(LD. >= x) %>% filter(LD. < x + byld) %>% filter(PA_MLB > 499)) / nrow(train %>% filter(LD. >= x) %>% filter(LD. < x + byld))
totalld <- nrow(train %>% filter(LD. >= x) %>% filter(LD. < x + byld))
# If the group has a high rate of players making it to the MLB and a large enough size, then it will be added to vectors that will alter be added to a data frame.
if(success_rate > 0.16 && totalld > 5){
upperld <- x + byld
lowerld <- x
upperldcol <- c(upperldcol, upperld)
lowerldcol <- c(lowerldcol, lowerld)
totalldcol <- c(totalldcol, totalld)
srldcol <- c(srldcol, success_rate)
test_success <- nrow(test %>% filter(LD. >= lowerld) %>% filter(LD. < upperld) %>% filter(PA_MLB > 499)) / nrow(test %>% filter(LD. >= lowerld) %>% filter(LD. < upperld))
testtotal <- nrow(test %>% filter(LD. >= lowerld) %>% filter(LD. < upperld))
test_succcol <- c(test_succcol, test_success)
testtotalcol <- c(testtotalcol, testtotal)
}
}
success_ranges <- data.frame(success = srldcol, lower = lowerldcol, upper = upperldcol, total = totalldcol, test_success = test_succcol, test_total = testtotalcol)
print(success_ranges)
}
}
mlld()
## data frame with 0 columns and 0 rows
## data frame with 0 columns and 0 rows
## data frame with 0 columns and 0 rows
## data frame with 0 columns and 0 rows
## data frame with 0 columns and 0 rows
## data frame with 0 columns and 0 rows
## data frame with 0 columns and 0 rows
## data frame with 0 columns and 0 rows
## data frame with 0 columns and 0 rows
## success lower upper total test_success test_total
## 1 0.1875 0.104 0.125 16 0.1176471 17
## success lower upper total test_success test_total
## 1 0.1875 0.104 0.125 16 0.1176471 17
## success lower upper total test_success test_total
## 1 0.1875 0.104 0.125 16 0.1176471 17
## success lower upper total test_success test_total
## 1 0.1875000 0.1040 0.125 16 0.1176471 17
## 2 0.1666667 0.2645 0.281 6 0.2500000 4
## success lower upper total test_success test_total
## 1 0.1875000 0.1040 0.1250 16 0.1176471 17
## 2 0.1666667 0.2645 0.2810 6 0.2500000 4
## 3 0.1666667 0.2678 0.2832 6 0.3333333 3
## success lower upper total test_success test_total
## 1 0.1875000 0.104000 0.1250000 16 0.11764706 17
## 2 0.1666667 0.264500 0.2810000 6 0.25000000 4
## 3 0.1666667 0.267800 0.2832000 6 0.33333333 3
## 4 0.1764706 0.111875 0.1263125 17 0.18181818 11
## 5 0.1607143 0.140750 0.1551875 56 0.08108108 37
## success lower upper total test_success test_total
## 1 0.1875000 0.1040000 0.1250000 16 0.11764706 17
## 2 0.1666667 0.2645000 0.2810000 6 0.25000000 4
## 3 0.1666667 0.2678000 0.2832000 6 0.33333333 3
## 4 0.1764706 0.1118750 0.1263125 17 0.18181818 11
## 5 0.1607143 0.1407500 0.1551875 56 0.08108108 37
## 6 0.1666667 0.1101765 0.1237647 12 0.20000000 10
## success lower upper total test_success test_total
## 1 0.1875000 0.1040000 0.1250000 16 0.11764706 17
## 2 0.1666667 0.2645000 0.2810000 6 0.25000000 4
## 3 0.1666667 0.2678000 0.2832000 6 0.33333333 3
## 4 0.1764706 0.1118750 0.1263125 17 0.18181818 11
## 5 0.1607143 0.1407500 0.1551875 56 0.08108108 37
## 6 0.1666667 0.1101765 0.1237647 12 0.20000000 10
## 7 0.1666667 0.1215000 0.1343333 18 0.12500000 8
## success lower upper total test_success test_total
## 1 0.1875000 0.1040000 0.1250000 16 0.11764706 17
## 2 0.1666667 0.2645000 0.2810000 6 0.25000000 4
## 3 0.1666667 0.2678000 0.2832000 6 0.33333333 3
## 4 0.1764706 0.1118750 0.1263125 17 0.18181818 11
## 5 0.1607143 0.1407500 0.1551875 56 0.08108108 37
## 6 0.1666667 0.1101765 0.1237647 12 0.20000000 10
## 7 0.1666667 0.1215000 0.1343333 18 0.12500000 8
## 8 0.1666667 0.1194737 0.1316316 18 0.33333333 6
## 9 0.1777778 0.1437895 0.1559474 45 0.09375000 32
## success lower upper total test_success test_total
## 1 0.1875000 0.1040000 0.1250000 16 0.11764706 17
## 2 0.1666667 0.2645000 0.2810000 6 0.25000000 4
## 3 0.1666667 0.2678000 0.2832000 6 0.33333333 3
## 4 0.1764706 0.1118750 0.1263125 17 0.18181818 11
## 5 0.1607143 0.1407500 0.1551875 56 0.08108108 37
## 6 0.1666667 0.1101765 0.1237647 12 0.20000000 10
## 7 0.1666667 0.1215000 0.1343333 18 0.12500000 8
## 8 0.1666667 0.1194737 0.1316316 18 0.33333333 6
## 9 0.1777778 0.1437895 0.1559474 45 0.09375000 32
## 10 0.1875000 0.1176500 0.1292000 16 0.25000000 8
Now, I’m going to try and make the model with two iterations, line drive % and BB/K
ml2 <- function(){
min <- min(train$LD.)
max <- max(train$LD.)
min2 <- min(train$BB.K_A)
max2 <- max(train$BB.K_A)
splitind <- 2:20
uppercol <- c()
lowercol <- c()
totalcol <- c()
uppercol2 <- c()
lowercol2 <- c()
totalcol2 <- c()
srcol <- c()
test_succcol <- c()
testtotalcol <- c()
# Same principle as before.
for(i in splitind){
group <- (max - min)/i
range <- seq(from = min, to = max, by = group)
group2 <- (max2 - min2)/i
range2 <- seq(from = min2, to = max2, by = group2)
# Same principle as before, except there needs to be a additional for loop to account for a second varibale
for(x in range){
y <- x
for(z in range2){
success_rate <- nrow(train %>% filter(LD. >= y) %>% filter(LD. < y + group) %>% filter(BB.K_A >= z) %>% filter(BB.K_A < z + group) %>% filter(PA_MLB > 499)) / nrow(train %>% filter(LD. >= y) %>% filter(LD. < y + group) %>% filter(BB.K_A >= z) %>% filter(BB.K_A < z + group))
total <- nrow(train %>% filter(LD. >= y) %>% filter(LD. < y + group) %>% filter(BB.K_A >= z) %>% filter(BB.K_A < z + group))
if(success_rate > 0.3 && total > 5){
upper <- y + group
lower <- y
upper2 <- z + group2
lower2 <- z
uppercol <- c(uppercol, upper)
lowercol <- c(lowercol, lower)
uppercol2 <- c(uppercol2, upper2)
lowercol2 <- c(lowercol2, lower2)
totalcol <- c(totalcol, total)
srcol <- c(srcol, success_rate)
test_success <- nrow(test %>% filter(LD. >= lower) %>% filter(LD. < upper) %>% filter(BB.K_A >= lower2) %>% filter(BB.K_A < upper2) %>% filter(PA_MLB > 499)) / nrow(test %>% filter(LD. >= lower) %>% filter(LD. < upper) %>% filter(BB.K_A >= lower2) %>% filter(BB.K_A < upper2))
testtotal <- nrow(test %>% filter(LD. >= lower) %>% filter(LD. < upper) %>% filter(BB.K_A >= lower2) %>% filter(BB.K_A < upper2))
test_succcol <- c(test_succcol, test_success)
testtotalcol <- c(testtotalcol, testtotal)
}
}
}
success_ranges <- data.frame(success = srcol, lower = lowercol, upper = uppercol, lower2 = lowercol2, upper2 = uppercol2, total = totalcol, test_success = test_succcol, test_total = testtotalcol)
print(success_ranges)
}
}
ml2()
## data frame with 0 columns and 0 rows
## data frame with 0 columns and 0 rows
## data frame with 0 columns and 0 rows
## data frame with 0 columns and 0 rows
## data frame with 0 columns and 0 rows
## data frame with 0 columns and 0 rows
## data frame with 0 columns and 0 rows
## success lower upper lower2 upper2 total test_success test_total
## 1 0.3333333 0.1343333 0.16 0.3955556 0.5533333 6 0 20
## success lower upper lower2 upper2 total test_success test_total
## 1 0.3333333 0.1343333 0.16 0.3955556 0.5533333 6 0 20
## success lower upper lower2 upper2 total test_success test_total
## 1 0.3333333 0.1343333 0.16 0.3955556 0.5533333 6 0 20
## success lower upper lower2 upper2 total test_success test_total
## 1 0.3333333 0.1343333 0.16 0.3955556 0.5533333 6 0 20
## success lower upper lower2 upper2 total test_success test_total
## 1 0.3333333 0.1343333 0.16 0.3955556 0.5533333 6 0 20
## success lower upper lower2 upper2 total test_success test_total
## 1 0.3333333 0.1343333 0.16 0.3955556 0.5533333 6 0 20
## success lower upper lower2 upper2 total test_success test_total
## 1 0.3333333 0.1343333 0.1600 0.3955556 0.5533333 6 0.00000000 20
## 2 0.3333333 0.1754000 0.1908 0.2693333 0.3640000 6 0.09090909 11
## success lower upper lower2 upper2 total test_success
## 1 0.3333333 0.1343333 0.1600000 0.3955556 0.5533333 6 0.00000000
## 2 0.3333333 0.1754000 0.1908000 0.2693333 0.3640000 6 0.09090909
## 3 0.3750000 0.1696250 0.1840625 0.2575000 0.3462500 8 0.00000000
## test_total
## 1 20
## 2 11
## 3 16
## success lower upper lower2 upper2 total test_success
## 1 0.3333333 0.1343333 0.1600000 0.3955556 0.5533333 6 0.00000000
## 2 0.3333333 0.1754000 0.1908000 0.2693333 0.3640000 6 0.09090909
## 3 0.3750000 0.1696250 0.1840625 0.2575000 0.3462500 8 0.00000000
## test_total
## 1 20
## 2 11
## 3 16
## success lower upper lower2 upper2 total test_success
## 1 0.3333333 0.1343333 0.1600000 0.3955556 0.5533333 6 0.00000000
## 2 0.3333333 0.1754000 0.1908000 0.2693333 0.3640000 6 0.09090909
## 3 0.3750000 0.1696250 0.1840625 0.2575000 0.3462500 8 0.00000000
## test_total
## 1 20
## 2 11
## 3 16
## success lower upper lower2 upper2 total test_success
## 1 0.3333333 0.1343333 0.1600000 0.3955556 0.5533333 6 0.00000000
## 2 0.3333333 0.1754000 0.1908000 0.2693333 0.3640000 6 0.09090909
## 3 0.3750000 0.1696250 0.1840625 0.2575000 0.3462500 8 0.00000000
## test_total
## 1 20
## 2 11
## 3 16
## success lower upper lower2 upper2 total test_success
## 1 0.3333333 0.1343333 0.1600000 0.3955556 0.5533333 6 0.00000000
## 2 0.3333333 0.1754000 0.1908000 0.2693333 0.3640000 6 0.09090909
## 3 0.3750000 0.1696250 0.1840625 0.2575000 0.3462500 8 0.00000000
## test_total
## 1 20
## 2 11
## 3 16
Now the goal is to iterate through 3 statistics, each from a different category. This function takes a couple of minutes to run.
# This first part creates empty variables that the function will need later on.
splitind <- 2:10
uppercol <- c()
lowercol <- c()
totalcol <- c()
uppercol2 <- c()
lowercol2 <- c()
totalcol2 <- c()
uppercol3 <- c()
lowercol3 <- c()
totalcol3 <- c()
srcol <- c()
test_succcol <- c()
testtotalcol <- c()
comb_success <- c()
stat1 <- c()
stat2 <- c()
stat3 <- c()
# THe function follows the same as before, except it has 3 for loops for 3 variables.
myfun <- function(var1, var2, var3) {
min <- min(train[[var1]], na.rm = TRUE)
max <- max(train[[var1]], na.rm = TRUE)
min2 <- min(train[[var2]], na.rm = TRUE)
max2 <- max(train[[var2]], na.rm = TRUE)
min3 <- min(train[[var3]], na.rm = TRUE)
max3 <- max(train[[var3]], na.rm = TRUE)
for (i in splitind) {
group <- (max - min) / i
range <- seq(from = min, to = max, by = group)
group2 <- (max2 - min2) / i
range2 <- seq(from = min2, to = max2, by = group2)
group3 <- (max3 - min3) / i
range3 <- seq(from = min3, to = max3, by = group3)
for (y in range) {
for (q in range2) {
for (s in range3) {
# Calculate successes and total
successes <- sum(
train[[var1]] >= y & train[[var1]] < (y + group) &
train[[var2]] >= q & train[[var2]] < (q + group2) &
train[[var3]] >= s & train[[var3]] < (s + group3) &
train$PA_MLB > 499,
na.rm = TRUE
)
total <- sum(
train[[var1]] >= y & train[[var1]] < (y + group) &
train[[var2]] >= q & train[[var2]] < (q + group2) &
train[[var3]] >= s & train[[var3]] < (s + group3),
na.rm = TRUE
)
test_successes <- sum(
test[[var1]] >= y & test[[var1]] < (y + group) &
test[[var2]] >= q & test[[var2]] < (q + group2) &
test[[var3]] >= s & test[[var3]] < (s + group3) &
test$PA_MLB > 499,
na.rm = TRUE
)
testtotal <- sum(
test[[var1]] >= y & test[[var1]] < (y + group) &
test[[var2]] >= q & test[[var2]] < (q + group2) &
test[[var3]] >= s & test[[var3]] < (s + group3),
na.rm = TRUE
)
test_success <- ifelse(testtotal > 0, test_successes / testtotal, NA)
# Avoid division by zero and missing values
if (total > 0) {
success_rate <- successes / total
if (!is.na(success_rate) && success_rate > 0.2 && total > 10 && !is.na(test_success) && test_success > 0.2) {
upper <- y + group
lower <- y
upper2 <- q + group2
lower2 <- q
upper3 <- s + group3
lower3 <- s
uppercol <- c(uppercol, upper)
lowercol <- c(lowercol, lower)
uppercol2 <- c(uppercol2, upper2)
lowercol2 <- c(lowercol2, lower2)
uppercol3 <- c(uppercol3, upper3)
lowercol3 <- c(lowercol3, lower3)
totalcol <- c(totalcol, total)
srcol <- c(srcol, success_rate)
# Test set calculations
test_succcol <- c(test_succcol, test_success)
testtotalcol <- c(testtotalcol, testtotal)
success_comb <- (total*success_rate + testtotal*test_success)/(total + testtotal)
comb_success <- c(comb_success, success_comb)
stat1 <- c(stat1, var1)
stat2 <- c(stat2, var2)
stat3 <- c(stat3, var3)
}
}
}
}
}
}
success_ranges <- data.frame(
success = srcol,
total = totalcol,
test_success = test_succcol,
test_total = testtotalcol,
overall_success = comb_success,
metric1 = stat1,
lower = lowercol,
upper = uppercol,
metric2 = stat2,
lower2 = lowercol2,
upper2 = uppercol2,
metric3 = stat3,
lower3 = lowercol3,
upper3 = uppercol3
)
return(success_ranges)
}
cat1 <- c("LD.", "FB.", "GB.", "GB.FB", "HR.FB")
cat2 <- c("Pull.", "Cent.", "Oppo.")
cat3 <- c("BB._A", "K._A", "BB.K_A")
cat4 <- c("AVG.y", "OBP_A", "SLG_A", "OPS_A", "ISO_A")
all_sr <- data.frame()
sr1 <- data.frame()
for (a in 1:5) {
b <- cat1[a]
for (c in 1:5) {
d <- cat4[c]
for (e in 1:3) {
f <- cat3[e]
sr <- myfun(b, d, f)
sr1 <- rbind(sr1, sr)
}
}
}
This function below will now compare groups of different sizes across different variables. Along with this, I noticed that the success rate among 19 year olds is significantly higher than the other ages, which intuitively makes sense as they are younger and have more time to develop. I decided to include only 19 year olds to try and increase the success rate.
# set.seed(4036)
train_index <- sample(nrow(data), 498)
train <- data[train_index,]
test <- data[-train_index,]
train <- train %>% filter(fin_age == 19)
test <- test %>% filter(fin_age == 19)
myfun <- function(var1, var2, var3) {
splitind <- 2:5
uppercol <- c()
lowercol <- c()
totalcol <- c()
uppercol2 <- c()
lowercol2 <- c()
totalcol2 <- c()
uppercol3 <- c()
lowercol3 <- c()
totalcol3 <- c()
srcol <- c()
test_succcol <- c()
testtotalcol <- c()
comb_success <- c()
stat1 <- c()
stat2 <- c()
stat3 <- c()
differ <- c()
# Initial min and max for each variable
min <- min(train[[var1]], na.rm = TRUE)
max <- max(train[[var1]], na.rm = TRUE)
min2 <- min(train[[var2]], na.rm = TRUE)
max2 <- max(train[[var2]], na.rm = TRUE)
min3 <- min(train[[var3]], na.rm = TRUE)
max3 <- max(train[[var3]], na.rm = TRUE)
for (i in splitind) {
group <- (max - min) / i
range <- seq(from = min, to = max, by = group)
for (i2 in splitind) {
group2 <- (max2 - min2) / i2
if (!is.finite(group2) || group2 <= 0) next
range2 <- seq(from = min2, to = max2, by = group2)
for (i3 in splitind) {
group3 <- (max3 - min3) / i3
if (!is.finite(group3) || group3 <= 0) next
range3 <- seq(from = min3, to = max3, by = group3)
for (y in range) {
for (q in range2) {
for (s in range3) {
# Filter rows dynamically
valid_rows <- train[[var1]] >= y & train[[var1]] < (y + group) &
train[[var2]] >= q & train[[var2]] < (q + group2) &
train[[var3]] >= s & train[[var3]] < (s + group3)
# Skip if no valid rows
if (sum(valid_rows, na.rm = TRUE) == 0) next
# Recalculate min and max for ranges
temp_min2 <- min(train[[var2]][valid_rows], na.rm = TRUE)
temp_max2 <- max(train[[var2]][valid_rows], na.rm = TRUE)
temp_min3 <- min(train[[var3]][valid_rows], na.rm = TRUE)
temp_max3 <- max(train[[var3]][valid_rows], na.rm = TRUE)
if (!is.finite(temp_min2) || !is.finite(temp_max2) || temp_min2 == temp_max2 ||
!is.finite(temp_min3) || !is.finite(temp_max3) || temp_min3 == temp_max3) next
min2 <- temp_min2
max2 <- temp_max2
min3 <- temp_min3
max3 <- temp_max3
# Calculate successes and totals
successes <- sum(
train[[var1]] >= y & train[[var1]] < (y + group) &
train[[var2]] >= q & train[[var2]] < (q + group2) &
train[[var3]] >= s & train[[var3]] < (s + group3) &
train$PA_MLB > 499,
na.rm = TRUE
)
total <- sum(
train[[var1]] >= y & train[[var1]] < (y + group) &
train[[var2]] >= q & train[[var2]] < (q + group2) &
train[[var3]] >= s & train[[var3]] < (s + group3),
na.rm = TRUE
)
test_successes <- sum(
test[[var1]] >= y & test[[var1]] < (y + group) &
test[[var2]] >= q & test[[var2]] < (q + group2) &
test[[var3]] >= s & test[[var3]] < (s + group3) &
test$PA_MLB > 499,
na.rm = TRUE
)
testtotal <- sum(
test[[var1]] >= y & test[[var1]] < (y + group) &
test[[var2]] >= q & test[[var2]] < (q + group2) &
test[[var3]] >= s & test[[var3]] < (s + group3),
na.rm = TRUE
)
test_success <- ifelse(testtotal > 0, test_successes / testtotal, NA)
if (total > 0) {
success_rate <- successes / total
if (!is.na(success_rate) && success_rate > 0.5 && total > 5 &&
!is.na(test_success) && test_success > 0.5 && testtotal > 5) {
uppercol <- c(uppercol, y + group)
lowercol <- c(lowercol, y)
uppercol2 <- c(uppercol2, q + group2)
lowercol2 <- c(lowercol2, q)
uppercol3 <- c(uppercol3, s + group3)
lowercol3 <- c(lowercol3, s)
totalcol <- c(totalcol, total)
srcol <- c(srcol, success_rate)
test_succcol <- c(test_succcol, test_success)
testtotalcol <- c(testtotalcol, testtotal)
comb_success <- c(comb_success, (total * success_rate + testtotal * test_success) / (total + testtotal))
stat1 <- c(stat1, var1)
stat2 <- c(stat2, var2)
stat3 <- c(stat3, var3)
differ <- c(differ, success_rate - test_success)
}
}
}
}
}
}
}
}
# Constructing the final data frame
success_ranges <- data.frame(
success = srcol,
total = totalcol,
test_success = test_succcol,
test_total = testtotalcol,
overall_success = comb_success,
success_diff = differ,
metric1 = stat1,
lower = lowercol,
upper = uppercol,
metric2 = stat2,
lower2 = lowercol2,
upper2 = uppercol2,
metric3 = stat3,
lower3 = lowercol3,
upper3 = uppercol3
)
return(success_ranges)
}
cat1 <- c("LD.", "FB.", "GB.", "GB.FB", "HR.FB", "Pull.", "Cent.", "Oppo.", "wSB_A", "BB._A", "K._A", "BB.K_A", "SwStr.", "AVG.y", "OBP_A", "SLG_A", "OPS_A", "wRC._A", "wOBA_A")
sr19adv <- data.frame()
for (a in 1:(length(cat1) - 2)) {
b <- cat1[a]
for (c in (a + 1):(length(cat1) - 1)) {
d <- cat1[c]
for (e in (c + 1):length(cat1)) {
f <- cat1[e]
sr <- myfun(b, d, f)
sr19adv <- rbind(sr19adv, sr)
}
}
}
print(sr19adv)
## success total test_success test_total overall_success success_diff metric1
## 1 0.6250000 8 0.6666667 6 0.6428571 -0.04166667 LD.
## 2 0.5714286 7 0.5714286 7 0.5714286 0.00000000 LD.
## 3 0.5714286 7 0.5714286 7 0.5714286 0.00000000 LD.
## 4 0.5833333 12 0.6666667 6 0.6111111 -0.08333333 FB.
## 5 0.5384615 13 0.5714286 7 0.5500000 -0.03296703 FB.
## 6 0.5555556 9 0.5714286 7 0.5625000 -0.01587302 GB.
## lower upper metric2 lower2 upper2 metric3 lower3 upper3
## 1 0.188 0.282 GB.FB 0.6300 1.390 wOBA_A 0.337 0.439
## 2 0.188 0.282 wSB_A -0.0500 3.300 OPS_A 0.740 0.981
## 3 0.188 0.282 wSB_A -0.0500 3.300 wRC._A 113.000 181.000
## 4 0.374 0.504 GB. 0.3170 0.439 OBP_A 0.318 0.403
## 5 0.374 0.504 GB.FB 0.6300 1.390 OBP_A 0.318 0.403
## 6 0.317 0.439 BB._A 0.0825 0.145 SwStr. 0.042 0.134
Add a new chunk by clicking the Insert Chunk button on the toolbar or by pressing Ctrl+Alt+I.
When you save the notebook, an HTML file containing the code and output will be saved alongside it (click the Preview button or press Ctrl+Shift+K to preview the HTML file).
The preview shows you a rendered HTML copy of the contents of the editor. Consequently, unlike Knit, Preview does not run any R code chunks. Instead, the output of the chunk when it was last run in the editor is displayed.