This is an R Markdown Notebook. When you execute code within the notebook, the results appear beneath the code.

Try executing this chunk by clicking the Run button within the chunk or by placing your cursor inside it and pressing Ctrl+Shift+Enter.

Importing Data and Packages. Merging datasets

library(dplyr)
## Warning: package 'dplyr' was built under R version 4.2.3
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.2.3
library(car)
## Loading required package: carData
## Warning: package 'carData' was built under R version 4.2.3
## 
## Attaching package: 'car'
## The following object is masked from 'package:dplyr':
## 
##     recode
SASTD <- read.csv("AML - SA_STD.csv")
SAADV <- read.csv("AML - SA_ADV.csv")
SABAT <- read.csv("AML - SA_BAT.csv")
MWSTD <- read.csv("AML - MW_STD.csv")
MWADV <- read.csv("AML - MW_ADV.csv")
MWBAT <- read.csv("AML - MW_BAT.csv")
MLB <- read.csv("AML - MLB.csv")

SA1 <- inner_join(SASTD, SAADV, by = "Name")
SA <- inner_join(SA1, SABAT, by = "Name")

MW1 <- inner_join(MWSTD, MWADV, by = "Name")
## Warning in inner_join(MWSTD, MWADV, by = "Name"): Detected an unexpected many-to-many relationship between `x` and `y`.
## ℹ Row 25 of `x` matches multiple rows in `y`.
## ℹ Row 16 of `y` matches multiple rows in `x`.
## ℹ If a many-to-many relationship is expected, set `relationship =
##   "many-to-many"` to silence this warning.
MW <- inner_join(MW1, MWBAT, by = "Name")
## Warning in inner_join(MW1, MWBAT, by = "Name"): Detected an unexpected many-to-many relationship between `x` and `y`.
## ℹ Row 25 of `x` matches multiple rows in `y`.
## ℹ Row 442 of `y` matches multiple rows in `x`.
## ℹ If a many-to-many relationship is expected, set `relationship =
##   "many-to-many"` to silence this warning.
MILB <- bind_rows(SA, MW)

data <- left_join(MILB, MLB, by = "Name", suffix = c("_A", "_MLB"))

data <- data %>%
  mutate(across(c(LD., GB., FB., IFFB., HR.FB, Pull., Cent., Oppo., SwStr., BB._A, K._A), 
                ~ as.numeric(sub("%", "", .)) / 100))
head(data)
data$fin_age <- substr(data$Age.x, nchar(data$Age.x) - 1, nchar(data$Age.x))
data$fin_age <- as.numeric(data$fin_age)

Out of 996 players in A ball, only 99 have had at least 500 plate appearances in the major leagues, just under 10%. My goal is to find a range of statistics that can predict A ball players to reach the 500 PA threshold at a greater rate.

Getting train and test sets

set.seed(4036)
train_index <- sample(nrow(data), 600)
train <- data[train_index,]
test <- data[-train_index,]
train %>% filter(PA_MLB > 400)
nrow(train %>% filter(PA_MLB > 400))
## [1] 59

Creating first split: The goal is to find a range of a statistic that maximizes the percentage of players who have made it to the MLB. I’m going to begin with line drive%

mlld <- function(){
  minld <- min(train$LD.)
  maxld <- max(train$LD.)
  splitind <- 2:20
  upperldcol <- c()
  lowerldcol <- c()
  totalldcol <- c()
  srldcol <- c()
  test_succcol <- c()
  testtotalcol <- c()
# This for loop will split line drive% into equal groups of 2, 3, 4, ... up to 20
  for(i in splitind){
    byld <- (maxld - minld)/i
    rangeld <- seq(from = minld, to = maxld, by = byld)
# This for loop will find the percentage of players who reached the MLB within each division of line drive% found in the previous for loop
    for(x in rangeld){
      success_rate <- nrow(train %>% filter(LD. >= x) %>% filter(LD. < x + byld) %>% filter(PA_MLB > 499)) / nrow(train %>% filter(LD. >= x) %>% filter(LD. < x + byld)) 
      totalld <- nrow(train %>% filter(LD. >= x) %>% filter(LD. < x + byld))
# If the group has a high rate of players making it to the MLB and a large enough size, then it will be added to vectors that will alter be added to a data frame.
      if(success_rate > 0.16 && totalld > 5){
        upperld <- x + byld
        lowerld <- x
        upperldcol <- c(upperldcol, upperld)
        lowerldcol <- c(lowerldcol, lowerld)
        totalldcol <- c(totalldcol, totalld)
        srldcol <- c(srldcol, success_rate)
        
        test_success <- nrow(test %>% filter(LD. >= lowerld) %>% filter(LD. < upperld) %>% filter(PA_MLB > 499)) / nrow(test %>% filter(LD. >= lowerld) %>% filter(LD. < upperld))
        testtotal <- nrow(test %>% filter(LD. >= lowerld) %>% filter(LD. < upperld))
        test_succcol <- c(test_succcol, test_success)
        testtotalcol <- c(testtotalcol, testtotal)
      }
      
    }
    success_ranges <- data.frame(success = srldcol, lower = lowerldcol, upper = upperldcol, total = totalldcol, test_success = test_succcol, test_total = testtotalcol)
    print(success_ranges)
  }

} 
  
mlld()
## data frame with 0 columns and 0 rows
## data frame with 0 columns and 0 rows
## data frame with 0 columns and 0 rows
## data frame with 0 columns and 0 rows
## data frame with 0 columns and 0 rows
## data frame with 0 columns and 0 rows
## data frame with 0 columns and 0 rows
## data frame with 0 columns and 0 rows
## data frame with 0 columns and 0 rows
##   success lower upper total test_success test_total
## 1  0.1875 0.104 0.125    16    0.1176471         17
##   success lower upper total test_success test_total
## 1  0.1875 0.104 0.125    16    0.1176471         17
##   success lower upper total test_success test_total
## 1  0.1875 0.104 0.125    16    0.1176471         17
##     success  lower upper total test_success test_total
## 1 0.1875000 0.1040 0.125    16    0.1176471         17
## 2 0.1666667 0.2645 0.281     6    0.2500000          4
##     success  lower  upper total test_success test_total
## 1 0.1875000 0.1040 0.1250    16    0.1176471         17
## 2 0.1666667 0.2645 0.2810     6    0.2500000          4
## 3 0.1666667 0.2678 0.2832     6    0.3333333          3
##     success    lower     upper total test_success test_total
## 1 0.1875000 0.104000 0.1250000    16   0.11764706         17
## 2 0.1666667 0.264500 0.2810000     6   0.25000000          4
## 3 0.1666667 0.267800 0.2832000     6   0.33333333          3
## 4 0.1764706 0.111875 0.1263125    17   0.18181818         11
## 5 0.1607143 0.140750 0.1551875    56   0.08108108         37
##     success     lower     upper total test_success test_total
## 1 0.1875000 0.1040000 0.1250000    16   0.11764706         17
## 2 0.1666667 0.2645000 0.2810000     6   0.25000000          4
## 3 0.1666667 0.2678000 0.2832000     6   0.33333333          3
## 4 0.1764706 0.1118750 0.1263125    17   0.18181818         11
## 5 0.1607143 0.1407500 0.1551875    56   0.08108108         37
## 6 0.1666667 0.1101765 0.1237647    12   0.20000000         10
##     success     lower     upper total test_success test_total
## 1 0.1875000 0.1040000 0.1250000    16   0.11764706         17
## 2 0.1666667 0.2645000 0.2810000     6   0.25000000          4
## 3 0.1666667 0.2678000 0.2832000     6   0.33333333          3
## 4 0.1764706 0.1118750 0.1263125    17   0.18181818         11
## 5 0.1607143 0.1407500 0.1551875    56   0.08108108         37
## 6 0.1666667 0.1101765 0.1237647    12   0.20000000         10
## 7 0.1666667 0.1215000 0.1343333    18   0.12500000          8
##     success     lower     upper total test_success test_total
## 1 0.1875000 0.1040000 0.1250000    16   0.11764706         17
## 2 0.1666667 0.2645000 0.2810000     6   0.25000000          4
## 3 0.1666667 0.2678000 0.2832000     6   0.33333333          3
## 4 0.1764706 0.1118750 0.1263125    17   0.18181818         11
## 5 0.1607143 0.1407500 0.1551875    56   0.08108108         37
## 6 0.1666667 0.1101765 0.1237647    12   0.20000000         10
## 7 0.1666667 0.1215000 0.1343333    18   0.12500000          8
## 8 0.1666667 0.1194737 0.1316316    18   0.33333333          6
## 9 0.1777778 0.1437895 0.1559474    45   0.09375000         32
##      success     lower     upper total test_success test_total
## 1  0.1875000 0.1040000 0.1250000    16   0.11764706         17
## 2  0.1666667 0.2645000 0.2810000     6   0.25000000          4
## 3  0.1666667 0.2678000 0.2832000     6   0.33333333          3
## 4  0.1764706 0.1118750 0.1263125    17   0.18181818         11
## 5  0.1607143 0.1407500 0.1551875    56   0.08108108         37
## 6  0.1666667 0.1101765 0.1237647    12   0.20000000         10
## 7  0.1666667 0.1215000 0.1343333    18   0.12500000          8
## 8  0.1666667 0.1194737 0.1316316    18   0.33333333          6
## 9  0.1777778 0.1437895 0.1559474    45   0.09375000         32
## 10 0.1875000 0.1176500 0.1292000    16   0.25000000          8

Now, I’m going to try and make the model with two iterations, line drive % and BB/K

ml2 <- function(){
  min <- min(train$LD.)
  max <- max(train$LD.)
  min2 <- min(train$BB.K_A)
  max2 <- max(train$BB.K_A)
  splitind <- 2:20
  uppercol <- c()
  lowercol <- c()
  totalcol <- c()
  uppercol2 <- c()
  lowercol2 <- c()
  totalcol2 <- c()
  srcol <- c()
  test_succcol <- c()
  testtotalcol <- c()
# Same principle as before. 
  for(i in splitind){
    group <- (max - min)/i
    range <- seq(from = min, to = max, by = group)
    group2 <- (max2 - min2)/i
    range2 <- seq(from = min2, to = max2, by = group2)
# Same principle as before, except there needs to be a additional for loop to account for  a second varibale
    for(x in range){
      y <- x
      for(z in range2){
        
        success_rate <- nrow(train %>% filter(LD. >= y) %>% filter(LD. < y + group) %>% filter(BB.K_A >= z) %>% filter(BB.K_A < z + group) %>% filter(PA_MLB > 499)) / nrow(train %>% filter(LD. >= y) %>% filter(LD. < y + group) %>% filter(BB.K_A >= z) %>% filter(BB.K_A < z + group)) 
        
        total <- nrow(train %>% filter(LD. >= y) %>% filter(LD. < y + group) %>% filter(BB.K_A >= z) %>% filter(BB.K_A < z + group))
        
        
        
        
        if(success_rate > 0.3 && total > 5){
          upper <- y + group
          lower <- y
          upper2 <- z + group2
          lower2 <- z
          uppercol <- c(uppercol, upper)
          lowercol <- c(lowercol, lower)
          uppercol2 <- c(uppercol2, upper2)
          lowercol2 <- c(lowercol2, lower2)
          totalcol <- c(totalcol, total)
          srcol <- c(srcol, success_rate)
          
          test_success <- nrow(test %>% filter(LD. >= lower) %>% filter(LD. < upper) %>% filter(BB.K_A >= lower2) %>% filter(BB.K_A < upper2) %>% filter(PA_MLB > 499)) / nrow(test %>% filter(LD. >= lower) %>% filter(LD. < upper) %>% filter(BB.K_A >= lower2) %>% filter(BB.K_A < upper2))
          
          testtotal <- nrow(test %>% filter(LD. >= lower) %>% filter(LD. < upper) %>% filter(BB.K_A >= lower2) %>% filter(BB.K_A < upper2))
          
          test_succcol <- c(test_succcol, test_success)
          testtotalcol <- c(testtotalcol, testtotal)
        }
      }
      
    }
    success_ranges <- data.frame(success = srcol, lower = lowercol, upper = uppercol, lower2 = lowercol2, upper2 = uppercol2, total = totalcol, test_success = test_succcol, test_total = testtotalcol)
    print(success_ranges)
  }

} 

ml2()
## data frame with 0 columns and 0 rows
## data frame with 0 columns and 0 rows
## data frame with 0 columns and 0 rows
## data frame with 0 columns and 0 rows
## data frame with 0 columns and 0 rows
## data frame with 0 columns and 0 rows
## data frame with 0 columns and 0 rows
##     success     lower upper    lower2    upper2 total test_success test_total
## 1 0.3333333 0.1343333  0.16 0.3955556 0.5533333     6            0         20
##     success     lower upper    lower2    upper2 total test_success test_total
## 1 0.3333333 0.1343333  0.16 0.3955556 0.5533333     6            0         20
##     success     lower upper    lower2    upper2 total test_success test_total
## 1 0.3333333 0.1343333  0.16 0.3955556 0.5533333     6            0         20
##     success     lower upper    lower2    upper2 total test_success test_total
## 1 0.3333333 0.1343333  0.16 0.3955556 0.5533333     6            0         20
##     success     lower upper    lower2    upper2 total test_success test_total
## 1 0.3333333 0.1343333  0.16 0.3955556 0.5533333     6            0         20
##     success     lower upper    lower2    upper2 total test_success test_total
## 1 0.3333333 0.1343333  0.16 0.3955556 0.5533333     6            0         20
##     success     lower  upper    lower2    upper2 total test_success test_total
## 1 0.3333333 0.1343333 0.1600 0.3955556 0.5533333     6   0.00000000         20
## 2 0.3333333 0.1754000 0.1908 0.2693333 0.3640000     6   0.09090909         11
##     success     lower     upper    lower2    upper2 total test_success
## 1 0.3333333 0.1343333 0.1600000 0.3955556 0.5533333     6   0.00000000
## 2 0.3333333 0.1754000 0.1908000 0.2693333 0.3640000     6   0.09090909
## 3 0.3750000 0.1696250 0.1840625 0.2575000 0.3462500     8   0.00000000
##   test_total
## 1         20
## 2         11
## 3         16
##     success     lower     upper    lower2    upper2 total test_success
## 1 0.3333333 0.1343333 0.1600000 0.3955556 0.5533333     6   0.00000000
## 2 0.3333333 0.1754000 0.1908000 0.2693333 0.3640000     6   0.09090909
## 3 0.3750000 0.1696250 0.1840625 0.2575000 0.3462500     8   0.00000000
##   test_total
## 1         20
## 2         11
## 3         16
##     success     lower     upper    lower2    upper2 total test_success
## 1 0.3333333 0.1343333 0.1600000 0.3955556 0.5533333     6   0.00000000
## 2 0.3333333 0.1754000 0.1908000 0.2693333 0.3640000     6   0.09090909
## 3 0.3750000 0.1696250 0.1840625 0.2575000 0.3462500     8   0.00000000
##   test_total
## 1         20
## 2         11
## 3         16
##     success     lower     upper    lower2    upper2 total test_success
## 1 0.3333333 0.1343333 0.1600000 0.3955556 0.5533333     6   0.00000000
## 2 0.3333333 0.1754000 0.1908000 0.2693333 0.3640000     6   0.09090909
## 3 0.3750000 0.1696250 0.1840625 0.2575000 0.3462500     8   0.00000000
##   test_total
## 1         20
## 2         11
## 3         16
##     success     lower     upper    lower2    upper2 total test_success
## 1 0.3333333 0.1343333 0.1600000 0.3955556 0.5533333     6   0.00000000
## 2 0.3333333 0.1754000 0.1908000 0.2693333 0.3640000     6   0.09090909
## 3 0.3750000 0.1696250 0.1840625 0.2575000 0.3462500     8   0.00000000
##   test_total
## 1         20
## 2         11
## 3         16

Now the goal is to iterate through 3 statistics, each from a different category. This function takes a couple of minutes to run.

# This first part creates empty variables that the function will need later on. 
  splitind <- 2:10
  uppercol <- c()
  lowercol <- c()
  totalcol <- c()
  uppercol2 <- c()
  lowercol2 <- c()
  totalcol2 <- c()
  uppercol3 <- c()
  lowercol3 <- c()
  totalcol3 <- c()
  srcol <- c()
  test_succcol <- c()
  testtotalcol <- c()
  comb_success <- c()
  stat1 <- c()
  stat2 <- c()
  stat3 <- c()  
# THe function follows the same as before, except it has 3 for loops for 3 variables.
  
myfun <- function(var1, var2, var3) {
  min <- min(train[[var1]], na.rm = TRUE)
  max <- max(train[[var1]], na.rm = TRUE)
  min2 <- min(train[[var2]], na.rm = TRUE)
  max2 <- max(train[[var2]], na.rm = TRUE)
  min3 <- min(train[[var3]], na.rm = TRUE)
  max3 <- max(train[[var3]], na.rm = TRUE)
  
  
  
  for (i in splitind) {
    group <- (max - min) / i
    range <- seq(from = min, to = max, by = group)
    group2 <- (max2 - min2) / i
    range2 <- seq(from = min2, to = max2, by = group2)
    group3 <- (max3 - min3) / i
    range3 <- seq(from = min3, to = max3, by = group3)
    
    for (y in range) {
      for (q in range2) {
        for (s in range3) {
          # Calculate successes and total
          successes <- sum(
            train[[var1]] >= y & train[[var1]] < (y + group) &
              train[[var2]] >= q & train[[var2]] < (q + group2) &
              train[[var3]] >= s & train[[var3]] < (s + group3) &
              train$PA_MLB > 499,
            na.rm = TRUE
          )
          
          total <- sum(
            train[[var1]] >= y & train[[var1]] < (y + group) &
              train[[var2]] >= q & train[[var2]] < (q + group2) &
              train[[var3]] >= s & train[[var3]] < (s + group3),
            na.rm = TRUE
          )
          test_successes <- sum(
                test[[var1]] >= y & test[[var1]] < (y + group) &
                  test[[var2]] >= q & test[[var2]] < (q + group2) &
                  test[[var3]] >= s & test[[var3]] < (s + group3) &
                  test$PA_MLB > 499,
                na.rm = TRUE
              )
              
              testtotal <- sum(
                test[[var1]] >= y & test[[var1]] < (y + group) &
                  test[[var2]] >= q & test[[var2]] < (q + group2) &
                  test[[var3]] >= s & test[[var3]] < (s + group3),
                na.rm = TRUE
              )
              
              test_success <- ifelse(testtotal > 0, test_successes / testtotal, NA)
          # Avoid division by zero and missing values
          if (total > 0) {
            success_rate <- successes / total
            
            if (!is.na(success_rate) && success_rate > 0.2 && total > 10 && !is.na(test_success) && test_success > 0.2) {
              upper <- y + group
              lower <- y
              upper2 <- q + group2
              lower2 <- q
              upper3 <- s + group3
              lower3 <- s
              
              uppercol <- c(uppercol, upper)
              lowercol <- c(lowercol, lower)
              uppercol2 <- c(uppercol2, upper2)
              lowercol2 <- c(lowercol2, lower2)
              uppercol3 <- c(uppercol3, upper3)
              lowercol3 <- c(lowercol3, lower3)
              totalcol <- c(totalcol, total)
              srcol <- c(srcol, success_rate)
              
              # Test set calculations
              
              
              test_succcol <- c(test_succcol, test_success)
              testtotalcol <- c(testtotalcol, testtotal)
              
              success_comb <- (total*success_rate + testtotal*test_success)/(total + testtotal)
              comb_success <- c(comb_success, success_comb)
              
              stat1 <- c(stat1, var1)
              stat2 <- c(stat2, var2)
              stat3 <- c(stat3, var3)
            }
          }
        }
      }
    }
  }
  success_ranges <- data.frame(
    success = srcol,
    total = totalcol,
    test_success = test_succcol,
    test_total = testtotalcol,
    overall_success = comb_success,
    metric1 = stat1,
    lower = lowercol,
    upper = uppercol,
    metric2 = stat2,
    lower2 = lowercol2,
    upper2 = uppercol2,
    metric3 = stat3,
    lower3 = lowercol3,
    upper3 = uppercol3
    
)
  return(success_ranges)
}

cat1 <- c("LD.", "FB.", "GB.", "GB.FB", "HR.FB")
cat2 <- c("Pull.", "Cent.", "Oppo.")
cat3 <- c("BB._A", "K._A", "BB.K_A")
cat4 <- c("AVG.y", "OBP_A", "SLG_A", "OPS_A", "ISO_A")

all_sr <- data.frame()
sr1 <- data.frame()

for (a in 1:5) {
  b <- cat1[a]
  for (c in 1:5) {
    d <- cat4[c]
    for (e in 1:3) {
      f <- cat3[e]
      sr <- myfun(b, d, f)
      sr1 <- rbind(sr1, sr)
    }
  }
}

This function below will now compare groups of different sizes across different variables. Along with this, I noticed that the success rate among 19 year olds is significantly higher than the other ages, which intuitively makes sense as they are younger and have more time to develop. I decided to include only 19 year olds to try and increase the success rate.

# set.seed(4036)
train_index <- sample(nrow(data), 498)
train <- data[train_index,]
test <- data[-train_index,]


 train <- train %>% filter(fin_age == 19)
 test <- test %>% filter(fin_age == 19)

  myfun <- function(var1, var2, var3) {
  splitind <- 2:5
  uppercol <- c()
  lowercol <- c()
  totalcol <- c()
  uppercol2 <- c()
  lowercol2 <- c()
  totalcol2 <- c()
  uppercol3 <- c()
  lowercol3 <- c()
  totalcol3 <- c()
  srcol <- c()
  test_succcol <- c()
  testtotalcol <- c()
  comb_success <- c()
  stat1 <- c()
  stat2 <- c()
  stat3 <- c()
  differ <- c()

  # Initial min and max for each variable
  min <- min(train[[var1]], na.rm = TRUE)
  max <- max(train[[var1]], na.rm = TRUE)
  min2 <- min(train[[var2]], na.rm = TRUE)
  max2 <- max(train[[var2]], na.rm = TRUE)
  min3 <- min(train[[var3]], na.rm = TRUE)
  max3 <- max(train[[var3]], na.rm = TRUE)

  for (i in splitind) {
    group <- (max - min) / i
    range <- seq(from = min, to = max, by = group)
    
    for (i2 in splitind) {
      group2 <- (max2 - min2) / i2
      if (!is.finite(group2) || group2 <= 0) next
      range2 <- seq(from = min2, to = max2, by = group2)
      
      for (i3 in splitind) {
        group3 <- (max3 - min3) / i3
        if (!is.finite(group3) || group3 <= 0) next
        range3 <- seq(from = min3, to = max3, by = group3)
        
        for (y in range) {
          for (q in range2) {
            for (s in range3) {
              # Filter rows dynamically
              valid_rows <- train[[var1]] >= y & train[[var1]] < (y + group) &
                            train[[var2]] >= q & train[[var2]] < (q + group2) &
                            train[[var3]] >= s & train[[var3]] < (s + group3)

              # Skip if no valid rows
              if (sum(valid_rows, na.rm = TRUE) == 0) next
              
              # Recalculate min and max for ranges
              temp_min2 <- min(train[[var2]][valid_rows], na.rm = TRUE)
              temp_max2 <- max(train[[var2]][valid_rows], na.rm = TRUE)
              temp_min3 <- min(train[[var3]][valid_rows], na.rm = TRUE)
              temp_max3 <- max(train[[var3]][valid_rows], na.rm = TRUE)

              if (!is.finite(temp_min2) || !is.finite(temp_max2) || temp_min2 == temp_max2 ||
                  !is.finite(temp_min3) || !is.finite(temp_max3) || temp_min3 == temp_max3) next

              
              min2 <- temp_min2
              max2 <- temp_max2
              min3 <- temp_min3
              max3 <- temp_max3

              # Calculate successes and totals
              successes <- sum(
                train[[var1]] >= y & train[[var1]] < (y + group) &
                  train[[var2]] >= q & train[[var2]] < (q + group2) &
                  train[[var3]] >= s & train[[var3]] < (s + group3) &
                  train$PA_MLB > 499,
                na.rm = TRUE
              )
              
              total <- sum(
                train[[var1]] >= y & train[[var1]] < (y + group) &
                  train[[var2]] >= q & train[[var2]] < (q + group2) &
                  train[[var3]] >= s & train[[var3]] < (s + group3),
                na.rm = TRUE
              )

              test_successes <- sum(
                test[[var1]] >= y & test[[var1]] < (y + group) &
                  test[[var2]] >= q & test[[var2]] < (q + group2) &
                  test[[var3]] >= s & test[[var3]] < (s + group3) &
                  test$PA_MLB > 499,
                na.rm = TRUE
              )
              
              testtotal <- sum(
                test[[var1]] >= y & test[[var1]] < (y + group) &
                  test[[var2]] >= q & test[[var2]] < (q + group2) &
                  test[[var3]] >= s & test[[var3]] < (s + group3),
                na.rm = TRUE
              )
              
              test_success <- ifelse(testtotal > 0, test_successes / testtotal, NA)
              
              
              if (total > 0) {
                success_rate <- successes / total
                
                if (!is.na(success_rate) && success_rate > 0.5 && total > 5 &&
                    !is.na(test_success) && test_success > 0.5 && testtotal > 5) {
                  
                  uppercol <- c(uppercol, y + group)
                  lowercol <- c(lowercol, y)
                  uppercol2 <- c(uppercol2, q + group2)
                  lowercol2 <- c(lowercol2, q)
                  uppercol3 <- c(uppercol3, s + group3)
                  lowercol3 <- c(lowercol3, s)
                  totalcol <- c(totalcol, total)
                  srcol <- c(srcol, success_rate)
                  test_succcol <- c(test_succcol, test_success)
                  testtotalcol <- c(testtotalcol, testtotal)
                  comb_success <- c(comb_success, (total * success_rate + testtotal * test_success) / (total + testtotal))
                  stat1 <- c(stat1, var1)
                  stat2 <- c(stat2, var2)
                  stat3 <- c(stat3, var3)
                  differ <- c(differ, success_rate - test_success)
                }
              }
            }
          }
        }
      }
    }
  }

  # Constructing the final data frame
  success_ranges <- data.frame(
    success = srcol,
    total = totalcol,
    test_success = test_succcol,
    test_total = testtotalcol,
    overall_success = comb_success,
    success_diff = differ,
    metric1 = stat1,
    lower = lowercol,
    upper = uppercol,
    metric2 = stat2,
    lower2 = lowercol2,
    upper2 = uppercol2,
    metric3 = stat3,
    lower3 = lowercol3,
    upper3 = uppercol3
  )
  
  return(success_ranges)
}


cat1 <- c("LD.", "FB.", "GB.", "GB.FB", "HR.FB", "Pull.", "Cent.", "Oppo.", "wSB_A", "BB._A", "K._A", "BB.K_A", "SwStr.", "AVG.y", "OBP_A", "SLG_A", "OPS_A", "wRC._A", "wOBA_A")

sr19adv <- data.frame()

for (a in 1:(length(cat1) - 2)) {
  b <- cat1[a]
  for (c in (a + 1):(length(cat1) - 1)) {
    d <- cat1[c]
    for (e in (c + 1):length(cat1)) {
      f <- cat1[e]
      sr <- myfun(b, d, f)
      sr19adv <- rbind(sr19adv, sr)
    }
  }
}


print(sr19adv)
##     success total test_success test_total overall_success success_diff metric1
## 1 0.6250000     8    0.6666667          6       0.6428571  -0.04166667     LD.
## 2 0.5714286     7    0.5714286          7       0.5714286   0.00000000     LD.
## 3 0.5714286     7    0.5714286          7       0.5714286   0.00000000     LD.
## 4 0.5833333    12    0.6666667          6       0.6111111  -0.08333333     FB.
## 5 0.5384615    13    0.5714286          7       0.5500000  -0.03296703     FB.
## 6 0.5555556     9    0.5714286          7       0.5625000  -0.01587302     GB.
##   lower upper metric2  lower2 upper2 metric3  lower3  upper3
## 1 0.188 0.282   GB.FB  0.6300  1.390  wOBA_A   0.337   0.439
## 2 0.188 0.282   wSB_A -0.0500  3.300   OPS_A   0.740   0.981
## 3 0.188 0.282   wSB_A -0.0500  3.300  wRC._A 113.000 181.000
## 4 0.374 0.504     GB.  0.3170  0.439   OBP_A   0.318   0.403
## 5 0.374 0.504   GB.FB  0.6300  1.390   OBP_A   0.318   0.403
## 6 0.317 0.439   BB._A  0.0825  0.145  SwStr.   0.042   0.134

Add a new chunk by clicking the Insert Chunk button on the toolbar or by pressing Ctrl+Alt+I.

When you save the notebook, an HTML file containing the code and output will be saved alongside it (click the Preview button or press Ctrl+Shift+K to preview the HTML file).

The preview shows you a rendered HTML copy of the contents of the editor. Consequently, unlike Knit, Preview does not run any R code chunks. Instead, the output of the chunk when it was last run in the editor is displayed.