Sports Analytics & Insights

# Set the CRAN mirror
options(repos = "https://cran.r-project.org")

# Install necessary packages
install.packages("rpart")

Installing package into 'C:/Users/C00303097/AppData/Local/R/win-library/4.3'
(as 'lib' is unspecified)

package 'rpart' successfully unpacked and MD5 sums checked

Warning: cannot remove prior installation of package 'rpart'

Warning in file.copy(savedcopy, lib, recursive = TRUE): problem copying
C:\Users\C00303097\AppData\Local\R\win-library\4.3\00LOCK\rpart\libs\x64\rpart.dll
to C:\Users\C00303097\AppData\Local\R\win-library\4.3\rpart\libs\x64\rpart.dll:
Permission denied

Warning: restored 'rpart'


The downloaded binary packages are in
    C:\Users\C00303097\AppData\Local\Temp\RtmpC8QyEh\downloaded_packages

install.packages("rpart.plot")

Installing package into 'C:/Users/C00303097/AppData/Local/R/win-library/4.3'
(as 'lib' is unspecified)

package 'rpart.plot' successfully unpacked and MD5 sums checked

The downloaded binary packages are in
    C:\Users\C00303097\AppData\Local\Temp\RtmpC8QyEh\downloaded_packages

install.packages("tidyverse")

Installing package into 'C:/Users/C00303097/AppData/Local/R/win-library/4.3'
(as 'lib' is unspecified)

package 'tidyverse' successfully unpacked and MD5 sums checked

The downloaded binary packages are in
    C:\Users\C00303097\AppData\Local\Temp\RtmpC8QyEh\downloaded_packages

install.packages("rattle")

Installing package into 'C:/Users/C00303097/AppData/Local/R/win-library/4.3'
(as 'lib' is unspecified)

package 'rattle' successfully unpacked and MD5 sums checked

The downloaded binary packages are in
    C:\Users\C00303097\AppData\Local\Temp\RtmpC8QyEh\downloaded_packages

install.packages("TTR")

Installing package into 'C:/Users/C00303097/AppData/Local/R/win-library/4.3'
(as 'lib' is unspecified)

package 'TTR' successfully unpacked and MD5 sums checked

The downloaded binary packages are in
    C:\Users\C00303097\AppData\Local\Temp\RtmpC8QyEh\downloaded_packages

install.packages("readr")

Installing package into 'C:/Users/C00303097/AppData/Local/R/win-library/4.3'
(as 'lib' is unspecified)

package 'readr' successfully unpacked and MD5 sums checked

Warning: cannot remove prior installation of package 'readr'

Warning in file.copy(savedcopy, lib, recursive = TRUE): problem copying
C:\Users\C00303097\AppData\Local\R\win-library\4.3\00LOCK\readr\libs\x64\readr.dll
to C:\Users\C00303097\AppData\Local\R\win-library\4.3\readr\libs\x64\readr.dll:
Permission denied

Warning: restored 'readr'


The downloaded binary packages are in
    C:\Users\C00303097\AppData\Local\Temp\RtmpC8QyEh\downloaded_packages

# Load required libraries
library(tidyverse)

Warning: package 'tidyverse' was built under R version 4.3.3

Warning: package 'ggplot2' was built under R version 4.3.3

Warning: package 'tidyr' was built under R version 4.3.3

Warning: package 'readr' was built under R version 4.3.3

── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr     1.1.4     ✔ readr     2.1.5
✔ forcats   1.0.0     ✔ stringr   1.5.1
✔ ggplot2   3.5.0     ✔ tibble    3.2.1
✔ lubridate 1.9.3     ✔ tidyr     1.3.1
✔ purrr     1.0.2

── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

library(rpart)

Warning: package 'rpart' was built under R version 4.3.3

library(rattle)

Warning: package 'rattle' was built under R version 4.3.3

Loading required package: bitops
Rattle: A free graphical interface for data science with R.
Version 5.5.1 Copyright (c) 2006-2021 Togaware Pty Ltd.
Type 'rattle()' to shake, rattle, and roll your data.

library(TTR)

Warning: package 'TTR' was built under R version 4.3.3

library(readr)
library(rpart.plot)

Warning: package 'rpart.plot' was built under R version 4.3.3

Question 1

echo = FALSE
warning = FALSE
error = FALSE
message = FALSE

pl1 <- read_csv("pl_training.csv")

Rows: 600 Columns: 12
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (4): team, wdl_ft, wdl_ht, home_or_away
dbl (8): ftg_diff, htg_diff, s_diff, st_diff, f_diff, c_diff, y_diff, r_diff

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

pl_testing <- read_csv("pl_testing.csv")

Rows: 160 Columns: 12
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (4): team, wdl_ft, wdl_ht, home_or_away
dbl (8): ftg_diff, htg_diff, s_diff, st_diff, f_diff, c_diff, y_diff, r_diff

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

echo = FALSE
warning = FALSE
error = FALSE
message = FALSE

# Load required libraries
library(rpart)
library(rpart.plot)

# Step 1: Load the data (Replace 'pl_training.csv' with your actual file name)
data <- read.csv("pl_training.csv")

# Step 2: Data Preprocessing (if needed)

# Step 3: Train the Classification Tree Model
# Assuming 'home_or_away' is the target variable to classify a team as either home or away
# Assuming other columns as features

# Create and train the classification tree model
model <- rpart(home_or_away ~ ., data = data, method = "class")


# Visualize the trained model with adjusted plot parameters
rpart.plot(model, yesno = 2, type = 2, extra = 101, cex = 0.6, tweak = 0.9, fallen.leaves = TRUE)

Warning: cex and tweak both specified, applying both

2bi

echo = FALSE
warning = FALSE
error = FALSE
message = FALSE

# Load required libraries
library(rpart)
library(rpart.plot)

# Step 1: Load the data (Replace 'pl_training.csv' with your actual file name)
data <- read.csv("pl_training.csv")

# Step 2: Train the Classification Tree Model
# Assuming 'home_or_away' is the target variable to classify a team as either home or away
# Assuming other columns as features

# Create and train the classification tree model
model <- rpart(home_or_away ~ ., data = data, method = "class")

# Step 3: Visualize the Model
# Visualize the trained model with adjusted plot parameters
rpart.plot(model, yesno = 2, type = 2, extra = 101, cex = 0.6, tweak = 0.9, fallen.leaves = TRUE)

Warning: cex and tweak both specified, applying both

# Step 4: Extract Rules and Assess Node Purity

# Function to extract rules from rpart object
extract_rules <- function(tree, prefix = "") {
  if (is.null(tree)) return(NULL)
  
  # If the node is terminal, return the prediction
  if (is.null(tree$frame$var)) {
    rule <- paste(prefix, "=>", ifelse(tree$frame$yval == "home", "home", "away"))
    return(rule)
  }
  
  # Extract rules for left and right children
  left_rule <- extract_rules(tree$left, paste(prefix, tree$frame$var, "<=", round(tree$frame$split, 2)))
  right_rule <- extract_rules(tree$right, paste(prefix, tree$frame$var, ">", round(tree$frame$split, 2)))
  
  return(list(left_rule, right_rule))
}

# Extract rules from the model
rules <- extract_rules(model)

# Print rules
cat("Rules for predicting if a team is the home team:\n")

Rules for predicting if a team is the home team:

for (i in 1:length(rules)) {
  if (!is.null(rules[[i]])) {
    cat("i. ", rules[[i]], "\n")
  }
}

2bii

echo = FALSE
warning = FALSE
error = FALSE
message = FALSE

# Function to extract rules from rpart object
extract_rules <- function(tree, prefix = "") {
  if (is.null(tree)) return(NULL)
  
  # If the node is terminal, return the prediction
  if (length(tree$frame$var) == 1 && tree$frame$var == "<leaf>") {
    rule <- paste(prefix, "=>", ifelse(tree$frame$yval == "away", "Away team", "Not Away team"))
    return(rule)
  }
  
  # Extract rules for left and right children
  left_rule <- extract_rules(tree$left, paste(prefix, tree$frame$var, "<=", round(tree$frame$split, 2)))
  right_rule <- extract_rules(tree$right, paste(prefix, tree$frame$var, ">", round(tree$frame$split, 2)))
  
  return(list(left_rule, right_rule))
}

# Extract rules from the model
rules <- extract_rules(model)

# Print rules
cat("Rules for predicting if a team is the away team:\n")

Rules for predicting if a team is the away team:

for (i in 1:length(rules)) {
  if (!is.null(rules[[i]])) {
    cat("i. ", rules[[i]], "\n")
  }
}

2biii

echo = FALSE
warning = FALSE
error = FALSE
message = FALSE

# Extract variable importance values from the model
var_importance <- model$variable.importance

# Sort the variable importance values in descending order
sorted_importance <- sort(var_importance, decreasing = TRUE)

# Print the variable importance values
cat("Variable Importance for Predicting Home or Away Team:\n")

Variable Importance for Predicting Home or Away Team:

for (i in 1:length(sorted_importance)) {
  cat(i, ". ", names(sorted_importance)[i], ": ", sorted_importance[i], "\n")
}

1 .  team :  23.61708 
2 .  c_diff :  18.75464 
3 .  ftg_diff :  15.80758 
4 .  s_diff :  12.53289 
5 .  st_diff :  10.93219 
6 .  htg_diff :  7.413904 
7 .  f_diff :  7.134892 
8 .  wdl_ht :  6.124066 
9 .  y_diff :  3.801148 
10 .  wdl_ft :  1.558226 
11 .  r_diff :  1.097416

# Results
#1 team :  23.61708 
#2 c_diff :  18.75464 
#3 ftg_diff :  15.80758 
#4 s_diff :  12.53289 
#5 st_diff :  10.93219 
#6 htg_diff :  7.413904 
#7 f_diff :  7.134892 
#8 wdl_ht :  6.124066 
#9 y_diff :  3.801148 
#10 wdl_ft :  1.558226 
#11 r_diff :  1.097416

echo = FALSE
warning = FALSE
error = FALSE
message = FALSE

# Load required libraries
library(rpart)
library(rpart.plot)
library(caret)

Warning: package 'caret' was built under R version 4.3.3

Loading required package: lattice


Attaching package: 'caret'

The following object is masked from 'package:purrr':

    lift

# Step 1: Load the data (Replace 'pl_training.csv' with your actual file name)
data <- read.csv("pl_training.csv")

# Step 2: Split the data into training and testing sets
set.seed(123) # for reproducibility
train_index <- createDataPartition(data$home_or_away, p = 0.7, list = FALSE)
train_data <- data[train_index, ]
test_data <- data[-train_index, ]

# Step 3: Train the Classification Tree Model
# Create and train the classification tree model
model <- rpart(home_or_away ~ ., data = train_data, method = "class")

# Step 4: Assess the accuracy on training data
train_pred <- predict(model, train_data, type = "class")
train_accuracy <- mean(train_pred == train_data$home_or_away)
cat("Accuracy on training data:", train_accuracy, "\n")

Accuracy on training data: 0.7505938

# Step 5: Assess the accuracy on testing data
test_pred <- predict(model, test_data, type = "class")
test_accuracy <- mean(test_pred == test_data$home_or_away)
cat("Accuracy on testing data:", test_accuracy, "\n")

Accuracy on testing data: 0.5195531

# These results indicate that the classification tree model achieves higher accuracy on the training data compared to the testing data. While the model performs reasonably well on the training data, its accuracy significantly drops when applied to unseen testing data. This discrepancy suggests that the model may be overfitting the training dataset.

3ai

echo = FALSE
warning = FALSE
error = FALSE
message = FALSE

# Load the dplyr package
library(dplyr)

# Now, you can use the %>%
modified_data <- data %>%
  mutate(
    rpts_diff = ftg_diff,  # Example: Assuming 'ftg_diff' represents the goal difference
    dist_diff = s_diff - c_diff  # Example: Assuming 's_diff' represents shots difference and 'c_diff' represents corners difference
  ) %>%
  select(
    team,  # Include the team variable if needed
    home_or_away,  # Ensure to keep the target variable
    rpts_diff,  # Goal difference variable
    dist_diff,  # Shots difference - Corners difference variable
    # Add other relevant variables as needed
  )

# View the modified dataset
head(modified_data)

            team home_or_away rpts_diff dist_diff
1        Watford         Home         1         9
2    Southampton         Home         0        -5
3 Crystal Palace         Home         1         0
4    Bournemouth         Home         1        -3
5       West Ham         Home        -3        -9
6      Leicester         Home         0         1

##

# Convert 'home_or_away' to a factor with appropriate levels
data$home_or_away <- factor(data$home_or_away, levels = c("home", "away"))

# Check the levels of the factor variable
levels(data$home_or_away)

[1] "home" "away"

3aii

Clearly state the regression equation.

y=ln(π/(1-π))=b_0+ b_1 X_1+ b_2 X_2+⋯+ b_K X_k.

3aiii

echo = FALSE
warning = FALSE
error = FALSE
message = FALSE

# Extract variable importance values from the model
var_importance <- model$variable.importance

# Sort the variable importance values in descending order
sorted_importance <- sort(var_importance, decreasing = TRUE)

# Print the variable importance values
cat("Variable Importance for Predicting Home or Away Team:\n")

Variable Importance for Predicting Home or Away Team:

for (i in 1:length(sorted_importance)) {
  cat(names(sorted_importance)[i], ": ", sorted_importance[i], "\n")
}

team :  29.07178 
s_diff :  25.14559 
f_diff :  15.59095 
ftg_diff :  12.6297 
st_diff :  11.86843 
c_diff :  7.390272 
y_diff :  6.520717 
htg_diff :  5.858338 
wdl_ht :  3.872174 
wdl_ft :  3.606517

# Results
# team :  29.07178 
# s_diff :  25.14559 
# f_diff :  15.59095 
# ftg_diff :  12.6297 
# st_diff :  11.86843 
# c_diff :  7.390272 
# y_diff :  6.520717 
# htg_diff :  5.858338 
# wdl_ht :  3.872174 
# wdl_ft :  3.606517

3aiv

echo = FALSE
warning = FALSE
error = FALSE
message = FALSE

# Model summary
summary(model)

Call:
rpart(formula = home_or_away ~ ., data = train_data, method = "class")
  n= 421 

          CP nsplit rel error    xerror       xstd
1 0.22966507      0 1.0000000 1.1722488 0.04842304
2 0.04545455      1 0.7703349 0.8612440 0.04856881
3 0.02153110      3 0.6794258 0.9330144 0.04895355
4 0.01913876      5 0.6363636 0.8947368 0.04877996
5 0.01435407      7 0.5980861 0.8755981 0.04866612
6 0.01196172     12 0.5263158 0.8755981 0.04866612
7 0.01000000     14 0.5023923 0.8947368 0.04877996

Variable importance
    team   s_diff   f_diff ftg_diff  st_diff   c_diff   y_diff htg_diff 
      24       21       13       10       10        6        5        5 
  wdl_ht   wdl_ft 
       3        3 

Node number 1: 421 observations,    complexity param=0.2296651
  predicted class=Away  expected loss=0.4964371  P(node) =1
    class counts:   212   209
   probabilities: 0.504 0.496 
  left son=2 (221 obs) right son=3 (200 obs)
  Primary splits:
      s_diff   < 0.5   to the left,  improve=11.633930, (0 missing)
      ftg_diff < 0.5   to the left,  improve=10.229100, (0 missing)
      wdl_ft   splits as  LLR,       improve=10.229100, (0 missing)
      st_diff  < -0.5  to the left,  improve= 8.647258, (0 missing)
      c_diff   < -3.5  to the left,  improve= 5.798350, (0 missing)
  Surrogate splits:
      st_diff  < 0.5   to the left,  agree=0.812, adj=0.605, (0 split)
      c_diff   < -0.5  to the left,  agree=0.705, adj=0.380, (0 split)
      team     splits as  RLLRLRLLRRRLLLLLRLLL, agree=0.703, adj=0.375, (0 split)
      ftg_diff < 0.5   to the left,  agree=0.672, adj=0.310, (0 split)
      wdl_ft   splits as  LLR, agree=0.672, adj=0.310, (0 split)

Node number 2: 221 observations,    complexity param=0.04545455
  predicted class=Away  expected loss=0.3846154  P(node) =0.5249406
    class counts:   136    85
   probabilities: 0.615 0.385 
  left son=4 (171 obs) right son=5 (50 obs)
  Primary splits:
      ftg_diff < 0.5   to the left,  improve=4.933747, (0 missing)
      wdl_ft   splits as  LLR, improve=4.933747, (0 missing)
      team     splits as  RLRRLRLRLRLLLLLLLRRL, improve=4.101378, (0 missing)
      s_diff   < -12.5 to the left,  improve=3.281703, (0 missing)
      htg_diff < 0.5   to the left,  improve=2.683966, (0 missing)
  Surrogate splits:
      htg_diff < 0.5   to the left,  agree=0.869, adj=0.42, (0 split)
      wdl_ht   splits as  LLR, agree=0.869, adj=0.42, (0 split)
      st_diff  < 0.5   to the left,  agree=0.801, adj=0.12, (0 split)
      team     splits as  LLLRLLLLLLLLLLLLLLLL, agree=0.783, adj=0.04, (0 split)
      y_diff   < -4.5  to the right, agree=0.778, adj=0.02, (0 split)

Node number 3: 200 observations,    complexity param=0.0215311
  predicted class=Home  expected loss=0.38  P(node) =0.4750594
    class counts:    76   124
   probabilities: 0.380 0.620 
  left son=6 (140 obs) right son=7 (60 obs)
  Primary splits:
      team     splits as  LLRLRLRRLLLRLLRRLLRR, improve=3.687619, (0 missing)
      f_diff   < 0.5   to the right, improve=3.527685, (0 missing)
      ftg_diff < -1.5  to the left,  improve=3.240000, (0 missing)
      c_diff   < 5.5   to the left,  improve=2.157821, (0 missing)
      s_diff   < 4.5   to the left,  improve=1.800440, (0 missing)
  Surrogate splits:
      s_diff  < 1.5   to the right, agree=0.745, adj=0.150, (0 split)
      st_diff < -2.5  to the right, agree=0.710, adj=0.033, (0 split)
      c_diff  < -9.5  to the right, agree=0.710, adj=0.033, (0 split)
      f_diff  < 8     to the left,  agree=0.705, adj=0.017, (0 split)

Node number 4: 171 observations,    complexity param=0.01435407
  predicted class=Away  expected loss=0.3274854  P(node) =0.4061758
    class counts:   115    56
   probabilities: 0.673 0.327 
  left son=8 (37 obs) right son=9 (134 obs)
  Primary splits:
      s_diff < -12.5 to the left,  improve=3.493884, (0 missing)
      team   splits as  RLLRRLLLLRLLLLLLLRRL, improve=3.107897, (0 missing)
      c_diff < -3.5  to the left,  improve=2.635426, (0 missing)
      f_diff < 0.5   to the right, improve=1.411128, (0 missing)
      y_diff < -0.5  to the right, improve=1.246140, (0 missing)
  Surrogate splits:
      st_diff  < -7.5  to the left,  agree=0.842, adj=0.270, (0 split)
      c_diff   < -9.5  to the left,  agree=0.842, adj=0.270, (0 split)
      ftg_diff < -4.5  to the left,  agree=0.789, adj=0.027, (0 split)

Node number 5: 50 observations,    complexity param=0.04545455
  predicted class=Home  expected loss=0.42  P(node) =0.1187648
    class counts:    21    29
   probabilities: 0.420 0.580 
  left son=10 (25 obs) right son=11 (25 obs)
  Primary splits:
      team     splits as  LLRLLRRR--RRLLLLRRRL, improve=9.000000, (0 missing)
      wdl_ht   splits as  LRR, improve=2.605322, (0 missing)
      s_diff   < -2.5  to the right, improve=1.458039, (0 missing)
      htg_diff < 0.5   to the left,  improve=1.388571, (0 missing)
      c_diff   < -5.5  to the right, improve=0.858645, (0 missing)
  Surrogate splits:
      f_diff   < 2.5   to the left,  agree=0.68, adj=0.36, (0 split)
      s_diff   < -4.5  to the right, agree=0.64, adj=0.28, (0 split)
      y_diff   < -1.5  to the left,  agree=0.62, adj=0.24, (0 split)
      htg_diff < 0.5   to the left,  agree=0.60, adj=0.20, (0 split)
      wdl_ht   splits as  LRR,       agree=0.60, adj=0.20, (0 split)

Node number 6: 140 observations,    complexity param=0.0215311
  predicted class=Home  expected loss=0.4428571  P(node) =0.3325416
    class counts:    62    78
   probabilities: 0.443 0.557 
  left son=12 (57 obs) right son=13 (83 obs)
  Primary splits:
      f_diff   < 0.5   to the right, improve=3.561301, (0 missing)
      ftg_diff < -1.5  to the left,  improve=3.053007, (0 missing)
      s_diff   < 4.5   to the left,  improve=2.786395, (0 missing)
      c_diff   < 5.5   to the left,  improve=2.166733, (0 missing)
      wdl_ft   splits as  RLR,       improve=1.921921, (0 missing)
  Surrogate splits:
      team     splits as  RR-R-L--RRL-LR--RL--, agree=0.679, adj=0.211, (0 split)
      c_diff   < -0.5  to the left,  agree=0.664, adj=0.175, (0 split)
      y_diff   < 1.5   to the right, agree=0.643, adj=0.123, (0 split)
      s_diff   < 2.5   to the left,  agree=0.629, adj=0.088, (0 split)
      ftg_diff < -1.5  to the left,  agree=0.600, adj=0.018, (0 split)

Node number 7: 60 observations
  predicted class=Home  expected loss=0.2333333  P(node) =0.1425178
    class counts:    14    46
   probabilities: 0.233 0.767 

Node number 8: 37 observations
  predicted class=Away  expected loss=0.1351351  P(node) =0.08788599
    class counts:    32     5
   probabilities: 0.865 0.135 

Node number 9: 134 observations,    complexity param=0.01435407
  predicted class=Away  expected loss=0.380597  P(node) =0.3182898
    class counts:    83    51
   probabilities: 0.619 0.381 
  left son=18 (90 obs) right son=19 (44 obs)
  Primary splits:
      team     splits as  RLLRRLLLLRLLLLRLLRRL, improve=3.560923, (0 missing)
      f_diff   < 2.5   to the right, improve=1.912438, (0 missing)
      ftg_diff < -2.5  to the right, improve=1.813744, (0 missing)
      c_diff   < 4.5   to the right, improve=1.111644, (0 missing)
      st_diff  < -6.5  to the right, improve=1.016406, (0 missing)
  Surrogate splits:
      st_diff < -6.5  to the right, agree=0.716, adj=0.136, (0 split)
      s_diff  < -11.5 to the right, agree=0.694, adj=0.068, (0 split)
      c_diff  < -8.5  to the right, agree=0.687, adj=0.045, (0 split)
      f_diff  < 9.5   to the left,  agree=0.679, adj=0.023, (0 split)

Node number 10: 25 observations
  predicted class=Away  expected loss=0.28  P(node) =0.05938242
    class counts:    18     7
   probabilities: 0.720 0.280 

Node number 11: 25 observations
  predicted class=Home  expected loss=0.12  P(node) =0.05938242
    class counts:     3    22
   probabilities: 0.120 0.880 

Node number 12: 57 observations,    complexity param=0.01435407
  predicted class=Away  expected loss=0.4210526  P(node) =0.1353919
    class counts:    33    24
   probabilities: 0.579 0.421 
  left son=24 (28 obs) right son=25 (29 obs)
  Primary splits:
      team     splits as  LL-L-R--RLR-LR--LR--, improve=2.0160750, (0 missing)
      st_diff  < -0.5  to the left,  improve=1.2351880, (0 missing)
      ftg_diff < -0.5  to the left,  improve=0.8894737, (0 missing)
      wdl_ft   splits as  RLR, improve=0.8894737, (0 missing)
      s_diff   < 14.5  to the left,  improve=0.7741676, (0 missing)
  Surrogate splits:
      c_diff   < 0.5   to the right, agree=0.667, adj=0.321, (0 split)
      ftg_diff < 1.5   to the right, agree=0.596, adj=0.179, (0 split)
      htg_diff < 0.5   to the right, agree=0.596, adj=0.179, (0 split)
      s_diff   < 7.5   to the right, agree=0.596, adj=0.179, (0 split)
      y_diff   < 0.5   to the left,  agree=0.596, adj=0.179, (0 split)

Node number 13: 83 observations,    complexity param=0.01913876
  predicted class=Home  expected loss=0.3493976  P(node) =0.1971496
    class counts:    29    54
   probabilities: 0.349 0.651 
  left son=26 (20 obs) right son=27 (63 obs)
  Primary splits:
      s_diff  < 4.5   to the left,  improve=3.309543, (0 missing)
      f_diff  < -8.5  to the left,  improve=2.795214, (0 missing)
      team    splits as  RR-L-L--LLL-RL--RL--, improve=2.158220, (0 missing)
      c_diff  < 5.5   to the left,  improve=1.807308, (0 missing)
      st_diff < 4.5   to the left,  improve=1.777832, (0 missing)
  Surrogate splits:
      team   splits as  RR-R-R--RRR-LL--RR--, agree=0.795, adj=0.15, (0 split)
      c_diff < -0.5  to the left,  agree=0.783, adj=0.10, (0 split)

Node number 18: 90 observations
  predicted class=Away  expected loss=0.3  P(node) =0.2137767
    class counts:    63    27
   probabilities: 0.700 0.300 

Node number 19: 44 observations,    complexity param=0.01435407
  predicted class=Home  expected loss=0.4545455  P(node) =0.1045131
    class counts:    20    24
   probabilities: 0.455 0.545 
  left son=38 (15 obs) right son=39 (29 obs)
  Primary splits:
      ftg_diff < -1.5  to the left,  improve=2.0480670, (0 missing)
      f_diff   < 5.5   to the left,  improve=1.6770050, (0 missing)
      wdl_ft   splits as  RL-, improve=1.6174100, (0 missing)
      y_diff   < 1.5   to the right, improve=0.9546582, (0 missing)
      team     splits as  L--LL----R----L--RR-, improve=0.6687565, (0 missing)
  Surrogate splits:
      htg_diff < -1.5  to the left,  agree=0.750, adj=0.267, (0 split)
      st_diff  < -4.5  to the left,  agree=0.727, adj=0.200, (0 split)
      team     splits as  R--RR----R----L--RR-, agree=0.682, adj=0.067, (0 split)
      c_diff   < 2.5   to the right, agree=0.682, adj=0.067, (0 split)
      y_diff   < -1.5  to the left,  agree=0.682, adj=0.067, (0 split)

Node number 24: 28 observations
  predicted class=Away  expected loss=0.2857143  P(node) =0.06650831
    class counts:    20     8
   probabilities: 0.714 0.286 

Node number 25: 29 observations
  predicted class=Home  expected loss=0.4482759  P(node) =0.06888361
    class counts:    13    16
   probabilities: 0.448 0.552 

Node number 26: 20 observations,    complexity param=0.01913876
  predicted class=Away  expected loss=0.4  P(node) =0.04750594
    class counts:    12     8
   probabilities: 0.600 0.400 
  left son=52 (10 obs) right son=53 (10 obs)
  Primary splits:
      team     splits as  RR-L-L--LLL-RR--R---, improve=3.600000, (0 missing)
      htg_diff < 0.5   to the right, improve=1.034343, (0 missing)
      wdl_ht   splits as  RRL, improve=1.034343, (0 missing)
      st_diff  < 0.5   to the right, improve=0.632967, (0 missing)
      y_diff   < -0.5  to the left,  improve=0.632967, (0 missing)
  Surrogate splits:
      s_diff   < 3.5   to the right, agree=0.75, adj=0.5, (0 split)
      st_diff  < -0.5  to the right, agree=0.75, adj=0.5, (0 split)
      f_diff   < -2.5  to the right, agree=0.70, adj=0.4, (0 split)
      ftg_diff < 1.5   to the right, agree=0.65, adj=0.3, (0 split)
      htg_diff < 0.5   to the right, agree=0.65, adj=0.3, (0 split)

Node number 27: 63 observations,    complexity param=0.01435407
  predicted class=Home  expected loss=0.2698413  P(node) =0.1496437
    class counts:    17    46
   probabilities: 0.270 0.730 
  left son=54 (7 obs) right son=55 (56 obs)
  Primary splits:
      f_diff   < -8.5  to the left,  improve=3.1111110, (0 missing)
      wdl_ht   splits as  LLR, improve=1.6253970, (0 missing)
      htg_diff < 0.5   to the left,  improve=1.6253970, (0 missing)
      team     splits as  RR-L-R--RRR-RR--RL--, improve=1.5704950, (0 missing)
      ftg_diff < 1.5   to the left,  improve=0.8396825, (0 missing)
  Surrogate splits:
      ftg_diff < -1.5  to the left,  agree=0.905, adj=0.143, (0 split)
      s_diff   < 21.5  to the right, agree=0.905, adj=0.143, (0 split)

Node number 38: 15 observations
  predicted class=Away  expected loss=0.3333333  P(node) =0.03562945
    class counts:    10     5
   probabilities: 0.667 0.333 

Node number 39: 29 observations,    complexity param=0.01196172
  predicted class=Home  expected loss=0.3448276  P(node) =0.06888361
    class counts:    10    19
   probabilities: 0.345 0.655 
  left son=78 (22 obs) right son=79 (7 obs)
  Primary splits:
      f_diff   < 5     to the left,  improve=2.194357, (0 missing)
      y_diff   < 1.5   to the right, improve=1.987659, (0 missing)
      c_diff   < 0.5   to the left,  improve=1.829764, (0 missing)
      htg_diff < -0.5  to the right, improve=1.718833, (0 missing)
      wdl_ht   splits as  LR-,       improve=1.718833, (0 missing)
  Surrogate splits:
      team splits as  L--LL----R----L--LL-, agree=0.793, adj=0.143, (0 split)

Node number 52: 10 observations
  predicted class=Away  expected loss=0.1  P(node) =0.02375297
    class counts:     9     1
   probabilities: 0.900 0.100 

Node number 53: 10 observations
  predicted class=Home  expected loss=0.3  P(node) =0.02375297
    class counts:     3     7
   probabilities: 0.300 0.700 

Node number 54: 7 observations
  predicted class=Away  expected loss=0.2857143  P(node) =0.01662708
    class counts:     5     2
   probabilities: 0.714 0.286 

Node number 55: 56 observations
  predicted class=Home  expected loss=0.2142857  P(node) =0.1330166
    class counts:    12    44
   probabilities: 0.214 0.786 

Node number 78: 22 observations,    complexity param=0.01196172
  predicted class=Home  expected loss=0.4545455  P(node) =0.05225653
    class counts:    10    12
   probabilities: 0.455 0.545 
  left son=156 (7 obs) right son=157 (15 obs)
  Primary splits:
      y_diff   < 1.5   to the right, improve=3.328139, (0 missing)
      f_diff   < 1     to the right, improve=2.194805, (0 missing)
      c_diff   < 0.5   to the left,  improve=1.994805, (0 missing)
      htg_diff < -0.5  to the right, improve=1.644134, (0 missing)
      wdl_ht   splits as  LR-,       improve=1.644134, (0 missing)
  Surrogate splits:
      f_diff  < 1     to the right, agree=0.864, adj=0.571, (0 split)
      team    splits as  R--RR---------R--LL-, agree=0.773, adj=0.286, (0 split)
      s_diff  < -1.5  to the right, agree=0.727, adj=0.143, (0 split)
      st_diff < -5.5  to the left,  agree=0.727, adj=0.143, (0 split)

Node number 79: 7 observations
  predicted class=Home  expected loss=0  P(node) =0.01662708
    class counts:     0     7
   probabilities: 0.000 1.000 

Node number 156: 7 observations
  predicted class=Away  expected loss=0.1428571  P(node) =0.01662708
    class counts:     6     1
   probabilities: 0.857 0.143 

Node number 157: 15 observations
  predicted class=Home  expected loss=0.2666667  P(node) =0.03562945
    class counts:     4    11
   probabilities: 0.267 0.733

# Extract coefficient summary
coef_summary <- summary(model)$coefficients

Call:
rpart(formula = home_or_away ~ ., data = train_data, method = "class")
  n= 421 

          CP nsplit rel error    xerror       xstd
1 0.22966507      0 1.0000000 1.1722488 0.04842304
2 0.04545455      1 0.7703349 0.8612440 0.04856881
3 0.02153110      3 0.6794258 0.9330144 0.04895355
4 0.01913876      5 0.6363636 0.8947368 0.04877996
5 0.01435407      7 0.5980861 0.8755981 0.04866612
6 0.01196172     12 0.5263158 0.8755981 0.04866612
7 0.01000000     14 0.5023923 0.8947368 0.04877996

Variable importance
    team   s_diff   f_diff ftg_diff  st_diff   c_diff   y_diff htg_diff 
      24       21       13       10       10        6        5        5 
  wdl_ht   wdl_ft 
       3        3 

Node number 1: 421 observations,    complexity param=0.2296651
  predicted class=Away  expected loss=0.4964371  P(node) =1
    class counts:   212   209
   probabilities: 0.504 0.496 
  left son=2 (221 obs) right son=3 (200 obs)
  Primary splits:
      s_diff   < 0.5   to the left,  improve=11.633930, (0 missing)
      ftg_diff < 0.5   to the left,  improve=10.229100, (0 missing)
      wdl_ft   splits as  LLR,       improve=10.229100, (0 missing)
      st_diff  < -0.5  to the left,  improve= 8.647258, (0 missing)
      c_diff   < -3.5  to the left,  improve= 5.798350, (0 missing)
  Surrogate splits:
      st_diff  < 0.5   to the left,  agree=0.812, adj=0.605, (0 split)
      c_diff   < -0.5  to the left,  agree=0.705, adj=0.380, (0 split)
      team     splits as  RLLRLRLLRRRLLLLLRLLL, agree=0.703, adj=0.375, (0 split)
      ftg_diff < 0.5   to the left,  agree=0.672, adj=0.310, (0 split)
      wdl_ft   splits as  LLR, agree=0.672, adj=0.310, (0 split)

Node number 2: 221 observations,    complexity param=0.04545455
  predicted class=Away  expected loss=0.3846154  P(node) =0.5249406
    class counts:   136    85
   probabilities: 0.615 0.385 
  left son=4 (171 obs) right son=5 (50 obs)
  Primary splits:
      ftg_diff < 0.5   to the left,  improve=4.933747, (0 missing)
      wdl_ft   splits as  LLR, improve=4.933747, (0 missing)
      team     splits as  RLRRLRLRLRLLLLLLLRRL, improve=4.101378, (0 missing)
      s_diff   < -12.5 to the left,  improve=3.281703, (0 missing)
      htg_diff < 0.5   to the left,  improve=2.683966, (0 missing)
  Surrogate splits:
      htg_diff < 0.5   to the left,  agree=0.869, adj=0.42, (0 split)
      wdl_ht   splits as  LLR, agree=0.869, adj=0.42, (0 split)
      st_diff  < 0.5   to the left,  agree=0.801, adj=0.12, (0 split)
      team     splits as  LLLRLLLLLLLLLLLLLLLL, agree=0.783, adj=0.04, (0 split)
      y_diff   < -4.5  to the right, agree=0.778, adj=0.02, (0 split)

Node number 3: 200 observations,    complexity param=0.0215311
  predicted class=Home  expected loss=0.38  P(node) =0.4750594
    class counts:    76   124
   probabilities: 0.380 0.620 
  left son=6 (140 obs) right son=7 (60 obs)
  Primary splits:
      team     splits as  LLRLRLRRLLLRLLRRLLRR, improve=3.687619, (0 missing)
      f_diff   < 0.5   to the right, improve=3.527685, (0 missing)
      ftg_diff < -1.5  to the left,  improve=3.240000, (0 missing)
      c_diff   < 5.5   to the left,  improve=2.157821, (0 missing)
      s_diff   < 4.5   to the left,  improve=1.800440, (0 missing)
  Surrogate splits:
      s_diff  < 1.5   to the right, agree=0.745, adj=0.150, (0 split)
      st_diff < -2.5  to the right, agree=0.710, adj=0.033, (0 split)
      c_diff  < -9.5  to the right, agree=0.710, adj=0.033, (0 split)
      f_diff  < 8     to the left,  agree=0.705, adj=0.017, (0 split)

Node number 4: 171 observations,    complexity param=0.01435407
  predicted class=Away  expected loss=0.3274854  P(node) =0.4061758
    class counts:   115    56
   probabilities: 0.673 0.327 
  left son=8 (37 obs) right son=9 (134 obs)
  Primary splits:
      s_diff < -12.5 to the left,  improve=3.493884, (0 missing)
      team   splits as  RLLRRLLLLRLLLLLLLRRL, improve=3.107897, (0 missing)
      c_diff < -3.5  to the left,  improve=2.635426, (0 missing)
      f_diff < 0.5   to the right, improve=1.411128, (0 missing)
      y_diff < -0.5  to the right, improve=1.246140, (0 missing)
  Surrogate splits:
      st_diff  < -7.5  to the left,  agree=0.842, adj=0.270, (0 split)
      c_diff   < -9.5  to the left,  agree=0.842, adj=0.270, (0 split)
      ftg_diff < -4.5  to the left,  agree=0.789, adj=0.027, (0 split)

Node number 5: 50 observations,    complexity param=0.04545455
  predicted class=Home  expected loss=0.42  P(node) =0.1187648
    class counts:    21    29
   probabilities: 0.420 0.580 
  left son=10 (25 obs) right son=11 (25 obs)
  Primary splits:
      team     splits as  LLRLLRRR--RRLLLLRRRL, improve=9.000000, (0 missing)
      wdl_ht   splits as  LRR, improve=2.605322, (0 missing)
      s_diff   < -2.5  to the right, improve=1.458039, (0 missing)
      htg_diff < 0.5   to the left,  improve=1.388571, (0 missing)
      c_diff   < -5.5  to the right, improve=0.858645, (0 missing)
  Surrogate splits:
      f_diff   < 2.5   to the left,  agree=0.68, adj=0.36, (0 split)
      s_diff   < -4.5  to the right, agree=0.64, adj=0.28, (0 split)
      y_diff   < -1.5  to the left,  agree=0.62, adj=0.24, (0 split)
      htg_diff < 0.5   to the left,  agree=0.60, adj=0.20, (0 split)
      wdl_ht   splits as  LRR,       agree=0.60, adj=0.20, (0 split)

Node number 6: 140 observations,    complexity param=0.0215311
  predicted class=Home  expected loss=0.4428571  P(node) =0.3325416
    class counts:    62    78
   probabilities: 0.443 0.557 
  left son=12 (57 obs) right son=13 (83 obs)
  Primary splits:
      f_diff   < 0.5   to the right, improve=3.561301, (0 missing)
      ftg_diff < -1.5  to the left,  improve=3.053007, (0 missing)
      s_diff   < 4.5   to the left,  improve=2.786395, (0 missing)
      c_diff   < 5.5   to the left,  improve=2.166733, (0 missing)
      wdl_ft   splits as  RLR,       improve=1.921921, (0 missing)
  Surrogate splits:
      team     splits as  RR-R-L--RRL-LR--RL--, agree=0.679, adj=0.211, (0 split)
      c_diff   < -0.5  to the left,  agree=0.664, adj=0.175, (0 split)
      y_diff   < 1.5   to the right, agree=0.643, adj=0.123, (0 split)
      s_diff   < 2.5   to the left,  agree=0.629, adj=0.088, (0 split)
      ftg_diff < -1.5  to the left,  agree=0.600, adj=0.018, (0 split)

Node number 7: 60 observations
  predicted class=Home  expected loss=0.2333333  P(node) =0.1425178
    class counts:    14    46
   probabilities: 0.233 0.767 

Node number 8: 37 observations
  predicted class=Away  expected loss=0.1351351  P(node) =0.08788599
    class counts:    32     5
   probabilities: 0.865 0.135 

Node number 9: 134 observations,    complexity param=0.01435407
  predicted class=Away  expected loss=0.380597  P(node) =0.3182898
    class counts:    83    51
   probabilities: 0.619 0.381 
  left son=18 (90 obs) right son=19 (44 obs)
  Primary splits:
      team     splits as  RLLRRLLLLRLLLLRLLRRL, improve=3.560923, (0 missing)
      f_diff   < 2.5   to the right, improve=1.912438, (0 missing)
      ftg_diff < -2.5  to the right, improve=1.813744, (0 missing)
      c_diff   < 4.5   to the right, improve=1.111644, (0 missing)
      st_diff  < -6.5  to the right, improve=1.016406, (0 missing)
  Surrogate splits:
      st_diff < -6.5  to the right, agree=0.716, adj=0.136, (0 split)
      s_diff  < -11.5 to the right, agree=0.694, adj=0.068, (0 split)
      c_diff  < -8.5  to the right, agree=0.687, adj=0.045, (0 split)
      f_diff  < 9.5   to the left,  agree=0.679, adj=0.023, (0 split)

Node number 10: 25 observations
  predicted class=Away  expected loss=0.28  P(node) =0.05938242
    class counts:    18     7
   probabilities: 0.720 0.280 

Node number 11: 25 observations
  predicted class=Home  expected loss=0.12  P(node) =0.05938242
    class counts:     3    22
   probabilities: 0.120 0.880 

Node number 12: 57 observations,    complexity param=0.01435407
  predicted class=Away  expected loss=0.4210526  P(node) =0.1353919
    class counts:    33    24
   probabilities: 0.579 0.421 
  left son=24 (28 obs) right son=25 (29 obs)
  Primary splits:
      team     splits as  LL-L-R--RLR-LR--LR--, improve=2.0160750, (0 missing)
      st_diff  < -0.5  to the left,  improve=1.2351880, (0 missing)
      ftg_diff < -0.5  to the left,  improve=0.8894737, (0 missing)
      wdl_ft   splits as  RLR, improve=0.8894737, (0 missing)
      s_diff   < 14.5  to the left,  improve=0.7741676, (0 missing)
  Surrogate splits:
      c_diff   < 0.5   to the right, agree=0.667, adj=0.321, (0 split)
      ftg_diff < 1.5   to the right, agree=0.596, adj=0.179, (0 split)
      htg_diff < 0.5   to the right, agree=0.596, adj=0.179, (0 split)
      s_diff   < 7.5   to the right, agree=0.596, adj=0.179, (0 split)
      y_diff   < 0.5   to the left,  agree=0.596, adj=0.179, (0 split)

Node number 13: 83 observations,    complexity param=0.01913876
  predicted class=Home  expected loss=0.3493976  P(node) =0.1971496
    class counts:    29    54
   probabilities: 0.349 0.651 
  left son=26 (20 obs) right son=27 (63 obs)
  Primary splits:
      s_diff  < 4.5   to the left,  improve=3.309543, (0 missing)
      f_diff  < -8.5  to the left,  improve=2.795214, (0 missing)
      team    splits as  RR-L-L--LLL-RL--RL--, improve=2.158220, (0 missing)
      c_diff  < 5.5   to the left,  improve=1.807308, (0 missing)
      st_diff < 4.5   to the left,  improve=1.777832, (0 missing)
  Surrogate splits:
      team   splits as  RR-R-R--RRR-LL--RR--, agree=0.795, adj=0.15, (0 split)
      c_diff < -0.5  to the left,  agree=0.783, adj=0.10, (0 split)

Node number 18: 90 observations
  predicted class=Away  expected loss=0.3  P(node) =0.2137767
    class counts:    63    27
   probabilities: 0.700 0.300 

Node number 19: 44 observations,    complexity param=0.01435407
  predicted class=Home  expected loss=0.4545455  P(node) =0.1045131
    class counts:    20    24
   probabilities: 0.455 0.545 
  left son=38 (15 obs) right son=39 (29 obs)
  Primary splits:
      ftg_diff < -1.5  to the left,  improve=2.0480670, (0 missing)
      f_diff   < 5.5   to the left,  improve=1.6770050, (0 missing)
      wdl_ft   splits as  RL-, improve=1.6174100, (0 missing)
      y_diff   < 1.5   to the right, improve=0.9546582, (0 missing)
      team     splits as  L--LL----R----L--RR-, improve=0.6687565, (0 missing)
  Surrogate splits:
      htg_diff < -1.5  to the left,  agree=0.750, adj=0.267, (0 split)
      st_diff  < -4.5  to the left,  agree=0.727, adj=0.200, (0 split)
      team     splits as  R--RR----R----L--RR-, agree=0.682, adj=0.067, (0 split)
      c_diff   < 2.5   to the right, agree=0.682, adj=0.067, (0 split)
      y_diff   < -1.5  to the left,  agree=0.682, adj=0.067, (0 split)

Node number 24: 28 observations
  predicted class=Away  expected loss=0.2857143  P(node) =0.06650831
    class counts:    20     8
   probabilities: 0.714 0.286 

Node number 25: 29 observations
  predicted class=Home  expected loss=0.4482759  P(node) =0.06888361
    class counts:    13    16
   probabilities: 0.448 0.552 

Node number 26: 20 observations,    complexity param=0.01913876
  predicted class=Away  expected loss=0.4  P(node) =0.04750594
    class counts:    12     8
   probabilities: 0.600 0.400 
  left son=52 (10 obs) right son=53 (10 obs)
  Primary splits:
      team     splits as  RR-L-L--LLL-RR--R---, improve=3.600000, (0 missing)
      htg_diff < 0.5   to the right, improve=1.034343, (0 missing)
      wdl_ht   splits as  RRL, improve=1.034343, (0 missing)
      st_diff  < 0.5   to the right, improve=0.632967, (0 missing)
      y_diff   < -0.5  to the left,  improve=0.632967, (0 missing)
  Surrogate splits:
      s_diff   < 3.5   to the right, agree=0.75, adj=0.5, (0 split)
      st_diff  < -0.5  to the right, agree=0.75, adj=0.5, (0 split)
      f_diff   < -2.5  to the right, agree=0.70, adj=0.4, (0 split)
      ftg_diff < 1.5   to the right, agree=0.65, adj=0.3, (0 split)
      htg_diff < 0.5   to the right, agree=0.65, adj=0.3, (0 split)

Node number 27: 63 observations,    complexity param=0.01435407
  predicted class=Home  expected loss=0.2698413  P(node) =0.1496437
    class counts:    17    46
   probabilities: 0.270 0.730 
  left son=54 (7 obs) right son=55 (56 obs)
  Primary splits:
      f_diff   < -8.5  to the left,  improve=3.1111110, (0 missing)
      wdl_ht   splits as  LLR, improve=1.6253970, (0 missing)
      htg_diff < 0.5   to the left,  improve=1.6253970, (0 missing)
      team     splits as  RR-L-R--RRR-RR--RL--, improve=1.5704950, (0 missing)
      ftg_diff < 1.5   to the left,  improve=0.8396825, (0 missing)
  Surrogate splits:
      ftg_diff < -1.5  to the left,  agree=0.905, adj=0.143, (0 split)
      s_diff   < 21.5  to the right, agree=0.905, adj=0.143, (0 split)

Node number 38: 15 observations
  predicted class=Away  expected loss=0.3333333  P(node) =0.03562945
    class counts:    10     5
   probabilities: 0.667 0.333 

Node number 39: 29 observations,    complexity param=0.01196172
  predicted class=Home  expected loss=0.3448276  P(node) =0.06888361
    class counts:    10    19
   probabilities: 0.345 0.655 
  left son=78 (22 obs) right son=79 (7 obs)
  Primary splits:
      f_diff   < 5     to the left,  improve=2.194357, (0 missing)
      y_diff   < 1.5   to the right, improve=1.987659, (0 missing)
      c_diff   < 0.5   to the left,  improve=1.829764, (0 missing)
      htg_diff < -0.5  to the right, improve=1.718833, (0 missing)
      wdl_ht   splits as  LR-,       improve=1.718833, (0 missing)
  Surrogate splits:
      team splits as  L--LL----R----L--LL-, agree=0.793, adj=0.143, (0 split)

Node number 52: 10 observations
  predicted class=Away  expected loss=0.1  P(node) =0.02375297
    class counts:     9     1
   probabilities: 0.900 0.100 

Node number 53: 10 observations
  predicted class=Home  expected loss=0.3  P(node) =0.02375297
    class counts:     3     7
   probabilities: 0.300 0.700 

Node number 54: 7 observations
  predicted class=Away  expected loss=0.2857143  P(node) =0.01662708
    class counts:     5     2
   probabilities: 0.714 0.286 

Node number 55: 56 observations
  predicted class=Home  expected loss=0.2142857  P(node) =0.1330166
    class counts:    12    44
   probabilities: 0.214 0.786 

Node number 78: 22 observations,    complexity param=0.01196172
  predicted class=Home  expected loss=0.4545455  P(node) =0.05225653
    class counts:    10    12
   probabilities: 0.455 0.545 
  left son=156 (7 obs) right son=157 (15 obs)
  Primary splits:
      y_diff   < 1.5   to the right, improve=3.328139, (0 missing)
      f_diff   < 1     to the right, improve=2.194805, (0 missing)
      c_diff   < 0.5   to the left,  improve=1.994805, (0 missing)
      htg_diff < -0.5  to the right, improve=1.644134, (0 missing)
      wdl_ht   splits as  LR-,       improve=1.644134, (0 missing)
  Surrogate splits:
      f_diff  < 1     to the right, agree=0.864, adj=0.571, (0 split)
      team    splits as  R--RR---------R--LL-, agree=0.773, adj=0.286, (0 split)
      s_diff  < -1.5  to the right, agree=0.727, adj=0.143, (0 split)
      st_diff < -5.5  to the left,  agree=0.727, adj=0.143, (0 split)

Node number 79: 7 observations
  predicted class=Home  expected loss=0  P(node) =0.01662708
    class counts:     0     7
   probabilities: 0.000 1.000 

Node number 156: 7 observations
  predicted class=Away  expected loss=0.1428571  P(node) =0.01662708
    class counts:     6     1
   probabilities: 0.857 0.143 

Node number 157: 15 observations
  predicted class=Home  expected loss=0.2666667  P(node) =0.03562945
    class counts:     4    11
   probabilities: 0.267 0.733

# Extract significant predictor variables (assuming p-value is less than 0.05)
significant_vars <- coef_summary[coef_summary[, "Pr(>|z|)"] < 0.05, ]

# Function to calculate impact based on coefficient
calculate_impact <- function(coefficient) {
  odds_ratio <- exp(coefficient)  # Calculate odds ratio
  impact <- (odds_ratio - 1) * 100  # Calculate impact in percentage
  return(impact)
}

# Print significant variables
print(significant_vars)

NULL

# Calculate impact for each significant predictor variable
impact_values <- sapply(significant_vars[, "Estimate"], calculate_impact)

# Print results
cat("Impact of significant predictor variables on the odds of a team being classified as the home team:\n")

Impact of significant predictor variables on the odds of a team being classified as the home team:

for (i in 1:length(impact_values)) {
  cat(names(impact_values)[i], ": ", ifelse(is.na(impact_values[i]), "NA", paste0(impact_values[i], "%")), "\n")
}

 :  NULL% 
 :

# Coefficients for each predictor variable
coefficients <- c(team = 29.07178,
                  s_diff = 25.14559,
                  f_diff = 15.59095,
                  ftg_diff = 12.6297,
                  st_diff = 11.86843,
                  c_diff = 7.390272,
                  y_diff = 6.520717,
                  htg_diff = 5.858338,
                  wdl_ht = 3.872174,
                  wdl_ft = 3.606517)

# Function to calculate impact based on coefficient
calculate_impact <- function(coefficient) {
  odds_ratio <- exp(coefficient)  # Calculate odds ratio
  return(odds_ratio)  # Return the odds ratio
}

# Calculate impact for each predictor variable
impact_values <- sapply(coefficients, calculate_impact)

# Print results
cat("Impact of significant predictor variables on the odds of a team being classified as the home team:\n")

Impact of significant predictor variables on the odds of a team being classified as the home team:

for (var in names(impact_values)) {
  cat(var, ": ", round(impact_values[var], digits = 2), "\n")
}

team :  4.2239e+12 
s_diff :  83289639281 
f_diff :  5902875 
ftg_diff :  305498.5 
st_diff :  142690 
c_diff :  1620.15 
y_diff :  679.07 
htg_diff :  350.14 
wdl_ht :  48.05 
wdl_ft :  36.84

echo = FALSE
warning = FALSE
error = FALSE
message = FALSE

# Load required libraries
library(rpart)
library(rpart.plot)
library(dplyr)
library(caret)

# Step 1: Load the data (Replace 'pl_training.csv' with your actual file name)
data <- read.csv("pl_training.csv")

# Step 2: Data Preprocessing (if needed)
# Assuming no preprocessing is required

# Step 3: Train the Classification Tree Model
# Create and train the classification tree model
tree_model <- rpart(home_or_away ~ ., data = data, method = "class")

# Step 4: Train the Binary Logistic Regression Model
# Recode target variable to binary (0 and 1)
data$home_or_away <- ifelse(data$home_or_away == "home", 1, 0)

# Create and train the logistic regression model
logistic_model <- glm(home_or_away ~ ., data = data, family = binomial(link = "logit"))

Warning: glm.fit: algorithm did not converge

# Step 5: Model Evaluation
# Compare Accuracy
# Classification Tree Model
tree_accuracy <- sum(predict(tree_model, type = "class") == data$home_or_away) / nrow(data)

# Binary Logistic Regression Model
logistic_pred <- ifelse(predict(logistic_model, type = "response") > 0.5, 1, 0)
logistic_accuracy <- sum(logistic_pred == data$home_or_away) / nrow(data)

# Print Accuracy Comparison
cat("Accuracy Comparison:\n")

Accuracy Comparison:

cat("Classification Tree Model Accuracy:", tree_accuracy, "\n")

Classification Tree Model Accuracy: 0

cat("Binary Logistic Regression Model Accuracy:", logistic_accuracy, "\n")

Binary Logistic Regression Model Accuracy: 1

# Step 6: Important Predictors Comparison
# Classification Tree Model
# Extract important predictors from the tree
tree_important_predictors <- tree_model$variable.importance

# Binary Logistic Regression Model
# Extract significant predictors from the logistic regression model
logistic_significant_predictors <- summary(logistic_model)$coefficients[summary(logistic_model)$coefficients[, "Pr(>|z|)"] < 0.05, ]

# Print Important Predictors Comparison
cat("\nImportant Predictors Comparison:\n")


Important Predictors Comparison:

cat("Classification Tree Model Important Predictors:\n")

Classification Tree Model Important Predictors:

print(tree_important_predictors)

     team    c_diff  ftg_diff    s_diff   st_diff  htg_diff    f_diff    wdl_ht 
23.617080 18.754642 15.807584 12.532890 10.932190  7.413904  7.134892  6.124066 
   y_diff    wdl_ft    r_diff 
 3.801148  1.558226  1.097416

cat("\nBinary Logistic Regression Model Significant Predictors:\n")


Binary Logistic Regression Model Significant Predictors:

print(logistic_significant_predictors)

     Estimate Std. Error z value Pr(>|z|)

The Binary Logistic Regression model achieved perfect accuracy, while the Classification Tree model’s accuracy calculation seems to be flawed.
The Classification Tree model identified several important predictors, such as team, c_diff, and ftg_diff, whereas no significant predictors were shown for the Binary Logistic Regression model.

Question 2

echo = FALSE
warning = FALSE
error = FALSE
message = FALSE

# Read the CSV file into a data frame
baseball_data <- read.csv("baseball_hof.csv")

# Display the first few rows of the data frame
head(baseball_data)

   playerID hits runs home_runs  rbi stolen_bases
1 aaronha01 3771 2174       755 2297          240
2 alomaro01 2724 1508       210 1134          474
3 aparilu01 2677 1335        83  791          506
4 bagweje01 2314 1517       449 1529          202
5 bankser01 2583 1305       512 1636           50
6 benchjo01 2048 1091       389 1376           68

Yes, scaling the data helps to ensure that the clustering algorithms treat all variables equally and produce meaningful clusters based on the actual relationships between the data points, rather than being biased by differences in variable scales.

echo = FALSE
warning = FALSE
error = FALSE
message = FALSE

# Import the baseball_hof.csv file into R
baseball_data <- read.csv("baseball_hof.csv")

# Display the structure of the data
str(baseball_data)

'data.frame':   82 obs. of  6 variables:
 $ playerID    : chr  "aaronha01" "alomaro01" "aparilu01" "bagweje01" ...
 $ hits        : int  3771 2724 2677 2314 2583 2048 2150 3060 3010 1779 ...
 $ runs        : int  2174 1508 1335 1517 1305 1091 1175 1844 1513 861 ...
 $ home_runs   : int  755 210 83 449 512 389 358 291 118 68 ...
 $ rbi         : int  2297 1134 791 1529 1636 1376 1430 1175 1014 789 ...
 $ stolen_bases: int  240 474 506 202 50 68 30 414 24 51 ...

# Scale the data
scaled_data <- scale(baseball_data[, -1])  # Exclude the playerID column for scaling

# Perform hierarchical clustering
hc <- hclust(dist(scaled_data), method = "complete")

# Plot the dendrogram
x11()
plot(hc, main = "Dendrogram of Baseball Players")

# Perform K-means clustering
set.seed(123)  # For reproducibility
kmeans_result <- kmeans(scaled_data, centers = 3)

# Add cluster labels to the original data
baseball_data$cluster <- as.factor(kmeans_result$cluster)

# Display the first few rows of the data with cluster labels
head(baseball_data)

   playerID hits runs home_runs  rbi stolen_bases cluster
1 aaronha01 3771 2174       755 2297          240       2
2 alomaro01 2724 1508       210 1134          474       3
3 aparilu01 2677 1335        83  791          506       3
4 bagweje01 2314 1517       449 1529          202       2
5 bankser01 2583 1305       512 1636           50       2
6 benchjo01 2048 1091       389 1376           68       1

echo = FALSE
warning = FALSE
error = FALSE
message = FALSE

# Import the baseball_hof.csv file into R
baseball_data <- read.csv("baseball_hof.csv")

# Scale the data
scaled_data <- scale(baseball_data[, -1])  # Exclude the playerID column for scaling

# Create a distance matrix using Euclidean distance
distance_matrix <- dist(scaled_data)

# Display the distance matrix
distance_matrix

           1         2         3         4         5         6         7
2  5.3192968                                                            
3  6.5750482 1.3133689                                                  
4  4.3146281 2.1568178 3.2456096                                        
5  4.1754313 2.8164338 3.8013048 1.1021044                              
6  5.6484457 2.6565017 3.2165698 1.5673796 1.5417807                    
7  5.4142136 2.5465469 3.1896332 1.3892093 1.3547904 0.4161458          
8  4.4403361 1.2913928 2.3347572 2.2978676 2.9318420 3.3211174 3.1238239
9  5.6545981 1.9333069 2.1563689 2.7394747 2.9577608 2.8268856 2.5746136
10 7.8686964 3.3310534 2.8140425 3.7353550 3.9509045 2.5633517 2.6799306
11 3.7600611 1.9690149 3.1000166 1.7206806 1.8125809 2.6683503 2.3696160
12 6.2363581 2.0588544 2.0412858 3.9894158 4.6731225 4.5550512 4.5218723
13 8.3613160 4.3574465 4.0727911 4.1317019 4.2491568 2.7299791 3.0125523
14 5.7762821 1.0856550 1.1456704 2.8594749 3.2594549 3.0510389 2.8958797
15 6.0726267 2.6085924 2.9328060 1.9953671 1.9843725 0.6071715 0.7688668
16 4.5046399 4.5222091 5.3237017 5.3539184 5.7491271 6.5427825 6.3021141
17 7.5207312 3.0644822 2.7110381 3.3135259 3.6543506 2.2594170 2.3663822
18 5.3453044 2.0402918 2.5475853 3.7228952 4.3406575 4.5461622 4.3679089
19 5.6680416 2.0858950 2.5999192 1.8262640 2.0489428 1.3483779 1.0907804
20 4.0324073 1.9484005 3.0858551 1.0606509 1.1868433 1.9650917 1.8153128
21 6.6506334 2.8133029 2.8927646 2.5658140 2.6210824 1.2384309 1.3314495
22 4.8651820 2.4500606 3.3172273 0.9286953 1.1332142 1.0768684 0.7256419
23 5.1050063 1.9751932 2.7308413 1.0467239 1.2567867 0.8405101 0.6873152
24 2.9693280 3.3139378 4.5429207 1.5915837 1.5866022 2.8631976 2.6011595
25 5.3288803 0.7469756 1.5102595 2.4419875 3.0001389 2.9165135 2.7271719
26 6.4140388 3.0284403 3.3497063 2.1908509 2.3807325 0.8881150 1.1312114
27 2.8109181 3.2755737 4.5054577 1.6315082 1.5166872 2.8991195 2.7167540
28 4.3821196 2.1025807 3.1037703 0.7718282 0.7372534 1.3837004 1.2213788
29 5.4200326 1.1223808 1.5271676 2.6131012 2.9029876 2.8493008 2.6702478
30 6.7751646 3.0037664 3.0591583 2.6794954 2.6850871 1.2451816 1.4152615
31 5.0815891 1.9447546 2.6836434 1.7566828 1.8352998 1.7676907 1.4507861
32 6.3370329 4.4158386 4.8415702 5.6189083 6.4284590 6.6888190 6.6332321
33 3.9978788 1.9566654 3.0587323 1.4321662 1.5933358 2.3286301 1.9953359
34 3.4948372 2.7041894 3.8961598 0.9449846 1.0707257 2.2439548 2.0881749
35 3.5643315 2.3716758 3.5599382 0.8875387 1.0844185 2.2038076 1.9454150
36 3.5737082 2.1938474 3.3511763 1.3690429 1.4259521 2.4638298 2.1666422
37 6.2114594 1.5279446 1.2793371 3.5040702 4.1612404 3.9169294 3.7799232
38 4.7520680 3.2459621 4.1266434 1.2900610 0.9901100 1.3154465 1.2977645
39 7.0463580 3.4707026 3.5581847 2.8337260 3.0086620 1.5573664 1.8481404
40 4.7629677 1.8094704 2.6188670 2.7232726 2.9967685 3.3207656 3.0586543
41 6.1085887 1.0830985 1.1185957 2.2998871 2.9117672 2.1819592 2.1781936
42 3.8698834 2.5385316 3.6678750 0.7288305 1.2890277 2.1132808 1.9355472
43 6.6407112 1.6213541 0.9704687 3.1064793 3.5261063 2.8050832 2.7267518
44 4.3701047 2.5707856 3.5567009 0.6629369 0.9460261 1.5241861 1.3544169
45 1.6072909 3.8611334 5.1323370 2.9298685 3.0568783 4.3844045 4.1737533
46 4.8018463 2.9933845 3.8423318 1.1923969 0.7623624 1.0320028 0.9764806
47 5.5365262 2.1078143 2.6230345 1.8265959 1.8600535 1.3112027 1.0319357
48 4.3731819 1.4639482 2.4552939 2.6816825 3.1942382 3.6628148 3.4563566
49 5.3709317 1.0645927 1.8601926 2.4828263 3.3391171 3.2223828 3.1770969
50 2.6280133 3.2592825 4.4502830 2.1257084 1.7757187 3.2071953 2.9329931
51 2.0545391 3.7948231 4.9713926 3.0337229 2.8813164 4.2405942 3.9281808
52 2.6923321 3.2269014 4.4678427 1.8215458 1.8722865 3.1626336 2.8744183
53 4.3737476 2.5069351 3.4399985 1.3256043 0.7816993 1.5787712 1.2804545
54 5.6591084 2.8344617 3.3647979 1.7197547 1.5038892 0.3659534 0.6042129
55 6.2637691 2.0282142 2.0906024 2.2824175 2.4659780 1.3990906 1.4078911
56 6.0808682 1.4241255 1.5591451 3.2666116 4.0468335 3.7677300 3.7494043
57 4.9024076 2.2788672 3.0789421 1.1025908 0.9300967 0.9072232 0.6232805
58 3.2187252 2.7950743 3.9147958 1.8296380 1.5813094 2.8338361 2.5177014
59 5.0782146 2.0703392 2.7006470 1.8353639 1.6391137 1.6703689 1.3957232
60 2.4802799 3.1815601 4.4479153 1.8484246 1.9247925 3.2569194 3.0299994
61 7.8631240 3.2195714 2.7291440 3.6163141 4.0066039 2.5695839 2.7578960
62 4.7504220 1.6504546 2.4903233 1.4658734 1.5121907 1.7332626 1.4814070
63 1.7370458 4.7966351 6.0846076 3.3131418 3.3413422 4.6732447 4.4408782
64 5.6791016 1.0800688 1.5659053 1.8122200 2.3866860 1.7771155 1.7755448
65 4.1600392 2.6997422 3.7749502 0.6003717 1.0309611 1.7291921 1.6207169
66 3.8110659 2.5650951 3.6495722 1.6664429 1.5268458 2.4527771 2.1050037
67 5.7783560 0.9955519 1.2452385 2.5233788 2.9152514 2.5509877 2.4223241
68 7.0103977 1.7032025 0.6218964 3.5366223 4.1304887 3.3824915 3.3940244
69 5.3113931 2.3291066 3.0216243 1.1231694 1.3628925 0.5578298 0.5367206
70 4.2897245 2.2160468 3.0972728 3.1812761 3.5893767 4.1600091 3.8803387
71 4.9390828 2.8838330 3.6871870 1.2306641 0.8106855 0.8286474 0.7295567
72 6.4574245 2.2590154 2.2621599 2.4561799 2.6723192 1.5609099 1.5101029
73 3.8766619 2.9503385 4.0302169 0.9631361 0.6431571 1.8639239 1.6306739
74 3.7956811 3.3341579 4.4227539 1.2545205 1.1241286 2.1948774 2.0295250
75 6.1705096 1.9093918 2.0463539 2.4782501 2.7541491 1.9975098 1.8321631
76 4.5777218 2.5125748 3.3530511 3.5539251 4.0089062 4.4923317 4.2831567
77 4.9402085 1.8338135 2.4898440 2.5205209 2.7675138 3.0026043 2.6847354
78 4.2535992 2.1601006 3.1581242 0.9295163 0.7801331 1.5896557 1.3290944
79 3.0831158 3.2809510 4.4803988 1.5876772 1.6034248 2.8379117 2.5534455
80 2.8325092 2.7530434 3.9971460 1.7602076 1.7241545 3.0080218 2.7445720
81 2.4580346 3.1190852 4.3247066 2.3938989 2.3210672 3.6229490 3.3282008
82 4.2386131 1.4157455 2.5035369 1.9439254 2.2835856 2.8067115 2.5361413
           8         9        10        11        12        13        14
2                                                                       
3                                                                       
4                                                                       
5                                                                       
6                                                                       
7                                                                       
8                                                                       
9  2.1037900                                                            
10 4.3565829 3.0728647                                                  
11 1.6720616 2.1230530 4.3048255                                        
12 2.4308292 3.6041150 4.7551164 3.6225214                              
13 5.3633503 4.3890077 1.6503690 5.1675483 5.7817404                    
14 1.7438424 1.3239355 3.1861054 2.2020275 2.3983186 4.5056515          
15 3.3823780 2.5717255 2.0140783 2.8512929 4.4871843 2.3773648 2.8294529
16 3.8711914 5.3084119 7.6629889 4.1412511 4.1380095 8.7320735 4.6894819
17 4.0265170 2.9164527 0.6656886 4.0510975 4.5815348 1.6860355 3.0804838
18 1.9363612 3.1374486 5.0688832 2.8159522 1.6990127 6.2515432 2.1634475
19 2.8244442 1.9942340 2.3888362 2.1739595 4.0525918 3.2246757 2.2194922
20 2.1173990 2.7166722 4.0030241 1.2307833 3.6068849 4.5358140 2.5577463
21 3.7417649 2.6915019 1.4583595 3.2559773 4.6037028 2.0205963 2.8896960
22 2.7809922 2.4956431 3.2024629 1.9586936 4.4558117 3.6306801 2.8840603
23 2.5006396 2.2119660 2.8201583 1.9366181 3.9445613 3.3363481 2.4078627
24 2.9245929 3.5806539 5.1538434 1.8913673 4.9570576 5.5369376 3.8803267
25 1.4345352 1.6964510 3.4560029 1.8267839 2.2926052 4.6373677 0.8379797
26 3.8291435 3.2042892 2.0916285 3.3960801 4.8304720 2.0108674 3.4138441
27 2.8832405 3.7427988 5.2632996 1.9746504 4.7811462 5.5820377 3.9032409
28 2.3587846 2.5314125 3.5770202 1.5040025 3.9564775 4.0340572 2.6280029
29 1.6802890 1.2939661 3.3041754 1.8044076 2.6145574 4.5446488 0.4859471
30 3.9321689 2.9104521 1.4612039 3.4486292 4.7613316 1.8059083 3.1040152
31 2.4734019 1.8261427 3.0472546 1.5069013 3.8869092 3.9423089 2.0065380
32 4.1134656 5.9824922 7.3909357 5.3588654 2.9389806 8.1914578 5.0168122
33 1.8001918 1.9674004 3.9949835 0.4938513 3.7899993 4.8281850 2.2158044
34 2.5514186 3.3105398 4.5812601 1.7309984 4.3208630 4.9017641 3.3941239
35 2.1100396 2.6922621 4.3301872 1.1612026 4.1273174 4.8653926 2.9183865
36 1.8227675 2.2942695 4.3340351 0.5913709 3.9289552 5.0624991 2.5458055
37 1.8299844 2.0851650 3.7686561 3.0073216 1.8974117 5.1037841 1.2613954
38 3.4841742 3.4804946 3.8242679 2.6639684 5.0935140 3.7838739 3.8084287
39 4.2835476 3.5628962 1.8930838 4.0542291 5.1353063 1.4283620 3.7983859
40 1.9823764 2.2196811 4.2328464 1.4868835 3.0182031 5.3559823 1.7182141
41 2.1905284 1.9884611 2.3298560 2.6431410 2.6757989 3.3470322 1.4683942
42 2.3180487 2.9749519 4.2724793 1.8525342 4.2594161 4.6428746 3.2216649
43 2.6103380 1.6192108 2.1681853 2.9479256 2.9268482 3.6092809 1.1189766
44 2.6049583 2.7952296 3.7677241 1.9939116 4.4414419 4.0754974 3.1493089
45 3.0019771 4.4271757 6.5104431 2.5842516 4.8442876 7.0260889 4.4524998
46 3.3004811 3.1716124 3.5634170 2.4163567 4.8805685 3.6404945 3.4893964
47 2.7640702 1.7908908 2.4598474 2.0093811 4.1008223 3.3185984 2.1338795
48 0.7881431 2.3649991 4.6679678 1.6536133 2.2352320 5.7361252 1.7429816
49 1.5866465 2.9202608 3.9810511 2.6169043 1.6372821 4.8019421 2.0457727
50 2.7869484 3.3833386 5.3502408 1.4314051 4.7714733 5.9205621 3.6095737
51 2.9650314 3.7340476 6.1723952 1.9920730 5.0668611 6.9056514 4.0266856
52 2.6384067 3.4032465 5.3110667 1.6801837 4.7953175 5.8328744 3.7244615
53 2.7276407 2.4717041 3.6709042 1.4031666 4.4211072 4.2134808 2.7736901
54 3.4412808 2.8717270 2.6410675 2.7509050 4.7278458 2.7593290 3.1692698
55 2.9697853 1.9769335 1.6470226 2.7103476 3.8251887 2.6110781 2.0444559
56 2.1181593 3.1727456 4.0289790 3.2179679 0.9609426 4.9742848 2.0554443
57 2.6936723 2.3037836 3.0589779 1.8097871 4.2639820 3.5454190 2.6177154
58 2.3144863 2.6312761 4.7737261 0.9597244 4.4729300 5.4719383 3.0321320
59 2.4727677 1.5570986 2.9815707 1.5569962 4.0210210 3.8635943 1.9887830
60 2.5661523 3.6143860 5.4626113 1.7951525 4.5917230 5.9308477 3.7742802
61 4.2762124 3.3887728 0.8873061 4.4304792 4.4961803 1.5025187 3.3235120
62 1.9514413 1.5514305 3.1998618 1.2031148 3.6117582 4.0547311 1.7908139
63 4.0608017 5.1402865 6.9733840 3.3651761 6.0510961 7.3017115 5.4025534
64 2.0543924 2.0082397 2.5303576 2.2982068 2.8747457 3.3640736 1.6383235
65 2.7556812 3.2525619 4.0979155 2.1185168 4.4427866 4.2914919 3.4190005
66 2.4823276 2.5479466 4.3179493 0.9247771 4.3431647 5.0313946 2.8095651
67 2.0341727 1.6457560 2.8374374 2.1147197 2.5763080 4.0430916 0.7683966
68 2.8268847 2.6403394 2.7030226 3.5313448 2.1637329 3.9201500 1.6557092
69 2.8666192 2.5796190 2.7671688 2.3512829 4.2580503 3.0693762 2.8179196
70 1.6190632 2.6037558 5.1276426 1.8125430 2.9369690 6.2888433 2.2067879
71 3.2603756 2.9956962 3.3448966 2.3475355 4.8065859 3.4879395 3.3238732
72 3.1426925 1.9314593 1.4422058 2.8856115 4.0794313 2.5348393 2.1946006
73 2.8900005 3.1076737 4.2540204 1.8483579 4.7995986 4.5484031 3.4736110
74 3.1747348 3.5550544 4.6083994 2.3372465 5.1048029 4.7625869 3.9372883
75 2.8373480 1.7599293 2.0920479 2.4784916 3.6829395 3.2825970 1.7445516
76 2.3630197 3.5619292 5.4940627 2.4817691 2.6585779 6.5283193 2.7601942
77 1.7349393 0.9941160 3.7346402 1.4359097 3.4759794 4.9636260 1.4378049
78 2.2345161 2.2350160 3.6447212 1.2608104 4.0782671 4.2191099 2.5439133
79 2.8341977 3.3889698 5.0557226 1.8573232 4.9783691 5.4829703 3.7960017
80 2.2900638 3.1395181 5.0812772 1.0885782 4.2278127 5.6927823 3.2176105
81 2.3987071 3.2547226 5.5585570 1.3354760 4.4616218 6.2826153 3.4309124
82 1.1056845 1.6992026 4.0555688 0.7199941 3.0384533 5.0715795 1.5839123
          15        16        17        18        19        20        21
2                                                                       
3                                                                       
4                                                                       
5                                                                       
6                                                                       
7                                                                       
8                                                                       
9                                                                       
10                                                                      
11                                                                      
12                                                                      
13                                                                      
14                                                                      
15                                                                      
16 6.7084745                                                            
17 1.7768633 7.4430286                                                  
18 4.5420248 2.8424907 4.8724050                                        
19 1.2557453 5.8122860 2.1447274 3.7165371                              
20 2.2991767 4.7854680 3.7228328 3.2880607 2.0307136                    
21 0.7591293 6.9398263 1.3469702 4.6527381 1.2695179 2.8218195          
22 1.4455094 5.8594186 2.8180516 4.1062878 1.2147564 1.5771685 1.9350566
23 1.0472323 5.8220838 2.4884733 3.8400719 1.1786934 1.3659275 1.6531962
24 3.3303232 5.0389068 4.7562197 4.2717095 2.9682059 1.8166579 3.8833884
25 2.8375482 4.2238595 3.2490807 1.7617819 2.0180732 2.1675563 2.9320943
26 0.8699020 7.1210400 1.7119750 4.9462252 1.6457029 2.7374486 1.0294694
27 3.3760111 5.0408604 4.8886495 4.2816505 3.2077187 1.6112074 4.0029049
28 1.7412778 5.3814851 3.2720550 3.7240963 1.6969264 0.6921076 2.3506984
29 2.6788860 4.5630535 3.2035406 2.2379830 2.0455707 2.1990626 2.8042387
30 0.7642896 7.1674488 1.3790148 4.8808844 1.5335588 2.9416357 0.2995996
31 1.7878489 5.1939055 2.8599677 3.3212777 0.7904747 1.6347637 1.9407026
32 6.8153732 3.8513363 7.0717091 3.3403785 6.3309275 5.3514588 7.0620461
33 2.5187585 4.5031579 3.7074067 3.0345942 1.7986184 1.2304289 2.9156307
34 2.7214579 5.1224908 4.2021406 3.9599903 2.5938242 1.0416669 3.3408230
35 2.5766236 4.8744514 3.9613810 3.6019653 2.2407009 1.0017969 3.1489280
36 2.7199334 4.5228485 4.0245890 3.2522243 2.2185694 1.1269309 3.2251446
37 3.7183912 4.5722121 3.5689773 1.8600819 3.1147086 3.3867860 3.7561900
38 1.8687155 6.5005384 3.4470144 4.9522134 2.2911985 1.8797218 2.5302477
39 1.3515511 7.7776957 1.5268904 5.4643160 2.3558704 3.3868679 1.4320425
40 3.3431002 3.6177003 4.0949587 1.9188270 2.3530940 2.1724563 3.4490976
41 1.9626276 5.5895112 2.0436526 3.0006556 1.7782131 2.3755484 2.0489375
42 2.5290108 5.3562450 3.8227898 3.9542908 2.4382501 1.4168730 3.1589083
43 2.4235090 5.6815110 2.1258622 3.0179711 1.9851885 3.0428204 2.2484578
44 1.9433149 5.8130311 3.3413924 4.2149167 2.0435028 1.4548573 2.6064130
45 4.7919538 4.0721793 6.1240232 4.1350109 4.4187273 2.7276104 5.3793433
46 1.5684229 6.3387551 3.2268665 4.7199307 1.9666523 1.6537402 2.2353219
47 1.1730277 5.8160441 2.2710638 3.7658386 0.4562007 1.9175945 1.3205361
48 3.7227071 3.2468133 4.4148975 1.3819815 3.0358957 2.2259642 4.0190704
49 3.2824826 4.3899760 3.6390727 2.0293310 2.8757420 2.3711102 3.5201374
50 3.5697643 4.5067565 5.0689614 3.9639816 3.1385352 1.7284658 4.0993518
51 4.5451090 3.9855841 5.8746188 4.0076978 3.9667529 2.7227830 5.0380441
52 3.5663184 4.6657531 4.9215572 4.0099177 3.1174200 1.9229828 4.1112331
53 1.8695067 5.4270052 3.4419000 3.9331581 1.5563892 1.1425358 2.3526955
54 0.6548296 6.7074978 2.3772366 4.7402193 1.5787165 2.0607225 1.3587082
55 0.9340840 6.2907696 1.5024108 3.9122507 1.1151688 2.3913840 0.9089585
56 3.7175184 4.5045723 3.7836101 1.8987461 3.3049271 3.0548559 3.8251121
57 1.1871076 5.8273120 2.7734793 4.0143684 1.2031144 1.3194712 1.7744574
58 3.1053719 4.6250691 4.4873687 3.6830849 2.6312501 1.5863551 3.6180800
59 1.5947551 5.5113350 2.8354761 3.6234229 1.1992286 1.6580966 1.9311342
60 3.6799415 4.5586903 5.0764627 3.9508144 3.3524659 1.7928880 4.2739215
61 2.1283324 7.6564167 0.7058521 5.0132443 2.6049930 3.9724318 1.7398305
62 1.7599030 5.1479423 2.9723485 3.2501819 1.3653742 1.2550680 2.1911846
63 5.1593669 5.1153585 6.5371053 5.2555678 4.8049031 3.4421036 5.7364847
64 1.6618842 5.5247408 2.2171493 3.1151199 1.5960558 1.8815987 1.9387877
65 2.2457558 5.6813723 3.6678365 4.2290661 2.2967840 1.3408672 2.8795845
66 2.7253219 4.6347526 4.0612872 3.4886027 1.9838568 1.4077925 3.0920588
67 2.3572705 4.8711406 2.7370196 2.3942843 1.6918321 2.2094041 2.3613048
68 3.0863080 5.6048795 2.6349364 2.7981147 2.7644235 3.4162705 2.9300709
69 0.9235996 6.2059784 2.3797276 4.2245262 1.3553099 1.6866306 1.5764573
70 4.2234794 2.7246368 4.9052405 1.4818094 3.2996440 2.7369817 4.4353395
71 1.3458996 6.3133465 3.0312668 4.6280305 1.7221009 1.6392130 1.9919356
72 1.0735285 6.4654941 1.2513278 4.0743108 1.0654051 2.6930589 0.8420279
73 2.3410081 5.6399138 3.8804387 4.3641966 2.2505929 1.4037322 2.9539054
74 2.7016184 5.9277013 4.1858615 4.7441394 2.7558949 1.8413810 3.3523689
75 1.7065944 5.7363411 1.9901709 3.4081286 0.8448637 2.4960480 1.4573766
76 4.6113017 2.2703149 5.3039375 1.3061920 3.7075900 2.9505205 4.7674894
77 2.9163676 4.4667538 3.5407723 2.6006818 2.0348963 2.3484736 3.0813460
78 1.8671826 5.3138555 3.3392492 3.7073722 1.6906542 0.9517733 2.4584791
79 3.2698865 5.1275785 4.6470493 4.2828054 2.9093466 1.9174416 3.8275499
80 3.3679564 4.1900979 4.7706601 3.4542741 2.8684208 1.3404411 3.8807663
81 3.9285749 3.8971453 5.2605674 3.5090463 3.3599295 2.0071154 4.4232550
82 2.8797878 3.9950039 3.8027647 2.2642140 2.1507976 1.5511348 3.2119933
          22        23        24        25        26        27        28
2                                                                       
3                                                                       
4                                                                       
5                                                                       
6                                                                       
7                                                                       
8                                                                       
9                                                                       
10                                                                      
11                                                                      
12                                                                      
13                                                                      
14                                                                      
15                                                                      
16                                                                      
17                                                                      
18                                                                      
19                                                                      
20                                                                      
21                                                                      
22                                                                      
23 0.8281475                                                            
24 1.9865703 2.4357529                                                  
25 2.5734914 2.2470075 3.3882915                                        
26 1.6691966 1.5482817 3.5412051 3.2981344                              
27 2.2321832 2.4478607 0.7810749 3.4697484 3.6446943                    
28 1.0493765 0.7951165 1.8583083 2.3556541 2.1997073 1.7443130          
29 2.6388803 2.1795450 3.5452057 0.8371096 3.3194605 3.5535794 2.3017639
30 2.0715017 1.7606316 4.0074390 3.1689533 0.9673492 4.0918518 2.4473547
31 1.3422222 1.3317671 2.6025667 1.7337201 2.3136594 2.8311518 1.4875404
32 6.3496932 6.0496613 6.0644704 4.6379485 6.9938071 5.8491041 5.8325478
33 1.5291305 1.6214996 1.7631612 1.8263899 3.0177382 1.9741715 1.3057006
34 1.6506004 1.7806599 1.0472048 2.9618084 2.9739480 0.7417167 1.1019592
35 1.4055862 1.5751459 1.0453669 2.5147547 2.9388941 1.0908149 0.9935912
36 1.6835845 1.7448846 1.4480486 2.1991328 3.2154505 1.5231254 1.2384627
37 3.6721232 3.2605768 4.5027083 1.4465172 4.1315734 4.5463186 3.4967425
38 1.2294387 1.4489579 2.0204322 3.5433199 1.9288067 1.9880239 1.3381738
39 2.3991237 2.1376975 4.2244758 3.8226737 0.8814534 4.2600643 2.8212819
40 2.8078021 2.6892591 3.1567768 1.2278997 3.8581648 3.3000288 2.5293980
41 2.3302278 1.7425285 3.7509164 1.5120883 2.3045397 3.7393919 2.2363608
42 1.4246601 1.5812576 1.2943335 2.8416625 2.7316782 1.2622644 1.2029504
43 2.9082030 2.4121786 4.3800694 1.5573951 2.8943235 4.4566854 2.9188891
44 0.9582228 1.1066772 1.6428327 2.8679410 2.1716567 1.6573081 0.9376964
45 3.6225131 3.7707145 1.9176193 3.9805246 5.1055388 1.6507027 3.0891963
46 0.9987677 1.1325518 2.0644274 3.2575144 1.7732532 2.0411583 1.0584250
47 1.2133780 1.0500334 2.9106324 2.0494628 1.7744617 3.0988664 1.5379259
48 3.1360182 2.8638648 3.1488713 1.3603763 4.2164960 3.0954110 2.6288041
49 3.0228266 2.6131186 3.5808092 1.6033277 3.5044904 3.4666092 2.6347713
50 2.4534942 2.6664185 1.1913911 3.2246503 4.0188132 1.1716510 1.9740438
51 3.3727081 3.6069877 1.9279733 3.6535096 5.0084011 2.0392704 2.9983805
52 2.2435332 2.6136522 0.5747587 3.2548055 3.8590905 0.9723205 2.0374357
53 1.0719829 1.1870191 1.8529437 2.5205576 2.3973866 1.9520614 0.8394492
54 1.2449049 0.9559940 2.9317819 3.1053347 1.0783935 2.9221658 1.4376564
55 1.8635725 1.3129820 3.6694606 2.1906152 1.5416559 3.7206544 2.0028581
56 3.6952206 3.2119035 4.3992608 1.8082633 3.9668509 4.2817526 3.3179435
57 0.6749863 0.4816183 2.2154323 2.4405876 1.7204044 2.2756081 0.7080795
58 2.0262448 2.2018444 1.3567914 2.7330477 3.6161939 1.4890026 1.6613162
59 1.4472265 1.1552729 2.7051194 2.0122592 2.3404101 2.7944323 1.3389432
60 2.4718840 2.6851308 0.8594384 3.3154138 3.9850264 0.6400175 2.0176350
61 3.2463585 2.8249368 5.1398555 3.4960767 1.9815646 5.1991883 3.5725548
62 1.3790435 0.9898832 2.4245335 1.7078092 2.4303730 2.4489279 1.0244783
63 3.8222040 4.2397383 1.8662799 4.8724113 5.3137827 1.9621922 3.6207312
64 1.9160942 1.2568229 3.2893163 1.5877669 2.0667470 3.2310760 1.7042383
65 1.2266893 1.4171221 1.4496440 3.0233430 2.3534008 1.3769673 1.0410867
66 1.6274950 1.9346193 1.5514393 2.3764051 3.1684318 1.8775469 1.5128317
67 2.4841506 2.0128968 3.6591954 0.8016381 2.8934121 3.6960117 2.2721182
68 3.5712572 3.0143940 4.8940553 1.8698776 3.3887558 4.8764536 3.4336359
69 0.8125861 0.4872057 2.5354640 2.6400888 1.1897705 2.5673660 1.0804282
70 3.4795622 3.3998510 3.3030255 1.7679898 4.7143817 3.4277855 3.1454945
71 0.8591427 0.9738621 2.1799583 3.1122194 1.6169613 2.2019968 1.0198570
72 1.9102299 1.5377561 3.7900970 2.3311805 1.5365149 3.9301807 2.2704449
73 1.1400037 1.5393754 1.0686621 3.1170186 2.5910670 1.1965152 1.0692764
74 1.5698494 1.9476979 1.1176586 3.5762339 2.8333491 1.1502250 1.5308540
75 2.0156916 1.7690405 3.6485186 1.7154276 2.1460145 3.8481677 2.2883660
76 3.9553064 3.8360189 3.7475338 2.1714906 5.0095702 3.7748719 3.5189254
77 2.4065125 2.3041971 3.0494432 1.3722733 3.5013591 3.2777889 2.3650805
78 1.0217938 0.9232392 1.7415068 2.3179648 2.3897479 1.7368687 0.5060250
79 1.9129750 2.3786524 0.3764184 3.3458811 3.5006324 0.9827945 1.8715514
80 2.2361654 2.3938717 1.1276764 2.7540582 3.7814260 1.0928855 1.7079523
81 2.7966013 2.9621446 1.5468407 3.0254920 4.3944259 1.5892083 2.3350397
82 2.1926711 2.0118259 2.4496241 1.2249361 3.4321498 2.5132822 1.7927981
          29        30        31        32        33        34        35
2                                                                       
3                                                                       
4                                                                       
5                                                                       
6                                                                       
7                                                                       
8                                                                       
9                                                                       
10                                                                      
11                                                                      
12                                                                      
13                                                                      
14                                                                      
15                                                                      
16                                                                      
17                                                                      
18                                                                      
19                                                                      
20                                                                      
21                                                                      
22                                                                      
23                                                                      
24                                                                      
25                                                                      
26                                                                      
27                                                                      
28                                                                      
29                                                                      
30 3.0164041                                                            
31 1.6944424 2.1801547                                                  
32 5.1405194 7.2166261 6.0890625                                        
33 1.8553074 3.1161765 1.1978572 5.5841983                              
34 3.0707737 3.4294095 2.3092698 5.6454383 1.6399329                    
35 2.5907021 3.2832043 1.8651213 5.6199418 0.9984951 0.7216653          
36 2.1804103 3.3900350 1.6755477 5.5514679 0.5670714 1.2980936 0.6494639
37 1.6806240 3.9678023 3.0181425 4.3093368 3.0224311 4.0643394 3.6070489
38 3.5355748 2.5238184 2.4017312 6.7749852 2.3635481 1.5060369 1.7325918
39 3.7712359 1.2466759 3.0347248 7.3535226 3.6947206 3.5981846 3.5779733
40 1.4238036 3.6954224 1.7246022 5.0159420 1.6497115 2.9626937 2.4997919
41 1.6151966 2.2063274 2.0388389 5.1769414 2.4719376 3.0660111 2.7898488
42 2.9761287 3.2554808 2.2982597 5.6385536 1.6326470 0.8056035 0.7730222
43 1.4094130 2.4543217 2.1654453 5.6645384 2.8024946 3.8417026 3.4227937
44 2.9007887 2.6795729 2.0326949 6.0918838 1.6857082 1.1085465 1.0348078
45 4.1435585 5.5065107 3.9109828 5.1994112 2.7950930 2.1915255 2.2564869
46 3.2024343 2.2456392 2.0535794 6.6991695 2.1146119 1.5032313 1.6210000
47 1.9029622 1.5531212 0.6976577 6.4303013 1.6607579 2.5154597 2.1269232
48 1.6116586 4.2244494 2.5489430 3.9341358 1.9280544 2.8167838 2.4018109
49 2.1381968 3.6775481 2.8033297 3.5548269 2.6433791 2.9244462 2.7793293
50 3.1935613 4.2354476 2.5348346 6.0279718 1.5824111 1.5011880 1.3045568
51 3.6572400 5.2052328 3.3183423 5.9790166 2.2192826 2.4872097 2.1640242
52 3.3929141 4.2555327 2.6683618 5.8477210 1.6365603 1.3135524 1.0792824
53 2.3854486 2.4838920 1.1514334 6.3669465 1.1483288 1.5148126 1.2213129
54 2.9507480 1.3165486 1.9320797 6.8848591 2.4312044 2.3060703 2.2709714
55 1.9837317 1.0779412 1.5988009 6.3626445 2.4363580 3.0543468 2.7732959
56 2.2756990 3.9829413 3.2616984 3.3961158 3.2769757 3.7342020 3.5589785
57 2.3236571 1.8837880 1.2018611 6.3295969 1.4779794 1.6762854 1.4434861
58 2.6403582 3.7735474 2.0455414 5.9845363 1.0045831 1.5403134 0.9961541
59 1.6438473 2.1064894 0.8619093 6.3193553 1.3192989 2.3050646 1.8429844
60 3.4419907 4.3956942 2.9058399 5.5233047 1.8628850 1.1211627 1.1424121
61 3.4807340 1.7053536 3.2961546 6.9794601 4.1256184 4.4837536 4.3215488
62 1.4457612 2.3613174 0.9925501 5.7953393 1.0011796 1.9302782 1.4521476
63 5.0864018 5.8521161 4.3950884 6.3550490 3.3953872 2.6069889 2.7442015
64 1.6278288 2.0745226 1.7925298 5.2387148 2.1220507 2.5486665 2.3167203
65 3.1571197 2.9444515 2.2498904 5.9034885 1.8728716 0.7572254 1.0645905
66 2.4138063 3.2832647 1.3414832 6.0293912 0.7480739 1.6679026 1.1889614
67 0.7373067 2.5829079 1.5471369 5.1854526 2.0604147 3.1146855 2.7228934
68 2.0110535 3.0879785 2.9483355 4.9386351 3.4614595 4.2311573 3.9456460
69 2.6281577 1.6345223 1.6842653 6.2900985 2.0041689 1.8939770 1.8046031
70 2.0421705 4.6774165 2.7030137 4.3709804 2.0869898 3.2672839 2.7663999
71 3.0376244 2.0161882 1.8356606 6.7214586 2.0268287 1.6419945 1.6695785
72 2.1775560 1.0678940 1.6707206 6.5964022 2.5581700 3.2807418 2.9456905
73 3.1465865 3.0409566 2.0509720 6.3354267 1.5798554 0.8451344 0.8453192
74 3.6401409 3.4086709 2.6352154 6.4410800 2.0969876 0.9839930 1.2113544
75 1.6872946 1.7404201 1.1715128 6.1893313 2.2029181 3.2328517 2.8330763
76 2.6087471 4.9946610 3.1549399 3.8229488 2.7729507 3.5706603 3.2951813
77 1.2242338 3.3288639 1.5532156 5.5952517 1.3641438 2.9367940 2.2752581
78 2.2042410 2.5805400 1.3891292 5.9605007 1.0054084 1.2244452 0.8258932
79 3.4762472 3.9565711 2.5636422 6.1406090 1.6855241 1.1878400 1.0059146
80 2.8326385 4.0319031 2.2847494 5.4744901 1.2648444 1.2161734 0.9777839
81 3.0533685 4.5895521 2.7169718 5.5649436 1.5976345 1.8871942 1.5401294
82 1.2563830 3.4234760 1.5804726 4.9455418 0.8885523 2.1744707 1.6073192
          36        37        38        39        40        41        42
2                                                                       
3                                                                       
4                                                                       
5                                                                       
6                                                                       
7                                                                       
8                                                                       
9                                                                       
10                                                                      
11                                                                      
12                                                                      
13                                                                      
14                                                                      
15                                                                      
16                                                                      
17                                                                      
18                                                                      
19                                                                      
20                                                                      
21                                                                      
22                                                                      
23                                                                      
24                                                                      
25                                                                      
26                                                                      
27                                                                      
28                                                                      
29                                                                      
30                                                                      
31                                                                      
32                                                                      
33                                                                      
34                                                                      
35                                                                      
36                                                                      
37 3.2968806                                                            
38 2.2364137 4.5919310                                                  
39 3.8489521 4.4217608 2.4679596                                        
40 2.0489269 2.4603998 3.7522799 4.5297982                              
41 2.7397011 1.9372723 3.0881642 2.5800365 2.5916998                    
42 1.3448082 3.7624888 1.4608533 3.2726661 3.0884263 2.7795246          
43 3.1878769 1.7308700 3.8488930 3.1457040 2.5044552 1.1434851 3.5836745
44 1.5202995 3.8228097 0.9622195 2.7065564 3.1676724 2.5816326 0.6654721
45 2.3410325 4.8146548 3.5951045 5.7003068 3.6763663 4.6275353 2.4718528
46 2.0410036 4.3405086 0.4082518 2.3637734 3.4507694 2.8300352 1.4944231
47 2.0432827 3.1383470 2.2167487 2.4205540 2.3287938 1.8434644 2.3792115
48 2.0103963 1.9329056 3.8715311 4.7400180 1.5268812 2.5163907 2.7858341
49 2.7600092 1.9178635 3.6211812 3.9010166 2.4909231 1.7022413 2.7760985
50 1.2029660 4.3799897 2.6127083 4.6980901 2.7113340 3.8610808 1.9483403
51 1.8957833 4.5932991 3.6685119 5.6650284 3.0460129 4.5477076 2.7208958
52 1.2632269 4.2897800 2.4318119 4.5189738 2.9833031 3.7761187 1.4463452
53 1.2391169 3.7782441 1.6027932 3.1027236 2.3829906 2.6561294 1.6997213
54 2.5165538 4.0731810 1.2810345 1.5862016 3.4895090 2.3591112 2.1800306
55 2.7415674 2.9393728 2.5993449 1.8821517 2.8614689 1.2866210 2.8367433
56 3.4561340 1.6869488 4.3520946 4.2857790 2.7452084 1.8993349 3.6079742
57 1.5945608 3.5545556 1.3070045 2.3722436 2.6686465 2.1516206 1.5928548
58 0.6313437 3.8096958 2.4396621 4.2530510 2.3859766 3.3298180 1.6759928
59 1.6032085 3.0935424 2.2691604 2.9013854 2.1422979 2.0627866 2.2324575
60 1.3902619 4.3121930 2.4742417 4.6043276 3.1040130 3.7809911 1.4149543
61 4.4160545 3.7214940 3.7515839 1.6502433 4.4112779 2.1501674 4.1413469
62 1.2175604 2.7932744 2.1683548 2.9876900 1.9618756 1.8240088 1.8249616
63 3.0042155 5.8115534 3.6451130 5.9419251 4.5256370 5.3961839 2.8206547
64 2.3235624 2.2711495 2.5790692 2.4195930 2.5152740 0.5645246 2.2952176
65 1.6757467 4.0586121 0.9550070 2.9465882 3.2412114 2.8034219 0.6681544
66 0.9544198 3.6787749 2.3405745 3.9288400 1.8868905 3.0284600 1.8921065
67 2.4482043 1.7906203 3.4114653 3.3664069 1.5904773 1.1889358 3.0287733
68 3.7874165 1.6733964 4.3767453 3.5730193 2.9265686 1.3400075 4.0244484
69 2.0954761 3.5937086 1.1970280 1.7858216 3.1176526 1.9575835 1.6358337
70 2.2779268 2.4145142 4.3475904 5.3304040 1.2891105 3.2031766 3.2916180
71 2.0214629 4.2125132 0.6663522 2.2499041 3.3054521 2.6841084 1.6225148
72 2.9079864 3.0089101 2.7528266 1.8628093 2.9989843 1.4947157 2.9929376
73 1.3609674 4.2410609 1.0456835 3.2377322 3.1100200 3.1167913 0.9228888
74 1.7967373 4.6033050 1.0799254 3.3940632 3.6427490 3.4739062 0.9183983
75 2.6749622 2.6442708 3.0658214 2.6875811 2.2049042 1.5275272 3.0730281
76 2.9599813 2.9370100 4.7142119 5.6729009 1.6003715 3.5161335 3.7958589
77 1.7664602 2.1863866 3.4476078 4.0454481 1.4277477 2.3184403 2.7408150
78 0.9230854 3.4199412 1.5071547 2.9929535 2.4391246 2.3420687 1.1706941
79 1.3661312 4.3970895 2.0245458 4.1530471 3.1729958 3.6877407 1.2110624
80 0.8963691 3.9063791 2.5211823 4.4684134 2.3401798 3.4192240 1.6417276
81 1.2492515 4.0460815 3.1408229 5.0571254 2.4941115 3.8700202 2.1631263
82 1.1133447 2.2948799 3.0250414 4.0203906 1.2300302 2.2053157 2.1212306
          43        44        45        46        47        48        49
2                                                                       
3                                                                       
4                                                                       
5                                                                       
6                                                                       
7                                                                       
8                                                                       
9                                                                       
10                                                                      
11                                                                      
12                                                                      
13                                                                      
14                                                                      
15                                                                      
16                                                                      
17                                                                      
18                                                                      
19                                                                      
20                                                                      
21                                                                      
22                                                                      
23                                                                      
24                                                                      
25                                                                      
26                                                                      
27                                                                      
28                                                                      
29                                                                      
30                                                                      
31                                                                      
32                                                                      
33                                                                      
34                                                                      
35                                                                      
36                                                                      
37                                                                      
38                                                                      
39                                                                      
40                                                                      
41                                                                      
42                                                                      
43                                                                      
44 3.3457849                                                            
45 5.2666022 3.0536741                                                  
46 3.5284905 0.9284661 3.6333332                                        
47 1.9951930 1.9507025 4.3197582 1.8547347                              
48 2.7628481 3.0708738 3.0647713 3.6511903 2.9742510                    
49 2.4646361 2.9675532 3.8402475 3.4615653 2.9808188 1.7661730          
50 4.3042805 2.2050978 1.7998720 2.5047008 2.9602947 2.7886851 3.6852375
51 4.8713172 3.1001049 1.6084254 3.5832806 3.7978114 2.9033219 4.1664944
52 4.3368236 1.8750586 1.5973078 2.4302164 3.0283221 2.8450455 3.5155101
53 3.0621310 1.3943789 3.2888408 1.2688439 1.3275129 2.8758006 3.2029118
54 2.9445091 1.5543200 4.4294933 0.9879677 1.4461183 3.7946505 3.4329978
55 1.5051265 2.3798939 4.9267257 2.2682252 1.0430596 3.2500755 2.8346515
56 2.3802235 3.7371359 4.5957682 4.1544400 3.4122553 2.1247453 0.8793844
57 2.6880387 1.0861997 3.6694870 0.9352384 0.9976035 2.9935884 2.9605262
58 3.6948178 1.8415834 2.2128532 2.2726570 2.4166622 2.4379103 3.3695951
59 2.1859105 1.8748434 3.9154494 1.8913716 0.7833771 2.6477483 3.0066092
60 4.4375576 1.9343065 1.1768677 2.4925170 3.2495250 2.7581856 3.3502336
61 2.3445208 3.6888371 6.4202045 3.5505521 2.7444477 4.6471206 3.6518557
62 2.1657397 1.5897123 3.4828808 1.8310417 1.0736425 2.1851442 2.5215124
63 6.0558912 3.3226881 1.5062422 3.7916951 4.7545517 4.2206153 4.8187907
64 1.5513205 2.0795467 4.2073352 2.3213123 1.6011745 2.4138269 1.7105851
65 3.6539597 0.6288076 2.8376047 1.0335406 2.2735577 3.1468198 2.9262520
66 3.3251215 1.9081185 2.8144193 2.1066466 1.8710439 2.4886609 3.1956911
67 1.0519959 2.8878623 4.4480002 3.0782964 1.6784196 2.0069182 1.9615253
68 1.1892435 3.8890972 5.5557992 4.1001525 2.8638079 2.9093347 2.0588412
69 2.6938106 1.0764786 3.9784444 0.9507811 1.3178355 3.2775461 2.8541468
70 3.2013467 3.5759975 3.2378478 4.1154832 3.2601169 1.0499131 2.6037202
71 3.3248631 1.0325264 3.7693044 0.2800292 1.6008224 3.5907403 3.4169002
72 1.5344964 2.5229028 5.1344639 2.4407922 1.0711076 3.4523103 3.0722953
73 3.7754501 0.7825895 2.7358314 1.0196902 2.1429480 3.2104723 3.3708974
74 4.2252512 0.9343909 2.6656681 1.2642901 2.6726951 3.5681514 3.6226025
75 1.2992782 2.7382350 4.8984408 2.7252453 0.9780469 2.9587376 2.7913935
76 3.6330179 4.0963663 3.5744775 4.4924507 3.7526300 1.6663497 2.5623117
77 2.1145330 2.7177866 3.7869162 3.1511664 1.8927349 1.7865737 2.7481693
78 2.8976476 0.9197738 3.0009463 1.2323491 1.4675371 2.5375771 2.7965517
79 4.2850213 1.5432719 2.0184767 2.0621221 2.8339003 3.1209783 3.5988866
80 3.9283428 1.9886166 1.6949792 2.4047118 2.7439426 2.3000200 3.1012118
81 4.2544470 2.5383246 1.5377726 3.0253666 3.2026263 2.3477180 3.4962066
82 2.4380261 2.2584847 2.9620664 2.7646695 2.0225884 1.1078213 2.1438893
          50        51        52        53        54        55        56
2                                                                       
3                                                                       
4                                                                       
5                                                                       
6                                                                       
7                                                                       
8                                                                       
9                                                                       
10                                                                      
11                                                                      
12                                                                      
13                                                                      
14                                                                      
15                                                                      
16                                                                      
17                                                                      
18                                                                      
19                                                                      
20                                                                      
21                                                                      
22                                                                      
23                                                                      
24                                                                      
25                                                                      
26                                                                      
27                                                                      
28                                                                      
29                                                                      
30                                                                      
31                                                                      
32                                                                      
33                                                                      
34                                                                      
35                                                                      
36                                                                      
37                                                                      
38                                                                      
39                                                                      
40                                                                      
41                                                                      
42                                                                      
43                                                                      
44                                                                      
45                                                                      
46                                                                      
47                                                                      
48                                                                      
49                                                                      
50                                                                      
51 1.2098859                                                            
52 1.0027596 1.4453055                                                  
53 1.7751578 2.8070382 2.0226103                                        
54 3.2192991 4.2604171 3.2211703 1.6065878                              
55 3.7507085 4.5987574 3.7906618 2.1377897 1.5033521                    
56 4.3936476 4.8327561 4.3144922 3.8236925 3.9741733 3.1062997          
57 2.3877724 3.3754444 2.4176500 0.7721701 0.9571390 1.5625001 3.5701877
58 0.8129914 1.4528073 1.0861085 1.4355891 2.8396439 3.2027785 4.0550448
59 2.5029081 3.2993423 2.7255443 1.0657260 1.6756263 1.4523318 3.4607468
60 1.0587912 1.5745444 0.6409733 2.1938275 3.3016176 3.8987063 4.1588689
61 5.4550375 6.2989227 5.3223740 3.8463427 2.6922980 1.8481617 3.7055786
62 2.2630347 3.0369920 2.4049101 1.0777927 1.7776644 1.5878416 3.0496633
63 2.2904778 2.1634213 1.7810405 3.6676775 4.7215349 5.4889947 5.6454022
64 3.4099251 4.1718743 3.3433369 2.2035644 1.9371936 1.1926390 2.1115306
65 2.1425191 3.1060480 1.7984473 1.5626513 1.8170883 2.7041056 3.7338183
66 1.3098410 2.0967423 1.5418398 1.0266548 2.5542263 2.7950037 3.8300578
67 3.4708534 4.0753405 3.6088719 2.4006738 2.7164839 1.5995527 2.0500694
68 4.8662378 5.4377610 4.8668053 3.7660017 3.5653314 2.2531094 1.6253368
69 2.9366734 3.9053163 2.7888738 1.4695887 0.7207871 1.4636034 3.4705285
70 2.8390004 2.6912381 2.9426302 3.1357682 4.3083919 3.7524924 2.8869179
71 2.5539095 3.6278173 2.5209765 1.1334648 0.7993232 2.0417321 4.0764874
72 3.9359665 4.7330439 3.9076435 2.3134685 1.6820778 0.4897562 3.3387328
73 1.6455974 2.6459424 1.4207227 1.1427523 1.8879100 2.8103503 4.1491486
74 1.9387049 2.8337014 1.5122795 1.7515709 2.1960355 3.2402521 4.4440992
75 3.6663700 4.3874423 3.7157816 2.1923279 2.1951543 1.0531521 3.0020537
76 3.3225676 3.3823765 3.5190670 3.5859309 4.6905046 4.1553169 2.7062742
77 2.7495592 2.9898533 2.8109382 2.1984265 3.1010661 2.4297045 3.0952664
78 1.7854584 2.7166074 1.8264981 0.7204949 1.6047290 2.0693415 3.4754939
79 1.2840342 1.9007809 0.5199300 1.8504746 2.8913111 3.5944616 4.4152251
80 0.6196449 1.4341302 0.9245788 1.6907041 3.0731174 3.4712944 3.8263433
81 0.7677831 0.7363657 1.0986675 2.2237673 3.6638019 3.9588288 4.1745760
82 2.1094391 2.4843870 2.2099523 1.8628671 2.9157370 2.5285920 2.6593658
          57        58        59        60        61        62        63
2                                                                       
3                                                                       
4                                                                       
5                                                                       
6                                                                       
7                                                                       
8                                                                       
9                                                                       
10                                                                      
11                                                                      
12                                                                      
13                                                                      
14                                                                      
15                                                                      
16                                                                      
17                                                                      
18                                                                      
19                                                                      
20                                                                      
21                                                                      
22                                                                      
23                                                                      
24                                                                      
25                                                                      
26                                                                      
27                                                                      
28                                                                      
29                                                                      
30                                                                      
31                                                                      
32                                                                      
33                                                                      
34                                                                      
35                                                                      
36                                                                      
37                                                                      
38                                                                      
39                                                                      
40                                                                      
41                                                                      
42                                                                      
43                                                                      
44                                                                      
45                                                                      
46                                                                      
47                                                                      
48                                                                      
49                                                                      
50                                                                      
51                                                                      
52                                                                      
53                                                                      
54                                                                      
55                                                                      
56                                                                      
57                                                                      
58 1.9487292                                                            
59 0.9997793 1.9121944                                                  
60 2.5454412 1.3230534 2.9008699                                        
61 3.1607367 4.9190129 3.2802132 5.4094054                              
62 0.9760642 1.6695453 0.5873447 2.5059113 3.3572914                    
63 4.0555349 2.7463652 4.4813870 1.7351377 6.8982619 4.1549115          
64 1.6837821 2.9121308 1.7391128 3.3190172 2.3659700 1.4333902 4.9541100
65 1.3964650 1.9968793 2.2486168 1.7528155 3.9463094 1.9281328 3.0676707
66 1.6389382 1.0188133 1.6108960 1.8597425 4.4960229 1.4971795 3.1957375
67 2.2188545 2.9690677 1.7188993 3.6649733 2.9572453 1.5757060 5.2980141
68 3.3741001 4.3660362 3.0740724 4.8527556 2.5444030 2.9055653 6.4623825
69 0.7325803 2.5166573 1.5683220 2.8673383 2.7010268 1.4649274 4.3190350
70 3.4198255 2.5320031 2.9558861 3.0141602 5.2231809 2.6251127 4.2439718
71 0.7322398 2.2722736 1.6822826 2.6187977 3.3768869 1.6799210 3.9594041
72 1.7445293 3.3436551 1.6065167 4.0843179 1.7523613 1.8026894 5.6130713
73 1.2834431 1.4745643 1.9951576 1.5648055 4.2599467 1.7937904 2.8480112
74 1.7761012 1.8634615 2.5462402 1.5702423 4.5370296 2.3075047 2.6104183
75 1.8884175 3.1191869 1.5070995 3.9208293 2.4022523 1.6797213 5.4416360
76 3.8821114 3.2740629 3.5759599 3.4613205 5.4936772 3.2401315 4.6113454
77 2.2986671 2.0692547 1.5959191 3.0580074 3.9981669 1.4732248 4.5128120
78 0.7337166 1.3030640 1.1236193 1.9110259 3.7171960 0.8163229 3.5100675
79 2.1641206 1.2490132 2.6104212 0.9568192 5.0613705 2.3369399 1.9635511
80 2.2037406 0.8635085 2.3516312 0.8771687 5.1198199 2.0033550 2.3852804
81 2.7637903 0.9506936 2.7229888 1.1500200 5.6489711 2.4050891 2.3144875
82 2.0369790 1.5775202 1.6353089 2.2837423 4.1516574 1.2116745 3.8631391
          64        65        66        67        68        69        70
2                                                                       
3                                                                       
4                                                                       
5                                                                       
6                                                                       
7                                                                       
8                                                                       
9                                                                       
10                                                                      
11                                                                      
12                                                                      
13                                                                      
14                                                                      
15                                                                      
16                                                                      
17                                                                      
18                                                                      
19                                                                      
20                                                                      
21                                                                      
22                                                                      
23                                                                      
24                                                                      
25                                                                      
26                                                                      
27                                                                      
28                                                                      
29                                                                      
30                                                                      
31                                                                      
32                                                                      
33                                                                      
34                                                                      
35                                                                      
36                                                                      
37                                                                      
38                                                                      
39                                                                      
40                                                                      
41                                                                      
42                                                                      
43                                                                      
44                                                                      
45                                                                      
46                                                                      
47                                                                      
48                                                                      
49                                                                      
50                                                                      
51                                                                      
52                                                                      
53                                                                      
54                                                                      
55                                                                      
56                                                                      
57                                                                      
58                                                                      
59                                                                      
60                                                                      
61                                                                      
62                                                                      
63                                                                      
64                                                                      
65 2.2902652                                                            
66 2.6596021 1.9704438                                                  
67 1.3039186 3.0841993 2.5377591                                        
68 1.8433019 4.0590637 3.9946050 1.5516634                              
69 1.5018868 1.3534359 2.2446863 2.3992681 3.2484227                    
70 3.1396747 3.6530229 2.4559588 2.4438963 3.4969502 3.8190455          
71 2.1941746 1.2015074 2.0227347 2.8922761 3.9363518 0.8185734 4.0311773
72 1.4957466 2.8875017 2.9011388 1.8005921 2.3947470 1.6272228 3.8702178
73 2.6134921 0.7816876 1.4956645 3.1727090 4.3614087 1.5763717 3.5458901
74 2.9641001 0.8122345 2.0544167 3.6701460 4.7527479 1.8794030 3.9547776
75 1.5965405 3.0036125 2.4952441 1.2140370 2.1607611 2.0121743 3.2040465
76 3.4667867 3.9875569 2.9662001 2.7370221 3.5807543 4.2217387 1.3616528
77 2.2498074 3.0511225 1.8950558 1.6517330 2.9465810 2.7204297 1.7513026
78 1.8490427 1.2300734 1.2900941 2.2937180 3.5431901 1.2531432 2.9716947
79 3.2387588 1.4878724 1.5624545 3.6248901 4.8547595 2.4805549 3.2723757
80 2.9860293 1.8751081 1.1704666 3.0671894 4.3955201 2.6833251 2.4426488
81 3.4819886 2.5221550 1.5503133 3.4228359 4.7735298 3.2805051 2.2947546
82 1.9714593 2.4412892 1.5283089 1.6495460 2.9685758 2.4523293 1.4433704
          71        72        73        74        75        76        77
2                                                                       
3                                                                       
4                                                                       
5                                                                       
6                                                                       
7                                                                       
8                                                                       
9                                                                       
10                                                                      
11                                                                      
12                                                                      
13                                                                      
14                                                                      
15                                                                      
16                                                                      
17                                                                      
18                                                                      
19                                                                      
20                                                                      
21                                                                      
22                                                                      
23                                                                      
24                                                                      
25                                                                      
26                                                                      
27                                                                      
28                                                                      
29                                                                      
30                                                                      
31                                                                      
32                                                                      
33                                                                      
34                                                                      
35                                                                      
36                                                                      
37                                                                      
38                                                                      
39                                                                      
40                                                                      
41                                                                      
42                                                                      
43                                                                      
44                                                                      
45                                                                      
46                                                                      
47                                                                      
48                                                                      
49                                                                      
50                                                                      
51                                                                      
52                                                                      
53                                                                      
54                                                                      
55                                                                      
56                                                                      
57                                                                      
58                                                                      
59                                                                      
60                                                                      
61                                                                      
62                                                                      
63                                                                      
64                                                                      
65                                                                      
66                                                                      
67                                                                      
68                                                                      
69                                                                      
70                                                                      
71                                                                      
72 2.2046179                                                            
73 1.1350260 2.9491410                                                  
74 1.4719903 3.3718742 0.6213667                                        
75 2.4803104 0.9710212 2.9983549 3.5134548                              
76 4.4162811 4.3460390 4.0154432 4.4217785 3.5829673                    
77 2.9962238 2.4369046 2.8299686 3.3198257 1.9374420 2.7695788          
78 1.1750461 2.2696093 1.0027663 1.5076746 2.2752825 3.5307409 2.0522298
79 2.1676075 3.6856030 1.0507448 1.0867908 3.5845549 3.8459995 2.8990546
80 2.4460212 3.6695821 1.5642188 1.8819656 3.3579473 2.8633218 2.4766599
81 3.0586297 4.1251495 2.1353533 2.3992950 3.7760866 2.9181404 2.5198516
82 2.6723549 2.6944249 2.3347354 2.7884470 2.2457296 2.2402242 1.0333123
          78        79        80        81
2                                         
3                                         
4                                         
5                                         
6                                         
7                                         
8                                         
9                                         
10                                        
11                                        
12                                        
13                                        
14                                        
15                                        
16                                        
17                                        
18                                        
19                                        
20                                        
21                                        
22                                        
23                                        
24                                        
25                                        
26                                        
27                                        
28                                        
29                                        
30                                        
31                                        
32                                        
33                                        
34                                        
35                                        
36                                        
37                                        
38                                        
39                                        
40                                        
41                                        
42                                        
43                                        
44                                        
45                                        
46                                        
47                                        
48                                        
49                                        
50                                        
51                                        
52                                        
53                                        
54                                        
55                                        
56                                        
57                                        
58                                        
59                                        
60                                        
61                                        
62                                        
63                                        
64                                        
65                                        
66                                        
67                                        
68                                        
69                                        
70                                        
71                                        
72                                        
73                                        
74                                        
75                                        
76                                        
77                                        
78                                        
79 1.6686872                              
80 1.5798559 1.2440599                    
81 2.0962502 1.5573823 0.7543149          
82 1.5870256 2.3792106 1.6982488 1.8647224

echo = FALSE
warning = FALSE
error = FALSE
message = FALSE

# Import the baseball_hof.csv file into R
baseball_data <- read.csv("baseball_hof.csv")

# Scale the data
scaled_data <- scale(baseball_data[, -1])  # Exclude the playerID column for scaling

# Perform hierarchical clustering with method = 'ward.D'
hc <- hclust(dist(scaled_data), method = "ward.D")

# Plot the dendrogram
x11()
plot(hc, main = "Dendrogram of Baseball Players (Ward's Method)")

echo = FALSE
warning = FALSE
error = FALSE
message = FALSE

# Import the required libraries
install.packages("gplots")

Installing package into 'C:/Users/C00303097/AppData/Local/R/win-library/4.3'
(as 'lib' is unspecified)

package 'gplots' successfully unpacked and MD5 sums checked

The downloaded binary packages are in
    C:\Users\C00303097\AppData\Local\Temp\RtmpC8QyEh\downloaded_packages

library(gplots)  # For heatmap

Warning: package 'gplots' was built under R version 4.3.3


Attaching package: 'gplots'

The following object is masked from 'package:stats':

    lowess

# Import the baseball_hof.csv file into R
baseball_data <- read.csv("baseball_hof.csv")

# Scale the data
scaled_data <- scale(baseball_data[, -1])  # Exclude the playerID column for scaling

# Perform hierarchical clustering with method = 'ward.D'
hc <- hclust(dist(scaled_data), method = "ward.D")

# Plot the dendrogram
x11()
plot(hc, main = "Dendrogram of Baseball Players (Ward's Method)")

# Display the heatmap directly in RStudio plot window
heatmap(as.matrix(scaled_data),
        Rowv = as.dendrogram(hc),
        Colv = NA,
        scale = "none",
        main = "Heatmap of Baseball Players",
        margins = c(5, 10),
        col = colorRampPalette(c("blue", "white", "red"))(100),
        labRow = base::abbreviate(rownames(scaled_data), minlength = 15),
        labCol = base::abbreviate(colnames(scaled_data), minlength = 15),
        cexRow = 0.7,
        cexCol = 0.7
)

The heatmap reveals clusters of baseball players with similar performance attributes, indicating distinct groupings based on their statistical profiles.

echo = FALSE
warning = FALSE
error = FALSE
message = FALSE

# Install and load the 'fpc' package
install.packages("fpc")

Installing package into 'C:/Users/C00303097/AppData/Local/R/win-library/4.3'
(as 'lib' is unspecified)

package 'fpc' successfully unpacked and MD5 sums checked

The downloaded binary packages are in
    C:\Users\C00303097\AppData\Local\Temp\RtmpC8QyEh\downloaded_packages

library(fpc)

Warning: package 'fpc' was built under R version 4.3.3

# Perform K-means clustering with 4 clusters
set.seed(123)  # For reproducibility
kmeans_result <- kmeans(scaled_data, centers = 4)

# Assess the quality of the clustering solution
# Compute average silhouette width
sil_width <- cluster.stats(dist(scaled_data), kmeans_result$cluster)$avg.silwidth

# Print quality assessment metric
cat("Average Silhouette Width:", sil_width, "\n")

Average Silhouette Width: 0.2884172

echo = FALSE
warning = FALSE
error = FALSE
message = FALSE

# Assign clusters to each player
baseball_data$Cluster <- kmeans_result$cluster

# Load required libraries
library(dplyr)
library(ggplot2)

# Calculate summary statistics for numerical variables within each cluster
summary_stats <- baseball_data %>%
  group_by(Cluster) %>%
  summarise(across(where(is.numeric), list(mean = mean, median = median, sd = sd)))

# Create visualizations for each numerical variable
for (var in c("hits", "runs", "home_runs", "rbi", "stolen_bases")) {
  # Create boxplots
  print(ggplot(baseball_data, aes(x = as.factor(Cluster), y = !!sym(var))) +
          geom_boxplot() +
          labs(title = paste("Boxplot of", var),
               x = "Cluster",
               y = var) +
          theme_minimal())
  
  # Create histograms
  print(ggplot(baseball_data, aes(x = !!sym(var), fill = as.factor(Cluster))) +
          geom_histogram(binwidth = 10, position = "identity", alpha = 0.7) +
          labs(title = paste("Histogram of", var),
               x = var,
               y = "Frequency",
               fill = "Cluster") +
          theme_minimal() +
          facet_wrap(~Cluster, nrow = 1))
}

# Summary statistics
summary_stats

# A tibble: 4 × 16
  Cluster hits_mean hits_median hits_sd runs_mean runs_median runs_sd
    <int>     <dbl>       <dbl>   <dbl>     <dbl>       <dbl>   <dbl>
1       1     2018.       2092     395.     1061.       1051     175.
2       2     2534.       2526.    299.     1406.       1382.    151.
3       3     3110.       3110     357.     1847.       1816     191.
4       4     3056.       3054     380.     1644.       1618.    269.
# ℹ 9 more variables: home_runs_mean <dbl>, home_runs_median <dbl>,
#   home_runs_sd <dbl>, rbi_mean <dbl>, rbi_median <dbl>, rbi_sd <dbl>,
#   stolen_bases_mean <dbl>, stolen_bases_median <dbl>, stolen_bases_sd <dbl>

echo = FALSE
warning = FALSE
error = FALSE
message = FALSE

# Read the data
baseball_data <- read.csv("baseball_hof.csv")

# Check the structure of the dataset
str(baseball_data)

'data.frame':   82 obs. of  6 variables:
 $ playerID    : chr  "aaronha01" "alomaro01" "aparilu01" "bagweje01" ...
 $ hits        : int  3771 2724 2677 2314 2583 2048 2150 3060 3010 1779 ...
 $ runs        : int  2174 1508 1335 1517 1305 1091 1175 1844 1513 861 ...
 $ home_runs   : int  755 210 83 449 512 389 358 291 118 68 ...
 $ rbi         : int  2297 1134 791 1529 1636 1376 1430 1175 1014 789 ...
 $ stolen_bases: int  240 474 506 202 50 68 30 414 24 51 ...

# Perform clustering using k-means with 4 clusters
set.seed(101) # for reproducibility
k <- 4
kmeans_result <- kmeans(baseball_data[, -1], centers = k)

# View the clustering results
kmeans_result

K-means clustering with 4 clusters of sizes 29, 18, 13, 22

Cluster means:
      hits      runs home_runs      rbi stolen_bases
1 2462.586 1387.5517  417.7931 1534.241     95.96552
2 2810.833 1536.0000  146.4444 1034.556    530.83333
3 1833.385  973.0769  246.5385 1076.077     61.23077
4 3232.818 1780.5455  387.3636 1753.727    257.45455

Clustering vector:
 [1] 4 2 2 1 1 3 1 2 2 3 4 2 3 2 3 4 3 2 1 1 3 1 1 1 2 3 4 1 2 3 1 2 4 1 1 4 2 1
[39] 3 4 2 1 2 1 4 1 1 4 2 4 4 4 1 3 3 2 1 4 1 4 3 1 4 2 1 4 2 2 1 4 1 3 1 1 1 4
[77] 4 1 1 4 4 4

Within cluster sum of squares by cluster:
[1] 3672415 4703586 2303500 6631870
 (between_SS / total_SS =  66.1 %)

Available components:

[1] "cluster"      "centers"      "totss"        "withinss"     "tot.withinss"
[6] "betweenss"    "size"         "iter"         "ifault"

echo = FALSE
warning = FALSE
error = FALSE
message = FALSE

# Load required libraries
library(cluster)
library(factoextra)

Warning: package 'factoextra' was built under R version 4.3.3

Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa

# Ensure that the cluster package is installed
if (!requireNamespace("cluster", quietly = TRUE)) {
  install.packages("cluster")
  library(cluster)
}

# Perform K-means clustering
set.seed(123)
kmeans_result <- kmeans(baseball_data[, -1], centers = 4)

# Compute distance matrix
d <- dist(baseball_data[, -1])

# Compute silhouette scores
sil <- silhouette(kmeans_result$cluster, d)

# Summary of silhouette scores
summary(sil)

Silhouette of 82 units in 4 clusters from silhouette.default(x = kmeans_result$cluster, dist = d) :
 Cluster sizes and average silhouette widths:
       13        29        22        18 
0.3742922 0.3869087 0.2293503 0.2550887 
Individual silhouette widths:
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
-0.1031  0.1899  0.3134  0.3137  0.4610  0.5661

echo = FALSE
warning = FALSE
error = FALSE
message = FALSE

# Assign clusters to each player
baseball_data$Cluster <- kmeans_result$cluster

# Load required libraries
library(dplyr)
library(ggplot2)

# Calculate summary statistics for numerical variables within each cluster
summary_stats <- baseball_data %>%
  group_by(Cluster) %>%
  summarise(across(where(is.numeric), list(mean = mean, median = median, sd = sd)))

# Create visualizations for each numerical variable
for (var in c("hits", "runs", "home_runs", "rbi", "stolen_bases")) {
  # Create boxplots
  print(ggplot(baseball_data, aes(x = as.factor(Cluster), y = !!sym(var))) +
          geom_boxplot() +
          labs(title = paste("Boxplot of", var),
               x = "Cluster",
               y = var) +
          theme_minimal())
  
  # Create histograms
  print(ggplot(baseball_data, aes(x = !!sym(var), fill = as.factor(Cluster))) +
          geom_histogram(binwidth = 10, position = "identity", alpha = 0.7) +
          labs(title = paste("Histogram of", var),
               x = var,
               y = "Frequency",
               fill = "Cluster") +
          theme_minimal() +
          facet_wrap(~Cluster, nrow = 1))
}

# Summary statistics
summary_stats

# A tibble: 4 × 16
  Cluster hits_mean hits_median hits_sd runs_mean runs_median runs_sd
    <int>     <dbl>       <dbl>   <dbl>     <dbl>       <dbl>   <dbl>
1       1     1833.        1912    335.      973.       1025     132.
2       2     2463.        2452    227.     1388.       1328     178.
3       3     3233.        3169    334.     1781.       1704     221.
4       4     2811.        2846    284.     1536        1510.    265.
# ℹ 9 more variables: home_runs_mean <dbl>, home_runs_median <dbl>,
#   home_runs_sd <dbl>, rbi_mean <dbl>, rbi_median <dbl>, rbi_sd <dbl>,
#   stolen_bases_mean <dbl>, stolen_bases_median <dbl>, stolen_bases_sd <dbl>

echo = FALSE
warning = FALSE
error = FALSE
message = FALSE

# Load required libraries
library(cluster)
library(factoextra)

# Perform Hierarchical Clustering
hc <- hclust(dist(baseball_data[, -1]))
hc_clusters <- cutree(hc, k = 4)

# Assess quality of Hierarchical Clustering solution
hc_sil <- silhouette(hc_clusters, dist(baseball_data[, -1]))

# Compute average silhouette width for Hierarchical Clustering
hc_sil_width <- mean(hc_sil[, "sil_width"])

# Perform K-means clustering
set.seed(123)
kmeans_result <- kmeans(baseball_data[, -1], centers = 4)

# Assess quality of K-means solution
km_sil <- silhouette(kmeans_result$cluster, dist(baseball_data[, -1]))

# Compute average silhouette width for K-means Clustering
km_sil_width <- mean(km_sil[, "sil_width"])

# Compare silhouette scores
cat("Average Silhouette Width for Hierarchical Clustering:", hc_sil_width, "\n")

Average Silhouette Width for Hierarchical Clustering: 0.2513447

cat("Average Silhouette Width for K-means Clustering:", km_sil_width, "\n")

Average Silhouette Width for K-means Clustering: 0.3137016

# Compare cluster profiles for Hierarchical Clustering
hc_summary <- baseball_data %>%
  mutate(Cluster = hc_clusters) %>%
  group_by(Cluster) %>%
  summarise(across(where(is.numeric), list(mean = mean, median = median, sd = sd)))

# Compare cluster profiles for K-means Clustering
km_summary <- baseball_data %>%
  mutate(Cluster = kmeans_result$cluster) %>%
  group_by(Cluster) %>%
  summarise(across(where(is.numeric), list(mean = mean, median = median, sd = sd)))

# Print summary statistics for both methods
cat("\nSummary Statistics for Hierarchical Clustering:\n")


Summary Statistics for Hierarchical Clustering:

print(hc_summary)

# A tibble: 4 × 16
  Cluster hits_mean hits_median hits_sd runs_mean runs_median runs_sd
    <int>     <dbl>       <dbl>   <dbl>     <dbl>       <dbl>   <dbl>
1       1     3658.       3630     351.     2050.       2062     173.
2       2     2957.       2938.    255.     1614.       1624.    237.
3       3     2337.       2315     225.     1291.       1276     197.
4       4     1532.       1573     214.      916.        959     158.
# ℹ 9 more variables: home_runs_mean <dbl>, home_runs_median <dbl>,
#   home_runs_sd <dbl>, rbi_mean <dbl>, rbi_median <dbl>, rbi_sd <dbl>,
#   stolen_bases_mean <dbl>, stolen_bases_median <dbl>, stolen_bases_sd <dbl>

cat("\nSummary Statistics for K-means Clustering:\n")


Summary Statistics for K-means Clustering:

print(km_summary)

# A tibble: 4 × 16
  Cluster hits_mean hits_median hits_sd runs_mean runs_median runs_sd
    <int>     <dbl>       <dbl>   <dbl>     <dbl>       <dbl>   <dbl>
1       1     1833.        1912    335.      973.       1025     132.
2       2     2463.        2452    227.     1388.       1328     178.
3       3     3233.        3169    334.     1781.       1704     221.
4       4     2811.        2846    284.     1536        1510.    265.
# ℹ 9 more variables: home_runs_mean <dbl>, home_runs_median <dbl>,
#   home_runs_sd <dbl>, rbi_mean <dbl>, rbi_median <dbl>, rbi_sd <dbl>,
#   stolen_bases_mean <dbl>, stolen_bases_median <dbl>, stolen_bases_sd <dbl>