Assignment_3_Inference

rm(list=ls())
accident_data <- matrix(c(1601, 510, 162527, 412368), 
                        nrow = 2, 
                        byrow = FALSE,
                        dimnames = list(Seat_Belt = c("None", "Seat Belt"),
                                        Injury = c("Fatal", "Nonfatal")))

print(accident_data)

##            Injury
## Seat_Belt   Fatal Nonfatal
##   None       1601   162527
##   Seat Belt   510   412368

# Calculate proportions
prop_fatal_none <- accident_data[1,1] / sum(accident_data[1,])
prop_fatal_belt <- accident_data[2,1] / sum(accident_data[2,])
prop_fatal_belt

## [1] 0.001235232

prop_fatal_none

## [1] 0.009754582

#Difference in proportions
diff_prop <- prop_fatal_none - prop_fatal_belt
diff_prop

## [1] 0.00851935

#Relative Risk (RR)
relative_risk <- prop_fatal_none / prop_fatal_belt
relative_risk

## [1] 7.896965

#Odds Ratio (OR)
odds_none <- accident_data[1,1] / accident_data[1,2]
odds_belt <- accident_data[2,1] / accident_data[2,2]
odds_ratio <- odds_none / odds_belt
odds_ratio

## [1] 7.964905

par(mfrow = c(2, 2), mar = c(5, 4, 4, 2) + 0.1)

barplot(t(accident_data), 
        main = "Accident Outcomes by Seat Belt Use\n(Counts)",
        xlab = "Seat Belt Use", 
        ylab = "Number of Accidents",
        col = c("red", "blue"))

prop_data <- prop.table(accident_data, margin = 1)
barplot(t(prop_data), 
        main = "Accident Outcomes by Seat Belt Use\n(Proportions)",
        xlab = "Seat Belt Use", 
        ylab = "Proportion",
        col = c("red", "blue"),
        ylim = c(0, 1))

fatality_rates <- c(prop_fatal_none, prop_fatal_belt)
barplot(fatality_rates * 100,
        names.arg = c("No Belt", "Seat Belt"),
        main = "Fatality Rates by Seat Belt Use",
        xlab = "Seat Belt Use",
        ylab = "Fatality Rate (%)",
        col = c("red", "blue"),
        ylim = c(0, 2))

risk_measures <- c(Difference = diff_prop * 100, 
                   Relative_Risk = relative_risk,
                   Odds_Ratio = odds_ratio)
barplot(risk_measures,
        main = "Risk Measures for No Seat Belt vs Seat Belt",
        ylab = "Value",
        col = "blue",
        las = 2)
abline(h = 1, col = "red", lty = 2, lwd = 2)
text(1:3, risk_measures + 0.5, labels = round(risk_measures, 2))

#Problem 3
rm(list=)
Data=read.csv("C:/Users/Sabuj Ganguly/OneDrive/Documents/PhD 1st sem/STAT 5034 Inference/TSkull_19.csv")


training_set <- Data[Data$Holdout == 0, ]
validation_set <- Data[Data$Holdout == 1, ]

#Pairwise scatter plots for training set
variables <- c("Length", "Breadth", "Height", "Fheight", "Fbreadth")

#pairwise scatterplot matrix
pairs(training_set[, variables], 
      col = ifelse(training_set$Type == 1, "red", "blue"),
      pch = 16,
      main = "Pairwise Scatter Plots by Type\n(Red: Type 1, Blue: Type 2)")

# ranking
type1 <- training_set[training_set$Type == 1, variables]
type2 <- training_set[training_set$Type == 2, variables]

mean_diffs <- abs(colMeans(type1) - colMeans(type2))
overlap_metric <- mean_diffs / (apply(type1, 2, sd) + apply(type2, 2, sd))

ranking <- order(overlap_metric, decreasing = TRUE)
cat("1.", variables[ranking[1]], "- Shows good separation between types\n")

## 1. Length - Shows good separation between types

cat("2.", variables[ranking[2]], "- Moderate separation\n")

## 2. Fheight - Moderate separation

cat("3.", variables[ranking[3]], "- Some separation visible\n")

## 3. Fbreadth - Some separation visible

cat("4.", variables[ranking[4]], "- Limited separation\n")

## 4. Height - Limited separation

cat("5.", variables[ranking[5]], "- Poor separation\n\n")

## 5. Breadth - Poor separation

for(var in variables) {
  mean_type1 <- mean(training_set[training_set$Type == 1, var])
  mean_type2 <- mean(training_set[training_set$Type == 2, var])
  mean_diff <- mean_type2 - mean_type1
  
  cat(var, ":\n")
  cat("  Type 1 mean:", round(mean_type1, 3), "\n")
  cat("  Type 2 mean:", round(mean_type2, 3), "\n")
  cat("  Mean difference:", round(mean_diff, 3), "\n")
  cat("  Effect size (unstandardized):", round(mean_diff, 3), "\n\n")
}

## Length :
##   Type 1 mean: 175.538 
##   Type 2 mean: 187.136 
##   Mean difference: 11.598 
##   Effect size (unstandardized): 11.598 
## 
## Breadth :
##   Type 1 mean: 139.154 
##   Type 2 mean: 138.773 
##   Mean difference: -0.381 
##   Effect size (unstandardized): -0.381 
## 
## Height :
##   Type 1 mean: 133.077 
##   Type 2 mean: 135.545 
##   Mean difference: 2.469 
##   Effect size (unstandardized): 2.469 
## 
## Fheight :
##   Type 1 mean: 70.385 
##   Type 2 mean: 76.818 
##   Mean difference: 6.434 
##   Effect size (unstandardized): 6.434 
## 
## Fbreadth :
##   Type 1 mean: 130.269 
##   Type 2 mean: 137.5 
##   Mean difference: 7.231 
##   Effect size (unstandardized): 7.231

#install.packages("rpart")
#install.packages("rpart.plot")
#install.packages("caret")

library(rpart)

## Warning: package 'rpart' was built under R version 4.5.1

library(rpart.plot)

## Warning: package 'rpart.plot' was built under R version 4.5.1

library(caret)

## Warning: package 'caret' was built under R version 4.5.1

## Loading required package: ggplot2

## Loading required package: lattice

set.seed(123)

formula <- as.formula("Type ~ Length + Breadth + Height + Fheight + Fbreadth")
#fitting trees with tuning parameters
tune_grid <- expand.grid(
  minsplit = c(2, 5, 10),      # Minimum observations to split
  minbucket = c(1, 3, 5),   #Minimum observations in terminal node
  cp = c(0.001, 0.01, 0.1, 0.5) 
)

tree_results <- list()
train_performance <- data.frame()
validation_performance <- data.frame()


#Fitting trees
for(i in 1:nrow(tune_grid)) {
  cat("Fitting tree", i, "of", nrow(tune_grid), "\n")
  
  # Fit tree with current parameters
  tree <- rpart(formula, 
                data = training_set,
                method = "class",
                control = rpart.control(
                  minsplit = tune_grid$minsplit[i],
                  minbucket = tune_grid$minbucket[i], 
                  cp = tune_grid$cp[i],
                  xval = 10  #10 fold cross val
                ))
  
  tree_name <- paste0("tree_", i)
  tree_results[[tree_name]] <- tree
  
  #Training performance
  train_pred <- predict(tree, training_set, type = "class")
  train_accuracy <- mean(train_pred == training_set$Type)
  
  #Validation performance  
  val_pred <- predict(tree, validation_set, type = "class")
  val_accuracy <- mean(val_pred == validation_set$Type)
  
  train_performance <- rbind(train_performance, 
                             data.frame(Tree = tree_name,
                                        Minsplit = tune_grid$minsplit[i],
                                        Minbucket = tune_grid$minbucket[i],
                                        CP = tune_grid$cp[i],
                                        Accuracy = train_accuracy,
                                        Nodes = nrow(tree$frame)))
  
  validation_performance <- rbind(validation_performance,
                                  data.frame(Tree = tree_name,
                                             Minsplit = tune_grid$minsplit[i],
                                             Minbucket = tune_grid$minbucket[i], 
                                             CP = tune_grid$cp[i],
                                             Accuracy = val_accuracy,
                                             Nodes = nrow(tree$frame)))
}

## Fitting tree 1 of 36 
## Fitting tree 2 of 36 
## Fitting tree 3 of 36 
## Fitting tree 4 of 36 
## Fitting tree 5 of 36 
## Fitting tree 6 of 36 
## Fitting tree 7 of 36 
## Fitting tree 8 of 36 
## Fitting tree 9 of 36 
## Fitting tree 10 of 36 
## Fitting tree 11 of 36 
## Fitting tree 12 of 36 
## Fitting tree 13 of 36 
## Fitting tree 14 of 36 
## Fitting tree 15 of 36 
## Fitting tree 16 of 36 
## Fitting tree 17 of 36 
## Fitting tree 18 of 36 
## Fitting tree 19 of 36 
## Fitting tree 20 of 36 
## Fitting tree 21 of 36 
## Fitting tree 22 of 36 
## Fitting tree 23 of 36 
## Fitting tree 24 of 36 
## Fitting tree 25 of 36 
## Fitting tree 26 of 36 
## Fitting tree 27 of 36 
## Fitting tree 28 of 36 
## Fitting tree 29 of 36 
## Fitting tree 30 of 36 
## Fitting tree 31 of 36 
## Fitting tree 32 of 36 
## Fitting tree 33 of 36 
## Fitting tree 34 of 36 
## Fitting tree 35 of 36 
## Fitting tree 36 of 36

#performance metrics combined
performance <- merge(train_performance, validation_performance, 
                     by = c("Tree", "Minsplit", "Minbucket", "CP", "Nodes"),
                     suffixes = c("_Train", "_Validation"))

#overfitting gap
performance$Overfitting_Gap <- performance$Accuracy_Train - performance$Accuracy_Validation

#sorting by validation accuracy
performance <- performance[order(performance$Accuracy_Validation, decreasing = TRUE), ]


print(performance)

##       Tree Minsplit Minbucket    CP Nodes Accuracy_Train Accuracy_Validation
## 1   tree_1        2         1 0.001     7      1.0000000               0.625
## 2  tree_10        2         1 0.010     7      1.0000000               0.625
## 3  tree_11        5         1 0.010     7      1.0000000               0.625
## 4  tree_12       10         1 0.010     7      1.0000000               0.625
## 12  tree_2        5         1 0.001     7      1.0000000               0.625
## 23  tree_3       10         1 0.001     7      1.0000000               0.625
## 5  tree_13        2         3 0.010     3      0.9166667               0.500
## 6  tree_14        5         3 0.010     3      0.9166667               0.500
## 7  tree_15       10         3 0.010     3      0.9166667               0.500
## 8  tree_16        2         5 0.010     3      0.9166667               0.500
## 9  tree_17        5         5 0.010     3      0.9166667               0.500
## 10 tree_18       10         5 0.010     3      0.9166667               0.500
## 11 tree_19        2         1 0.100     3      0.9166667               0.500
## 13 tree_20        5         1 0.100     3      0.9166667               0.500
## 14 tree_21       10         1 0.100     3      0.9166667               0.500
## 15 tree_22        2         3 0.100     3      0.9166667               0.500
## 16 tree_23        5         3 0.100     3      0.9166667               0.500
## 17 tree_24       10         3 0.100     3      0.9166667               0.500
## 18 tree_25        2         5 0.100     3      0.9166667               0.500
## 19 tree_26        5         5 0.100     3      0.9166667               0.500
## 20 tree_27       10         5 0.100     3      0.9166667               0.500
## 21 tree_28        2         1 0.500     3      0.9166667               0.500
## 22 tree_29        5         1 0.500     3      0.9166667               0.500
## 24 tree_30       10         1 0.500     3      0.9166667               0.500
## 25 tree_31        2         3 0.500     3      0.9166667               0.500
## 26 tree_32        5         3 0.500     3      0.9166667               0.500
## 27 tree_33       10         3 0.500     3      0.9166667               0.500
## 28 tree_34        2         5 0.500     3      0.9166667               0.500
## 29 tree_35        5         5 0.500     3      0.9166667               0.500
## 30 tree_36       10         5 0.500     3      0.9166667               0.500
## 31  tree_4        2         3 0.001     3      0.9166667               0.500
## 32  tree_5        5         3 0.001     3      0.9166667               0.500
## 33  tree_6       10         3 0.001     3      0.9166667               0.500
## 34  tree_7        2         5 0.001     3      0.9166667               0.500
## 35  tree_8        5         5 0.001     3      0.9166667               0.500
## 36  tree_9       10         5 0.001     3      0.9166667               0.500
##    Overfitting_Gap
## 1        0.3750000
## 2        0.3750000
## 3        0.3750000
## 4        0.3750000
## 12       0.3750000
## 23       0.3750000
## 5        0.4166667
## 6        0.4166667
## 7        0.4166667
## 8        0.4166667
## 9        0.4166667
## 10       0.4166667
## 11       0.4166667
## 13       0.4166667
## 14       0.4166667
## 15       0.4166667
## 16       0.4166667
## 17       0.4166667
## 18       0.4166667
## 19       0.4166667
## 20       0.4166667
## 21       0.4166667
## 22       0.4166667
## 24       0.4166667
## 25       0.4166667
## 26       0.4166667
## 27       0.4166667
## 28       0.4166667
## 29       0.4166667
## 30       0.4166667
## 31       0.4166667
## 32       0.4166667
## 33       0.4166667
## 34       0.4166667
## 35       0.4166667
## 36       0.4166667

#best validation tree
best_val_idx <- which.max(performance$Accuracy_Validation)
best_val_tree <- performance[best_val_idx, ]
best_tree <- tree_results[[best_val_tree$Tree]]

#simplest tree
simplest_idx <- which.min(performance$Nodes)
simplest_tree <- performance[simplest_idx, ]
simple_tree_obj <- tree_results[[simplest_tree$Tree]]

#complex tree
complex_idx <- which.max(performance$Nodes)
complex_tree <- performance[complex_idx, ]
complex_tree_obj <- tree_results[[complex_tree$Tree]]

print(best_val_tree)

##     Tree Minsplit Minbucket    CP Nodes Accuracy_Train Accuracy_Validation
## 1 tree_1        2         1 0.001     7              1               0.625
##   Overfitting_Gap
## 1           0.375

print(simplest_tree)

##      Tree Minsplit Minbucket   CP Nodes Accuracy_Train Accuracy_Validation
## 5 tree_13        2         3 0.01     3      0.9166667                 0.5
##   Overfitting_Gap
## 5       0.4166667

print(complex_tree)

##     Tree Minsplit Minbucket    CP Nodes Accuracy_Train Accuracy_Validation
## 1 tree_1        2         1 0.001     7              1               0.625
##   Overfitting_Gap
## 1           0.375

#plotting the trees
plot_tree <- function(tree, title) {
  plot(tree, uniform = TRUE, main = title, margin = 0.1)
  text(tree, use.n = TRUE, all = TRUE, cex = 0.8)
}

#key trees

cat("1. Best Validation Tree (Tree", best_val_tree$Tree, "):\n")

## 1. Best Validation Tree (Tree tree_1 ):

print(best_tree)

## n= 24 
## 
## node), split, n, loss, yval, (yprob)
##       * denotes terminal node
## 
## 1) root 24 11 1 (0.54166667 0.45833333)  
##   2) Length< 180.75 13  1 1 (0.92307692 0.07692308)  
##     4) Breadth< 151.75 12  0 1 (1.00000000 0.00000000) *
##     5) Breadth>=151.75 1  0 2 (0.00000000 1.00000000) *
##   3) Length>=180.75 11  1 2 (0.09090909 0.90909091)  
##     6) Breadth>=148.25 1  0 1 (1.00000000 0.00000000) *
##     7) Breadth< 148.25 10  0 2 (0.00000000 1.00000000) *

plot_tree(best_tree, paste("Best Validation Tree - Accuracy:", 
                           round(best_val_tree$Accuracy_Validation, 3)))

cat("\n2. Simplest Tree (Tree", simplest_tree$Tree, "):\n")

## 
## 2. Simplest Tree (Tree tree_13 ):

print(simple_tree_obj)

## n= 24 
## 
## node), split, n, loss, yval, (yprob)
##       * denotes terminal node
## 
## 1) root 24 11 1 (0.54166667 0.45833333)  
##   2) Length< 180.75 13  1 1 (0.92307692 0.07692308) *
##   3) Length>=180.75 11  1 2 (0.09090909 0.90909091) *

plot_tree(simple_tree_obj, paste("Simplest Tree -", simplest_tree$Nodes, "nodes"))

cat("\n3. Most Complex Tree (Tree", complex_tree$Tree, "):\n")

## 
## 3. Most Complex Tree (Tree tree_1 ):

print(complex_tree_obj)

## n= 24 
## 
## node), split, n, loss, yval, (yprob)
##       * denotes terminal node
## 
## 1) root 24 11 1 (0.54166667 0.45833333)  
##   2) Length< 180.75 13  1 1 (0.92307692 0.07692308)  
##     4) Breadth< 151.75 12  0 1 (1.00000000 0.00000000) *
##     5) Breadth>=151.75 1  0 2 (0.00000000 1.00000000) *
##   3) Length>=180.75 11  1 2 (0.09090909 0.90909091)  
##     6) Breadth>=148.25 1  0 1 (1.00000000 0.00000000) *
##     7) Breadth< 148.25 10  0 2 (0.00000000 1.00000000) *

plot_tree(complex_tree_obj, paste("Most Complex Tree -", complex_tree$Nodes, "nodes"))

#overfitting
overfit_threshold <- 0.2
overfit_trees <- performance[performance$Overfitting_Gap > overfit_threshold, ]

cat("TREES WITH SIGNIFICANT OVERFITTING (Gap >", overfit_threshold, "):\n")

## TREES WITH SIGNIFICANT OVERFITTING (Gap > 0.2 ):

if(nrow(overfit_trees) > 0) {
  print(overfit_trees[, c("Tree", "Nodes", "Accuracy_Train", "Accuracy_Validation", "Overfitting_Gap")])
} else {
  cat("No significant overfitting detected\n")
}

##       Tree Nodes Accuracy_Train Accuracy_Validation Overfitting_Gap
## 1   tree_1     7      1.0000000               0.625       0.3750000
## 2  tree_10     7      1.0000000               0.625       0.3750000
## 3  tree_11     7      1.0000000               0.625       0.3750000
## 4  tree_12     7      1.0000000               0.625       0.3750000
## 12  tree_2     7      1.0000000               0.625       0.3750000
## 23  tree_3     7      1.0000000               0.625       0.3750000
## 5  tree_13     3      0.9166667               0.500       0.4166667
## 6  tree_14     3      0.9166667               0.500       0.4166667
## 7  tree_15     3      0.9166667               0.500       0.4166667
## 8  tree_16     3      0.9166667               0.500       0.4166667
## 9  tree_17     3      0.9166667               0.500       0.4166667
## 10 tree_18     3      0.9166667               0.500       0.4166667
## 11 tree_19     3      0.9166667               0.500       0.4166667
## 13 tree_20     3      0.9166667               0.500       0.4166667
## 14 tree_21     3      0.9166667               0.500       0.4166667
## 15 tree_22     3      0.9166667               0.500       0.4166667
## 16 tree_23     3      0.9166667               0.500       0.4166667
## 17 tree_24     3      0.9166667               0.500       0.4166667
## 18 tree_25     3      0.9166667               0.500       0.4166667
## 19 tree_26     3      0.9166667               0.500       0.4166667
## 20 tree_27     3      0.9166667               0.500       0.4166667
## 21 tree_28     3      0.9166667               0.500       0.4166667
## 22 tree_29     3      0.9166667               0.500       0.4166667
## 24 tree_30     3      0.9166667               0.500       0.4166667
## 25 tree_31     3      0.9166667               0.500       0.4166667
## 26 tree_32     3      0.9166667               0.500       0.4166667
## 27 tree_33     3      0.9166667               0.500       0.4166667
## 28 tree_34     3      0.9166667               0.500       0.4166667
## 29 tree_35     3      0.9166667               0.500       0.4166667
## 30 tree_36     3      0.9166667               0.500       0.4166667
## 31  tree_4     3      0.9166667               0.500       0.4166667
## 32  tree_5     3      0.9166667               0.500       0.4166667
## 33  tree_6     3      0.9166667               0.500       0.4166667
## 34  tree_7     3      0.9166667               0.500       0.4166667
## 35  tree_8     3      0.9166667               0.500       0.4166667
## 36  tree_9     3      0.9166667               0.500       0.4166667

#underfit trees
underfit_threshold <- 0.6
underfit_trees <- performance[performance$Accuracy_Train < underfit_threshold, ]

cat("\nTREES WITH POTENTIAL UNDERFITTING (Train Accuracy <", underfit_threshold, "):\n")

## 
## TREES WITH POTENTIAL UNDERFITTING (Train Accuracy < 0.6 ):

if(nrow(underfit_trees) > 0) {
  print(underfit_trees[, c("Tree", "Nodes", "Accuracy_Train", "Accuracy_Validation")])
} else {
  cat("No significant underfitting detected\n")
}

## No significant underfitting detected

cat("Effect of Complexity Parameter (cp):\n")

## Effect of Complexity Parameter (cp):

cp_effect <- aggregate(cbind(Accuracy_Train, Accuracy_Validation, Nodes, Overfitting_Gap) ~ CP, 
                       data = performance, mean)
print(round(cp_effect, 3))

##      CP Accuracy_Train Accuracy_Validation Nodes Overfitting_Gap
## 1 0.001          0.944               0.542 4.333           0.403
## 2 0.010          0.944               0.542 4.333           0.403
## 3 0.100          0.917               0.500 3.000           0.417
## 4 0.500          0.917               0.500 3.000           0.417

cat("\nEffect of Minsplit:\n")

## 
## Effect of Minsplit:

minsplit_effect <- aggregate(cbind(Accuracy_Train, Accuracy_Validation, Nodes) ~ Minsplit, 
                             data = performance, mean)
print(round(minsplit_effect, 3))

##   Minsplit Accuracy_Train Accuracy_Validation Nodes
## 1        2          0.931               0.521 3.667
## 2        5          0.931               0.521 3.667
## 3       10          0.931               0.521 3.667

#analysis of best tree on validation set

# Predictions on validation set
val_pred <- predict(best_tree, validation_set, type = "class")
val_prob <- predict(best_tree, validation_set, type = "prob")

# Confusion matrix
conf_matrix <- table(Predicted = val_pred, Actual = validation_set$Type)
cat("Confusion Matrix (Validation Set):\n")

## Confusion Matrix (Validation Set):

print(conf_matrix)

##          Actual
## Predicted 1 2
##         1 4 3
##         2 0 1

# Performance metrics
accuracy <- sum(diag(conf_matrix)) / sum(conf_matrix)
if(ncol(conf_matrix) == 2) {
  sensitivity <- conf_matrix[2,2] / sum(conf_matrix[,2])  # True positive rate
  specificity <- conf_matrix[1,1] / sum(conf_matrix[,1])  # True negative rate
  precision <- conf_matrix[2,2] / sum(conf_matrix[2,])    # Positive predictive value
  
  cat("\nPerformance Metrics:\n")
  cat("Accuracy:", round(accuracy, 3), "\n")
  cat("Sensitivity:", round(sensitivity, 3), "\n")
  cat("Specificity:", round(specificity, 3), "\n")
  cat("Precision:", round(precision, 3), "\n")
}

## 
## Performance Metrics:
## Accuracy: 0.625 
## Sensitivity: 0.25 
## Specificity: 1 
## Precision: 1

# Variable importance
imp <- best_tree$variable.importance
if(!is.null(imp)) {
  imp_sorted <- sort(imp, decreasing = TRUE)
  print(round(imp_sorted, 3))
  
barplot(imp_sorted, main = "Variable Importance in Best Tree",
          ylab = "Importance", las = 2, cex.names = 0.8)
}

##   Length Fbreadth  Fheight   Height  Breadth 
##    8.252    4.501    4.501    4.501    4.415

results_summary <- list(
  Best_Tree = best_val_tree,
  Best_Tree_Object = best_tree,
  Performance_Summary = performance,
  Validation_Confusion_Matrix = conf_matrix,
  Variable_Importance = imp_sorted
)

results_summary

## $Best_Tree
##     Tree Minsplit Minbucket    CP Nodes Accuracy_Train Accuracy_Validation
## 1 tree_1        2         1 0.001     7              1               0.625
##   Overfitting_Gap
## 1           0.375
## 
## $Best_Tree_Object
## n= 24 
## 
## node), split, n, loss, yval, (yprob)
##       * denotes terminal node
## 
## 1) root 24 11 1 (0.54166667 0.45833333)  
##   2) Length< 180.75 13  1 1 (0.92307692 0.07692308)  
##     4) Breadth< 151.75 12  0 1 (1.00000000 0.00000000) *
##     5) Breadth>=151.75 1  0 2 (0.00000000 1.00000000) *
##   3) Length>=180.75 11  1 2 (0.09090909 0.90909091)  
##     6) Breadth>=148.25 1  0 1 (1.00000000 0.00000000) *
##     7) Breadth< 148.25 10  0 2 (0.00000000 1.00000000) *
## 
## $Performance_Summary
##       Tree Minsplit Minbucket    CP Nodes Accuracy_Train Accuracy_Validation
## 1   tree_1        2         1 0.001     7      1.0000000               0.625
## 2  tree_10        2         1 0.010     7      1.0000000               0.625
## 3  tree_11        5         1 0.010     7      1.0000000               0.625
## 4  tree_12       10         1 0.010     7      1.0000000               0.625
## 12  tree_2        5         1 0.001     7      1.0000000               0.625
## 23  tree_3       10         1 0.001     7      1.0000000               0.625
## 5  tree_13        2         3 0.010     3      0.9166667               0.500
## 6  tree_14        5         3 0.010     3      0.9166667               0.500
## 7  tree_15       10         3 0.010     3      0.9166667               0.500
## 8  tree_16        2         5 0.010     3      0.9166667               0.500
## 9  tree_17        5         5 0.010     3      0.9166667               0.500
## 10 tree_18       10         5 0.010     3      0.9166667               0.500
## 11 tree_19        2         1 0.100     3      0.9166667               0.500
## 13 tree_20        5         1 0.100     3      0.9166667               0.500
## 14 tree_21       10         1 0.100     3      0.9166667               0.500
## 15 tree_22        2         3 0.100     3      0.9166667               0.500
## 16 tree_23        5         3 0.100     3      0.9166667               0.500
## 17 tree_24       10         3 0.100     3      0.9166667               0.500
## 18 tree_25        2         5 0.100     3      0.9166667               0.500
## 19 tree_26        5         5 0.100     3      0.9166667               0.500
## 20 tree_27       10         5 0.100     3      0.9166667               0.500
## 21 tree_28        2         1 0.500     3      0.9166667               0.500
## 22 tree_29        5         1 0.500     3      0.9166667               0.500
## 24 tree_30       10         1 0.500     3      0.9166667               0.500
## 25 tree_31        2         3 0.500     3      0.9166667               0.500
## 26 tree_32        5         3 0.500     3      0.9166667               0.500
## 27 tree_33       10         3 0.500     3      0.9166667               0.500
## 28 tree_34        2         5 0.500     3      0.9166667               0.500
## 29 tree_35        5         5 0.500     3      0.9166667               0.500
## 30 tree_36       10         5 0.500     3      0.9166667               0.500
## 31  tree_4        2         3 0.001     3      0.9166667               0.500
## 32  tree_5        5         3 0.001     3      0.9166667               0.500
## 33  tree_6       10         3 0.001     3      0.9166667               0.500
## 34  tree_7        2         5 0.001     3      0.9166667               0.500
## 35  tree_8        5         5 0.001     3      0.9166667               0.500
## 36  tree_9       10         5 0.001     3      0.9166667               0.500
##    Overfitting_Gap
## 1        0.3750000
## 2        0.3750000
## 3        0.3750000
## 4        0.3750000
## 12       0.3750000
## 23       0.3750000
## 5        0.4166667
## 6        0.4166667
## 7        0.4166667
## 8        0.4166667
## 9        0.4166667
## 10       0.4166667
## 11       0.4166667
## 13       0.4166667
## 14       0.4166667
## 15       0.4166667
## 16       0.4166667
## 17       0.4166667
## 18       0.4166667
## 19       0.4166667
## 20       0.4166667
## 21       0.4166667
## 22       0.4166667
## 24       0.4166667
## 25       0.4166667
## 26       0.4166667
## 27       0.4166667
## 28       0.4166667
## 29       0.4166667
## 30       0.4166667
## 31       0.4166667
## 32       0.4166667
## 33       0.4166667
## 34       0.4166667
## 35       0.4166667
## 36       0.4166667
## 
## $Validation_Confusion_Matrix
##          Actual
## Predicted 1 2
##         1 4 3
##         2 0 1
## 
## $Variable_Importance
##   Length Fbreadth  Fheight   Height  Breadth 
## 8.252331 4.501271 4.501271 4.501271 4.414548

Assignment_3_Inference

Sabuj Ganguly

2025-10-08