Exercise

library(mlbench)
library(mlbench)
library(e1071)
library(corrplot)

## corrplot 0.95 loaded

library(ggplot2)

## 
## Attaching package: 'ggplot2'

## The following object is masked from 'package:e1071':
## 
##     element

library(reshape2)
library(kernlab)

## 
## Attaching package: 'kernlab'

## The following object is masked from 'package:ggplot2':
## 
##     alpha

library(caret)

## Loading required package: lattice

library(AppliedPredictiveModeling)
library(GGally)
library(gridExtra)
library(moments)

## 
## Attaching package: 'moments'

## The following objects are masked from 'package:e1071':
## 
##     kurtosis, moment, skewness

data(Glass)
str(Glass)

## 'data.frame':    214 obs. of  10 variables:
##  $ RI  : num  1.52 1.52 1.52 1.52 1.52 ...
##  $ Na  : num  13.6 13.9 13.5 13.2 13.3 ...
##  $ Mg  : num  4.49 3.6 3.55 3.69 3.62 3.61 3.6 3.61 3.58 3.6 ...
##  $ Al  : num  1.1 1.36 1.54 1.29 1.24 1.62 1.14 1.05 1.37 1.36 ...
##  $ Si  : num  71.8 72.7 73 72.6 73.1 ...
##  $ K   : num  0.06 0.48 0.39 0.57 0.55 0.64 0.58 0.57 0.56 0.57 ...
##  $ Ca  : num  8.75 7.83 7.78 8.22 8.07 8.07 8.17 8.24 8.3 8.4 ...
##  $ Ba  : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ Fe  : num  0 0 0 0 0 0.26 0 0 0 0.11 ...
##  $ Type: Factor w/ 6 levels "1","2","3","5",..: 1 1 1 1 1 1 1 1 1 1 ...

head(Glass)

##        RI    Na   Mg   Al    Si    K   Ca Ba   Fe Type
## 1 1.52101 13.64 4.49 1.10 71.78 0.06 8.75  0 0.00    1
## 2 1.51761 13.89 3.60 1.36 72.73 0.48 7.83  0 0.00    1
## 3 1.51618 13.53 3.55 1.54 72.99 0.39 7.78  0 0.00    1
## 4 1.51766 13.21 3.69 1.29 72.61 0.57 8.22  0 0.00    1
## 5 1.51742 13.27 3.62 1.24 73.08 0.55 8.07  0 0.00    1
## 6 1.51596 12.79 3.61 1.62 72.97 0.64 8.07  0 0.26    1

dim(Glass)

## [1] 214  10

summary(Glass)

##        RI              Na              Mg              Al       
##  Min.   :1.511   Min.   :10.73   Min.   :0.000   Min.   :0.290  
##  1st Qu.:1.517   1st Qu.:12.91   1st Qu.:2.115   1st Qu.:1.190  
##  Median :1.518   Median :13.30   Median :3.480   Median :1.360  
##  Mean   :1.518   Mean   :13.41   Mean   :2.685   Mean   :1.445  
##  3rd Qu.:1.519   3rd Qu.:13.82   3rd Qu.:3.600   3rd Qu.:1.630  
##  Max.   :1.534   Max.   :17.38   Max.   :4.490   Max.   :3.500  
##        Si              K                Ca               Ba       
##  Min.   :69.81   Min.   :0.0000   Min.   : 5.430   Min.   :0.000  
##  1st Qu.:72.28   1st Qu.:0.1225   1st Qu.: 8.240   1st Qu.:0.000  
##  Median :72.79   Median :0.5550   Median : 8.600   Median :0.000  
##  Mean   :72.65   Mean   :0.4971   Mean   : 8.957   Mean   :0.175  
##  3rd Qu.:73.09   3rd Qu.:0.6100   3rd Qu.: 9.172   3rd Qu.:0.000  
##  Max.   :75.41   Max.   :6.2100   Max.   :16.190   Max.   :3.150  
##        Fe          Type  
##  Min.   :0.00000   1:70  
##  1st Qu.:0.00000   2:76  
##  Median :0.00000   3:17  
##  Mean   :0.05701   5:13  
##  3rd Qu.:0.10000   6: 9  
##  Max.   :0.51000   7:29

predictors <- c("RI", "Na", "Mg", "Al", "Si", "K", "Ca", "Ba", "Fe")

Key Observations

Utilize suitable visualizations (employ any types of data visualization you deem appropriate) to explore the predictor variables, aiming to understand their distributions and relationships among them

sapply(Glass[,predictors], sd)

##          RI          Na          Mg          Al          Si           K 
## 0.003036864 0.816603556 1.442407845 0.499269646 0.774545795 0.652191846 
##          Ca          Ba          Fe 
## 1.423153487 0.497219261 0.097438701

# Histograms of all Predictors
par(mfrow = c(3, 3), mar = c(2,2,2,1))

for (pred in predictors){
  mn <- round(mean(Glass[[pred]]), 3)
  med <- round(median(Glass[[pred]]), 3)
  hist(Glass[,pred],
       main = paste(pred, "(skew = ", round(skewness(Glass[[pred]]), 2), ")"),
       xlab = pred,
       col = "lightblue",
       border = "White",
       breaks = 25)
  abline(v = mean(Glass[[pred]]), col = "red", lwd = 2, lty = 2)
  abline(v = median(Glass[[pred]]), col = "navy", lwd = 2, lty = 3)

  
  legend("topright",
         legend = c(paste("Mean =", round(mean(Glass[[pred]]), 3)),
                    paste("Median =", round(median(Glass[[pred]]), 3))),
         col = c("red", "navy"), lwd = 3, lty = c(2,3), cex = 0.7)
}

The histograms depict the distribution of each chemical measurement in the Glass dataset. Several predictors are not normally distributed, with many showing right skewness. Variables such as K, Ba, and Fe contain many low values with a few large observations, creating long right tails. Skewness will affect SVM indirectly through scaling.

par(mfrow = c(1,1))

# Boxplots by Glass Type
par(mfrow=c(3,3), mar = c(2,2,2,1))
type_colors <- c("1" = "#E91E63", "2" = "#2196F3", "3" = "#4CAF50",
                 "5" = "#FF9800", "6" = "#9C27B0", "7" = "#00BCD4")

for (pred in predictors){
  boxplot(Glass[,pred] ~ Glass$Type,
       main = pred,
       xlab = "Glass Type",
       ylab = pred,
       col = type_colors,
       border = "gray30",
       outline = TRUE)
}

Predictors such as RI, Na, Al, and Ca show noticeable differences in median values across classes. This suggests that these predictors may be useful for classification. Several predictors contain extreme values beyond the whiskers, indicating possible outliers.

par(mfrow = c(1,1))

# Scatterplot matrix
corr_matrix <- cor(Glass[, predictors])
round(corr_matrix, 3)

##        RI     Na     Mg     Al     Si      K     Ca     Ba     Fe
## RI  1.000 -0.192 -0.122 -0.407 -0.542 -0.290  0.810  0.000  0.143
## Na -0.192  1.000 -0.274  0.157 -0.070 -0.266 -0.275  0.327 -0.241
## Mg -0.122 -0.274  1.000 -0.482 -0.166  0.005 -0.444 -0.492  0.083
## Al -0.407  0.157 -0.482  1.000 -0.006  0.326 -0.260  0.479 -0.074
## Si -0.542 -0.070 -0.166 -0.006  1.000 -0.193 -0.209 -0.102 -0.094
## K  -0.290 -0.266  0.005  0.326 -0.193  1.000 -0.318 -0.043 -0.008
## Ca  0.810 -0.275 -0.444 -0.260 -0.209 -0.318  1.000 -0.113  0.125
## Ba  0.000  0.327 -0.492  0.479 -0.102 -0.043 -0.113  1.000 -0.059
## Fe  0.143 -0.241  0.083 -0.074 -0.094 -0.008  0.125 -0.059  1.000

corrplot(corr_matrix,
         method = "color",
         type = "lower",
         addCoef.col = "black",
         number.cex = 0.7,
         tl.col = "black",
         tl.srt = 45,
         col = colorRampPalette(c("#F44336","white","#2196F3"))(200),
         title = "Correlation Matrix of Predictors",
         mar = c(0,0,2,0))

The correlation matrix reveals relationships among predictors. There are notable predictors that show strong positive or negative correlation, indicating redundancy. The strongest correlation occures between RI and Cl, indicating that higher calcium levels are strongly associated with a higher refractive index. RI also shows a moderate negative relationship with Si, suggesting that glass with more silicon tends to have lower refractive index.Most other variables have weak correlations, meaning they vary independently. These results suggest that Ca and RI may provide overlapping information, whihc could be an important factor for model building.

#Top Correlations by absolute value
corr_long <- as.data.frame(as.table(corr_matrix))
corr_long <- corr_long[corr_long$Var1 != corr_long$Var2, ]
corr_long <- corr_long[!duplicated(t(apply(corr_long[,1:2], 1, sort))), ]
corr_long$AbsFreq <- abs(corr_long$Freq)
head(corr_long[order(-corr_long$AbsFreq), ], 10)

##    Var1 Var2       Freq   AbsFreq
## 7    Ca   RI  0.8104027 0.8104027
## 5    Si   RI -0.5420522 0.5420522
## 26   Ba   Mg -0.4922621 0.4922621
## 22   Al   Mg -0.4817985 0.4817985
## 35   Ba   Al  0.4794039 0.4794039
## 25   Ca   Mg -0.4437500 0.4437500
## 4    Al   RI -0.4073260 0.4073260
## 17   Ba   Na  0.3266029 0.3266029
## 33    K   Al  0.3259584 0.3259584
## 52   Ca    K -0.3178362 0.3178362

Outliers and Skewness

Do there appear to be any outliers in the data? Are any predictors skewed? Show all the work!

# Outlier Detection (Z-score)
z_scores <- scale(Glass[,predictors])
outlier_idx <- abs(z_scores) > 3

outlier_counts <- colSums(outlier_idx)
print(outlier_counts)

## RI Na Mg Al Si  K Ca Ba Fe 
##  3  2  0  3  6  3  7  6  3

outlier_report <- which(outlier_idx, arr.ind = TRUE)
colnames(outlier_report) <- c("Row", "Predictor_Col")
outlier_report <- as.data.frame(outlier_report)
outlier_report$Predictor <- predictors[outlier_report$Predictor_Col]
outlier_report$Value <- mapply(function(r, p) Glass[r,p],
                               outlier_report$Row, outlier_report$Predictor)
outlier_report$ZScore <- mapply(function(r, c) z_scores[r,c],
                               outlier_report$Row, outlier_report$Predictor_Col)
print(outlier_report[order(outlier_report$Predictor), c("Row", "Predictor", "Value", "ZScore")])

##        Row Predictor    Value    ZScore
## X164   164        Al  3.50000  4.116199
## X172   172        Al  3.04000  3.194854
## X173   173        Al  3.02000  3.154795
## X107.4 107        Ba  3.15000  5.983182
## X164.2 164        Ba  2.20000  4.072556
## X190   190        Ba  1.68000  3.026740
## X204   204        Ba  1.71000  3.087075
## X208   208        Ba  2.88000  5.440162
## X214   214        Ba  1.67000  3.006628
## X106   106        Ca 13.24000  3.009540
## X107.3 107        Ca 13.30000  3.051700
## X108.2 108        Ca 16.19000  5.082401
## X111   111        Ca 14.68000  4.021377
## X112   112        Ca 14.96000  4.218124
## X113.1 113        Ca 14.40000  3.824631
## X132   132        Ca 13.44000  3.150073
## X146   146        Fe  0.35000  3.006923
## X163   163        Fe  0.37000  3.212180
## X175   175        Fe  0.51000  4.648981
## X172.1 172         K  6.21000  8.759606
## X173.1 173         K  6.21000  8.759606
## X202.1 202         K  2.70000  3.377754
## X107.1 107        Na 10.73000 -3.279254
## X185   185        Na 17.38000  4.864232
## X107   107        RI  1.53125  4.242726
## X108   108        RI  1.53393  5.125215
## X113   113        RI  1.52777  3.096807
## X107.2 107        Si 69.81000 -3.667872
## X108.1 108        Si 70.16000 -3.215994
## X164.1 164        Si 69.89000 -3.564585
## X185.1 185        Si 75.41000  3.562172
## X189   189        Si 70.26000 -3.086886
## X202   202        Si 75.18000  3.265224

par(mfrow = c(3,3), mar = c(2,2,2,1))
for(i in seq_along(predictors)){
  pred <- predictors[i]
  z_col <- abs(z_scores[, i])
  is_out <- z_col > 3
  plot(seq_len(nrow(Glass)), Glass[[pred]],
       col = ifelse(is_out, "red", "steelblue"),
       pch = ifelse(is_out, 17, 16),
       cex = ifelse(is_out, 1.2, 0.5),
       main = paste0(pred, " (", sum(is_out), " outlier(s))"),
       xlab = "Observation Index",
       ylab = pred)
}

Outliers were detected using Z-scores, where observations exceeding +- 3 sd were flagged. Ca, Si, and Ba contained the highest number of outliers indicating considerable variation in these variables. K had the most extreme individual outliers with Z-scores above 8 while Si showed both high and low outliers. These outliers may represent chemcially different glass categories.

skew_vals <- sapply(Glass[, predictors], skewness)
kurt_vals <- sapply(Glass[, predictors], kurtosis)

skew_sorted <- sort(skew_vals, decreasing = TRUE)
print(round(skew_sorted, 4))

##       K      Ba      Ca      Fe      RI      Al      Na      Si      Mg 
##  6.5056  3.3924  2.0327  1.7420  1.6140  0.9009  0.4510 -0.7253 -1.1445

skew_df <- data.frame(
  Predictor = names(skew_vals),
  Skewness = round(skew_vals, 4),
  Kurtosis = round(kurt_vals, 4),
  Severity = ifelse(abs(skew_vals) > 1, "High",
                    ifelse(abs(skew_vals) > 0.5, "Moderate", "Low"))
)
print(skew_df[order(-abs(skew_df$Skewness)), ])

##    Predictor Skewness Kurtosis Severity
## K          K   6.5056  56.3923     High
## Ba        Ba   3.3924  15.2221     High
## Ca        Ca   2.0327   9.4990     High
## Fe        Fe   1.7420   5.5723     High
## RI        RI   1.6140   7.7894     High
## Mg        Mg  -1.1445   2.5713     High
## Al        Al   0.9009   4.9848 Moderate
## Si        Si  -0.7253   5.8711 Moderate
## Na        Na   0.4510   5.9535      Low

barplot(skew_sorted,
        main = "Skewness of Predictor Variables",
        xlab = "Predictor",
        ylab = "Skewness",
        col = ifelse(abs(skew_sorted) > 1, "firebrick",
              ifelse(abs(skew_sorted) > 0.5, "darkorange", "forestgreen")),
        las = 2,
        border = "white",
        ylim = c(min(skew_sorted) - 0.5, max(skew_sorted) + 0.5))
legend("topright",
       legend = c("|skew| > 1 (High)", "|skew| > 0.5 (Moderate)", "|skew| <= 0.5 (Low)"),
       fill = c("firebrick", "darkorange", "forestgreen", cex = 0.8))

Skewness values indicate that several predictors are moderately to highly skewed. K shows extreme positive skewness and very high kurtosis, indicating that most samples have low K values while a few do contain large concentrations. This is similar for Ba, Ca, and Fe. Mg shows strong negative skewness and in contrast, Na has low skewness and appears to be more symmetrically distributed.

Transformations

Are there any relevant transformations of one or more predictors that might improve the classification model? Show all the work

skewed_preds <- c("K", "Ba", "Ca", "Fe")

par(mfrow = c(4,3), mar = c(2,2,2,1))

for(pred in skewed_preds){
  orig <- Glass[[pred]]
  log_t <- log1p(orig)
  sqrt_t <- sqrt(orig)
  
  skews <- c(round(skewness(orig), 2),
             round(skewness(log_t), 2),
             round(skewness(sqrt_t), 2))
  hist(orig, breaks = 20, col = "steelblue", border = "white",
       main = paste0(pred, ": Original (skew=", skews[1], ")"),
       xlab = pred)
  hist(log_t, breaks = 20, col = "forestgreen", border = "white",
       main = paste0(pred, ": log(x+1) (skew=", skews[2], ")"),
       xlab = paste("log(", pred, "+1)"))
  hist(sqrt_t, breaks = 20, col = "darkorange", border = "white",
       main = paste0(pred, ": sqrt(x) (skew=", skews[3], ")"),
       xlab = paste("sqrt(", pred, ")"))
}

Because predictor K, Ba, Ca and Fe are highly right-skewed, transformations were explored. Log and square root transformations reduce skewness by compressing large values. After transformation, distribution becomes more symmetric, which can improve model training by reducing the influence of extreme observations.

par(mfrow = c(1,1))

Glass_pos <- Glass[, predictors]
Glass_pos$K  <- Glass_pos$K  + 0.001
Glass_pos$Ba <- Glass_pos$Ba + 0.001
Glass_pos$Fe <- Glass_pos$Fe + 0.001
Glass_pos$Mg <- Glass_pos$Mg + 0.001

pp_boxcox <- preProcess(Glass_pos, method = "BoxCox")
Glass_bc  <- predict(pp_boxcox, Glass_pos)

skew_before <- round(sapply(Glass[, predictors], skewness), 3)
skew_after  <- round(sapply(Glass_bc,            skewness), 3)

skew_comparison <- data.frame(
  Predictor    = predictors,
  Skew_Before  = skew_before,
  Skew_After   = skew_after,
  Improvement  = round(abs(skew_before) - abs(skew_after), 3)
)
print(skew_comparison[order(-abs(skew_comparison$Skew_Before)), ])

##    Predictor Skew_Before Skew_After Improvement
## K          K       6.506      0.127       6.379
## Ba        Ba       3.392      1.688       1.704
## Ca        Ca       2.033     -0.195       1.838
## Fe        Fe       1.742      0.743       0.999
## RI        RI       1.614      1.577       0.037
## Mg        Mg      -1.144     -1.308      -0.164
## Al        Al       0.901      0.092       0.809
## Si        Si      -0.725     -0.655       0.070
## Na        Na       0.451      0.034       0.417

Box-Cox transformations show improvement in several predictors, suggesting transformed predictors may improve classification performance. Most notably, K, Ca, Al and Fe showed the most improvement, demonstrating its effectiveness for strongly right skewed features

pp_yj    <- preProcess(Glass[, predictors], method = "YeoJohnson")
Glass_yj <- predict(pp_yj, Glass[, predictors])

skew_yj <- round(sapply(Glass_yj, skewness), 3)
print(data.frame(Predictor = predictors, Skew_Raw = skew_before, Skew_YJ = skew_yj))

##    Predictor Skew_Raw Skew_YJ
## RI        RI    1.614   1.614
## Na        Na    0.451  -0.009
## Mg        Mg   -1.144  -0.883
## Al        Al    0.901   0.000
## Si        Si   -0.725  -0.725
## K          K    6.506  -0.071
## Ca        Ca    2.033  -0.208
## Ba        Ba    3.392   3.392
## Fe        Fe    1.742   1.742

YeoJohnson transformation provided similar values to Box-cox however, it was able to handle zero and negative values better. It almost completely corrected the skew for K, Ca, Al and Na. Ba and Fe were not improved, likely because extreme values still dominated.

SVM Model Tuning

Fit SVM model (You may refer to Chapter 4 material for details) using the following R codes: (This code will be discussed in detail in the following chapters)

set.seed(231) 
sigDist <-sigest(Type~ ., data = Glass, frac = 1) 
sigDist

##        90%        50%        10% 
## 0.03407935 0.11297847 0.62767315

svmTuneGrid <-data.frame(
  sigma = as.vector(sigDist)[1], 
  C = 2^(-2:10)) 
svmTuneGrid

##         sigma       C
## 1  0.03407935    0.25
## 2  0.03407935    0.50
## 3  0.03407935    1.00
## 4  0.03407935    2.00
## 5  0.03407935    4.00
## 6  0.03407935    8.00
## 7  0.03407935   16.00
## 8  0.03407935   32.00
## 9  0.03407935   64.00
## 10 0.03407935  128.00
## 11 0.03407935  256.00
## 12 0.03407935  512.00
## 13 0.03407935 1024.00

set.seed(1056) 
svmFit <-train(Type~ ., 
               data = Glass, 
               method = "svmRadial",
               preProc = c("center", "scale"),
               tuneGrid = svmTuneGrid, 
               trControl = trainControl(
                 method = "repeatedcv", 
                 repeats = 5))

svmFit

## Support Vector Machines with Radial Basis Function Kernel 
## 
## 214 samples
##   9 predictor
##   6 classes: '1', '2', '3', '5', '6', '7' 
## 
## Pre-processing: centered (9), scaled (9) 
## Resampling: Cross-Validated (10 fold, repeated 5 times) 
## Summary of sample sizes: 194, 191, 194, 192, 193, 194, ... 
## Resampling results across tuning parameters:
## 
##   C        Accuracy   Kappa    
##      0.25  0.5462532  0.3286847
##      0.50  0.5721161  0.3689913
##      1.00  0.6364625  0.4706664
##      2.00  0.6832063  0.5506030
##      4.00  0.7028410  0.5801977
##      8.00  0.6980384  0.5744221
##     16.00  0.7015910  0.5810250
##     32.00  0.7014216  0.5834227
##     64.00  0.7121792  0.6028075
##    128.00  0.6985407  0.5863168
##    256.00  0.7029157  0.5932771
##    512.00  0.7084049  0.6017276
##   1024.00  0.7059784  0.5981677
## 
## Tuning parameter 'sigma' was held constant at a value of 0.03407935
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were sigma = 0.03407935 and C = 64.

svmFit$bestTune

##        sigma  C
## 9 0.03407935 64

max(svmFit$results$Accuracy)

## [1] 0.7121792

svmFit$results

##         sigma       C  Accuracy     Kappa AccuracySD   KappaSD
## 1  0.03407935    0.25 0.5462532 0.3286847 0.06892510 0.1015915
## 2  0.03407935    0.50 0.5721161 0.3689913 0.10043982 0.1503148
## 3  0.03407935    1.00 0.6364625 0.4706664 0.09011066 0.1339674
## 4  0.03407935    2.00 0.6832063 0.5506030 0.08655001 0.1236686
## 5  0.03407935    4.00 0.7028410 0.5801977 0.08744430 0.1259908
## 6  0.03407935    8.00 0.6980384 0.5744221 0.08565013 0.1237030
## 7  0.03407935   16.00 0.7015910 0.5810250 0.08369320 0.1180196
## 8  0.03407935   32.00 0.7014216 0.5834227 0.08904774 0.1254737
## 9  0.03407935   64.00 0.7121792 0.6028075 0.09893126 0.1392120
## 10 0.03407935  128.00 0.6985407 0.5863168 0.09909836 0.1366574
## 11 0.03407935  256.00 0.7029157 0.5932771 0.10532758 0.1450300
## 12 0.03407935  512.00 0.7084049 0.6017276 0.10654373 0.1466388
## 13 0.03407935 1024.00 0.7059784 0.5981677 0.10801890 0.1494817

Moderate regularization and small sigma produces optimal model performance.

plot(svmFit, scales = list(x = list(log = 2)))

pred_train <- predict(svmFit, Glass)
confusionMatrix(pred_train, Glass$Type)

## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  1  2  3  5  6  7
##          1 65 17  3  0  0  1
##          2  3 58  3  0  0  0
##          3  2  1 11  0  0  0
##          5  0  0  0 13  0  0
##          6  0  0  0  0  9  0
##          7  0  0  0  0  0 28
## 
## Overall Statistics
##                                          
##                Accuracy : 0.8598         
##                  95% CI : (0.806, 0.9034)
##     No Information Rate : 0.3551         
##     P-Value [Acc > NIR] : < 2.2e-16      
##                                          
##                   Kappa : 0.809          
##                                          
##  Mcnemar's Test P-Value : NA             
## 
## Statistics by Class:
## 
##                      Class: 1 Class: 2 Class: 3 Class: 5 Class: 6 Class: 7
## Sensitivity            0.9286   0.7632  0.64706  1.00000  1.00000   0.9655
## Specificity            0.8542   0.9565  0.98477  1.00000  1.00000   1.0000
## Pos Pred Value         0.7558   0.9062  0.78571  1.00000  1.00000   1.0000
## Neg Pred Value         0.9609   0.8800  0.97000  1.00000  1.00000   0.9946
## Prevalence             0.3271   0.3551  0.07944  0.06075  0.04206   0.1355
## Detection Rate         0.3037   0.2710  0.05140  0.06075  0.04206   0.1308
## Detection Prevalence   0.4019   0.2991  0.06542  0.06075  0.04206   0.1308
## Balanced Accuracy      0.8914   0.8598  0.81592  1.00000  1.00000   0.9828

The final SVM achieved a maximum accuracy of approximately 0.8598%. Class 3 has the lowest sensitivity, while classes 5,6, and 7 are perfectly classified. Balance accuracy is high across all classes showing the model handles imbalanced classes fairly well.

Exercise_2

Sofia Hernandez

2026-02-15

Key Observations

Outliers and Skewness

Transformations

SVM Model Tuning