# install.packages('mlbench')
library(mlbench)
data(Glass)
str(Glass)
## 'data.frame': 214 obs. of 10 variables:
## $ RI : num 1.52 1.52 1.52 1.52 1.52 ...
## $ Na : num 13.6 13.9 13.5 13.2 13.3 ...
## $ Mg : num 4.49 3.6 3.55 3.69 3.62 3.61 3.6 3.61 3.58 3.6 ...
## $ Al : num 1.1 1.36 1.54 1.29 1.24 1.62 1.14 1.05 1.37 1.36 ...
## $ Si : num 71.8 72.7 73 72.6 73.1 ...
## $ K : num 0.06 0.48 0.39 0.57 0.55 0.64 0.58 0.57 0.56 0.57 ...
## $ Ca : num 8.75 7.83 7.78 8.22 8.07 8.07 8.17 8.24 8.3 8.4 ...
## $ Ba : num 0 0 0 0 0 0 0 0 0 0 ...
## $ Fe : num 0 0 0 0 0 0.26 0 0 0 0.11 ...
## $ Type: Factor w/ 6 levels "1","2","3","5",..: 1 1 1 1 1 1 1 1 1 1 ...
# Histograms
par(mfrow=c(3,3))
hist(Glass$RI)
hist(Glass$Na)
hist(Glass$Mg)
hist(Glass$Al)
hist(Glass$Si)
hist(Glass$K)
hist(Glass$Ca)
hist(Glass$Ba)
hist(Glass$Fe)
# Boxplots
boxplot(Glass[1], xlab = "RI")
boxplot(Glass[2:9], xlab = "Elements")
Outliers are present in all predictors except Mg. Strong right skew is observed in K, Ba, and Fe; Al and Ca exhibit moderate right skew, and RI shows a slight right skew. Mg and Si appear approximately symmetric, and Mg displays a bimodal distribution.
# Resolve skewness via box-cox transformation (K, Ba, Fe)
library(caret)
## Loading required package: ggplot2
## Loading required package: lattice
KBoxCox = BoxCoxTrans(Glass$K)
BaBoxCox = BoxCoxTrans(Glass$Ba)
FeBoxCox = BoxCoxTrans(Glass$Fe)
KBoxCox$lambda
## [1] NA
BaBoxCox$lambda
## [1] NA
FeBoxCox$lambda
## [1] NA
# Center & scale
GlassCS = scale(Glass[1:9], center = TRUE, scale = TRUE)
colMeans(GlassCS)
## RI Na Mg Al Si
## 2.637535e-13 -4.240637e-15 -4.233374e-16 -7.896072e-16 9.426520e-16
## K Ca Ba Fe
## 3.413676e-16 -3.134824e-16 -2.490220e-16 3.206158e-16
covariance = cov(GlassCS)
diag(covariance)
## RI Na Mg Al Si K Ca Ba Fe
## 1 1 1 1 1 1 1 1 1
# Resolve outliers via spatial sign
GlasSS = spatialSign(GlassCS, center = TRUE, shape = TRUE)
A Box-Cox transformation may improve skewness in the three predictors (K, Ba, Fe) with a strong right skew. However, these predictors contain many zero values, which resulted in estimated λ values of NA. Therefore, there was little change from the original distribution. All predictors were centered and scaled to have mean 0 and variance 1. In order to resolve outliers, a spatial sign transformation was applied. Overall, standardization and the spatial sign transformation are expected to improve the performance of the classification model.
# install.packages('kernlab')
library(kernlab)
##
## Attaching package: 'kernlab'
## The following object is masked from 'package:ggplot2':
##
## alpha
set.seed(231)
sigDist <- sigest(Type~ ., data = Glass, frac = 1)
sigDist
## 90% 50% 10%
## 0.03407935 0.11297847 0.62767315
svmTuneGrid <- data.frame(sigma = as.vector(sigDist)[1], C = 2^(-2:10))
svmTuneGrid
## sigma C
## 1 0.03407935 0.25
## 2 0.03407935 0.50
## 3 0.03407935 1.00
## 4 0.03407935 2.00
## 5 0.03407935 4.00
## 6 0.03407935 8.00
## 7 0.03407935 16.00
## 8 0.03407935 32.00
## 9 0.03407935 64.00
## 10 0.03407935 128.00
## 11 0.03407935 256.00
## 12 0.03407935 512.00
## 13 0.03407935 1024.00
# install.packages('AppliedPredictiveModeling')
library(AppliedPredictiveModeling)
library(caret)
set.seed(1056)
svmFit <- train(Type~ ., data = Glass, method = "svmRadial",
preProc = c("center", "scale"),tuneGrid = svmTuneGrid,
trControl = trainControl(method = "repeatedcv", repeats = 5))
plot(svmFit, scales = list(x = list(log = 2)))