Load packages
library(dplyr)
## Warning: package 'dplyr' was built under R version 4.1.2
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(neuralnet)
##
## Attaching package: 'neuralnet'
## The following object is masked from 'package:dplyr':
##
## compute
library(NeuralNetTools)
## Warning: package 'NeuralNetTools' was built under R version 4.1.2
library(GGally)
## Loading required package: ggplot2
## Registered S3 method overwritten by 'GGally':
## method from
## +.gg ggplot2
library(mlr3pipelines)
library(mlr3learners)
## Warning: package 'mlr3learners' was built under R version 4.1.2
## Loading required package: mlr3
## Warning: package 'mlr3' was built under R version 4.1.2
library(mlr3tuning)
## Warning: package 'mlr3tuning' was built under R version 4.1.2
## Loading required package: paradox
## Warning: package 'paradox' was built under R version 4.1.2
library(mlr3)
library(paradox)
library(igraph)
## Warning: package 'igraph' was built under R version 4.1.2
##
## Attaching package: 'igraph'
## The following objects are masked from 'package:dplyr':
##
## as_data_frame, groups, union
## The following objects are masked from 'package:stats':
##
## decompose, spectrum
## The following object is masked from 'package:base':
##
## union
Read CSV
sonar <- read.csv("Lab07/Lab07_Eastburn/Sonar.csv")
Class to factor
sonar <- sonar %>% mutate(Class = ifelse(Class == "M", 1, 0))
sonar <- sonar %>% mutate(Class = as.factor(Class))
Create task
task_sonar <- TaskClassif$new(id = "sonar_class", backend = sonar, target = "Class")
PCA
pca <- po("pca")
filter <- po("filter", filter = mlr3filters::flt("variance"), filter.frac = 0.4)
graph <- pca %>>%
filter
Transformed data
graph$train(task_sonar)[[1]]$data()
## Class PC1 PC2 PC3 PC4 PC5
## 1: 0 -0.57609252 -0.3193929 -0.387290866 0.37800946 0.24378042
## 2: 0 0.97066736 -1.0180392 0.834155860 -0.10742341 -0.28607040
## 3: 0 0.32588679 -0.8742086 0.246015217 -0.86303144 -0.06910951
## 4: 0 -0.85203524 -0.6908625 -0.013390104 0.11025671 0.55405014
## 5: 0 0.06475555 -0.2229678 0.638601229 -0.14043222 -0.13585413
## ---
## 204: 1 -0.04178149 1.0316439 0.265669389 -0.02151569 0.01543950
## 205: 1 -0.21914886 1.1538075 0.203619433 -0.04583609 -0.18327157
## 206: 1 -0.29773311 1.1518599 0.250095569 0.05770218 -0.16356592
## 207: 1 0.04576078 0.9959696 0.199677073 0.06651565 0.14722654
## 208: 1 0.05111456 0.9125263 0.008909656 0.04438699 0.02791537
## PC6 PC7 PC8 PC9 PC10 PC11
## 1: 0.15624322 -0.42773627 -0.07483224 -0.024347304 -0.31805393 0.11554577
## 2: -0.17868417 -0.26033173 -0.38560046 -0.004029693 0.07252885 0.19446114
## 3: 0.30083262 -0.41280706 -0.07532083 -0.010855279 0.43855925 0.15405372
## 4: -0.39796815 0.49256917 0.22045967 0.537002842 0.43679464 -0.33688182
## 5: -0.05288075 -0.38655829 -0.06654869 -0.117005592 0.41927727 0.14558550
## ---
## 204: -0.09730398 0.00378388 -0.17792649 -0.027907579 0.26417924 -0.09082759
## 205: -0.01277022 0.07850652 -0.26483077 0.070749216 0.30329559 -0.03715553
## 206: -0.01527529 0.04579153 -0.22590366 0.072129783 0.31304369 -0.18514363
## 207: 0.08460050 0.03103798 -0.06744400 0.025770859 0.17111974 -0.26102334
## 208: 0.02994945 0.03310736 0.02086840 -0.118991791 0.14413420 -0.19187219
## PC12 PC13 PC14 PC15 PC16 PC17
## 1: 0.19140774 -0.2356098019 0.17681388 0.22922764 0.04534951 0.09690196
## 2: 0.18852909 -0.0550196869 0.09983914 -0.13093877 -0.06903809 -0.05301938
## 3: 0.01702367 -0.0766250011 -0.29240080 -0.15248959 -0.01643511 -0.32268681
## 4: -0.24195040 0.0749765750 0.10852714 0.13255416 0.00481413 -0.10474015
## 5: 0.39449731 0.1861964629 -0.02214001 0.12031826 0.22053886 -0.05373408
## ---
## 204: 0.13519176 -0.0004123724 -0.09017845 0.17297217 0.03139786 0.04973799
## 205: 0.11266160 -0.0429713650 0.01903037 -0.01867625 -0.01434417 0.01398599
## 206: 0.11255586 -0.0485276815 -0.03988695 0.08318128 0.01267779 -0.01091284
## 207: 0.13640168 -0.1304645385 -0.18682282 -0.06272755 -0.09487276 -0.06794386
## 208: 0.12282139 -0.1823935582 -0.20947555 -0.17146532 -0.16300751 -0.10407182
## PC18 PC19 PC20 PC21 PC22
## 1: -0.009306475 0.0800119288 0.078829365 -0.108573434 -0.021275222
## 2: -0.064453681 0.1259679414 -0.175313764 0.015216272 -0.106034246
## 3: 0.156025778 -0.0932083348 0.085140164 0.064640134 0.014502654
## 4: -0.144403316 -0.1525580433 -0.005591952 0.072861828 0.020617599
## 5: 0.158596060 0.1577441540 0.059632708 0.059224937 0.053927277
## ---
## 204: 0.084613761 0.0098530919 0.023705645 -0.072467542 0.024198927
## 205: 0.055174458 0.0017489805 -0.002011512 -0.030586020 -0.003070949
## 206: 0.069247335 -0.0009737085 -0.040734865 -0.076561611 0.020755260
## 207: 0.046964128 -0.0600639129 0.063445419 0.004248056 -0.008983174
## 208: -0.067024830 -0.0725484188 0.121567743 0.090500002 0.020271051
## PC23 PC24
## 1: -0.02260075 0.07081699
## 2: -0.08091657 0.04644319
## 3: 0.09943128 0.17545116
## 4: 0.01380689 0.08069620
## 5: 0.10177508 -0.12774204
## ---
## 204: -0.08355416 0.11026363
## 205: -0.05545056 -0.02863766
## 206: -0.04212355 0.01063477
## 207: -0.09803539 0.03710139
## 208: -0.04994241 0.01746676
Create learner
lrn_nn <- lrn("classif.nnet", size = 6, maxit = 500, MaxNWts = 1500)
Connect pipeline to learner and graph
graph <-
pca %>>%
filter %>>%
lrn_nn
plot(graph)
train neural network
graph$train(task_sonar)
## # weights: 157
## initial value 145.228091
## iter 10 value 72.605135
## iter 20 value 14.097459
## iter 30 value 4.763146
## iter 40 value 4.519882
## iter 50 value 4.497804
## iter 60 value 4.221372
## iter 70 value 4.086441
## iter 80 value 3.383914
## iter 90 value 3.365346
## iter 100 value 3.364941
## iter 110 value 3.355866
## iter 120 value 2.776052
## iter 130 value 2.773813
## iter 140 value 2.770312
## iter 150 value 2.761019
## iter 160 value 1.910216
## iter 170 value 1.909825
## iter 180 value 1.909642
## iter 190 value 1.909616
## final value 1.909606
## converged
## $classif.nnet.output
## NULL
Create new graphlearner
glrn = GraphLearner$new(graph)
Cross Validate
resampling = rsmp("cv", folds = 5)
resampling$instantiate(task_sonar)
measure = msr("classif.auc")
rr = resample(task_sonar, glrn, resampling, store_models = TRUE)
## INFO [17:02:00.610] [mlr3] Applying learner 'pca.variance.classif.nnet' on task 'sonar_class' (iter 3/5)
## # weights: 157
## initial value 114.933197
## iter 10 value 41.850504
## iter 20 value 2.378639
## iter 30 value 0.051092
## iter 40 value 0.002057
## iter 50 value 0.000336
## final value 0.000069
## converged
## INFO [17:02:00.872] [mlr3] Applying learner 'pca.variance.classif.nnet' on task 'sonar_class' (iter 1/5)
## # weights: 157
## initial value 119.256138
## iter 10 value 60.047940
## iter 20 value 14.254191
## iter 30 value 0.984352
## iter 40 value 0.005461
## iter 50 value 0.000132
## final value 0.000091
## converged
## INFO [17:02:01.046] [mlr3] Applying learner 'pca.variance.classif.nnet' on task 'sonar_class' (iter 4/5)
## # weights: 157
## initial value 145.182336
## iter 10 value 61.309746
## iter 20 value 6.156296
## iter 30 value 0.028356
## iter 40 value 0.000822
## final value 0.000083
## converged
## INFO [17:02:01.210] [mlr3] Applying learner 'pca.variance.classif.nnet' on task 'sonar_class' (iter 5/5)
## # weights: 157
## initial value 115.563768
## iter 10 value 55.199531
## iter 20 value 5.917959
## iter 30 value 4.678160
## iter 40 value 4.603091
## iter 50 value 4.572153
## iter 60 value 4.530350
## iter 70 value 4.429892
## iter 80 value 4.389668
## iter 90 value 4.343482
## iter 100 value 4.173348
## iter 110 value 4.123947
## iter 120 value 4.079041
## iter 130 value 4.061644
## iter 140 value 4.022738
## iter 150 value 4.006895
## iter 160 value 3.811688
## iter 170 value 3.789092
## iter 180 value 3.701669
## iter 190 value 3.494150
## iter 200 value 3.375171
## iter 210 value 3.264555
## iter 220 value 3.216994
## iter 230 value 3.113340
## iter 240 value 3.049055
## iter 250 value 3.010534
## iter 260 value 2.880643
## iter 270 value 2.834491
## iter 280 value 2.552885
## iter 290 value 2.293533
## iter 300 value 2.102307
## iter 310 value 0.994084
## iter 320 value 0.223189
## iter 330 value 0.058476
## iter 340 value 0.024197
## iter 350 value 0.008759
## iter 360 value 0.007020
## iter 370 value 0.005950
## iter 380 value 0.003237
## iter 390 value 0.002103
## iter 400 value 0.001770
## iter 410 value 0.001479
## iter 420 value 0.001303
## iter 430 value 0.001165
## iter 440 value 0.001035
## iter 450 value 0.000798
## iter 460 value 0.000778
## iter 470 value 0.000718
## iter 480 value 0.000591
## iter 490 value 0.000512
## iter 500 value 0.000379
## final value 0.000379
## stopped after 500 iterations
## INFO [17:02:01.513] [mlr3] Applying learner 'pca.variance.classif.nnet' on task 'sonar_class' (iter 2/5)
## # weights: 157
## initial value 133.811140
## iter 10 value 44.291019
## iter 20 value 9.831256
## iter 30 value 4.509956
## iter 40 value 4.203635
## iter 50 value 4.177463
## iter 60 value 3.941032
## iter 70 value 2.845109
## iter 80 value 1.970280
## iter 90 value 1.928737
## iter 100 value 1.922016
## iter 110 value 1.914690
## iter 120 value 1.911781
## iter 130 value 1.911224
## iter 140 value 1.909550
## iter 150 value 1.905501
## iter 160 value 1.797682
## iter 170 value 0.919090
## iter 180 value 0.021473
## iter 190 value 0.002332
## iter 200 value 0.001106
## iter 210 value 0.000767
## iter 220 value 0.000518
## iter 230 value 0.000359
## iter 240 value 0.000257
## iter 250 value 0.000223
## iter 260 value 0.000141
## iter 270 value 0.000129
## iter 280 value 0.000129
## iter 290 value 0.000129
## iter 300 value 0.000128
## final value 0.000092
## converged
rr$aggregate(measure)
## classif.auc
## 0.8062249
Tune Model
tune_ps = ParamSet$new(list(
ParamDbl$new("variance.filter.frac", lower = 0.05, upper = 0.95),
ParamInt$new("classif.nnet.size", lower = 2, upper = 20)
))
tune_ps
## <ParamSet>
## id class lower upper nlevels default value
## 1: variance.filter.frac ParamDbl 0.05 0.95 Inf <NoDefault[3]>
## 2: classif.nnet.size ParamInt 2.00 20.00 19 <NoDefault[3]>
evals = trm("evals", n_evals = 50)
tuner = tnr("random_search")
measure = msr("classif.auc")
at_nn = AutoTuner$new(learner = glrn,
resampling = rsmp("holdout"),
measure = measure,
search_space = tune_ps,
terminator = evals,
tuner = tuner)
at_nn$train(task_sonar)
Output
at_nn$learner
## <GraphLearner:pca.variance.classif.nnet>
## * Model: list
## * Parameters: variance.filter.frac=0.2542, classif.nnet.MaxNWts=1500,
## classif.nnet.maxit=500, classif.nnet.size=17
## * Packages: mlr3, mlr3pipelines
## * Predict Type: prob
## * Feature types: logical, integer, numeric, character, factor, ordered,
## POSIXct
## * Properties: featureless, hotstart_backward, hotstart_forward,
## importance, loglik, missings, multiclass, oob_error,
## selected_features, twoclass, weights
Result and AUC
at_nn$tuning_result
## variance.filter.frac classif.nnet.size learner_param_vals x_domain
## 1: 0.2541689 17 <list[4]> <list[2]>
## classif.auc
## 1: 0.9194251
Pipeline
plot(graph)
The pipeline starts with my input, which was the sonar csv. Then a PCA transformation was done to create new features based on original features. Next, a variance filter of 0.4 was then applied to the 24 new PCA features. The classification learner was then applied with a size of 6 nodes in the hidden layer. This resulted in the highest AUC I could achieve of 0.928, up from 0.81 in the cross validated AUC before tuning.