This material is to practise machine learning with mlr package. If you want to learn more about it, please see this link

In this tutorial, Discriminant Analysis classification model will be built to predict three levels of classes using a dataset called wine from HDclassif package. Let’s get started.

Load required library and dataset

# Predicting diabetes 

library(HDclassif) # Diabetes data from this package

library(tibble)

library(mlr) # Machine learning with r package

data(wine, package = "HDclassif")

wine_tibble<-as_tibble(wine)

head(wine_tibble) # Look at first few rows 
NA

Data cleaning

# Rename the wine_tibble data

names(wine_tibble) <- c("Class", "Alco", "Malic", "Ash", "Alk", "Mag",
                    "Phe", "Flav", "Non_flav", "Proan", "Col", "Hue",
                    "OD", "Prol")

head(wine_tibble)
NA

This dataset has 14 columns and 178 rows (cases). It is now to twist data a little bit.

Data visualization using boxplot

Building Linear Discriminant Analysis

# Create a task, learner and train the model

LDA_task<-makeClassifTask(data=wine_tibble, target = "Class")
Provided data is not a pure data.frame but from class tbl_df, hence it will be converted.
LDA_learner<-makeLearner("classif.lda")

train_LDA<-train(LDA_learner,LDA_task)

Get model learner parameters

get_learner1<-getLearnerModel(train_LDA)

get_learner1
Call:
lda(f, data = getTaskData(.task, .subset))

Prior probabilities of groups:
        1         2         3 
0.3314607 0.3988764 0.2696629 

Group means:
      Alco    Malic      Ash      Alk      Mag      Phe      Flav Non_flav    Proan      Col       Hue       OD
1 13.74475 2.010678 2.455593 17.03729 106.3390 2.840169 2.9823729 0.290000 1.899322 5.528305 1.0620339 3.157797
2 12.27873 1.932676 2.244789 20.23803  94.5493 2.258873 2.0808451 0.363662 1.630282 3.086620 1.0562817 2.785352
3 13.15375 3.333750 2.437083 21.41667  99.3125 1.678750 0.7814583 0.447500 1.153542 7.396250 0.6827083 1.683542
       Prol
1 1115.7119
2  519.5070
3  629.8958

Coefficients of linear discriminants:
                  LD1           LD2
Alco     -0.403399781  0.8717930699
Malic     0.165254596  0.3053797325
Ash      -0.369075256  2.3458497486
Alk       0.154797889 -0.1463807654
Mag      -0.002163496 -0.0004627565
Phe       0.618052068 -0.0322128171
Flav     -1.661191235 -0.4919980543
Non_flav -1.495818440 -1.6309537953
Proan     0.134092628 -0.3070875776
Col       0.355055710  0.2532306865
Hue      -0.818036073 -1.5156344987
OD       -1.157559376  0.0511839665
Prol     -0.002691206  0.0028529846

Proportion of trace:
   LD1    LD2 
0.6875 0.3125 

Get discriminant functions values

pred_LDA<-predict(get_learner1)

head(pred_LDA$x)
        LD1       LD2
1 -4.700244 1.9791383
2 -4.301958 1.1704129
3 -3.420720 1.4291014
4 -4.205754 4.0028715
5 -1.509982 0.4512239
6 -4.518689 3.2131376
Plotting the discriminant analysis functions
wine_tibble %>% mutate(LD1=pred_LDA$x[,1],LD2=pred_LDA$x[,2]) %>% ggplot(aes(x=LD1,y=LD2,col=Class)) + geom_point(size=2) +
  stat_ellipse() + theme_bw()

Cross-validating the Linear Discriminant Analysis

Kfold<-makeResampleDesc(method = "RepCV", folds=10, reps=5, stratify = T)

LDA_kfold<-resample(LDA_learner,LDA_task,resampling = Kfold ,measures = list(mmce,acc))
Resampling: repeated cross-validation
Measures:             mmce      acc       
[Resample] iter 1:    0.0000000 1.0000000 
[Resample] iter 2:    0.0000000 1.0000000 
[Resample] iter 3:    0.0000000 1.0000000 
[Resample] iter 4:    0.0000000 1.0000000 
[Resample] iter 5:    0.0555556 0.9444444 
[Resample] iter 6:    0.0000000 1.0000000 
[Resample] iter 7:    0.0000000 1.0000000 
[Resample] iter 8:    0.0555556 0.9444444 
[Resample] iter 9:    0.0000000 1.0000000 
[Resample] iter 10:   0.0000000 1.0000000 
[Resample] iter 11:   0.0000000 1.0000000 
[Resample] iter 12:   0.0555556 0.9444444 
[Resample] iter 13:   0.0000000 1.0000000 
[Resample] iter 14:   0.0000000 1.0000000 
[Resample] iter 15:   0.0000000 1.0000000 
[Resample] iter 16:   0.0588235 0.9411765 
[Resample] iter 17:   0.0555556 0.9444444 
[Resample] iter 18:   0.0000000 1.0000000 
[Resample] iter 19:   0.0526316 0.9473684 
[Resample] iter 20:   0.0000000 1.0000000 
[Resample] iter 21:   0.0000000 1.0000000 
[Resample] iter 22:   0.0000000 1.0000000 
[Resample] iter 23:   0.0555556 0.9444444 
[Resample] iter 24:   0.0000000 1.0000000 
[Resample] iter 25:   0.0000000 1.0000000 
[Resample] iter 26:   0.0000000 1.0000000 
[Resample] iter 27:   0.0555556 0.9444444 
[Resample] iter 28:   0.0555556 0.9444444 
[Resample] iter 29:   0.0000000 1.0000000 
[Resample] iter 30:   0.0000000 1.0000000 
[Resample] iter 31:   0.0555556 0.9444444 
[Resample] iter 32:   0.0000000 1.0000000 
[Resample] iter 33:   0.0000000 1.0000000 
[Resample] iter 34:   0.0000000 1.0000000 
[Resample] iter 35:   0.0000000 1.0000000 
[Resample] iter 36:   0.0000000 1.0000000 
[Resample] iter 37:   0.0555556 0.9444444 
[Resample] iter 38:   0.0000000 1.0000000 
[Resample] iter 39:   0.0555556 0.9444444 
[Resample] iter 40:   0.0000000 1.0000000 
[Resample] iter 41:   0.0000000 1.0000000 
[Resample] iter 42:   0.0000000 1.0000000 
[Resample] iter 43:   0.0000000 1.0000000 
[Resample] iter 44:   0.0555556 0.9444444 
[Resample] iter 45:   0.0000000 1.0000000 
[Resample] iter 46:   0.0000000 1.0000000 
[Resample] iter 47:   0.0000000 1.0000000 
[Resample] iter 48:   0.0000000 1.0000000 
[Resample] iter 49:   0.0000000 1.0000000 
[Resample] iter 50:   0.0588235 0.9411765 


Aggregated Result: mmce.test.mean=0.0156278,acc.test.mean=0.9843722
LDA_kfold$aggr
mmce.test.mean  acc.test.mean 
    0.01562779     0.98437221 

Similar to Linear Discriminant Analysis, we fit quadratic discriminant analysis

QDA_learner<-makeLearner("classif.qda") # Set the learner 

train_QDA<-train(QDA_learner,LDA_task) # Train the quadratic discriminant analysis 

QDA_check<-resample(QDA_learner, LDA_task, resampling = Kfold, measures = list(mmce, acc))
Resampling: repeated cross-validation
Measures:             mmce      acc       
[Resample] iter 1:    0.0000000 1.0000000 
[Resample] iter 2:    0.0000000 1.0000000 
[Resample] iter 3:    0.0000000 1.0000000 
[Resample] iter 4:    0.0555556 0.9444444 
[Resample] iter 5:    0.0000000 1.0000000 
[Resample] iter 6:    0.0588235 0.9411765 
[Resample] iter 7:    0.0000000 1.0000000 
[Resample] iter 8:    0.0000000 1.0000000 
[Resample] iter 9:    0.0000000 1.0000000 
[Resample] iter 10:   0.0000000 1.0000000 
[Resample] iter 11:   0.0000000 1.0000000 
[Resample] iter 12:   0.0555556 0.9444444 
[Resample] iter 13:   0.0000000 1.0000000 
[Resample] iter 14:   0.0000000 1.0000000 
[Resample] iter 15:   0.0000000 1.0000000 
[Resample] iter 16:   0.0000000 1.0000000 
[Resample] iter 17:   0.0000000 1.0000000 
[Resample] iter 18:   0.0000000 1.0000000 
[Resample] iter 19:   0.0000000 1.0000000 
[Resample] iter 20:   0.0555556 0.9444444 
[Resample] iter 21:   0.0000000 1.0000000 
[Resample] iter 22:   0.0000000 1.0000000 
[Resample] iter 23:   0.0000000 1.0000000 
[Resample] iter 24:   0.0000000 1.0000000 
[Resample] iter 25:   0.0555556 0.9444444 
[Resample] iter 26:   0.0000000 1.0000000 
[Resample] iter 27:   0.0000000 1.0000000 
[Resample] iter 28:   0.0000000 1.0000000 
[Resample] iter 29:   0.0000000 1.0000000 
[Resample] iter 30:   0.0000000 1.0000000 
[Resample] iter 31:   0.0000000 1.0000000 
[Resample] iter 32:   0.0000000 1.0000000 
[Resample] iter 33:   0.0000000 1.0000000 
[Resample] iter 34:   0.0555556 0.9444444 
[Resample] iter 35:   0.0000000 1.0000000 
[Resample] iter 36:   0.0588235 0.9411765 
[Resample] iter 37:   0.0000000 1.0000000 
[Resample] iter 38:   0.0526316 0.9473684 
[Resample] iter 39:   0.0000000 1.0000000 
[Resample] iter 40:   0.0000000 1.0000000 
[Resample] iter 41:   0.0000000 1.0000000 
[Resample] iter 42:   0.0000000 1.0000000 
[Resample] iter 43:   0.0000000 1.0000000 
[Resample] iter 44:   0.0000000 1.0000000 
[Resample] iter 45:   0.0000000 1.0000000 
[Resample] iter 46:   0.0000000 1.0000000 
[Resample] iter 47:   0.0000000 1.0000000 
[Resample] iter 48:   0.0555556 0.9444444 
[Resample] iter 49:   0.0000000 1.0000000 
[Resample] iter 50:   0.0000000 1.0000000 


Aggregated Result: mmce.test.mean=0.0100722,acc.test.mean=0.9899278
QDA_check$aggr
mmce.test.mean  acc.test.mean 
    0.01007224     0.98992776 
LS0tDQp0aXRsZTogIlIgTm90ZWJvb2siDQpvdXRwdXQ6IGh0bWxfbm90ZWJvb2sNCi0tLQ0KDQpUaGlzIG1hdGVyaWFsIGlzIHRvIHByYWN0aXNlIGBtYWNoaW5lIGxlYXJuaW5nIHdpdGggbWxyYCBwYWNrYWdlLiBJZiB5b3Ugd2FudCB0byBsZWFybiBtb3JlIGFib3V0IGl0LCBwbGVhc2Ugc2VlIHRoaXMgW2xpbmtdKGh0dHBzOi8vd3d3Lm1hbm5pbmcuY29tL2Jvb2tzL21hY2hpbmUtbGVhcm5pbmctd2l0aC1yLXRoZS10aWR5dmVyc2UtYW5kLW1scikNCg0KSW4gdGhpcyB0dXRvcmlhbCwgYERpc2NyaW1pbmFudCBBbmFseXNpc2AgY2xhc3NpZmljYXRpb24gbW9kZWwgd2lsbCBiZSBidWlsdCB0byBwcmVkaWN0IHRocmVlIGxldmVscyBvZiBjbGFzc2VzIHVzaW5nIGEgZGF0YXNldCBjYWxsZWQgYHdpbmVgIGZyb20gYEhEY2xhc3NpZmAgcGFja2FnZS4gTGV0J3MgZ2V0IHN0YXJ0ZWQuDQoNCiMgTG9hZCByZXF1aXJlZCBsaWJyYXJ5IGFuZCBkYXRhc2V0DQoNCmBgYHtyfQ0KIyBQcmVkaWN0aW5nIGRpYWJldGVzIA0KDQpsaWJyYXJ5KEhEY2xhc3NpZikgIyBEaWFiZXRlcyBkYXRhIGZyb20gdGhpcyBwYWNrYWdlDQoNCmxpYnJhcnkodGliYmxlKQ0KDQpsaWJyYXJ5KG1scikgIyBNYWNoaW5lIGxlYXJuaW5nIHdpdGggciBwYWNrYWdlDQoNCmRhdGEod2luZSwgcGFja2FnZSA9ICJIRGNsYXNzaWYiKQ0KDQp3aW5lX3RpYmJsZTwtYXNfdGliYmxlKHdpbmUpDQoNCmhlYWQod2luZV90aWJibGUpICMgTG9vayBhdCBmaXJzdCBmZXcgcm93cyANCg0KYGBgDQoNCiMgRGF0YSBjbGVhbmluZyANCg0KYGBge3J9DQojIFJlbmFtZSB0aGUgd2luZV90aWJibGUgZGF0YQ0KDQpuYW1lcyh3aW5lX3RpYmJsZSkgPC0gYygiQ2xhc3MiLCAiQWxjbyIsICJNYWxpYyIsICJBc2giLCAiQWxrIiwgIk1hZyIsDQogICAgICAgICAgICAgICAgICAgICJQaGUiLCAiRmxhdiIsICJOb25fZmxhdiIsICJQcm9hbiIsICJDb2wiLCAiSHVlIiwNCiAgICAgICAgICAgICAgICAgICAgIk9EIiwgIlByb2wiKQ0KDQpoZWFkKHdpbmVfdGliYmxlKQ0KDQpgYGANCg0KVGhpcyBkYXRhc2V0IGhhcyAxNCBjb2x1bW5zIGFuZCAxNzggcm93cyAoY2FzZXMpLiBJdCBpcyBub3cgdG8gdHdpc3QgZGF0YSBhIGxpdHRsZSBiaXQuIA0KDQojIyMgRGF0YSB2aXN1YWxpemF0aW9uIHVzaW5nIGJveHBsb3QgDQoNCmBgYHtyfQ0KIyBDb252ZXJ0IENsYXNzIHRvIGZhY3RvciANCg0Kd2luZV90aWJibGUkQ2xhc3M8LWFzLmZhY3Rvcih3aW5lX3RpYmJsZSRDbGFzcykNCg0KIyBHYXRoZXIgYWxsIGNvbHVtbnMsIGV4Y2VwdCBDbGFzcw0KDQpsaWJyYXJ5KHRpZHlyKTsgbGlicmFyeShkcGx5cikNCg0Kd2luZTE8LSB3aW5lX3RpYmJsZSAlPiUgZ2F0aGVyKGtleT0iVmFyaWFibGUiLCB2YWx1ZT0iVmFsdWVzIiwgLUNsYXNzKQ0KDQojIFBsb3R0aW5nIHRoZSBib3hwbG90DQoNCmxpYnJhcnkoZ2dwbG90MikNCg0KZ2dwbG90KHdpbmUxLCBhZXMoeD1DbGFzcywgeT1WYWx1ZXMpKSAgKyBmYWNldF93cmFwKH5WYXJpYWJsZSwgc2NhbGVzID0gImZyZWVfeSIpKyBnZW9tX2JveHBsb3QoYWVzKGZpbGw9Q2xhc3MpKSArIHRoZW1lX2J3KCkNCg0KYGBgDQoNCg0KIyMgQnVpbGRpbmcgTGluZWFyIERpc2NyaW1pbmFudCBBbmFseXNpcyAgDQoNCmBgYHtyfQ0KIyBDcmVhdGUgYSB0YXNrLCBsZWFybmVyIGFuZCB0cmFpbiB0aGUgbW9kZWwNCg0KTERBX3Rhc2s8LW1ha2VDbGFzc2lmVGFzayhkYXRhPXdpbmVfdGliYmxlLCB0YXJnZXQgPSAiQ2xhc3MiKQ0KDQpMREFfbGVhcm5lcjwtbWFrZUxlYXJuZXIoImNsYXNzaWYubGRhIikNCg0KdHJhaW5fTERBPC10cmFpbihMREFfbGVhcm5lcixMREFfdGFzaykNCg0KYGBgDQoNCiMjIyMgR2V0IG1vZGVsIGxlYXJuZXIgcGFyYW1ldGVycyANCg0KYGBge3J9DQpnZXRfbGVhcm5lcjE8LWdldExlYXJuZXJNb2RlbCh0cmFpbl9MREEpDQoNCmdldF9sZWFybmVyMQ0KDQpgYGANCg0KIyBHZXQgZGlzY3JpbWluYW50IGZ1bmN0aW9ucyB2YWx1ZXMNCg0KYGBge3J9DQpwcmVkX0xEQTwtcHJlZGljdChnZXRfbGVhcm5lcjEpDQoNCmhlYWQocHJlZF9MREEkeCkNCg0KYGBgDQoNCiMjIyMjIFBsb3R0aW5nIHRoZSBkaXNjcmltaW5hbnQgYW5hbHlzaXMgZnVuY3Rpb25zIA0KDQpgYGB7cn0NCndpbmVfdGliYmxlICU+JSBtdXRhdGUoTEQxPXByZWRfTERBJHhbLDFdLExEMj1wcmVkX0xEQSR4WywyXSkgJT4lIGdncGxvdChhZXMoeD1MRDEseT1MRDIsY29sPUNsYXNzKSkgKyBnZW9tX3BvaW50KHNpemU9MikgKw0KICBzdGF0X2VsbGlwc2UoKSArIHRoZW1lX2J3KCkNCg0KYGBgDQoNCiMgQ3Jvc3MtdmFsaWRhdGluZyB0aGUgTGluZWFyIERpc2NyaW1pbmFudCBBbmFseXNpcyANCg0KYGBge3J9DQpLZm9sZDwtbWFrZVJlc2FtcGxlRGVzYyhtZXRob2QgPSAiUmVwQ1YiLCBmb2xkcz0xMCwgcmVwcz01LCBzdHJhdGlmeSA9IFQpDQoNCkxEQV9rZm9sZDwtcmVzYW1wbGUoTERBX2xlYXJuZXIsTERBX3Rhc2sscmVzYW1wbGluZyA9IEtmb2xkICxtZWFzdXJlcyA9IGxpc3QobW1jZSxhY2MpKQ0KDQpMREFfa2ZvbGQkYWdncg0KDQpgYGANCg0KIyBTaW1pbGFyIHRvIExpbmVhciBEaXNjcmltaW5hbnQgQW5hbHlzaXMsIHdlIGZpdCBxdWFkcmF0aWMgZGlzY3JpbWluYW50IGFuYWx5c2lzIA0KDQpgYGB7cn0NClFEQV9sZWFybmVyPC1tYWtlTGVhcm5lcigiY2xhc3NpZi5xZGEiKSAjIFNldCB0aGUgbGVhcm5lciANCg0KdHJhaW5fUURBPC10cmFpbihRREFfbGVhcm5lcixMREFfdGFzaykgIyBUcmFpbiB0aGUgcXVhZHJhdGljIGRpc2NyaW1pbmFudCBhbmFseXNpcyANCg0KUURBX2NoZWNrPC1yZXNhbXBsZShRREFfbGVhcm5lciwgTERBX3Rhc2ssIHJlc2FtcGxpbmcgPSBLZm9sZCwgbWVhc3VyZXMgPSBsaXN0KG1tY2UsIGFjYykpDQoNClFEQV9jaGVjayRhZ2dyDQoNCg0KYGBgDQoNCg0K