if(!require(pacman)){install.packages("pacman"); require(pacman)}
## Loading required package: pacman
## Warning: package 'pacman' was built under R version 3.6.2
packages <- c('nnet', 'glue', 'broom', 'MASS', 'caret', 'InformationValue', 'Hmisc', 'kableExtra', 'corrplot', 'tidyverse', 'ROCR', 'palmerpenguins', 'mice')
pacman::p_load(char = packages)
df = penguins
df %>% head() %>% kableExtra::kable()
| species | island | bill_length_mm | bill_depth_mm | flipper_length_mm | body_mass_g | sex | year |
|---|---|---|---|---|---|---|---|
| Adelie | Torgersen | 39.1 | 18.7 | 181 | 3750 | male | 2007 |
| Adelie | Torgersen | 39.5 | 17.4 | 186 | 3800 | female | 2007 |
| Adelie | Torgersen | 40.3 | 18.0 | 195 | 3250 | female | 2007 |
| Adelie | Torgersen | NA | NA | NA | NA | NA | 2007 |
| Adelie | Torgersen | 36.7 | 19.3 | 193 | 3450 | female | 2007 |
| Adelie | Torgersen | 39.3 | 20.6 | 190 | 3650 | male | 2007 |
str(df)
## Classes 'tbl_df', 'tbl' and 'data.frame': 344 obs. of 8 variables:
## $ species : Factor w/ 3 levels "Adelie","Chinstrap",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ island : Factor w/ 3 levels "Biscoe","Dream",..: 3 3 3 3 3 3 3 3 3 3 ...
## $ bill_length_mm : num 39.1 39.5 40.3 NA 36.7 39.3 38.9 39.2 34.1 42 ...
## $ bill_depth_mm : num 18.7 17.4 18 NA 19.3 20.6 17.8 19.6 18.1 20.2 ...
## $ flipper_length_mm: int 181 186 195 NA 193 190 181 195 193 190 ...
## $ body_mass_g : int 3750 3800 3250 NA 3450 3650 3625 4675 3475 4250 ...
## $ sex : Factor w/ 2 levels "female","male": 2 1 1 NA 1 2 1 2 NA NA ...
## $ year : int 2007 2007 2007 2007 2007 2007 2007 2007 2007 2007 ...
df %>% group_by(species, island) %>%
summarise(n = n())
## # A tibble: 5 x 3
## # Groups: species [3]
## species island n
## <fct> <fct> <int>
## 1 Adelie Biscoe 44
## 2 Adelie Dream 56
## 3 Adelie Torgersen 52
## 4 Chinstrap Dream 68
## 5 Gentoo Biscoe 124
with(df, ftable(species, year) %>% prop.table(., margin = 1) %>% round(., 2))
## year 2007 2008 2009
## species
## Adelie 0.33 0.33 0.34
## Chinstrap 0.38 0.26 0.35
## Gentoo 0.27 0.37 0.35
with(df, ftable(species, sex) %>% prop.table(., margin = 1) %>% round(., 2))
## sex female male
## species
## Adelie 0.50 0.50
## Chinstrap 0.50 0.50
## Gentoo 0.49 0.51
metadata = data.frame(colName = names(df)) %>%
dplyr::mutate(dtype = sapply(df, class),
missing = colSums(is.na(df)),
unique = sapply(df, function(x) length(unique(x))))
metadata %>% kableExtra::kable()
| colName | dtype | missing | unique |
|---|---|---|---|
| species | factor | 0 | 3 |
| island | factor | 0 | 3 |
| bill_length_mm | numeric | 2 | 165 |
| bill_depth_mm | numeric | 2 | 81 |
| flipper_length_mm | integer | 2 | 56 |
| body_mass_g | integer | 2 | 95 |
| sex | factor | 11 | 3 |
| year | integer | 0 | 3 |
The categorical variables “sex” or “year” do not help to classify the penguins. There are 3 distinct species. Let’s just focus on the four main physiological measures, i.e. “bill_length_mm”, “bill_depth_mm”, “flipper_length_mm”, and “body_mass_g”, and see how they differ.
bodyVars = metadata %>% dplyr::filter(unique >3) %>% .$colName %>% as.character
dfGather <- df %>%
dplyr::select(all_of(c("species", bodyVars))) %>%
dplyr::mutate(species = as.character(species)) %>%
tidyr::gather(key, value, -species)
ggplot(dfGather, aes(value, color = species)) +
geom_density() +
theme(legend.position = "top") +
geom_vline(data = aggregate(value ~ species + key, dfGather, median),
aes(xintercept = value,
color = species),
linetype = "dashed") +
facet_wrap(~ key, nrow = 5, scales = "free")
## Warning: Removed 8 rows containing non-finite values (stat_density).
Since Adelie and Chinstrap display similarity to each other in terms of “bill_depth_mm”, “body_mass_g” and “flipper_length_mm” (the dashed line displayed in above chart is the median), let’s group them into one category (as “AC”). Besides, Chinstrap only resides in the “Dream” island, whereas Gentoo resides only in “Biscoe” while Adelie resides in all three islands. Grouping Adelie and Chinstrap into one will make it easier to classify the penguins, e.g. knowing a penguin comes from either “Torgersen” or “Dream” would easily classify it as an “AC” penguin.
Let’s fix the missing values and come up with a new binary target variable.
# impute missing values
init = mice(df, maxit = 0)
meth = init$method
predM = init$predictorMatrix
meth[c("bill_length_mm", "bill_depth_mm", "flipper_length_mm", "body_mass_g")] = "norm"
meth[c("sex")] = "logreg"
set.seed(1234)
imputed <- mice(df, method = meth, predictorMatrix = predM, m = 5)
##
## iter imp variable
## 1 1 bill_length_mm bill_depth_mm flipper_length_mm body_mass_g sex
## 1 2 bill_length_mm bill_depth_mm flipper_length_mm body_mass_g sex
## 1 3 bill_length_mm bill_depth_mm flipper_length_mm body_mass_g sex
## 1 4 bill_length_mm bill_depth_mm flipper_length_mm body_mass_g sex
## 1 5 bill_length_mm bill_depth_mm flipper_length_mm body_mass_g sex
## 2 1 bill_length_mm bill_depth_mm flipper_length_mm body_mass_g sex
## 2 2 bill_length_mm bill_depth_mm flipper_length_mm body_mass_g sex
## 2 3 bill_length_mm bill_depth_mm flipper_length_mm body_mass_g sex
## 2 4 bill_length_mm bill_depth_mm flipper_length_mm body_mass_g sex
## 2 5 bill_length_mm bill_depth_mm flipper_length_mm body_mass_g sex
## 3 1 bill_length_mm bill_depth_mm flipper_length_mm body_mass_g sex
## 3 2 bill_length_mm bill_depth_mm flipper_length_mm body_mass_g sex
## 3 3 bill_length_mm bill_depth_mm flipper_length_mm body_mass_g sex
## 3 4 bill_length_mm bill_depth_mm flipper_length_mm body_mass_g sex
## 3 5 bill_length_mm bill_depth_mm flipper_length_mm body_mass_g sex
## 4 1 bill_length_mm bill_depth_mm flipper_length_mm body_mass_g sex
## 4 2 bill_length_mm bill_depth_mm flipper_length_mm body_mass_g sex
## 4 3 bill_length_mm bill_depth_mm flipper_length_mm body_mass_g sex
## 4 4 bill_length_mm bill_depth_mm flipper_length_mm body_mass_g sex
## 4 5 bill_length_mm bill_depth_mm flipper_length_mm body_mass_g sex
## 5 1 bill_length_mm bill_depth_mm flipper_length_mm body_mass_g sex
## 5 2 bill_length_mm bill_depth_mm flipper_length_mm body_mass_g sex
## 5 3 bill_length_mm bill_depth_mm flipper_length_mm body_mass_g sex
## 5 4 bill_length_mm bill_depth_mm flipper_length_mm body_mass_g sex
## 5 5 bill_length_mm bill_depth_mm flipper_length_mm body_mass_g sex
df2 <- complete(imputed) %>%
dplyr::mutate(target_flag = dplyr::case_when(species == "Adelie" | species == "Chinstrap" ~ 1, TRUE ~ 0),
target_flag = dplyr::case_when(target_flag == 1 ~ "AC", TRUE ~ "G") %>%
factor(., levels = c("AC", "G"), labels = c("Adelie, Chinstrap", "Gentoo"))) %>%
dplyr::select(-species)
# double check and make sure no more missing values
colSums(is.na(df2))
## island bill_length_mm bill_depth_mm flipper_length_mm
## 0 0 0 0
## body_mass_g sex year target_flag
## 0 0 0 0
# df2 - ready to build model
df2 %>% head()
## island bill_length_mm bill_depth_mm flipper_length_mm body_mass_g sex
## 1 Torgersen 39.10000 18.70000 181.000 3750.000 male
## 2 Torgersen 39.50000 17.40000 186.000 3800.000 female
## 3 Torgersen 40.30000 18.00000 195.000 3250.000 female
## 4 Torgersen 44.08715 20.98134 191.335 4194.923 male
## 5 Torgersen 36.70000 19.30000 193.000 3450.000 female
## 6 Torgersen 39.30000 20.60000 190.000 3650.000 male
## year target_flag
## 1 2007 Adelie, Chinstrap
## 2 2007 Adelie, Chinstrap
## 3 2007 Adelie, Chinstrap
## 4 2007 Adelie, Chinstrap
## 5 2007 Adelie, Chinstrap
## 6 2007 Adelie, Chinstrap
# let's take a look of the bodyVars after completing the whole data set
df2 %>%
dplyr::select(bodyVars) %>%
cor() %>%
corrplot(method = "number", type = "upper", order = "hclust")
## Note: Using an external vector in selections is ambiguous.
## i Use `all_of(bodyVars)` instead of `bodyVars` to silence this message.
## i See <https://tidyselect.r-lib.org/reference/faq-external-vector.html>.
## This message is displayed once per session.
Pre-process the data and then split it into train and test set.
preProcValues <- caret::preProcess(df2[, bodyVars], method = c("center", "scale"))
df3 <- predict(preProcValues, df2)
set.seed(1234)
index = sample(nrow(df3), nrow(df3) * 0.7)
dfTrain <- df3[index, ]
dfTest <- df3[-index, ]
dim(dfTrain); dim(dfTest)
## [1] 240 8
## [1] 104 8
head(dfTrain)
## island bill_length_mm bill_depth_mm flipper_length_mm body_mass_g sex
## 284 Dream 1.3485407 0.5255864 -0.2805527 -0.5674517 male
## 336 Dream 0.3038344 1.1313838 -0.4936580 -0.8479145 female
## 101 Biscoe -1.6389528 0.3741371 -0.6357281 -0.5986142 female
## 111 Biscoe -1.0707792 -0.3326265 -0.2095176 -0.4739641 female
## 133 Dream -1.3090456 0.6770358 -0.5646930 -0.8790770 female
## 98 Dream -0.6675592 0.6770358 -0.3515878 0.1804490 male
## year target_flag
## 284 2007 Adelie, Chinstrap
## 336 2009 Adelie, Chinstrap
## 101 2009 Adelie, Chinstrap
## 111 2009 Adelie, Chinstrap
## 133 2009 Adelie, Chinstrap
## 98 2008 Adelie, Chinstrap
# full model
fullModel <- glm(target_flag ~ ., data = dfTrain, family = binomial(link = "logit"))
summary(fullModel)
##
## Call:
## glm(formula = target_flag ~ ., family = binomial(link = "logit"),
## data = dfTrain)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -3.694e-05 -2.100e-08 -2.100e-08 2.100e-08 2.595e-05
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 4.199e+03 6.805e+07 0 1
## islandDream -1.028e+00 8.644e+04 0 1
## islandTorgersen -5.117e+00 8.404e+04 0 1
## bill_length_mm 2.910e+00 3.207e+04 0 1
## bill_depth_mm -2.022e+01 6.208e+04 0 1
## flipper_length_mm 1.262e+01 7.301e+04 0 1
## body_mass_g 1.296e+01 5.904e+04 0 1
## sexmale -2.121e+00 6.838e+04 0 1
## year -2.098e+00 3.387e+04 0 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 3.0412e+02 on 239 degrees of freedom
## Residual deviance: 4.1174e-09 on 231 degrees of freedom
## AIC: 18
##
## Number of Fisher Scoring iterations: 25
# stepwise model (direction default to both)
step <- MASS::stepAIC(fullModel, trace = FALSE)
step$anova
## Stepwise Model Path
## Analysis of Deviance Table
##
## Initial Model:
## target_flag ~ island + bill_length_mm + bill_depth_mm + flipper_length_mm +
## body_mass_g + sex + year
##
## Final Model:
## target_flag ~ bill_depth_mm + body_mass_g
##
##
## Step Df Deviance Resid. Df Resid. Dev AIC
## 1 231 4.117427e-09 18
## 2 - island 2 1.259859e-10 233 4.243413e-09 14
## 3 - year 1 8.010748e-11 234 4.323520e-09 12
## 4 - sex 1 5.598544e-11 235 4.379506e-09 10
## 5 - bill_length_mm 1 3.095555e-10 236 4.689061e-09 8
## 6 - flipper_length_mm 1 5.184175e-09 237 9.873236e-09 6
stepModel <- glm(target_flag ~ bill_depth_mm + body_mass_g, data = dfTrain, family = binomial(link = "logit"))
summary(stepModel)
##
## Call:
## glm(formula = target_flag ~ bill_depth_mm + body_mass_g, family = binomial(link = "logit"),
## data = dfTrain)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -7.374e-05 -2.100e-08 -2.100e-08 2.100e-08 6.275e-05
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -31.62 18951.68 -0.002 0.999
## bill_depth_mm -40.41 25398.54 -0.002 0.999
## body_mass_g 33.89 18107.24 0.002 0.999
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 3.0412e+02 on 239 degrees of freedom
## Residual deviance: 9.8732e-09 on 237 degrees of freedom
## AIC: 6
##
## Number of Fisher Scoring iterations: 25
# repeated, k-fold cross-validation
set.seed(1234)
train.control <- caret::trainControl(method = "repeatedcv", number = 10, repeats = 10)
kFoldModel <- caret::train(target_flag ~ bill_depth_mm + body_mass_g,
data = dfTrain, method = "glm", trControl = train.control)
summary(kFoldModel)
##
## Call:
## NULL
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -7.374e-05 -2.100e-08 -2.100e-08 2.100e-08 6.275e-05
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -31.62 18951.68 -0.002 0.999
## bill_depth_mm -40.41 25398.54 -0.002 0.999
## body_mass_g 33.89 18107.24 0.002 0.999
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 3.0412e+02 on 239 degrees of freedom
## Residual deviance: 9.8732e-09 on 237 degrees of freedom
## AIC: 6
##
## Number of Fisher Scoring iterations: 25
We built 3 logistics regression models using different approaches. First, we built a full model using an entire data set. Subsequently, we applied stepwise approach and looked at both directions (forward, backward). Finally, after we came up with a final model from the stepwise approach, we applied it with kfold cross validation. We built a 10-fold, repeated (10 times) cross validation using the formula from the stepwise model (target_flag ~ bill_depth_mm + body_mass_g). We compared across the three models using Akaike Information Criterion (AIC). We acheived the lowest AIC of 6 (down from the full model of 18) and the result looks reasonable. For instance, knowing the bill_depth_mm or body_mass_g is enough for guessing whether it belongs to the AC or G type of penguins. For example, Adelie and Chinstrap have almost the same distribution of bill_depth_mm and body_mass_g, in which both of these measures are quite significantly different from Gentoo (please see above ggplot chart). Meanwhile, sex or year do not seem to have any significant predictive power.
# prediction for each model
predicted_1 <- predict(fullModel, dfTest, type = "response")
predicted_2 <- predict(stepModel, dfTest, type = "response")
predicted_3 <- predict(kFoldModel, dfTest, type = "raw")
dfTest2 <- dfTest %>%
dplyr::mutate(predicted_1 = dplyr::case_when(predicted_1 <0.5 ~ "AC", TRUE ~ "G") %>%
factor(., levels = c("AC", "G"), labels = c("Adelie, Chinstrap", "Gentoo")),
predicted_2 = dplyr::case_when(predicted_2 <0.5 ~ "AC", TRUE ~ "G") %>%
factor(., levels = c("AC", "G"), labels = c("Adelie, Chinstrap", "Gentoo")),
predicted_3 = predicted_3)
# diagnostic - Confusion Matrix, i.e. the columns are actuals, while rows are predicteds
optCutOff = 0.5
cm_1 <- InformationValue::confusionMatrix(dfTest2$target_flag, predicted = predicted_1, threshold = optCutOff)
cm_2 <- InformationValue::confusionMatrix(dfTest2$target_flag, predicted = predicted_2, threshold = optCutOff)
cm_3 <- ftable(predicted_3, dfTest2$target_flag)
# print confusion matrix
cm_1
## Adelie, Chinstrap Gentoo
## 0 59 0
## 1 0 45
cm_2
## Adelie, Chinstrap Gentoo
## 0 59 0
## 1 0 45
cm_3
## Adelie, Chinstrap Gentoo
## predicted_3
## Adelie, Chinstrap 59 0
## Gentoo 0 45
# print tidy output for model evaluation
broom::tidy(caret::confusionMatrix(dfTest2$target_flag, dfTest2$predicted_2)) %>% dplyr::select(term, estimate)
## # A tibble: 13 x 2
## term estimate
## <chr> <dbl>
## 1 accuracy 1
## 2 kappa 1
## 3 sensitivity 1
## 4 specificity 1
## 5 pos_pred_value 1
## 6 neg_pred_value 1
## 7 precision 1
## 8 recall 1
## 9 f1 1
## 10 prevalence 0.567
## 11 detection_rate 0.567
## 12 detection_prevalence 0.567
## 13 balanced_accuracy 1
# print ROC Curve
# ROC Curve
par(mfrow = c(1, 2))
pred <- prediction(predicted_1, dfTest2$target_flag)
perf <- performance(pred, "tpr", "fpr")
plot(perf, colorize = TRUE, main = "Full Model")
pred <- prediction(predicted_2, dfTest2$target_flag)
perf <- performance(pred, "tpr", "fpr")
plot(perf, colorize = TRUE, main = "Step Model")
# Final print of the stepModel
tidy(stepModel)
## # A tibble: 3 x 5
## term estimate std.error statistic p.value
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 (Intercept) -31.6 18952. -0.00167 0.999
## 2 bill_depth_mm -40.4 25399. -0.00159 0.999
## 3 body_mass_g 33.9 18107. 0.00187 0.999
Surprisingly, the full and stepwise models demonstrate perfect classification. We achieved 100% accuracy and the only concern is overfitting, i.e. whether our model can be applied to a different data set. For the sake of simplicity and parsimony, we should go with the simple model that is suggested by the stepwish approach. In this case, less is more. The two physiological measures are important enough to classify the penguins.
pred <- prediction(predicted_2, dfTest2$target_flag)
auc = performance(pred, "auc")
auc = auc@y.values[[1]]
cm <- InformationValue::confusionMatrix(dfTest2$target_flag, predicted_2)
TN <- cm[1, 1]
FN <- cm[1, 2]
FP <- cm[2, 1]
TP <- cm[2, 2]
accuracy = (TP + TN) / (TN + FN + FP + TP)
TPR = TP / (TP + FN)
FPR = FP / (FP + TN)
TNR = TN / (TN + FP)
FNR = FN / (FN + TP)
# print the metrics
print(paste0("The AUC is ", auc * 100, "%"))
## [1] "The AUC is 100%"
print(paste0("The Accuracy is ", accuracy * 100, "%"))
## [1] "The Accuracy is 100%"
print(paste0("The TPR is ", TPR * 100, "%"))
## [1] "The TPR is 100%"
print(paste0("The FPR is ", FPR * 100, "%"))
## [1] "The FPR is 0%"
print(paste0("The TNR is ", TNR * 100, "%"))
## [1] "The TNR is 100%"
print(paste0("The FNR is ", FNR * 100, "%"))
## [1] "The FNR is 0%"
Let’s set our baseline reference using “Adelie”. We will make prediction comparison against this baseline category.
# use the original "species" column for target variable
df4 <- cbind(species = df$species, df3 %>% dplyr::select(-target_flag))
# split the data set again
train <- df4[index, ]
test <- df4[-index, ]
# set reference
train$species <- relevel(train$species, ref = "Adelie")
# build a multinomial model
multinom_model <- nnet::multinom(species ~ ., data = train)
## # weights: 30 (18 variable)
## initial value 263.666949
## iter 10 value 0.431645
## iter 20 value 0.118350
## iter 30 value 0.066545
## iter 40 value 0.001738
## iter 50 value 0.001184
## iter 60 value 0.001128
## iter 70 value 0.000350
## iter 80 value 0.000203
## iter 90 value 0.000168
## iter 100 value 0.000108
## final value 0.000108
## stopped after 100 iterations
# checking the model
summary(multinom_model)
## Call:
## nnet::multinom(formula = species ~ ., data = train)
##
## Coefficients:
## (Intercept) islandDream islandTorgersen bill_length_mm bill_depth_mm
## Chinstrap -0.009096519 30.15339 -8.398646 55.21404 -29.05699
## Gentoo 0.020205059 10.96439 -39.558414 30.52027 -42.60795
## flipper_length_mm body_mass_g sexmale year
## Chinstrap 12.99986 -1.689107 -21.68769 0.007679352
## Gentoo 12.57672 29.628495 -16.38241 0.011278243
##
## Std. Errors:
## (Intercept) islandDream islandTorgersen bill_length_mm bill_depth_mm
## Chinstrap 0.04593618 23.56378 1.216616e+00 85.03581 246.2632
## Gentoo 0.08297213 182.22304 1.120212e-09 85.52528 156.0239
## flipper_length_mm body_mass_g sexmale year
## Chinstrap 91.85039 86.82316 21.59739 0.1257196
## Gentoo 79.53957 95.96933 177.23002 0.1786416
##
## Residual Deviance: 0.0002168773
## AIC: 36.00022
# convert the coefficients to odds by taking the exponential of the coefficients
options(scipen=999)
exp(coef(multinom_model)) %>% round(3)
## (Intercept) islandDream islandTorgersen
## Chinstrap 0.991 12458078068075.14 0
## Gentoo 1.020 57779.79 0
## bill_length_mm bill_depth_mm flipper_length_mm
## Chinstrap 953128158450511633206206 0 442351.7
## Gentoo 17979766914983 0 289733.8
## body_mass_g sexmale year
## Chinstrap 0.185 0 1.008
## Gentoo 7370413889469.455 0 1.011
# take a look of the predicted probability, comparing it to the original label (or target)
round(fitted(multinom_model), 2) %>% as.data.frame() %>% dplyr::mutate(species = train$species) %>%
dplyr::select(true_label = species, everything()) %>%
head(10) %>%
kableExtra::kable()
| true_label | Adelie | Chinstrap | Gentoo |
|---|---|---|---|
| Chinstrap | 0 | 1 | 0 |
| Chinstrap | 0 | 1 | 0 |
| Adelie | 1 | 0 | 0 |
| Adelie | 1 | 0 | 0 |
| Adelie | 1 | 0 | 0 |
| Adelie | 1 | 0 | 0 |
| Adelie | 1 | 0 | 0 |
| Gentoo | 0 | 0 | 1 |
| Adelie | 1 | 0 | 0 |
| Chinstrap | 0 | 1 | 0 |
We converted the coefficients (log likelihood) to odds by taking the exponential of the coefficients. Before conversion, a one-unit increase in the variable bill_length_mm is associated with the increase in the log odds of being a Chinstrap vs. Adelie in the amount of 87.85357. After conversion, keeping all other variables constant, the odds for a one-unit increase in the same variable is 1.42E+38. We can easily identify few key variables that help classify the penguins. For instance, the odds for a one-unit increase in flipper_length_mm is 3.28E+09 for being a Gentoo vs. Adelie penguin. Similarly, the odds for a one-unit increase in body_mass_g is 1.292 times for being a Gentoo vs. Adelie penguin. The results of this model picked out important physiological features that were displayed in above ggplot chart.
# model evaluation using the test set
test$speciesPredicted <- predict(multinom_model, newdata = test, "class")
# classification table
tab <- table(test$species, test$speciesPredicted)
tab
##
## Adelie Chinstrap Gentoo
## Adelie 40 1 0
## Chinstrap 0 18 0
## Gentoo 0 0 45
# calculate accuracy - sum of diagonal elements divided by total obs
print(paste0("The model accuracy is ", round((sum(diag(tab))/sum(tab))*100,2), "%"))
## [1] "The model accuracy is 99.04%"
We built almost a perfect model achieving the accuracy of 99%. The best fit statistics for evaluating the model would be accuracy as it is simple and easy to calculate. The classification table is also important in telling us the performance of our model. Relative risks or odds ratio is also important in interpreting the coefficients to people with no statistical background, such as keeping all variables constant, if your X score increases by 1 unit, you are 0.85 times more likely to be in the Z category as compared to the ref group (the risk or odds is 15% lower). Last but not least, we can perform a chi-square goodness of fit test to compare across models.