#RPubs : (https://rpubs.com/PanefiDwi/1306959)
# Load dataset from UCI
url <- "https://archive.ics.uci.edu/static/public/42/data.csv"
glass <- read.csv(url)
# Data structure validation
str(glass)
## 'data.frame': 214 obs. of 11 variables:
## $ Id_number : int 1 2 3 4 5 6 7 8 9 10 ...
## $ RI : num 1.52 1.52 1.52 1.52 1.52 ...
## $ Na : num 13.6 13.9 13.5 13.2 13.3 ...
## $ Mg : num 4.49 3.6 3.55 3.69 3.62 3.61 3.6 3.61 3.58 3.6 ...
## $ Al : num 1.1 1.36 1.54 1.29 1.24 1.62 1.14 1.05 1.37 1.36 ...
## $ Si : num 71.8 72.7 73 72.6 73.1 ...
## $ K : num 0.06 0.48 0.39 0.57 0.55 0.64 0.58 0.57 0.56 0.57 ...
## $ Ca : num 8.75 7.83 7.78 8.22 8.07 8.07 8.17 8.24 8.3 8.4 ...
## $ Ba : num 0 0 0 0 0 0 0 0 0 0 ...
## $ Fe : num 0 0 0 0 0 0.26 0 0 0 0.11 ...
## $ Type_of_glass: int 1 1 1 1 1 1 1 1 1 1 ...
# Drop ID column
glass$Id_number <- NULL
# change target to factor (multinomial classification)
glass$Type_of_glass <- factor(glass$Type_of_glass,
levels = c(1,2,3,5,6,7),
labels = c("building_float",
"building_non_float",
"vehicle_float",
"containers",
"tableware",
"headlamps"))
table(glass$Type_of_glass)
##
## building_float building_non_float vehicle_float containers
## 70 76 17 13
## tableware headlamps
## 9 29
#load package nnet
library(nnet)
## Warning: package 'nnet' was built under R version 4.4.3
# Build multinomial logistic regression model
model <- multinom(Type_of_glass ~ ., data = glass)
## # weights: 66 (50 variable)
## initial value 383.436526
## iter 10 value 257.359885
## iter 20 value 181.634208
## iter 30 value 161.554088
## iter 40 value 157.912577
## iter 50 value 154.889493
## iter 60 value 153.706333
## iter 70 value 153.334999
## iter 80 value 152.219340
## iter 90 value 149.994098
## iter 100 value 149.743843
## final value 149.743843
## stopped after 100 iterations
summary(model)
## Call:
## multinom(formula = Type_of_glass ~ ., data = glass)
##
## Coefficients:
## (Intercept) RI Na Mg Al
## building_non_float 114.01139 210.99092 -3.5715880 -6.14888398 -0.0777839
## vehicle_float 46.69565 -61.97027 1.6471464 -0.01788714 2.5121161
## containers 19.54782 14.22700 -0.4893655 -3.69586811 10.1611011
## tableware -14.59763 -21.52840 10.7663636 -7.48120815 34.9748591
## headlamps -33.83528 22.99089 2.4341715 -5.00880431 6.2849258
## Si K Ca Ba Fe
## building_non_float -4.4509190 -3.70543961 -4.6895169 -5.757871 2.2610525
## vehicle_float 0.2207149 -0.67459086 0.6082768 -2.208131 1.5301451
## containers -0.5204113 0.62817476 -0.4292740 -3.450644 -0.6424633
## tableware -0.9212133 -197.82120395 -4.7069924 -149.906448 -407.9088594
## headlamps -0.1495441 -0.06454676 -2.2076868 -2.475847 -15.9357312
##
## Std. Errors:
## (Intercept) RI Na Mg Al
## building_non_float 0.12813868 0.32065233 0.5037905 0.7083486 1.1946519
## vehicle_float 0.05427163 0.09037508 0.6844281 0.9200780 1.4319207
## containers 0.05937308 0.08790452 0.7494205 1.0489632 2.3181174
## tableware 0.07611198 0.12924244 7.2730426 2.0108392 0.7110474
## headlamps 0.15557299 0.38577177 1.0143998 1.1443317 2.0280315
## Si K Ca Ba Fe
## building_non_float 0.1279720 1.7656501 0.4544060 2.468525 2.050503e+00
## vehicle_float 0.1688232 2.1932557 0.5593086 4.500601 3.130438e+00
## containers 0.2053111 2.3195447 0.7324720 2.628137 4.489458e+00
## tableware 1.8953165 0.1117598 3.5480224 0.024365 7.124074e-13
## headlamps 0.2163546 2.3189653 0.9487705 2.840221 1.865328e+01
##
## Residual Deviance: 299.4877
## AIC: 399.4877
# Count p-value
z <- summary(model)$coefficients / summary(model)$standard.errors
p <- 2 * (1 - pnorm(abs(z)))
# Merge coefficient and p-value
coef_table <- cbind(summary(model)$coefficients, "p-value" = round(p, 4))
print(coef_table)
## (Intercept) RI Na Mg Al
## building_non_float 114.01139 210.99092 -3.5715880 -6.14888398 -0.0777839
## vehicle_float 46.69565 -61.97027 1.6471464 -0.01788714 2.5121161
## containers 19.54782 14.22700 -0.4893655 -3.69586811 10.1611011
## tableware -14.59763 -21.52840 10.7663636 -7.48120815 34.9748591
## headlamps -33.83528 22.99089 2.4341715 -5.00880431 6.2849258
## Si K Ca Ba Fe
## building_non_float -4.4509190 -3.70543961 -4.6895169 -5.757871 2.2610525
## vehicle_float 0.2207149 -0.67459086 0.6082768 -2.208131 1.5301451
## containers -0.5204113 0.62817476 -0.4292740 -3.450644 -0.6424633
## tableware -0.9212133 -197.82120395 -4.7069924 -149.906448 -407.9088594
## headlamps -0.1495441 -0.06454676 -2.2076868 -2.475847 -15.9357312
## (Intercept) RI Na Mg Al Si K Ca
## building_non_float 0 0 0.0000 0.0000 0.9481 0.0000 0.0358 0.0000
## vehicle_float 0 0 0.0161 0.9845 0.0794 0.1911 0.7584 0.2768
## containers 0 0 0.5138 0.0004 0.0000 0.0113 0.7865 0.5578
## tableware 0 0 0.1388 0.0002 0.0000 0.6269 0.0000 0.1846
## headlamps 0 0 0.0164 0.0000 0.0019 0.4894 0.9778 0.0200
## Ba Fe
## building_non_float 0.0197 0.2702
## vehicle_float 0.6237 0.6250
## containers 0.1892 0.8862
## tableware 0.0000 0.0000
## headlamps 0.3834 0.3929
# Class Prediction
pred <- predict(model, glass)
# Confusion matrix
conf_mat <- table(Predicted = pred, Actual = glass$Type_of_glass)
print(conf_mat)
## Actual
## Predicted building_float building_non_float vehicle_float containers
## building_float 52 19 10 0
## building_non_float 18 54 7 3
## vehicle_float 0 0 0 0
## containers 0 1 0 9
## tableware 0 0 0 0
## headlamps 0 2 0 1
## Actual
## Predicted tableware headlamps
## building_float 0 0
## building_non_float 0 2
## vehicle_float 0 0
## containers 0 0
## tableware 9 0
## headlamps 0 27
# Accuracy
accuracy <- sum(diag(conf_mat)) / sum(conf_mat)
print(paste("Accuracy:", round(accuracy * 100, 2), "%"))
## [1] "Accuracy: 70.56 %"
# The multinomial regression model built to predict glass types based on chemical features achieved an accuracy of 70.56%.
# The RI feature is the most consistent and has the greatest influence, followed by Na, Mg, Al, and Ca, which contribute to the differentiation between glass types, while the rest have a smaller impact.
# Although the model shows good predictions for the 'building_non_float' class, there is greater inaccuracy in predicting the 'vehicle_float,' 'containers,' 'tableware,' and 'headlamps' classes.
# Classification errors are more frequent in the 'containers' and 'headlamps' classes, which is likely due to the imbalanced data distribution or less representative features."