#RPubs : (https://rpubs.com/PanefiDwi/1306959)
# Load dataset from UCI
url <- "https://archive.ics.uci.edu/static/public/42/data.csv"
glass <- read.csv(url)

# Data structure validation
str(glass)
## 'data.frame':    214 obs. of  11 variables:
##  $ Id_number    : int  1 2 3 4 5 6 7 8 9 10 ...
##  $ RI           : num  1.52 1.52 1.52 1.52 1.52 ...
##  $ Na           : num  13.6 13.9 13.5 13.2 13.3 ...
##  $ Mg           : num  4.49 3.6 3.55 3.69 3.62 3.61 3.6 3.61 3.58 3.6 ...
##  $ Al           : num  1.1 1.36 1.54 1.29 1.24 1.62 1.14 1.05 1.37 1.36 ...
##  $ Si           : num  71.8 72.7 73 72.6 73.1 ...
##  $ K            : num  0.06 0.48 0.39 0.57 0.55 0.64 0.58 0.57 0.56 0.57 ...
##  $ Ca           : num  8.75 7.83 7.78 8.22 8.07 8.07 8.17 8.24 8.3 8.4 ...
##  $ Ba           : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ Fe           : num  0 0 0 0 0 0.26 0 0 0 0.11 ...
##  $ Type_of_glass: int  1 1 1 1 1 1 1 1 1 1 ...
# Drop ID column
glass$Id_number <- NULL

# change target to factor (multinomial classification)
glass$Type_of_glass <- factor(glass$Type_of_glass,
                              levels = c(1,2,3,5,6,7),
                              labels = c("building_float", 
                                         "building_non_float",
                                         "vehicle_float",
                                         "containers",
                                         "tableware",
                                         "headlamps"))
table(glass$Type_of_glass)
## 
##     building_float building_non_float      vehicle_float         containers 
##                 70                 76                 17                 13 
##          tableware          headlamps 
##                  9                 29
#load package nnet
library(nnet)
## Warning: package 'nnet' was built under R version 4.4.3
# Build multinomial logistic regression model
model <- multinom(Type_of_glass ~ ., data = glass)
## # weights:  66 (50 variable)
## initial  value 383.436526 
## iter  10 value 257.359885
## iter  20 value 181.634208
## iter  30 value 161.554088
## iter  40 value 157.912577
## iter  50 value 154.889493
## iter  60 value 153.706333
## iter  70 value 153.334999
## iter  80 value 152.219340
## iter  90 value 149.994098
## iter 100 value 149.743843
## final  value 149.743843 
## stopped after 100 iterations
summary(model)
## Call:
## multinom(formula = Type_of_glass ~ ., data = glass)
## 
## Coefficients:
##                    (Intercept)        RI         Na          Mg         Al
## building_non_float   114.01139 210.99092 -3.5715880 -6.14888398 -0.0777839
## vehicle_float         46.69565 -61.97027  1.6471464 -0.01788714  2.5121161
## containers            19.54782  14.22700 -0.4893655 -3.69586811 10.1611011
## tableware            -14.59763 -21.52840 10.7663636 -7.48120815 34.9748591
## headlamps            -33.83528  22.99089  2.4341715 -5.00880431  6.2849258
##                            Si             K         Ca          Ba           Fe
## building_non_float -4.4509190   -3.70543961 -4.6895169   -5.757871    2.2610525
## vehicle_float       0.2207149   -0.67459086  0.6082768   -2.208131    1.5301451
## containers         -0.5204113    0.62817476 -0.4292740   -3.450644   -0.6424633
## tableware          -0.9212133 -197.82120395 -4.7069924 -149.906448 -407.9088594
## headlamps          -0.1495441   -0.06454676 -2.2076868   -2.475847  -15.9357312
## 
## Std. Errors:
##                    (Intercept)         RI        Na        Mg        Al
## building_non_float  0.12813868 0.32065233 0.5037905 0.7083486 1.1946519
## vehicle_float       0.05427163 0.09037508 0.6844281 0.9200780 1.4319207
## containers          0.05937308 0.08790452 0.7494205 1.0489632 2.3181174
## tableware           0.07611198 0.12924244 7.2730426 2.0108392 0.7110474
## headlamps           0.15557299 0.38577177 1.0143998 1.1443317 2.0280315
##                           Si         K        Ca       Ba           Fe
## building_non_float 0.1279720 1.7656501 0.4544060 2.468525 2.050503e+00
## vehicle_float      0.1688232 2.1932557 0.5593086 4.500601 3.130438e+00
## containers         0.2053111 2.3195447 0.7324720 2.628137 4.489458e+00
## tableware          1.8953165 0.1117598 3.5480224 0.024365 7.124074e-13
## headlamps          0.2163546 2.3189653 0.9487705 2.840221 1.865328e+01
## 
## Residual Deviance: 299.4877 
## AIC: 399.4877
# Count p-value
z <- summary(model)$coefficients / summary(model)$standard.errors
p <- 2 * (1 - pnorm(abs(z)))

# Merge coefficient and p-value
coef_table <- cbind(summary(model)$coefficients, "p-value" = round(p, 4))
print(coef_table)
##                    (Intercept)        RI         Na          Mg         Al
## building_non_float   114.01139 210.99092 -3.5715880 -6.14888398 -0.0777839
## vehicle_float         46.69565 -61.97027  1.6471464 -0.01788714  2.5121161
## containers            19.54782  14.22700 -0.4893655 -3.69586811 10.1611011
## tableware            -14.59763 -21.52840 10.7663636 -7.48120815 34.9748591
## headlamps            -33.83528  22.99089  2.4341715 -5.00880431  6.2849258
##                            Si             K         Ca          Ba           Fe
## building_non_float -4.4509190   -3.70543961 -4.6895169   -5.757871    2.2610525
## vehicle_float       0.2207149   -0.67459086  0.6082768   -2.208131    1.5301451
## containers         -0.5204113    0.62817476 -0.4292740   -3.450644   -0.6424633
## tableware          -0.9212133 -197.82120395 -4.7069924 -149.906448 -407.9088594
## headlamps          -0.1495441   -0.06454676 -2.2076868   -2.475847  -15.9357312
##                    (Intercept) RI     Na     Mg     Al     Si      K     Ca
## building_non_float           0  0 0.0000 0.0000 0.9481 0.0000 0.0358 0.0000
## vehicle_float                0  0 0.0161 0.9845 0.0794 0.1911 0.7584 0.2768
## containers                   0  0 0.5138 0.0004 0.0000 0.0113 0.7865 0.5578
## tableware                    0  0 0.1388 0.0002 0.0000 0.6269 0.0000 0.1846
## headlamps                    0  0 0.0164 0.0000 0.0019 0.4894 0.9778 0.0200
##                        Ba     Fe
## building_non_float 0.0197 0.2702
## vehicle_float      0.6237 0.6250
## containers         0.1892 0.8862
## tableware          0.0000 0.0000
## headlamps          0.3834 0.3929
# Class Prediction
pred <- predict(model, glass)

# Confusion matrix
conf_mat <- table(Predicted = pred, Actual = glass$Type_of_glass)
print(conf_mat)
##                     Actual
## Predicted            building_float building_non_float vehicle_float containers
##   building_float                 52                 19            10          0
##   building_non_float             18                 54             7          3
##   vehicle_float                   0                  0             0          0
##   containers                      0                  1             0          9
##   tableware                       0                  0             0          0
##   headlamps                       0                  2             0          1
##                     Actual
## Predicted            tableware headlamps
##   building_float             0         0
##   building_non_float         0         2
##   vehicle_float              0         0
##   containers                 0         0
##   tableware                  9         0
##   headlamps                  0        27
# Accuracy
accuracy <- sum(diag(conf_mat)) / sum(conf_mat)
print(paste("Accuracy:", round(accuracy * 100, 2), "%"))
## [1] "Accuracy: 70.56 %"
# The multinomial regression model built to predict glass types based on chemical features achieved an accuracy of 70.56%. 
# The RI feature is the most consistent and has the greatest influence, followed by Na, Mg, Al, and Ca, which contribute to the differentiation between glass types, while the rest have a smaller impact. 
# Although the model shows good predictions for the 'building_non_float' class, there is greater inaccuracy in predicting the 'vehicle_float,' 'containers,' 'tableware,' and 'headlamps' classes. 
# Classification errors are more frequent in the 'containers' and 'headlamps' classes, which is likely due to the imbalanced data distribution or less representative features."