Multinomial regression example using Iris dataset. Objective is to identify to which species, a flower belongs for given value of Petal, Sepal length and width.
This is achieved using nnet package and results are verified using clasiification table
library(datasets)
library(ggplot2)
head(iris)
## Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## 1 5.1 3.5 1.4 0.2 setosa
## 2 4.9 3.0 1.4 0.2 setosa
## 3 4.7 3.2 1.3 0.2 setosa
## 4 4.6 3.1 1.5 0.2 setosa
## 5 5.0 3.6 1.4 0.2 setosa
## 6 5.4 3.9 1.7 0.4 setosa
dim(iris)
## [1] 150 5
iris[!complete.cases(iris),]
## [1] Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## <0 rows> (or 0-length row.names)
table(iris$Species)
##
## setosa versicolor virginica
## 50 50 50
names(iris)
## [1] "Sepal.Length" "Sepal.Width" "Petal.Length" "Petal.Width"
## [5] "Species"
From results below, its obvious that log of odds of a flower to be either virginia/veriscolor increases with per unit increase in Petal Length. Similary the log od odds of a flower to be a virginia/veriscolor decreases with per unit increase in Sepal Length.
irisCopy<-iris
irisCopy$Species<-relevel(irisCopy$Species,ref = "setosa")
library(nnet)
irisModel<-multinom(Species~Sepal.Length+Sepal.Width+Petal.Length+Petal.Width,data = irisCopy)
## # weights: 18 (10 variable)
## initial value 164.791843
## iter 10 value 16.177348
## iter 20 value 7.111438
## iter 30 value 6.182999
## iter 40 value 5.984028
## iter 50 value 5.961278
## iter 60 value 5.954900
## iter 70 value 5.951851
## iter 80 value 5.950343
## iter 90 value 5.949904
## iter 100 value 5.949867
## final value 5.949867
## stopped after 100 iterations
m<-summary(irisModel)
m
## Call:
## multinom(formula = Species ~ Sepal.Length + Sepal.Width + Petal.Length +
## Petal.Width, data = irisCopy)
##
## Coefficients:
## (Intercept) Sepal.Length Sepal.Width Petal.Length Petal.Width
## versicolor 18.69037 -5.458424 -8.707401 14.24477 -3.097684
## virginica -23.83628 -7.923634 -15.370769 23.65978 15.135301
##
## Std. Errors:
## (Intercept) Sepal.Length Sepal.Width Petal.Length Petal.Width
## versicolor 34.97116 89.89215 157.0415 60.19170 45.48852
## virginica 35.76649 89.91153 157.1196 60.46753 45.93406
##
## Residual Deviance: 11.89973
## AIC: 31.89973
ggplot(iris,aes(x=Petal.Length,y=Sepal.Length))+
geom_point(aes(color=Species))
# library(lmtest)
# null<-multinom(Species~.,irisCopy)
# lrtest(null,irisModel)
z <- m$coefficients/m$standard.errors
#table of pvalues
pvalue <-1 - pchisq(z^2,df=1)
pvalue
## (Intercept) Sepal.Length Sepal.Width Petal.Length Petal.Width
## versicolor 0.5930295 0.9515807 0.9557828 0.8129231 0.9457075
## virginica 0.5051288 0.9297757 0.9220685 0.6955898 0.7417773
irisCopy$predicted<-predict(irisModel,irisCopy,type = "class")
head(irisCopy)
## Sepal.Length Sepal.Width Petal.Length Petal.Width Species predicted
## 1 5.1 3.5 1.4 0.2 setosa setosa
## 2 4.9 3.0 1.4 0.2 setosa setosa
## 3 4.7 3.2 1.3 0.2 setosa setosa
## 4 4.6 3.1 1.5 0.2 setosa setosa
## 5 5.0 3.6 1.4 0.2 setosa setosa
## 6 5.4 3.9 1.7 0.4 setosa setosa
table(irisCopy$Species,irisCopy$predicted)
##
## setosa versicolor virginica
## setosa 50 0 0
## versicolor 0 49 1
## virginica 0 1 49
library(caret)
## Loading required package: lattice
index<-createDataPartition(iris$Species,p=0.7,list=F)
dim(index)
## [1] 105 1
irisTrain<-iris[index,]
irisTest<-iris[-index,]
head(irisTest)
## Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## 4 4.6 3.1 1.5 0.2 setosa
## 15 5.8 4.0 1.2 0.2 setosa
## 21 5.4 3.4 1.7 0.2 setosa
## 24 5.1 3.3 1.7 0.5 setosa
## 25 4.8 3.4 1.9 0.2 setosa
## 26 5.0 3.0 1.6 0.2 setosa
irisModel2<-multinom(Species~Sepal.Length+Sepal.Width+Petal.Length+Petal.Width,data = iris)
## # weights: 18 (10 variable)
## initial value 164.791843
## iter 10 value 16.177348
## iter 20 value 7.111438
## iter 30 value 6.182999
## iter 40 value 5.984028
## iter 50 value 5.961278
## iter 60 value 5.954900
## iter 70 value 5.951851
## iter 80 value 5.950343
## iter 90 value 5.949904
## iter 100 value 5.949867
## final value 5.949867
## stopped after 100 iterations
summary(irisModel2)
## Call:
## multinom(formula = Species ~ Sepal.Length + Sepal.Width + Petal.Length +
## Petal.Width, data = iris)
##
## Coefficients:
## (Intercept) Sepal.Length Sepal.Width Petal.Length Petal.Width
## versicolor 18.69037 -5.458424 -8.707401 14.24477 -3.097684
## virginica -23.83628 -7.923634 -15.370769 23.65978 15.135301
##
## Std. Errors:
## (Intercept) Sepal.Length Sepal.Width Petal.Length Petal.Width
## versicolor 34.97116 89.89215 157.0415 60.19170 45.48852
## virginica 35.76649 89.91153 157.1196 60.46753 45.93406
##
## Residual Deviance: 11.89973
## AIC: 31.89973
irisTest$predicted<-predict(irisModel2,irisTest,type = "class")
table(irisTest$Species,irisTest$predicted)
##
## setosa versicolor virginica
## setosa 15 0 0
## versicolor 0 14 1
## virginica 0 0 15