#DATA
data("iris")
library(psych)
## Warning: package 'psych' was built under R version 3.5.2
pairs.panels(iris[1:4],gap=0,bg=c("red","green","blue")[iris$Species],pch=21)
#data partition
ind <- sample(2,nrow(iris),replace = TRUE,prob=c(0.6,0.4))
training <- iris[ind==1,]
testing <- iris[ind==2,]
pc <- prcomp(training[,-5],center = TRUE,scale. = TRUE,)
attributes(pc)
## $names
## [1] "sdev" "rotation" "center" "scale" "x"
##
## $class
## [1] "prcomp"
pc$center
## Sepal.Length Sepal.Width Petal.Length Petal.Width
## 5.898925 3.026882 3.915054 1.264516
pc$scale
## Sepal.Length Sepal.Width Petal.Length Petal.Width
## 0.8428078 0.4462744 1.7728598 0.7557957
print(pc)
## Standard deviations (1, .., p=4):
## [1] 1.6812938 0.9944528 0.4064457 0.1382637
##
## Rotation (n x k) = (4 x 4):
## PC1 PC2 PC3 PC4
## Sepal.Length 0.5289871 -0.35163480 0.7180344 0.2845210
## Sepal.Width -0.2096409 -0.93535752 -0.2482228 -0.1397942
## Petal.Length 0.5904559 0.01638167 -0.1102381 -0.7993379
## Petal.Width 0.5723505 -0.03450961 -0.6408273 0.5104550
summary(pc)
## Importance of components:
## PC1 PC2 PC3 PC4
## Standard deviation 1.6813 0.9945 0.4064 0.13826
## Proportion of Variance 0.7067 0.2472 0.0413 0.00478
## Cumulative Proportion 0.7067 0.9539 0.9952 1.00000
pairs.panels(pc$x,gap=0,bg=c("red","green","blue")[iris$Species],pch=21)
#bi plot
library(ggbiplot)
## Loading required package: ggplot2
## Warning: package 'ggplot2' was built under R version 3.5.3
##
## Attaching package: 'ggplot2'
## The following objects are masked from 'package:psych':
##
## %+%, alpha
## Loading required package: plyr
## Loading required package: scales
## Warning: package 'scales' was built under R version 3.5.2
##
## Attaching package: 'scales'
## The following objects are masked from 'package:psych':
##
## alpha, rescale
## Loading required package: grid
g <-ggbiplot(pc,obs.scale = 1,var.scale = 1,groups = training$Species,ellipse = TRUE,circle = TRUE,ellipse.prob = 0.68)
g <- g+scale_color_discrete(name="")
g<-g+theme(legend.direction = "horizontal",legend.position = "top")
g
trg <- predict(pc,training)
tst <- predict(pc,testing)
trg <- data.frame(trg,training[5])
tst <- data.frame(tst,testing$Species)
#multinomial logistic regression with first to pcs
library(nnet)
trg$Species <- relevel(trg$Species,ref = "setosa")
mymodel <- multinom(Species~PC1+PC2,data=trg)
## # weights: 12 (6 variable)
## initial value 102.170943
## iter 10 value 16.240829
## iter 20 value 15.489293
## iter 30 value 15.404099
## final value 15.398235
## converged
summary(mymodel)
## Call:
## multinom(formula = Species ~ PC1 + PC2, data = trg)
##
## Coefficients:
## (Intercept) PC1 PC2
## versicolor 8.371648 9.723546 5.942224
## virginica 3.211335 15.338206 6.939281
##
## Std. Errors:
## (Intercept) PC1 PC2
## versicolor 57.55209 51.90760 107.1832
## virginica 57.57024 51.92742 107.1856
##
## Residual Deviance: 30.79647
## AIC: 42.79647
#confusion mattrics
p<-predict(mymodel,trg)
tab <- table(p,trg$Species)
tab
##
## p setosa versicolor virginica
## setosa 27 0 0
## versicolor 0 27 3
## virginica 0 4 32
#misclasification`
1-sum(diag(tab))/sum(tab)
## [1] 0.07526882
p<-predict(mymodel,tst)
tab <- table(p,tst$testing.Species)
tab
##
## p setosa versicolor virginica
## setosa 22 0 0
## versicolor 1 15 2
## virginica 0 4 13
1-sum(diag(tab))/sum(tab)
## [1] 0.122807