PCA_MARKDOWN

#DATA

data("iris")
library(psych)

## Warning: package 'psych' was built under R version 3.5.2

pairs.panels(iris[1:4],gap=0,bg=c("red","green","blue")[iris$Species],pch=21)

#data partition

ind <- sample(2,nrow(iris),replace = TRUE,prob=c(0.6,0.4))
training <- iris[ind==1,]
testing <- iris[ind==2,]
pc <- prcomp(training[,-5],center = TRUE,scale. = TRUE,)
attributes(pc)

## $names
## [1] "sdev"     "rotation" "center"   "scale"    "x"       
## 
## $class
## [1] "prcomp"

pc$center

## Sepal.Length  Sepal.Width Petal.Length  Petal.Width 
##     5.898925     3.026882     3.915054     1.264516

pc$scale

## Sepal.Length  Sepal.Width Petal.Length  Petal.Width 
##    0.8428078    0.4462744    1.7728598    0.7557957

print(pc)

## Standard deviations (1, .., p=4):
## [1] 1.6812938 0.9944528 0.4064457 0.1382637
## 
## Rotation (n x k) = (4 x 4):
##                     PC1         PC2        PC3        PC4
## Sepal.Length  0.5289871 -0.35163480  0.7180344  0.2845210
## Sepal.Width  -0.2096409 -0.93535752 -0.2482228 -0.1397942
## Petal.Length  0.5904559  0.01638167 -0.1102381 -0.7993379
## Petal.Width   0.5723505 -0.03450961 -0.6408273  0.5104550

summary(pc)

## Importance of components:
##                           PC1    PC2    PC3     PC4
## Standard deviation     1.6813 0.9945 0.4064 0.13826
## Proportion of Variance 0.7067 0.2472 0.0413 0.00478
## Cumulative Proportion  0.7067 0.9539 0.9952 1.00000

pairs.panels(pc$x,gap=0,bg=c("red","green","blue")[iris$Species],pch=21)

#bi plot

library(ggbiplot)

## Loading required package: ggplot2

## Warning: package 'ggplot2' was built under R version 3.5.3

## 
## Attaching package: 'ggplot2'

## The following objects are masked from 'package:psych':
## 
##     %+%, alpha

## Loading required package: plyr

## Loading required package: scales

## Warning: package 'scales' was built under R version 3.5.2

## 
## Attaching package: 'scales'

## The following objects are masked from 'package:psych':
## 
##     alpha, rescale

## Loading required package: grid

g <-ggbiplot(pc,obs.scale = 1,var.scale = 1,groups = training$Species,ellipse = TRUE,circle = TRUE,ellipse.prob = 0.68)
g <- g+scale_color_discrete(name="")
g<-g+theme(legend.direction = "horizontal",legend.position = "top")
g

trg <- predict(pc,training)
tst <- predict(pc,testing)
trg <- data.frame(trg,training[5])
tst <- data.frame(tst,testing$Species)

#multinomial logistic regression with first to pcs

library(nnet)
trg$Species <- relevel(trg$Species,ref = "setosa")
mymodel <- multinom(Species~PC1+PC2,data=trg)

## # weights:  12 (6 variable)
## initial  value 102.170943 
## iter  10 value 16.240829
## iter  20 value 15.489293
## iter  30 value 15.404099
## final  value 15.398235 
## converged

summary(mymodel)

## Call:
## multinom(formula = Species ~ PC1 + PC2, data = trg)
## 
## Coefficients:
##            (Intercept)       PC1      PC2
## versicolor    8.371648  9.723546 5.942224
## virginica     3.211335 15.338206 6.939281
## 
## Std. Errors:
##            (Intercept)      PC1      PC2
## versicolor    57.55209 51.90760 107.1832
## virginica     57.57024 51.92742 107.1856
## 
## Residual Deviance: 30.79647 
## AIC: 42.79647

#confusion mattrics

p<-predict(mymodel,trg)
tab <- table(p,trg$Species)
tab

##             
## p            setosa versicolor virginica
##   setosa         27          0         0
##   versicolor      0         27         3
##   virginica       0          4        32

#misclasification`

1-sum(diag(tab))/sum(tab)

## [1] 0.07526882

for testing data

p<-predict(mymodel,tst)
tab <- table(p,tst$testing.Species)
tab

##             
## p            setosa versicolor virginica
##   setosa         22          0         0
##   versicolor      1         15         2
##   virginica       0          4        13

1-sum(diag(tab))/sum(tab)

## [1] 0.122807

PCA_MARKDOWN

suman

8/10/2019

for testing data