Conclusions

Set-up

library(svd)
library(dplyr)
library(ggplot2)
dt <- iris
rownames(dt) <- paste0('id', seq_len(nrow(dt)))
iris_type <- factor(dt$Species)
dt <- dt[, -5]
knitr::kable(head(dt))
Sepal.Length Sepal.Width Petal.Length Petal.Width
id1 5.1 3.5 1.4 0.2
id2 4.9 3.0 1.4 0.2
id3 4.7 3.2 1.3 0.2
id4 4.6 3.1 1.5 0.2
id5 5.0 3.6 1.4 0.2
id6 5.4 3.9 1.7 0.4

De-mean and Unit variance

dt_normed <- scale(dt, center = T, scale = T)

PCA

Demonstrate that prcomp(dataset, scale=TRUE) v.s. prcomp(normed_dataset, scale=FALSE). They will have same results.

res.pca <- prcomp(dt, scale = TRUE)
res.pca2 <- prcomp(dt_normed, scale=FALSE)
names(res.pca)
## [1] "sdev"     "rotation" "center"   "scale"    "x"

Projected on PC space

prcomp(dataset, scale=TRUE)

head(unclass(res.pca$x)[, 1:4])
##           PC1        PC2         PC3          PC4
## id1 -2.257141 -0.4784238  0.12727962  0.024087508
## id2 -2.074013  0.6718827  0.23382552  0.102662845
## id3 -2.356335  0.3407664 -0.04405390  0.028282305
## id4 -2.291707  0.5953999 -0.09098530 -0.065735340
## id5 -2.381863 -0.6446757 -0.01568565 -0.035802870
## id6 -2.068701 -1.4842053 -0.02687825  0.006586116

prcomp(normed_dataset, scale=FALSE)

head(unclass(res.pca2$x)[, 1:4])
##           PC1        PC2         PC3          PC4
## id1 -2.257141 -0.4784238  0.12727962  0.024087508
## id2 -2.074013  0.6718827  0.23382552  0.102662845
## id3 -2.356335  0.3407664 -0.04405390  0.028282305
## id4 -2.291707  0.5953999 -0.09098530 -0.065735340
## id5 -2.381863 -0.6446757 -0.01568565 -0.035802870
## id6 -2.068701 -1.4842053 -0.02687825  0.006586116

SVD

Run propack.svd v.s. svd on normalized dataset.

ppk <- propack.svd(dt_normed)
names(ppk)
## [1] "d" "u" "v"
svd_res <- svd(dt_normed)
names(svd_res)
## [1] "d" "u" "v"
cat('Length of d:', length(ppk$d), '\n')
## Length of d: 4
cat('Dim of U:', dim(ppk$u), '\n')
## Dim of U: 150 4
cat('Dim of V:', dim(ppk$v), '\n')
## Dim of V: 4 4

D

propack.svd

head(ppk$d)
## [1] 20.853205 11.670070  4.676192  1.756847

svd

head(svd_res$d)
## [1] 20.853205 11.670070  4.676192  1.756847

U matrix

propack.svd

head(ppk$u)
##            [,1]        [,2]         [,3]         [,4]
## [1,] 0.10823953 -0.04099580  0.027218646 -0.013710648
## [2,] 0.09945776  0.05757315  0.050003401 -0.058435855
## [3,] 0.11299630  0.02920003 -0.009420891 -0.016098333
## [4,] 0.10989710  0.05101939 -0.019457133  0.037416661
## [5,] 0.11422046 -0.05524180 -0.003354363  0.020379051
## [6,] 0.09920300 -0.12718049 -0.005747892 -0.003748828

svd

head(svd_res$u)
##             [,1]        [,2]         [,3]         [,4]
## [1,] -0.10823953 -0.04099580  0.027218646  0.013710648
## [2,] -0.09945776  0.05757315  0.050003401  0.058435855
## [3,] -0.11299630  0.02920003 -0.009420891  0.016098333
## [4,] -0.10989710  0.05101939 -0.019457133 -0.037416661
## [5,] -0.11422046 -0.05524180 -0.003354363 -0.020379051
## [6,] -0.09920300 -0.12718049 -0.005747892  0.003748828

V matrix

propack.svd

head(ppk$v)
##            [,1]        [,2]       [,3]       [,4]
## [1,] -0.5210659 -0.37741762  0.7195664 -0.2612863
## [2,]  0.2693474 -0.92329566 -0.2443818  0.1235096
## [3,] -0.5804131 -0.02449161 -0.1421264  0.8014492
## [4,] -0.5648565 -0.06694199 -0.6342727 -0.5235971

svd

head(svd_res$v)
##            [,1]        [,2]       [,3]       [,4]
## [1,]  0.5210659 -0.37741762  0.7195664  0.2612863
## [2,] -0.2693474 -0.92329566 -0.2443818 -0.1235096
## [3,]  0.5804131 -0.02449161 -0.1421264 -0.8014492
## [4,]  0.5648565 -0.06694199 -0.6342727  0.5235971

PCA v.s. SVD

Projected to 2D space

prcomp

head(res.pca2$x[, 1:2])
##           PC1        PC2
## id1 -2.257141 -0.4784238
## id2 -2.074013  0.6718827
## id3 -2.356335  0.3407664
## id4 -2.291707  0.5953999
## id5 -2.381863 -0.6446757
## id6 -2.068701 -1.4842053
qplot(res.pca2$x[, 1], res.pca2$x[, 2], color = iris_type,
      xlab='PC1', ylab='PC2')

propack.svd

ppk_pca <- t(ppk$d * t(ppk$u))
head(ppk_pca[, 1:2])
##          [,1]       [,2]
## [1,] 2.257141 -0.4784238
## [2,] 2.074013  0.6718827
## [3,] 2.356335  0.3407664
## [4,] 2.291707  0.5953999
## [5,] 2.381863 -0.6446757
## [6,] 2.068701 -1.4842053
qplot(ppk_pca[, 1], ppk_pca[, 2], color = iris_type,
      xlab='Calculated PC1', ylab='Calculated PC2')

propack.svd + scale_x_reverse()

ppk_pca <- t(ppk$d * t(ppk$u))
head(ppk_pca[, 1:2])
##          [,1]       [,2]
## [1,] 2.257141 -0.4784238
## [2,] 2.074013  0.6718827
## [3,] 2.356335  0.3407664
## [4,] 2.291707  0.5953999
## [5,] 2.381863 -0.6446757
## [6,] 2.068701 -1.4842053
qplot(ppk_pca[, 1], ppk_pca[, 2], color = iris_type,
      xlab='Calculated PC1', ylab='Calculated PC2') +
  scale_x_reverse()

svd

svd_pca <- t(svd_res$d * t(svd_res$u))
head(svd_pca[, 1:2])
##           [,1]       [,2]
## [1,] -2.257141 -0.4784238
## [2,] -2.074013  0.6718827
## [3,] -2.356335  0.3407664
## [4,] -2.291707  0.5953999
## [5,] -2.381863 -0.6446757
## [6,] -2.068701 -1.4842053
qplot(svd_pca[, 1], svd_pca[, 2], color = iris_type,
      xlab='Calculated PC1', ylab='Calculated PC2')

Variance explained

prcomp

res.pca2$sdev^2
## [1] 2.91849782 0.91403047 0.14675688 0.02071484

propack.svd

ppk$d^2 / (nrow(dt)-1)
## [1] 2.91849782 0.91403047 0.14675688 0.02071484

svd

svd_res$d^2 / (nrow(dt)-1)
## [1] 2.91849782 0.91403047 0.14675688 0.02071484

PCA Loadings

prcomp

head(unclass(res.pca2$rotation)[, 1:2])
##                     PC1         PC2
## Sepal.Length  0.5210659 -0.37741762
## Sepal.Width  -0.2693474 -0.92329566
## Petal.Length  0.5804131 -0.02449161
## Petal.Width   0.5648565 -0.06694199

propack.svd

ppk$v[, 1:2]
##            [,1]        [,2]
## [1,] -0.5210659 -0.37741762
## [2,]  0.2693474 -0.92329566
## [3,] -0.5804131 -0.02449161
## [4,] -0.5648565 -0.06694199

svd

svd_res$v[, 1:2]
##            [,1]        [,2]
## [1,]  0.5210659 -0.37741762
## [2,] -0.2693474 -0.92329566
## [3,]  0.5804131 -0.02449161
## [4,]  0.5648565 -0.06694199