prcomp(dataset, scale=TRUE) is same as prcomp(normed_dataset, scale=FALSE), where normed_dataset is features are de-meaned and have unit-variance.svd::propack.svd v.s. base::svd.library(svd)
library(dplyr)
library(ggplot2)
dt <- iris
rownames(dt) <- paste0('id', seq_len(nrow(dt)))
iris_type <- factor(dt$Species)
dt <- dt[, -5]
knitr::kable(head(dt))
| Sepal.Length | Sepal.Width | Petal.Length | Petal.Width | |
|---|---|---|---|---|
| id1 | 5.1 | 3.5 | 1.4 | 0.2 |
| id2 | 4.9 | 3.0 | 1.4 | 0.2 |
| id3 | 4.7 | 3.2 | 1.3 | 0.2 |
| id4 | 4.6 | 3.1 | 1.5 | 0.2 |
| id5 | 5.0 | 3.6 | 1.4 | 0.2 |
| id6 | 5.4 | 3.9 | 1.7 | 0.4 |
dt_normed <- scale(dt, center = T, scale = T)
Demonstrate that prcomp(dataset, scale=TRUE) v.s. prcomp(normed_dataset, scale=FALSE). They will have same results.
res.pca <- prcomp(dt, scale = TRUE)
res.pca2 <- prcomp(dt_normed, scale=FALSE)
names(res.pca)
## [1] "sdev" "rotation" "center" "scale" "x"
prcomp(dataset, scale=TRUE)head(unclass(res.pca$x)[, 1:4])
## PC1 PC2 PC3 PC4
## id1 -2.257141 -0.4784238 0.12727962 0.024087508
## id2 -2.074013 0.6718827 0.23382552 0.102662845
## id3 -2.356335 0.3407664 -0.04405390 0.028282305
## id4 -2.291707 0.5953999 -0.09098530 -0.065735340
## id5 -2.381863 -0.6446757 -0.01568565 -0.035802870
## id6 -2.068701 -1.4842053 -0.02687825 0.006586116
prcomp(normed_dataset, scale=FALSE)head(unclass(res.pca2$x)[, 1:4])
## PC1 PC2 PC3 PC4
## id1 -2.257141 -0.4784238 0.12727962 0.024087508
## id2 -2.074013 0.6718827 0.23382552 0.102662845
## id3 -2.356335 0.3407664 -0.04405390 0.028282305
## id4 -2.291707 0.5953999 -0.09098530 -0.065735340
## id5 -2.381863 -0.6446757 -0.01568565 -0.035802870
## id6 -2.068701 -1.4842053 -0.02687825 0.006586116
Run propack.svd v.s. svd on normalized dataset.
ppk <- propack.svd(dt_normed)
names(ppk)
## [1] "d" "u" "v"
svd_res <- svd(dt_normed)
names(svd_res)
## [1] "d" "u" "v"
cat('Length of d:', length(ppk$d), '\n')
## Length of d: 4
cat('Dim of U:', dim(ppk$u), '\n')
## Dim of U: 150 4
cat('Dim of V:', dim(ppk$v), '\n')
## Dim of V: 4 4
propack.svdhead(ppk$d)
## [1] 20.853205 11.670070 4.676192 1.756847
svdhead(svd_res$d)
## [1] 20.853205 11.670070 4.676192 1.756847
propack.svdhead(ppk$u)
## [,1] [,2] [,3] [,4]
## [1,] 0.10823953 -0.04099580 0.027218646 -0.013710648
## [2,] 0.09945776 0.05757315 0.050003401 -0.058435855
## [3,] 0.11299630 0.02920003 -0.009420891 -0.016098333
## [4,] 0.10989710 0.05101939 -0.019457133 0.037416661
## [5,] 0.11422046 -0.05524180 -0.003354363 0.020379051
## [6,] 0.09920300 -0.12718049 -0.005747892 -0.003748828
svdhead(svd_res$u)
## [,1] [,2] [,3] [,4]
## [1,] -0.10823953 -0.04099580 0.027218646 0.013710648
## [2,] -0.09945776 0.05757315 0.050003401 0.058435855
## [3,] -0.11299630 0.02920003 -0.009420891 0.016098333
## [4,] -0.10989710 0.05101939 -0.019457133 -0.037416661
## [5,] -0.11422046 -0.05524180 -0.003354363 -0.020379051
## [6,] -0.09920300 -0.12718049 -0.005747892 0.003748828
propack.svdhead(ppk$v)
## [,1] [,2] [,3] [,4]
## [1,] -0.5210659 -0.37741762 0.7195664 -0.2612863
## [2,] 0.2693474 -0.92329566 -0.2443818 0.1235096
## [3,] -0.5804131 -0.02449161 -0.1421264 0.8014492
## [4,] -0.5648565 -0.06694199 -0.6342727 -0.5235971
svdhead(svd_res$v)
## [,1] [,2] [,3] [,4]
## [1,] 0.5210659 -0.37741762 0.7195664 0.2612863
## [2,] -0.2693474 -0.92329566 -0.2443818 -0.1235096
## [3,] 0.5804131 -0.02449161 -0.1421264 -0.8014492
## [4,] 0.5648565 -0.06694199 -0.6342727 0.5235971
prcomphead(res.pca2$x[, 1:2])
## PC1 PC2
## id1 -2.257141 -0.4784238
## id2 -2.074013 0.6718827
## id3 -2.356335 0.3407664
## id4 -2.291707 0.5953999
## id5 -2.381863 -0.6446757
## id6 -2.068701 -1.4842053
qplot(res.pca2$x[, 1], res.pca2$x[, 2], color = iris_type,
xlab='PC1', ylab='PC2')
propack.svdppk_pca <- t(ppk$d * t(ppk$u))
head(ppk_pca[, 1:2])
## [,1] [,2]
## [1,] 2.257141 -0.4784238
## [2,] 2.074013 0.6718827
## [3,] 2.356335 0.3407664
## [4,] 2.291707 0.5953999
## [5,] 2.381863 -0.6446757
## [6,] 2.068701 -1.4842053
qplot(ppk_pca[, 1], ppk_pca[, 2], color = iris_type,
xlab='Calculated PC1', ylab='Calculated PC2')
propack.svd + scale_x_reverse()ppk_pca <- t(ppk$d * t(ppk$u))
head(ppk_pca[, 1:2])
## [,1] [,2]
## [1,] 2.257141 -0.4784238
## [2,] 2.074013 0.6718827
## [3,] 2.356335 0.3407664
## [4,] 2.291707 0.5953999
## [5,] 2.381863 -0.6446757
## [6,] 2.068701 -1.4842053
qplot(ppk_pca[, 1], ppk_pca[, 2], color = iris_type,
xlab='Calculated PC1', ylab='Calculated PC2') +
scale_x_reverse()
svdsvd_pca <- t(svd_res$d * t(svd_res$u))
head(svd_pca[, 1:2])
## [,1] [,2]
## [1,] -2.257141 -0.4784238
## [2,] -2.074013 0.6718827
## [3,] -2.356335 0.3407664
## [4,] -2.291707 0.5953999
## [5,] -2.381863 -0.6446757
## [6,] -2.068701 -1.4842053
qplot(svd_pca[, 1], svd_pca[, 2], color = iris_type,
xlab='Calculated PC1', ylab='Calculated PC2')
prcompres.pca2$sdev^2
## [1] 2.91849782 0.91403047 0.14675688 0.02071484
propack.svdppk$d^2 / (nrow(dt)-1)
## [1] 2.91849782 0.91403047 0.14675688 0.02071484
svdsvd_res$d^2 / (nrow(dt)-1)
## [1] 2.91849782 0.91403047 0.14675688 0.02071484
prcomphead(unclass(res.pca2$rotation)[, 1:2])
## PC1 PC2
## Sepal.Length 0.5210659 -0.37741762
## Sepal.Width -0.2693474 -0.92329566
## Petal.Length 0.5804131 -0.02449161
## Petal.Width 0.5648565 -0.06694199
propack.svdppk$v[, 1:2]
## [,1] [,2]
## [1,] -0.5210659 -0.37741762
## [2,] 0.2693474 -0.92329566
## [3,] -0.5804131 -0.02449161
## [4,] -0.5648565 -0.06694199
svdsvd_res$v[, 1:2]
## [,1] [,2]
## [1,] 0.5210659 -0.37741762
## [2,] -0.2693474 -0.92329566
## [3,] 0.5804131 -0.02449161
## [4,] 0.5648565 -0.06694199