#Singular Value Decomposition SVD
# http://www.ats.ucla.edu/stat/r/pages/svd_demos.htm
library(foreign)
data(mtcars)
names(mtcars)
## [1] "mpg" "cyl" "disp" "hp" "drat" "wt" "qsec" "vs" "am" "gear"
## [11] "carb"
pca1 <- prcomp(~mpg + cyl + hp + gear, data = mtcars,
scale = TRUE)
screeplot(pca1)

xvars <- with(mtcars, cbind(mpg, cyl, hp, gear))
corr <- cor(xvars)
a <- eigen(corr)
(std <- sqrt(a$values))
## [1] 1.6919113 0.9439046 0.4036615 0.2890291
(rotation <- a$vectors)
## [,1] [,2] [,3] [,4]
## [1,] 0.5568211 0.02371158 0.8274920 -0.06815416
## [2,] -0.5688619 -0.04596181 0.4411482 0.69258354
## [3,] -0.5084457 -0.48604135 0.3031075 -0.64294058
## [4,] 0.3283738 -0.87240420 -0.1696205 0.31986007
# svd approach
df <- nrow(xvars) - 1
zvars <- scale(xvars)
z.svd <- svd(zvars)
z.svd$d/sqrt(df)
## [1] 1.6919113 0.9439046 0.4036615 0.2890291
z.svd$v
## [,1] [,2] [,3] [,4]
## [1,] -0.5568211 0.02371158 0.8274920 -0.06815416
## [2,] 0.5688619 -0.04596181 0.4411482 0.69258354
## [3,] 0.5084457 -0.48604135 0.3031075 -0.64294058
## [4,] -0.3283738 -0.87240420 -0.1696205 0.31986007
#MDS
cnut <- read.dta("http://statistics.ats.ucla.edu/stat/data/cerealnut.dta")
# centering the variables
mds.data <- as.matrix(sweep(cnut[, -1], 2, colMeans(cnut[, -1])))
dismat <- dist(mds.data)
mds.m1 <- cmdscale(dismat, k = 8, eig = TRUE)
mds.m1$eig
## [1] 1.584379e+05 1.087288e+05 1.056264e+04 3.826785e+02 6.976171e+01
## [6] 1.252082e+01 5.755998e+00 2.224324e+00 4.513969e-12 4.508111e-12
## [11] 4.121611e-12 3.188527e-12 3.150030e-12 2.297497e-12 2.091059e-12
## [16] 1.246190e-12 1.131813e-12 8.794901e-13 2.967892e-13 -1.382636e-12
## [21] -1.452732e-12 -1.574794e-12 -1.876268e-12 -5.916330e-12 -2.520931e-11
mds.m1 <- cmdscale(dismat, k = 2, eig = TRUE)
x <- mds.m1$points[, 1]
y <- mds.m1$points[, 2]
plot(x, y)
text(x + 20, y, label = cnut$brand)

# eigenvalues
xx <- svd(mds.data %*% t(mds.data))
xx$d
## [1] 1.584379e+05 1.087288e+05 1.056264e+04 3.826785e+02 6.976171e+01
## [6] 1.252082e+01 5.755998e+00 2.224324e+00 1.576321e-11 9.903742e-12
## [11] 7.190968e-12 4.712199e-12 4.152571e-12 3.030837e-12 2.767589e-12
## [16] 2.082324e-12 1.971417e-12 1.496531e-12 1.258080e-12 1.045736e-12
## [21] 7.934340e-13 7.346559e-13 2.088189e-13 1.653877e-13 8.383459e-14
# coordinates
xxd <- xx$v %*% sqrt(diag(xx$d))
x1 <- xxd[, 1]
y1 <- xxd[, 2]
plot(x1, y1)
text(x1 + 20, y1, label = cnut$brand)

# http://genomicsclass.github.io/book/pages/pca_svd.html
#https://en.wikibooks.org/wiki/Data_Mining_Algorithms_In_R/Dimensionality_Reduction/Singular_Value_Decomposition#Input_Data