Multidimensional data: projections:
Find principal components of your data (use: Matlab: pca, R: princomp, or other).
Make visualizations in two projection.
Make the attribute axis representation.
Select the most informative/informative attributes possible according to the shortest / longest axes
Create different subsets of attributes (informative only, non-informative only, no non-informative attributies, etc.) and visualize the data using the nonlinear projection method - MDS
Present different visualizations, comment on the result.
library(stats)
library(pracma)
## Warning: paketas 'pracma' buvo sukurtas pagal R versiją 4.1.3
library(ggplot2)
library(plot3D)
library(rgl)
library(viscomplexr)
library(philentropy)
## Warning: paketas 'philentropy' buvo sukurtas pagal R versiją 4.1.3
library(factoextra)
## Warning: paketas 'factoextra' buvo sukurtas pagal R versiją 4.1.3
## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa
library(corrplot)
## corrplot 0.92 loaded
library(RColorBrewer)
library(PerformanceAnalytics)
## Warning: paketas 'PerformanceAnalytics' buvo sukurtas pagal R versiją 4.1.3
## Įkeliamas reikalingas paketas: xts
## Įkeliamas reikalingas paketas: zoo
##
## Pridedamas paketas: 'zoo'
## Šie objektai yra užmaskuoti nuo 'package:base':
##
## as.Date, as.Date.numeric
##
## Pridedamas paketas: 'PerformanceAnalytics'
## Šis objektas yra užmaskuotas nuo 'package:graphics':
##
## legend
x <- seq(-40, 40, length = 100)
y <- seq(-10, 10 ,len = 100)
s <- complex(real = x, imaginary = y)
z <- zeta(x + y*1i)
m <- abs(z)
dff <- data.frame(Re(s), as.numeric(Im(s)), as.numeric(z), m)
## Warning in data.frame(Re(s), as.numeric(Im(s)), as.numeric(z), m): menamosios
## dalys atmestos keičiant duomenų tipą
head(dff)
## Re.s. as.numeric.Im.s.. as.numeric.z. m
## 1 -40.00000 -10.000000 -3.451914e+20 3.001451e+21
## 2 -39.19192 -9.797980 2.738126e+20 5.011015e+20
## 3 -38.38384 -9.595960 8.136942e+19 8.504820e+19
## 4 -37.57576 -9.393939 1.348245e+19 1.467905e+19
## 5 -36.76768 -9.191919 1.139870e+18 2.577382e+18
## 6 -35.95960 -8.989899 -1.150264e+17 4.605423e+17
PC <- prcomp(dff, scale = TRUE)
PC
## Standard deviations (1, .., p=4):
## [1] 1.453075e+00 1.254517e+00 5.610359e-01 7.788648e-17
##
## Rotation (n x k) = (4 x 4):
## PC1 PC2 PC3 PC4
## Re.s. 0.6440319 -0.2781415 0.0886577 7.071068e-01
## as.numeric.Im.s.. 0.6440319 -0.2781415 0.0886577 -7.071068e-01
## as.numeric.z. 0.2056652 0.6956227 0.6883393 -1.543904e-16
## m -0.3579770 -0.6011520 0.7144709 5.551115e-16
PC$sdev
## [1] 1.453075e+00 1.254517e+00 5.610359e-01 7.788648e-17
PC$rotation
## PC1 PC2 PC3 PC4
## Re.s. 0.6440319 -0.2781415 0.0886577 7.071068e-01
## as.numeric.Im.s.. 0.6440319 -0.2781415 0.0886577 -7.071068e-01
## as.numeric.z. 0.2056652 0.6956227 0.6883393 -1.543904e-16
## m -0.3579770 -0.6011520 0.7144709 5.551115e-16
PC$center
## Re.s. as.numeric.Im.s.. as.numeric.z. m
## -5.329071e-16 -1.332268e-16 2.441164e+17 3.605420e+19
PC$scale
## Re.s. as.numeric.Im.s.. as.numeric.z. m
## 2.344363e+01 5.860907e+00 4.505088e+19 3.038021e+20
head(PC$x)
## PC1 PC2 PC3 PC4
## [1,] -7.268890 -10.2524823 1.3934124 7.934498e-15
## [2,] -1.452410 4.2338715 4.9771493 1.022439e-15
## [3,] -1.796304 2.0664863 1.0644330 9.217273e-16
## [4,] -1.978904 1.1383247 -0.1322022 1.025798e-15
## [5,] -1.976592 0.9525168 -0.3431348 8.239393e-16
## [6,] -1.935428 0.9181543 -0.3611749 8.243720e-16
# Eigenvalues
eig.val <- get_eigenvalue(PC)
eig.val
## eigenvalue variance.percent cumulative.variance.percent
## Dim.1 2.111427e+00 5.278567e+01 52.78567
## Dim.2 1.573812e+00 3.934529e+01 92.13097
## Dim.3 3.147613e-01 7.869032e+00 100.00000
## Dim.4 6.066303e-33 1.516576e-31 100.00000
# Results for Variables
res.var <- get_pca_var(PC)
res.var$coord # Coordinates
## Dim.1 Dim.2 Dim.3 Dim.4
## Re.s. 0.9358267 -0.3489331 0.04974015 5.507406e-17
## as.numeric.Im.s.. 0.9358267 -0.3489331 0.04974015 -5.507406e-17
## as.numeric.z. 0.2988469 0.8726701 0.38618305 -1.202492e-32
## m -0.5201675 -0.7541552 0.40084384 4.323568e-32
res.var$contrib # Contributions to the PCs
## Dim.1 Dim.2 Dim.3 Dim.4
## Re.s. 41.477714 7.736267 0.7860188 5.000000e+01
## as.numeric.Im.s.. 41.477714 7.736267 0.7860188 5.000000e+01
## as.numeric.z. 4.229816 48.389090 47.3810942 2.383639e-30
## m 12.814756 36.138376 51.0468683 3.081488e-29
res.var$cos2 # Quality of representation
## Dim.1 Dim.2 Dim.3 Dim.4
## Re.s. 0.87577164 0.1217543 0.002474083 3.033152e-33
## as.numeric.Im.s.. 0.87577164 0.1217543 0.002474083 3.033152e-33
## as.numeric.z. 0.08930947 0.7615532 0.149137345 1.445988e-64
## m 0.27057422 0.5687500 0.160675783 1.869324e-63
# Results for individuals
res.ind <- get_pca_ind(PC)
head(res.ind$coord) # Coordinates
## Dim.1 Dim.2 Dim.3 Dim.4
## 1 -7.268890 -10.2524823 1.3934124 7.934498e-15
## 2 -1.452410 4.2338715 4.9771493 1.022439e-15
## 3 -1.796304 2.0664863 1.0644330 9.217273e-16
## 4 -1.978904 1.1383247 -0.1322022 1.025798e-15
## 5 -1.976592 0.9525168 -0.3431348 8.239393e-16
## 6 -1.935428 0.9181543 -0.3611749 8.243720e-16
head(res.ind$contrib) # Contributions to the PCs
## Dim.1 Dim.2 Dim.3 Dim.4
## 1 25.0241982 66.7890511 6.16847827 10378.0275
## 2 0.9990855 11.3899696 78.70095735 172.3259
## 3 1.5282114 2.7133902 3.59960902 140.0492
## 4 1.8546979 0.8233407 0.05552595 173.4601
## 5 1.8503666 0.5764910 0.37406595 111.9093
## 6 1.7740988 0.5356469 0.41443260 112.0269
head(res.ind$cos2) # Quality of representation
## Dim.1 Dim.2 Dim.3 Dim.4
## 1 0.33045335 0.6574034 0.012143204 3.937430e-31
## 2 0.04707942 0.4000624 0.552858171 2.333066e-32
## 3 0.37389028 0.4948228 0.131286885 9.844407e-32
## 4 0.74886588 0.2477919 0.003342198 2.012234e-31
## 5 0.79216515 0.1839616 0.023873241 1.376488e-31
## 6 0.79373051 0.1786284 0.027641045 1.440010e-31
Visualize eigenvalues (scree plot). Show the percentage of variances explained by each principal component.
fviz_eig(PC)
Graph of individuals. Individuals with a similar profile are grouped together.
fviz_pca_ind(PC,
col.ind = "cos2", # Color by the quality of representation
gradient.cols = c("#00AFBB", "#E7B800", "#FC4E07"),
repel = TRUE # Avoid text overlapping
)
## Warning: ggrepel: 97 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
Graph of variables. Positive correlated variables point to the same side of the plot. Negative correlated variables point to opposite sides of the graph.
fviz_pca_var(PC,
col.var = "contrib", # Color by contributions to the PC
gradient.cols = c("#00AFBB", "#E7B800", "#FC4E07"),
repel = TRUE # Avoid text overlapping
)
Biplot of individuals and variables
fviz_pca_biplot(PC, repel = TRUE,
col.var = "#2E9FDF", # Variables color
col.ind = "#696969" # Individuals color
)
## Warning: ggrepel: 97 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
Collerogram
M <- cor(dff)
M
## Re.s. as.numeric.Im.s.. as.numeric.z. m
## Re.s. 1.000000000 1.000000000 -0.005625753 -0.2036989
## as.numeric.Im.s.. 1.000000000 1.000000000 -0.005625753 -0.2036989
## as.numeric.z. -0.005625753 -0.005625753 1.000000000 -0.6587800
## m -0.203698929 -0.203698929 -0.658780038 1.0000000
corrplot(M, type="upper", order="hclust",
col=brewer.pal(n=8, name="RdYlBu"))
PCC <- prcomp(dff[,1:3], scale = TRUE)
PCC
## Standard deviations (1, .., p=3):
## [1] 1.414236e+00 9.999684e-01 4.080012e-16
##
## Rotation (n x k) = (3 x 3):
## PC1 PC2 PC3
## Re.s. 0.707084406 0.005625219 7.071068e-01
## as.numeric.Im.s.. 0.707084406 0.005625219 -7.071068e-01
## as.numeric.z. -0.007955261 0.999968356 -5.100087e-16
PCC$sdev
## [1] 1.414236e+00 9.999684e-01 4.080012e-16
PCC$rotation
## PC1 PC2 PC3
## Re.s. 0.707084406 0.005625219 7.071068e-01
## as.numeric.Im.s.. 0.707084406 0.005625219 -7.071068e-01
## as.numeric.z. -0.007955261 0.999968356 -5.100087e-16
PCC$center
## Re.s. as.numeric.Im.s.. as.numeric.z.
## -5.329071e-16 -1.332268e-16 2.441164e+17
PCC$scale
## Re.s. as.numeric.Im.s.. as.numeric.z.
## 2.344363e+01 5.860907e+00 4.505088e+19
head(PCC$x)
## PC1 PC2 PC3
## [1,] -2.351885 -7.686629210 5.020805e-15
## [2,] -2.412446 6.053433982 -1.986771e-15
## [3,] -2.329719 1.782271526 1.918255e-16
## [4,] -2.268986 0.275811386 7.383108e-16
## [5,] -2.218061 0.002237966 8.780378e-16
## [6,] -2.169095 -0.025228451 1.114289e-15
# Eigenvalues
eig.val <- get_eigenvalue(PCC)
eig.val
## eigenvalue variance.percent cumulative.variance.percent
## Dim.1 2.000063e+00 6.666878e+01 66.66878
## Dim.2 9.999367e-01 3.333122e+01 100.00000
## Dim.3 1.664650e-31 5.548833e-30 100.00000
# Results for Variables
res.var <- get_pca_var(PCC)
res.var$coord # Coordinates
## Dim.1 Dim.2 Dim.3
## Re.s. 0.99998418 0.005625041 2.885004e-16
## as.numeric.Im.s.. 0.99998418 0.005625041 -2.885004e-16
## as.numeric.z. -0.01125062 0.999936710 -2.080842e-31
res.var$contrib # Contributions to the PCs
## Dim.1 Dim.2 Dim.3
## Re.s. 49.996835691 0.003164309 5.000000e+01
## as.numeric.Im.s.. 49.996835691 0.003164309 5.000000e+01
## as.numeric.z. 0.006328619 99.993671381 2.601089e-29
res.var$cos2 # Quality of representation
## Dim.1 Dim.2 Dim.3
## Re.s. 0.9999683589 3.164109e-05 8.323250e-32
## as.numeric.Im.s.. 0.9999683589 3.164109e-05 8.323250e-32
## as.numeric.z. 0.0001265764 9.998734e-01 4.329902e-62
# Results for individuals
res.ind <- get_pca_ind(PCC)
head(res.ind$coord) # Coordinates
## Dim.1 Dim.2 Dim.3
## 1 -2.351885 -7.686629210 5.020805e-15
## 2 -2.412446 6.053433982 -1.986771e-15
## 3 -2.329719 1.782271526 1.918255e-16
## 4 -2.268986 0.275811386 7.383108e-16
## 5 -2.218061 0.002237966 8.780378e-16
## 6 -2.169095 -0.025228451 1.114289e-15
head(res.ind$contrib) # Contributions to the PCs
## Dim.1 Dim.2 Dim.3
## 1 2.765595 5.908801e+01 151.4341054
## 2 2.909857 3.664638e+01 23.7122503
## 3 2.713709 3.176693e+00 0.2210497
## 4 2.574067 7.607674e-02 3.2745794
## 5 2.459820 5.008811e-06 4.6313065
## 6 2.352411 6.365150e-04 7.4588621
head(res.ind$cos2) # Quality of representation
## Dim.1 Dim.2 Dim.3
## 1 0.08560412 9.143959e-01 3.901297e-31
## 2 0.13705499 8.629450e-01 9.295553e-32
## 3 0.63081572 3.691843e-01 4.276696e-33
## 4 0.98543902 1.456098e-02 1.043385e-31
## 5 0.99999898 1.018028e-06 1.567036e-31
## 6 0.99986474 1.352588e-04 2.638638e-31
Visualize eigenvalues (scree plot). Show the percentage of variances explained by each principal component.
fviz_eig(PCC)
Graph of individuals. Individuals with a similar profile are grouped together.
fviz_pca_ind(PCC,
col.ind = "cos2", # Color by the quality of representation
gradient.cols = c("#00AFBB", "#E7B800", "#FC4E07"),
repel = TRUE # Avoid text overlapping
)
## Warning: ggrepel: 93 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
Graph of variables. Positive correlated variables point to the same side of the plot. Negative correlated variables point to opposite sides of the graph.
fviz_pca_var(PCC,
col.var = "contrib", # Color by contributions to the PC
gradient.cols = c("#00AFBB", "#E7B800", "#FC4E07"),
repel = TRUE # Avoid text overlapping
)
Biplot of individuals and variables
fviz_pca_biplot(PCC, repel = TRUE,
col.var = "#2E9FDF", # Variables color
col.ind = "#696969" # Individuals color
)
## Warning: ggrepel: 95 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
Collerogram
M <- cor(dff[,1:3])
M
## Re.s. as.numeric.Im.s.. as.numeric.z.
## Re.s. 1.000000000 1.000000000 -0.005625753
## as.numeric.Im.s.. 1.000000000 1.000000000 -0.005625753
## as.numeric.z. -0.005625753 -0.005625753 1.000000000
corrplot(M, type="upper", order="hclust",
col=brewer.pal(n=8, name="RdYlBu"))
We can see that by not including non-informative attributes in the attribute, we get the most accurate representation of the data with Real part, Imaginary part and zeta function values. Which means attribute $ z $ modulus is not informative with PC. Only with PC we can represent more 66 % of data with PC1 and more then 33 % PC2.