data of distances between 8 city in Australia, from “http://rosetta.reltech.org/TC/v15/Mapping/data/dist-Aus.csv”
## data: data of distances between 8 city in Australia
dist.au <- read.csv("http://rosetta.reltech.org/TC/v15/Mapping/data/dist-Aus.csv")
row.names(dist.au) <- dist.au[, 1]
dist.au <- dist.au[, -1]
dist.au
## A AS B D H M P S
## A 0 1328 1600 2616 1161 653 2130 1161
## AS 1328 0 1962 1289 2463 1889 1991 2026
## B 1600 1962 0 2846 1788 1374 3604 732
## D 2616 1289 2846 0 3734 3146 2652 3146
## H 1161 2463 1788 3734 0 598 3008 1057
## M 653 1889 1374 3146 598 0 2720 713
## P 2130 1991 3604 2652 3008 2720 0 3288
## S 1161 2026 732 3146 1057 713 3288 0
Do PCA with prcomp R function
dist.au.pca <- prcomp(dist.au, center = TRUE, scale.= TRUE)
print(dist.au.pca)
## Standard deviations (1, .., p=8):
## [1] 2.278072e+00 1.170197e+00 1.025433e+00 4.821009e-01 3.177163e-01
## [6] 1.956843e-01 1.336381e-01 1.172616e-17
##
## Rotation (n x k) = (8 x 8):
## PC1 PC2 PC3 PC4 PC5 PC6
## A 0.3329843 -0.03401098 0.60167533 0.09676914 0.59777557 -0.27359873
## AS -0.2006934 0.50316854 0.62552813 -0.15936130 -0.51614876 -0.03253025
## B 0.3502661 0.42395457 -0.17796301 -0.57167815 0.14873393 -0.33183361
## D -0.3530185 0.46121055 0.05690583 0.43515809 0.34998025 0.23292248
## H 0.4131902 -0.17995466 0.09619134 0.38077720 -0.47728255 -0.26597247
## M 0.4206411 -0.05500333 0.21934399 0.22433591 -0.02592846 0.36466405
## P -0.2769171 -0.53380470 0.39110908 -0.45749901 0.05261388 0.21862666
## S 0.4209359 0.18126094 -0.03822802 -0.21080327 -0.02019242 0.71288165
## PC7 PC8
## A -0.26185622 0.12580404
## AS -0.01491579 0.14890228
## B 0.28685284 -0.35294418
## D 0.08106832 -0.53549115
## H -0.17074407 -0.56120636
## M 0.75446928 0.13682553
## P 0.11739222 -0.46016417
## S -0.47978521 -0.07249984
plot(dist.au.pca, type="l")
summary(dist.au.pca)
## Importance of components:
## PC1 PC2 PC3 PC4 PC5 PC6
## Standard deviation 2.2781 1.1702 1.0254 0.48210 0.31772 0.19568
## Proportion of Variance 0.6487 0.1712 0.1314 0.02905 0.01262 0.00479
## Cumulative Proportion 0.6487 0.8199 0.9513 0.98036 0.99298 0.99777
## PC7 PC8
## Standard deviation 0.13364 1.173e-17
## Proportion of Variance 0.00223 0.000e+00
## Cumulative Proportion 1.00000 1.000e+00
predict(dist.au.pca, newdata=tail(dist.au, 2))
## PC1 PC2 PC3 PC4 PC5 PC6
## P 2.888134 2.1766506 0.07023146 0.4851566 -0.015396598 -0.03426793
## S -1.920671 -0.3395569 0.56979580 0.2396121 0.007450299 -0.34420612
## PC7 PC8
## P -0.01149583 3.013612e-16
## S 0.17319365 -1.134936e-16
dist.au.pca$x <- -dist.au.pca$x ## change direction
city.names <- c("Adelaide", "Alice Springs", "Brisbane", "Darwin", "Hobart",
"Melbourne", "Perth", "Sydney")
library(ggbiplot)
## Loading required package: ggplot2
## Loading required package: plyr
## Loading required package: scales
## Loading required package: grid
g <- ggbiplot(dist.au.pca, obs.scale = 1, var.scale = 1,
ellipse = TRUE, circle = TRUE, labels=city.names)
g <- g + scale_color_discrete(name = '')
g <- g + theme(legend.direction = 'horizontal',
legend.position = 'top')
print(g)
Do MDS with cmdscale R function
fit <- cmdscale(dist.au, eig = TRUE, k = 2)
x <- fit$points[, 1]
y <- fit$points[, 2]
plot(x, y, pch = 19, xlim = range(x) + c(0, 600))
city.names <- c("Adelaide", "Alice Springs", "Brisbane", "Darwin", "Hobart",
"Melbourne", "Perth", "Sydney")
text(x, y, pos = 4, labels = city.names)
x <- 0 - x
y <- 0 - y
plot(x, y, pch = 19, xlim = range(x) + c(0, 600))
text(x, y, pos = 4, labels = city.names)
library(igraph)
##
## Attaching package: 'igraph'
## The following objects are masked from 'package:stats':
##
## decompose, spectrum
## The following object is masked from 'package:base':
##
## union
g <- graph.full(nrow(dist.au))
V(g)$label <- city.names
layout <- layout.mds(g, dist = as.matrix(dist.au))
plot(g, layout = layout, vertex.size = 3)
References 1. https://www.r-bloggers.com/computing-and-visualizing-pca-in-r/ 2. https://www.r-bloggers.com/multidimensional-scaling-mds-with-r/