Load Data

data of distances between 8 city in Australia, from “http://rosetta.reltech.org/TC/v15/Mapping/data/dist-Aus.csv

## data: data of distances between 8 city in Australia
dist.au <- read.csv("http://rosetta.reltech.org/TC/v15/Mapping/data/dist-Aus.csv")
row.names(dist.au) <- dist.au[, 1]
dist.au <- dist.au[, -1]

dist.au
##       A   AS    B    D    H    M    P    S
## A     0 1328 1600 2616 1161  653 2130 1161
## AS 1328    0 1962 1289 2463 1889 1991 2026
## B  1600 1962    0 2846 1788 1374 3604  732
## D  2616 1289 2846    0 3734 3146 2652 3146
## H  1161 2463 1788 3734    0  598 3008 1057
## M   653 1889 1374 3146  598    0 2720  713
## P  2130 1991 3604 2652 3008 2720    0 3288
## S  1161 2026  732 3146 1057  713 3288    0

PCA

Do PCA with prcomp R function

dist.au.pca <- prcomp(dist.au, center = TRUE, scale.= TRUE)
print(dist.au.pca)
## Standard deviations (1, .., p=8):
## [1] 2.278072e+00 1.170197e+00 1.025433e+00 4.821009e-01 3.177163e-01
## [6] 1.956843e-01 1.336381e-01 1.172616e-17
## 
## Rotation (n x k) = (8 x 8):
##           PC1         PC2         PC3         PC4         PC5         PC6
## A   0.3329843 -0.03401098  0.60167533  0.09676914  0.59777557 -0.27359873
## AS -0.2006934  0.50316854  0.62552813 -0.15936130 -0.51614876 -0.03253025
## B   0.3502661  0.42395457 -0.17796301 -0.57167815  0.14873393 -0.33183361
## D  -0.3530185  0.46121055  0.05690583  0.43515809  0.34998025  0.23292248
## H   0.4131902 -0.17995466  0.09619134  0.38077720 -0.47728255 -0.26597247
## M   0.4206411 -0.05500333  0.21934399  0.22433591 -0.02592846  0.36466405
## P  -0.2769171 -0.53380470  0.39110908 -0.45749901  0.05261388  0.21862666
## S   0.4209359  0.18126094 -0.03822802 -0.21080327 -0.02019242  0.71288165
##            PC7         PC8
## A  -0.26185622  0.12580404
## AS -0.01491579  0.14890228
## B   0.28685284 -0.35294418
## D   0.08106832 -0.53549115
## H  -0.17074407 -0.56120636
## M   0.75446928  0.13682553
## P   0.11739222 -0.46016417
## S  -0.47978521 -0.07249984
plot(dist.au.pca, type="l")
summary(dist.au.pca)
## Importance of components:
##                           PC1    PC2    PC3     PC4     PC5     PC6
## Standard deviation     2.2781 1.1702 1.0254 0.48210 0.31772 0.19568
## Proportion of Variance 0.6487 0.1712 0.1314 0.02905 0.01262 0.00479
## Cumulative Proportion  0.6487 0.8199 0.9513 0.98036 0.99298 0.99777
##                            PC7       PC8
## Standard deviation     0.13364 1.173e-17
## Proportion of Variance 0.00223 0.000e+00
## Cumulative Proportion  1.00000 1.000e+00
predict(dist.au.pca, newdata=tail(dist.au, 2))
##         PC1        PC2        PC3       PC4          PC5         PC6
## P  2.888134  2.1766506 0.07023146 0.4851566 -0.015396598 -0.03426793
## S -1.920671 -0.3395569 0.56979580 0.2396121  0.007450299 -0.34420612
##           PC7           PC8
## P -0.01149583  3.013612e-16
## S  0.17319365 -1.134936e-16
dist.au.pca$x <- -dist.au.pca$x ## change direction
city.names <- c("Adelaide", "Alice Springs", "Brisbane", "Darwin", "Hobart", 
                "Melbourne", "Perth", "Sydney")


library(ggbiplot)
## Loading required package: ggplot2
## Loading required package: plyr
## Loading required package: scales
## Loading required package: grid

g <- ggbiplot(dist.au.pca, obs.scale = 1, var.scale = 1, 
              ellipse = TRUE, circle = TRUE, labels=city.names)
g <- g + scale_color_discrete(name = '')
g <- g + theme(legend.direction = 'horizontal', 
               legend.position = 'top')

print(g)

MDS

Do MDS with cmdscale R function

fit <- cmdscale(dist.au, eig = TRUE, k = 2)
x <- fit$points[, 1]
y <- fit$points[, 2]

plot(x, y, pch = 19, xlim = range(x) + c(0, 600))
city.names <- c("Adelaide", "Alice Springs", "Brisbane", "Darwin", "Hobart", 
                "Melbourne", "Perth", "Sydney")
text(x, y, pos = 4, labels = city.names)

x <- 0 - x
y <- 0 - y
plot(x, y, pch = 19, xlim = range(x) + c(0, 600))
text(x, y, pos = 4, labels = city.names)

library(igraph)
## 
## Attaching package: 'igraph'
## The following objects are masked from 'package:stats':
## 
##     decompose, spectrum
## The following object is masked from 'package:base':
## 
##     union
g <- graph.full(nrow(dist.au))
V(g)$label <- city.names
layout <- layout.mds(g, dist = as.matrix(dist.au))
plot(g, layout = layout, vertex.size = 3)

References 1. https://www.r-bloggers.com/computing-and-visualizing-pca-in-r/ 2. https://www.r-bloggers.com/multidimensional-scaling-mds-with-r/