Let’s consider two points P and Q with 5 coordinates.
P <- c(9, 2, 6, 5, 8)
P
[1] 9 2 6 5 8
length(P)
[1] 5
Q <- c(12, 8, 6, 4, 10)
O <- rep(0, 5) # Origin with 5 coordinates
# ? operator in R provides access to the documentation page of a R function.
?dist
starting httpd help server ... done
d_OP <- dist(rbind(O, P), method="euclidean")
d_OP
O
P 14.49138
d_PQ <- dist(rbind(P, Q), method="euclidean")
d_PQ
P
Q 7.071068
Let’s randomly generate 5 data points which are normally distributed.
set.seed(1)
x1 <- rnorm(5, mean=0, sd=10)
x1
[1] -6.264538 1.836433 -8.356286 15.952808 3.295078
#mean(x1)
#sd(x1)
x2 <- rnorm(5, mean=0, sd=1)
x2
[1] -0.8204684 0.4874291 0.7383247 0.5757814 -0.3053884
plot(x1, x2, pch=19) # Scatterplot of x1 and x2
abline(h=0, v=0, col="red")
d_PQ <- dist(cbind(x1, x2), method="euclidean")
d_PQ
1 2 3 4
2 8.205872
3 2.608687 10.195807
4 22.261177 14.116651 24.309638
5 9.573482 1.660182 11.698018 12.688365
Euclidean distance from 1 to 2 = 8.205872.
Euclidean distance from 1 to 3 = 2.608687.
Euclidean distance from 2 to 3 = 10.195807, etc.
maha1 <- sqrt((x1^2/var(x1))+(x2^2/var(x2))) # Method 1
maha1
[1] 1.3891495 0.7534099 1.4052152 1.8699059 0.5709981
# Calculate the var-cov matrix of two variables (independent case)
S <- matrix(diag(c(var(x1), var(x2))), nrow=2, ncol=2)
S
[,1] [,2]
[1,] 92.35968 0.0000000
[2,] 0.00000 0.4473392
# Inverse of the matrix S
solve(S)
[,1] [,2]
[1,] 0.01082724 0.00000
[2,] 0.00000000 2.23544
# Data matrix
dmat <- cbind(x1, x2)
dmat
x1 x2
[1,] -6.264538 -0.8204684
[2,] 1.836433 0.4874291
[3,] -8.356286 0.7383247
[4,] 15.952808 0.5757814
[5,] 3.295078 -0.3053884
dim(dmat)
[1] 5 2
# Mean matrix
mu <- matrix(rep(0, 10), ncol=2, nrow=5)
mu
[,1] [,2]
[1,] 0 0
[2,] 0 0
[3,] 0 0
[4,] 0 0
[5,] 0 0
maha2 <- sqrt(diag((dmat-mu)%*%solve(S)%*%t(dmat-mu))) # Method 2
maha2
[1] 1.3891495 0.7534099 1.4052152 1.8699059 0.5709981
?mahalanobis
# Calculate Mahalanobis distance
maha3 <- sqrt(mahalanobis(dmat, center=c(0, 0), cov=S)) # Method 3
maha3
[1] 1.3891495 0.7534099 1.4052152 1.8699059 0.5709981
P <- dmat[1, ]
Q <- dmat[2, ]
maha1 <- sqrt((((P[1]-Q[1])^2)/var(x1)) + (((P[2]-Q[2])^2)/var(x2))) # Method 1
maha1
x1
2.129432
maha2 <- sqrt(t(as.matrix(P-Q))%*%solve(S)%*%as.matrix(P-Q)) # Method 2
maha2
[,1]
[1,] 2.129432
maha3 <- sqrt(mahalanobis(P, center=Q, cov=S)) # Method 3
maha3
[1] 2.129432
# Define vector x
x <- c(3, 4, 10)
x
[1] 3 4 10
# Size of vector x
length(x)
[1] 3
# Convert a vector into a matrix object
xmat <- as.matrix(x)
xmat
[,1]
[1,] 3
[2,] 4
[3,] 10
dim(xmat)
[1] 3 1
# Transpose of a vector
t(x)
[,1] [,2] [,3]
[1,] 3 4 10
length(t(x))
[1] 3
# Transpose of a matrix
t(xmat)
[,1] [,2] [,3]
[1,] 3 4 10
dim(t(xmat))
[1] 1 3
# Multiplication by a scalar
3 * x
[1] 9 12 30
# Vector addition and and subtraction
x <- c(4, 1, 3, 2)
y <- c(1, -1, 3, 0)
x + y
[1] 5 0 6 2
x - y
[1] 3 2 0 2
# Inner product of x and y
t(x) %*% y
[,1]
[1,] 12
crossprod(x, y)
[,1]
[1,] 12
# Component-wise product of vectors
x * y
[1] 4 -1 9 0
# Normalized vector: a vector of unit length
z <- x / sqrt(c(t(x)%*%x)) # x / sqrt(c(crossprod(x)))
z
[1] 0.7302967 0.1825742 0.5477226 0.3651484
# Check the length of the normalized vector
crossprod(z)
[,1]
[1,] 1
data <- c(1:6)
data
[1] 1 2 3 4 5 6
# Define matrix A
A <- matrix(data, nrow=3, ncol=2)
A
[,1] [,2]
[1,] 1 4
[2,] 2 5
[3,] 3 6
# Define matrix B
B <- matrix(data, nrow = 3, ncol=2, byrow=TRUE)
B
[,1] [,2]
[1,] 1 2
[2,] 3 4
[3,] 5 6
# Multiplication by a scalar
3 * A
[,1] [,2]
[1,] 3 12
[2,] 6 15
[3,] 9 18
# Matrix addition and subtraction
A <- matrix(c(1:6), 2, 3)
A
[,1] [,2] [,3]
[1,] 1 3 5
[2,] 2 4 6
B <- matrix(c(20:25), 2, 3)
B
[,1] [,2] [,3]
[1,] 20 22 24
[2,] 21 23 25
A + B
[,1] [,2] [,3]
[1,] 21 25 29
[2,] 23 27 31
A - B
[,1] [,2] [,3]
[1,] -19 -19 -19
[2,] -19 -19 -19
# Matrix multiplication
A %*% t(B)
[,1] [,2]
[1,] 206 215
[2,] 272 284
t(B)%*%A
[,1] [,2] [,3]
[1,] 62 144 226
[2,] 68 158 248
[3,] 74 172 270
# Multiply element-by-element
A * B
[,1] [,2] [,3]
[1,] 20 66 120
[2,] 42 92 150
# Special matrices
# Diagonal matrix
ddata <- diag(data)
ddata
[,1] [,2] [,3] [,4] [,5] [,6]
[1,] 1 0 0 0 0 0
[2,] 0 2 0 0 0 0
[3,] 0 0 3 0 0 0
[4,] 0 0 0 4 0 0
[5,] 0 0 0 0 5 0
[6,] 0 0 0 0 0 6
diag(ddata)
[1] 1 2 3 4 5 6
# Identity matrix
diag(rep(1,4))
[,1] [,2] [,3] [,4]
[1,] 1 0 0 0
[2,] 0 1 0 0
[3,] 0 0 1 0
[4,] 0 0 0 1
# Trace of a matrix: sum of its diagonal elements
A <- matrix(rnorm(25), 5, 5)
A
[,1] [,2] [,3] [,4] [,5]
[1,] 1.5117812 -0.04493361 0.91897737 -0.05612874 1.35867955
[2,] 0.3898432 -0.01619026 0.78213630 -0.15579551 -0.10278773
[3,] -0.6212406 0.94383621 0.07456498 -1.47075238 0.38767161
[4,] -2.2146999 0.82122120 -1.98935170 -0.47815006 -0.05380504
[5,] 1.1249309 0.59390132 0.61982575 0.41794156 -1.37705956
sum(diag(A))
[1] -0.2850537
# Determinant of a matrix
det(A)
[1] 1.332143
# Inverse of a matrix
solve(A)
[,1] [,2] [,3] [,4] [,5]
[1,] -0.2455356 -3.328855 0.8689000 -1.295708 0.3014574
[2,] 1.0825373 2.687555 -0.7137777 1.716740 0.5994606
[3,] 0.4521566 3.897085 -0.8458634 1.166644 -0.1284804
[4,] 1.0273684 3.673334 -1.6763979 1.915998 0.1926631
[5,] 0.7816277 1.308704 -0.4875499 0.788552 -0.2207416
# Check A.inv(A) = I
A %*% solve(A)
[,1] [,2] [,3] [,4] [,5]
[1,] 1.000000e+00 -2.220446e-16 1.110223e-16 4.440892e-16 0.000000e+00
[2,] 4.163336e-17 1.000000e+00 1.387779e-17 -1.387779e-17 3.816392e-17
[3,] -1.110223e-16 5.551115e-16 1.000000e+00 -1.110223e-16 6.938894e-17
[4,] -1.387779e-16 3.885781e-16 -1.457168e-16 1.000000e+00 -1.734723e-18
[5,] -2.220446e-16 -1.332268e-15 3.330669e-16 0.000000e+00 1.000000e+00
# Eigenvalues and eigenvectors
evs <- eigen(A)
evs$values
[1] 2.5070958+0.0000000i -2.4759155+0.0000000i -1.1194433+0.0000000i
[4] 0.4016046+0.1744205i 0.4016046-0.1744205i
evs$vectors
[,1] [,2] [,3] [,4]
[1,] 0.6215789+0i 0.06419107+0i -0.3781959+0i 0.54812264+0.00344903i
[2,] 0.2339175+0i -0.15866809+0i -0.1366923+0i -0.40145783+0.09570145i
[3,] 0.3458510+0i 0.52712185+0i 0.5549777+0i -0.59398737+0.00000000i
[4,] -0.6309227+0i 0.64718932+0i 0.6222747+0i -0.37695071+0.15068046i
[5,] 0.2030907+0i -0.52344291+0i 0.3782289+0i -0.07540428+0.07693685i
[,5]
[1,] 0.54812264-0.00344903i
[2,] -0.40145783-0.09570145i
[3,] -0.59398737+0.00000000i
[4,] -0.37695071-0.15068046i
[5,] -0.07540428-0.07693685i