Contoh matriks yang digunakan
X = matrix(c(6,8,7,
5,7,6,
1,9,4), nrow=3, ncol=3)
Y = matrix(c(7.3,6.8,8.5,
8.9,7.6,6.1,
9.4,8.0,7.2), nrow=3, ncol=3, byrow=TRUE)
list("Matriks X" = X, "Matriks Y" = Y)
## $`Matriks X`
## [,1] [,2] [,3]
## [1,] 6 5 1
## [2,] 8 7 9
## [3,] 7 6 4
##
## $`Matriks Y`
## [,1] [,2] [,3]
## [1,] 7.3 6.8 8.5
## [2,] 8.9 7.6 6.1
## [3,] 9.4 8.0 7.2
# Operasi penjumlahan dan pengurangan
hasil <- list(
"X + Y" = X + Y,
"X - Y" = X - Y,
"Y - X" = Y - X
)
hasil
## $`X + Y`
## [,1] [,2] [,3]
## [1,] 13.3 11.8 9.5
## [2,] 16.9 14.6 15.1
## [3,] 16.4 14.0 11.2
##
## $`X - Y`
## [,1] [,2] [,3]
## [1,] -1.3 -1.8 -7.5
## [2,] -0.9 -0.6 2.9
## [3,] -2.4 -2.0 -3.2
##
## $`Y - X`
## [,1] [,2] [,3]
## [1,] 1.3 1.8 7.5
## [2,] 0.9 0.6 -2.9
## [3,] 2.4 2.0 3.2
Perkalian matriks standar (%*%
)
Sesuai aturan aljabar linear: jumlah kolom matriks pertama harus sama dengan jumlah baris matriks kedua.
# Perkalian matriks standar
perkalian_XY <- X %*% Y
perkalian_YX <- Y %*% X
list("X %*% Y" = perkalian_XY, "Y %*% X" = perkalian_YX)
## $`X %*% Y`
## [,1] [,2] [,3]
## [1,] 97.7 86.8 88.7
## [2,] 205.3 179.6 175.5
## [3,] 142.1 125.2 124.9
##
## $`Y %*% X`
## [,1] [,2] [,3]
## [1,] 157.7 135.1 102.5
## [2,] 156.9 134.3 101.7
## [3,] 170.8 146.2 110.2
X %*% Y
dan Y %*% X
menghasilkan hasil
berbeda (perkalian matriks tidak komutatif).
Perkalian elemen per elemen (*
)
Bukan aturan aljabar linear, tapi operasi langsung antar elemen yang posisinya sama.
# Perkalian elemen per elemen
elemenwise_XY <- X * Y
kali_2X <- 2 * X
list(
"X * Y (elemen per elemen)" = elemenwise_XY,
"2 * X (skalar kali matriks)" = kali_2X
)
## $`X * Y (elemen per elemen)`
## [,1] [,2] [,3]
## [1,] 43.8 34.0 8.5
## [2,] 71.2 53.2 54.9
## [3,] 65.8 48.0 28.8
##
## $`2 * X (skalar kali matriks)`
## [,1] [,2] [,3]
## [1,] 12 10 2
## [2,] 16 14 18
## [3,] 14 12 8
X * Y
mengalikan elemen pada baris & kolom yang
sama.
2 * X
mengalikan seluruh isi matriks dengan skalar
2.
Menukar baris menjadi kolom dan kolom menjadi baris.
transX <- t(X)
transY <- t(Y)
list(
"Transpose X" = transX,
"Transpose Y" = transY
)
## $`Transpose X`
## [,1] [,2] [,3]
## [1,] 6 8 7
## [2,] 5 7 6
## [3,] 1 9 4
##
## $`Transpose Y`
## [,1] [,2] [,3]
## [1,] 7.3 8.9 9.4
## [2,] 6.8 7.6 8.0
## [3,] 8.5 6.1 7.2
Transpose berguna saat ingin melihat hubungan simetris antar variabel.
Matriks invers hanya ada jika determinan ≠ 0 (matriks nonsingular). Digunakan untuk menyelesaikan sistem persamaan linear AX=BAX = BAX=B.
inv_X <- solve(X)
inv_Y <- solve(Y)
list(
"Invers X" = inv_X,
"Invers Y" = inv_Y
)
## $`Invers X`
## [,1] [,2] [,3]
## [1,] 13.0 7.0 -19
## [2,] -15.5 -8.5 23
## [3,] 0.5 0.5 -1
##
## $`Invers Y`
## [,1] [,2] [,3]
## [1,] -1.27147766 -4.089347 4.965636
## [2,] 1.44759450 5.871993 -6.683849
## [3,] 0.05154639 -1.185567 1.082474
Invers -> kunci dalam analisis multivariat (misalnya kovarians invers dalam jarak Mahalanobis).
Contoh matriks yang digunakan
A <- matrix(c(2,4,6,
1,3,5,
7,9,11), nrow=3, byrow=TRUE)
B <- matrix(c(1,0,2,
0,1,3,
4,0,5), nrow=3, byrow=TRUE)
C <- matrix(c(3,2,1,
6,5,4,
9,8,7), nrow=3, byrow=TRUE)
list("Matriks A" = A,
"Matriks B" = B,
"Matriks C" = C)
## $`Matriks A`
## [,1] [,2] [,3]
## [1,] 2 4 6
## [2,] 1 3 5
## [3,] 7 9 11
##
## $`Matriks B`
## [,1] [,2] [,3]
## [1,] 1 0 2
## [2,] 0 1 3
## [3,] 4 0 5
##
## $`Matriks C`
## [,1] [,2] [,3]
## [1,] 3 2 1
## [2,] 6 5 4
## [3,] 9 8 7
Indexing elemen matriks (memanggil elemen matriks)
# Ambil elemen baris 2 kolom 3 dari A
elemen_A23 <- A[2,3]
# Ambil seluruh baris ke-1 dari B
baris1_B <- B[1,]
# Ambil seluruh kolom ke-2 dari C
kolom2_C <- C[,2]
# Ambil submatriks A (baris 1-2, kolom 2-3)
sub_A <- A[1:2, 2:3]
list(
"A[2,3]" = elemen_A23,
"B[1,]" = baris1_B,
"C[,2]" = kolom2_C,
"A[1:2, 2:3]" = sub_A
)
## $`A[2,3]`
## [1] 5
##
## $`B[1,]`
## [1] 1 0 2
##
## $`C[,2]`
## [1] 2 5 8
##
## $`A[1:2, 2:3]`
## [,1] [,2]
## [1,] 4 6
## [2,] 3 5
A[2,3]
→ elemen di baris 2 kolom 3.
B[1,]
→ seluruh elemen baris pertama.
C[,2]
→ seluruh elemen kolom kedua.
A[1:2, 2:3]
→ potongan submatriks.
Trace = jumlah elemen diagonal utama matriks.
trace_X <- sum(diag(X))
trace_Y <- sum(diag(Y))
list(
"Trace X" = trace_X,
"Trace Y" = trace_Y
)
## $`Trace X`
## [1] 17
##
## $`Trace Y`
## [1] 22.1
Trace -> jumlah eigenvalue matriks.
trace_X <- sum(diag(X))
trace_Y <- sum(diag(Y))
list(
"Trace X" = trace_X,
"Trace Y" = trace_Y
)
## $`Trace X`
## [1] 17
##
## $`Trace Y`
## [1] 22.1
Rank → seberapa banyak informasi unik dalam matriks.
eigen_X <- eigen(X)
eigen_Y <- eigen(Y)
list(
"Eigen X" = eigen_X,
"Eigen Y" = eigen_Y
)
## $`Eigen X`
## eigen() decomposition
## $values
## [1] 17.3957872 -0.5904904 0.1947032
##
## $vectors
## [,1] [,2] [,3]
## [1,] 0.3782369 0.5818913 0.653408498
## [2,] 0.7549068 -0.7981770 -0.756958478
## [3,] 0.5357729 0.1559361 -0.008437875
##
##
## $`Eigen Y`
## eigen() decomposition
## $values
## [1] 23.230397 -1.286223 0.155826
##
## $vectors
## [,1] [,2] [,3]
## [1,] -0.5633011 -0.7710948 0.5522886
## [2,] -0.5584126 0.5270180 -0.8126545
## [3,] -0.6089887 0.3573023 0.1859299
Eigenvalue & eigenvector -> arah utama variasi data (fondasi PCA dan analisis multivariat).
SVD memecah sebuah matriks AAA menjadi tiga matriks UDVTU D V^TUDVT, di mana:
U = matriks ortogonal (arah baris)
D = singular values (skala tiap komponen)
V = matriks ortogonal (arah kolom).
A <- matrix(c(5,-3,6,
2,-4,8,
-2,5,-1,
7,3,9), 4, 3, byrow=TRUE)
svd_result <- svd(A)
list("Singular Values"=svd_result$d,
"U"=svd_result$u,
"V"=svd_result$v)
## $`Singular Values`
## [1] 16.07076 7.41936 3.11187
##
## $U
## [,1] [,2] [,3]
## [1,] -0.5046975 0.2278362 -0.3742460
## [2,] -0.5178195 0.4138180 0.7413297
## [3,] 0.1646416 -0.6063789 0.5337354
## [4,] -0.6708477 -0.6396483 -0.1596770
##
## $V
## [,1] [,2] [,3]
## [1,] -0.5341591 -0.17494276 -0.8270847
## [2,] 0.1490928 -0.98251336 0.1115295
## [3,] -0.8321330 -0.06373793 0.5509011
Singular values menunjukkan “besar informasi” tiap dimensi.
Persiapan
library(factoextra)
## Loading required package: ggplot2
## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa
library(StatMatch)
## Loading required package: proxy
##
## Attaching package: 'proxy'
## The following objects are masked from 'package:stats':
##
## as.dist, dist
## The following object is masked from 'package:base':
##
## as.matrix
## Loading required package: survey
## Loading required package: grid
## Loading required package: Matrix
## Loading required package: survival
##
## Attaching package: 'survey'
## The following object is masked from 'package:graphics':
##
## dotchart
## Loading required package: lpSolve
## Loading required package: dplyr
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
set.seed(321)
ss <- sample(1:50, 15)
df <- USArrests[ss, ]
df.scaled <- scale(df)
Jarak antar titik/objek dapat dihitung dengan berbagai cara, tergantung konteks:
# Euclidean
dist.eucl <- dist(df.scaled, method="euclidean")
fviz_dist(dist.eucl)
# Chebyshev
dist.cheb <- dist(df.scaled, method="maximum")
fviz_dist(dist.cheb)
dist.man <- dist(df.scaled, method="manhattan")
fviz_dist(dist.man)
# Mahalanobis
dist.mah <- mahalanobis.dist(df.scaled)
as.matrix(dist.mah)[1:5,1:5] # tampil sebagian
## Wyoming Illinois Mississippi Kansas New York
## Wyoming 0.000000 1.7186109 2.820779 1.419510 1.8695558
## Illinois 1.718611 0.0000000 3.658323 2.290525 0.4722069
## Mississippi 2.820779 3.6583235 0.000000 3.213907 3.6566922
## Kansas 1.419510 2.2905255 3.213907 0.000000 2.1522535
## New York 1.869556 0.4722069 3.656692 2.152253 0.0000000
# Minkowski
set.seed(123)
data <- matrix(runif(15,1,10), nrow=5, ncol=3)
colnames(data) <- c("X1","X2","X3")
p1 <- data[1,]; p2 <- data[2,]
minkowski_distance <- function(x, y, p){
sum(abs(x-y)^p)^(1/p)
}
list(
Manhattan=minkowski_distance(p1,p2,1),
Euclidean=minkowski_distance(p1,p2,2),
Minkowski_p3=minkowski_distance(p1,p2,3),
Chebyshev=max(abs(p1-p2))
)
## $Manhattan
## [1] 13.38098
##
## $Euclidean
## [1] 7.726871
##
## $Minkowski_p3
## [1] 6.435156
##
## $Chebyshev
## [1] 4.531493
Jarak digunakan dalam clustering, deteksi outlier, dan analisis multivariat. Pemilihan jenis jarak bergantung pada sifat data dan tujuan analisis.
Persiapan
BB = c(6.2,11.5,8.7,10.1,7.8,6.9,12.0,3.1,14.8,9.4)
PM = c(61,73,68,70,64,60,76,49,84,71)
RTB = c(115,138,127,123,131,120,143,95,160,128)
lizard <- as.matrix(cbind(BB,PM,RTB))
# Rata-rata
colMeans(lizard)
## BB PM RTB
## 9.05 67.60 128.00
Rata-rata menunjukkan “titik pusat” data.
# Kovarians
cov(lizard)
## BB PM RTB
## BB 10.98056 31.80000 54.96667
## PM 31.80000 94.04444 160.22222
## RTB 54.96667 160.22222 300.66667
Kovarians besar → variabel cenderung berubah bersama.
# Korelasi
cor(lizard)
## BB PM RTB
## BB 1.0000000 0.9895743 0.9566313
## PM 0.9895743 1.0000000 0.9528259
## RTB 0.9566313 0.9528259 1.0000000
Korelasi → menunjukkan kekuatan hubungan.
# Standardisasi manual
n <- nrow(lizard)
u <- matrix(1,n,1)
xbar <- (1/n)*t(u)%*%lizard
D <- lizard - u %*% xbar
S <- (1/(n-1))*t(D)%*%D
Ds <- diag(sqrt(diag(S)))
R <- solve(Ds) %*% S %*% solve(Ds)
list("Matriks Rata-rata"=xbar,
"Kovarians"=S,
"Korelasi"=R)
## $`Matriks Rata-rata`
## BB PM RTB
## [1,] 9.05 67.6 128
##
## $Kovarians
## BB PM RTB
## BB 10.98056 31.80000 54.96667
## PM 31.80000 94.04444 160.22222
## RTB 54.96667 160.22222 300.66667
##
## $Korelasi
## [,1] [,2] [,3]
## [1,] 1.0000000 0.9895743 0.9566313
## [2,] 0.9895743 1.0000000 0.9528259
## [3,] 0.9566313 0.9528259 1.0000000
Standardisasi penting sebelum analisis multivariat agar variabel dengan skala besar tidak mendominasi hasil.
Operasi dasar matriks seperti penjumlahan, pengurangan, perkalian, transpose, invers, dan determinan memberikan fondasi aljabar linear yang kuat. Selanjutnya, konsep eigenvalue, eigenvector, dan dekomposisi singular value (SVD) memperlihatkan bagaimana suatu matriks dapat dijelaskan melalui komponen-komponen utamanya.
Selain itu, berbagai ukuran jarak antar objek (Euclidean, Manhattan, Chebyshev, Mahalanobis, dan Minkowski) memberikan gambaran bagaimana kedekatan antar data bisa diukur dengan cara yang berbeda sesuai kebutuhan. Terakhir, perhitungan vektor rata-rata, matriks kovarians, korelasi, dan standardisasi menunjukkan pentingnya memahami struktur data sebelum dilakukan analisis lebih lanjut karena struktur data akan sangat memepengaruhi hasil analisis.
Secara keseluruhan, poin - poin tersebut menegaskan bahwa operasi matriks dan konsep turunannya merupakan dasar penting dalam analisis multivariat, karena hampir semua metode modern (seperti PCA, clustering, hingga analisis faktor) berlandaskan pada prinsip-prinsip ini.