Dokumen ini merangkum praktikum pengantar analisis multivariat menggunakan R, meliputi:
pkgs <- c("MVN", "DescTools", "readr", "ggplot2")
to_install <- pkgs[!pkgs %in% installed.packages()[, "Package"]]
if (length(to_install) > 0) install.packages(to_install)
invisible(lapply(pkgs, library, character.only = TRUE))
2 + 3
## [1] 5
10 - 4
## [1] 6
3 * 7
## [1] 21
15 / 4
## [1] 3.75
10 / 3
## [1] 3.333333
10 %/% 3
## [1] 3
10 %% 3
## [1] 1
2^5
## [1] 32
sqrt(16)
## [1] 4
log(exp(1))
## [1] 1
log(100, 10)
## [1] 2
exp(1)
## [1] 2.718282
Pembahasan singkat:
%/% memberi hasil pembagian bulat.%% memberi sisa bagi.log(100, 10) berbeda dengan
log(100) karena basis ditentukan eksplisit.x <- 10
y <- 3.5
z <- x + y
nama <- "Salman"
cat("Halo,", nama, "! Nilai z =", z, "\n")
## Halo, Salman ! Nilai z = 13.5
v <- c(2, 5, 1, 3, 4)
v + 10
## [1] 12 15 11 13 14
v * 2
## [1] 4 10 2 6 8
v^2
## [1] 4 25 1 9 16
length(v)
## [1] 5
sum(v)
## [1] 15
mean(v)
## [1] 3
var(v)
## [1] 2.5
sd(v)
## [1] 1.581139
min(v); max(v)
## [1] 1
## [1] 5
Interpretasi:
var(v) menggunakan variansi sampel (pembagi
n-1), sesuai kebutuhan inferensi statistik.A <- matrix(c(1,2,3, 4,5,6, 7,8,9), nrow = 3, ncol = 3)
A
## [,1] [,2] [,3]
## [1,] 1 4 7
## [2,] 2 5 8
## [3,] 3 6 9
t(A)
## [,1] [,2] [,3]
## [1,] 1 2 3
## [2,] 4 5 6
## [3,] 7 8 9
B <- matrix(c(1,0,0, 0,2,0, 0,0,3), nrow = 3)
A %*% B
## [,1] [,2] [,3]
## [1,] 1 8 21
## [2,] 2 10 24
## [3,] 3 12 27
C <- matrix(c(2,1, 1,3), nrow = 2)
C
## [,1] [,2]
## [1,] 2 1
## [2,] 1 3
solve(C)
## [,1] [,2]
## [1,] 0.6 -0.2
## [2,] -0.2 0.4
det(C)
## [1] 5
mahasiswa <- data.frame(
Nama = c("Andi", "Budi", "Cici", "Dani", "Edo"),
Nilai1 = c(75, 82, 90, 68, 45),
Nilai2 = c(80, 78, 95, 72, 89),
Nilai3 = c(70, 85, 88, 84, 95)
)
str(mahasiswa)
## 'data.frame': 5 obs. of 4 variables:
## $ Nama : chr "Andi" "Budi" "Cici" "Dani" ...
## $ Nilai1: num 75 82 90 68 45
## $ Nilai2: num 80 78 95 72 89
## $ Nilai3: num 70 85 88 84 95
head(mahasiswa)
Pembahasan:
* pada matriks berarti perkalian elemen, sedangkan
%*% adalah perkalian matriks.solve(C) hanya valid jika determinan matriks tidak
nol.data_ujian <- data.frame(
Aljabar = c(75, 82, 90, 68, 77, 85, 71, 93, 66, 80),
Kalkulus = c(70, 78, 88, 65, 73, 83, 69, 91, 63, 76),
Statistik = c(72, 80, 92, 67, 75, 81, 70, 89, 64, 78),
Komputer = c(68, 75, 85, 62, 70, 79, 65, 87, 60, 73)
)
data_ujian
x_bar <- colMeans(data_ujian)
S <- cov(data_ujian)
R <- cor(data_ujian)
x_bar
## Aljabar Kalkulus Statistik Komputer
## 78.7 75.6 76.8 72.4
round(S, 3)
## Aljabar Kalkulus Statistik Komputer
## Aljabar 81.789 84.978 80.600 83.133
## Kalkulus 84.978 89.378 84.133 86.956
## Statistik 80.600 84.133 82.400 82.533
## Komputer 83.133 86.956 82.533 84.933
round(R, 4)
## Aljabar Kalkulus Statistik Komputer
## Aljabar 1.0000 0.9939 0.9818 0.9974
## Kalkulus 0.9939 1.0000 0.9804 0.9980
## Statistik 0.9818 0.9804 1.0000 0.9866
## Komputer 0.9974 0.9980 0.9866 1.0000
pairs(
data_ujian,
main = "Scatterplot Matrix Skor Ujian",
pch = 19,
col = "steelblue"
)
Interpretasi:
c_vec <- c(0.4, 0.3, 0.2, 0.1)
EY <- t(c_vec) %*% x_bar
VarY <- t(c_vec) %*% S %*% c_vec
EY
## [,1]
## [1,] 76.76
VarY
## [,1]
## [1,] 83.83156
Pembahasan: kombinasi linier berguna untuk menyusun indeks performa gabungan dari beberapa komponen nilai.
X <- as.matrix(data_ujian)
n <- nrow(X)
p <- ncol(X)
d2 <- mahalanobis(X, center = colMeans(X), cov = cov(X))
head(d2)
## [1] 5.191184 2.075978 7.873216 1.974719 1.633245 3.117544
d2_sorted <- sort(d2)
probs <- (seq_len(n) - 0.5) / n
chi2_quant <- qchisq(probs, df = p)
plot(
chi2_quant, d2_sorted,
xlab = expression(paste("Kuantil ", chi[p]^2)),
ylab = expression(paste("Jarak Mahalanobis Terurut ", d[(j)]^2)),
main = paste0("Chi-square Q-Q Plot (p = ", p, ")"),
pch = 19, col = "darkorange"
)
abline(0, 1, col = "tomato", lty = 2, lwd = 2)
hasil_mardia <- mvn(data = data_ujian, mvnTest = "mardia", multivariatePlot = "qq")
hasil_hz <- mvn(data = data_ujian, mvnTest = "hz", multivariatePlot = "qq")
hasil_mardia$multivariateNormality
hasil_hz$multivariateNormality
Interpretasi umum:
Sigma_pop <- diag(c(70, 65, 75, 70))
mu_0 <- c(76, 74, 75, 71)
n <- nrow(data_ujian)
p <- ncol(data_ujian)
x_bar <- colMeans(data_ujian)
diff <- x_bar - mu_0
Z2 <- n * t(diff) %*% solve(Sigma_pop) %*% diff
p_val_z2 <- 1 - pchisq(Z2, df = p)
data.frame(
Statistik = c("Z2", "p-value"),
Nilai = c(round(Z2, 4), round(p_val_z2, 4))
)
S <- cov(data_ujian)
T2 <- n * t(diff) %*% solve(S) %*% diff
F_stat <- ((n - p) / (p * (n - 1))) * T2
p_val_t2 <- 1 - pf(F_stat, df1 = p, df2 = n - p)
data.frame(
Statistik = c("T2", "F", "p-value"),
Nilai = c(round(T2, 4), round(F_stat, 4), round(p_val_t2, 4))
)
HotellingsT2Test(as.matrix(data_ujian), mu = mu_0)
##
## Hotelling's one sample T2-test
##
## data: as.matrix(data_ujian)
## T.2 = 12.735, df1 = 4, df2 = 6, p-value = 0.00431
## alternative hypothesis: true location is not equal to c(76,74,75,71)
Interpretasi:
mu_0.Grup1 <- data.frame(
X1 = c(52, 48, 55, 60, 47, 53, 58, 50, 45, 56),
X2 = c(30, 28, 32, 35, 27, 31, 34, 29, 26, 33)
)
Grup2 <- data.frame(
X1 = c(58, 62, 55, 65, 60, 57, 63, 59, 61, 64, 56, 60),
X2 = c(34, 37, 33, 40, 36, 35, 38, 34, 36, 39, 33, 37)
)
hotelling_two_sample <- function(X1, X2) {
X1 <- as.matrix(X1); X2 <- as.matrix(X2)
n1 <- nrow(X1); n2 <- nrow(X2); p <- ncol(X1)
xbar1 <- colMeans(X1); xbar2 <- colMeans(X2)
S1 <- cov(X1); S2 <- cov(X2)
Sp <- ((n1 - 1) * S1 + (n2 - 1) * S2) / (n1 + n2 - 2)
d <- xbar1 - xbar2
T2 <- (n1 * n2 / (n1 + n2)) * t(d) %*% solve(Sp) %*% d
F_stat <- ((n1 + n2 - p - 1) / (p * (n1 + n2 - 2))) * T2
p_val <- 1 - pf(F_stat, p, n1 + n2 - p - 1)
c(T2 = T2, F = F_stat, p_value = p_val)
}
round(hotelling_two_sample(Grup1, Grup2), 4)
## T2 F p_value
## 26.3814 12.5312 0.0003
Sebelum <- data.frame(
Tinggi = c(165,170,158,172,163,168,175,160,167,173,162,170,164,166,171),
Berat = c(62,68,55,72,60,65,73,58,64,70,61,67,63,66,69)
)
Sesudah <- data.frame(
Tinggi = Sebelum$Tinggi + c(1,2,1,1,0,2,1,1,2,1,1,2,0,1,1),
Berat = Sebelum$Berat + c(-2,-3,-1,-3,-2,-2,-3,-1,-2,-3,-1,-2,-2,-2,-3)
)
D <- Sesudah - Sebelum
d_bar <- colMeans(D)
S_D <- cov(D)
n_d <- nrow(D)
p_d <- ncol(D)
T2_pair <- n_d * t(d_bar) %*% solve(S_D) %*% d_bar
F_pair <- ((n_d - p_d) / (p_d * (n_d - 1))) * T2_pair
pval_pair <- 1 - pf(F_pair, df1 = p_d, df2 = n_d - p_d)
data.frame(
Statistik = c("T2", "F", "p-value"),
Nilai = c(round(T2_pair, 4), round(F_pair, 4), round(pval_pair, 4))
)
set.seed(789)
n_per <- 10
Kelompok <- factor(rep(c("A", "B", "C"), each = n_per))
Y1 <- c(rnorm(n_per, 50, 5), rnorm(n_per, 55, 5), rnorm(n_per, 60, 5))
Y2 <- c(rnorm(n_per, 30, 4), rnorm(n_per, 33, 4), rnorm(n_per, 36, 4))
Y3 <- c(rnorm(n_per, 20, 3), rnorm(n_per, 22, 3), rnorm(n_per, 24, 3))
data_manova <- data.frame(Y1, Y2, Y3, Kelompok)
fit_manova <- manova(cbind(Y1, Y2, Y3) ~ Kelompok, data = data_manova)
summary(fit_manova, test = "Wilks")
## Df Wilks approx F num Df den Df Pr(>F)
## Kelompok 2 0.2834 7.3205 6 50 1.197e-05 ***
## Residuals 27
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(fit_manova, test = "Pillai")
## Df Pillai approx F num Df den Df Pr(>F)
## Kelompok 2 0.75051 5.2057 6 52 0.0002936 ***
## Residuals 27
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(fit_manova, test = "Hotelling")
## Df Hotelling-Lawley approx F num Df den Df Pr(>F)
## Kelompok 2 2.409 9.6358 6 48 5.975e-07 ***
## Residuals 27
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(fit_manova, test = "Roy")
## Df Roy approx F num Df den Df Pr(>F)
## Kelompok 2 2.3582 20.438 3 26 5.151e-07 ***
## Residuals 27
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary.aov(fit_manova)
## Response Y1 :
## Df Sum Sq Mean Sq F value Pr(>F)
## Kelompok 2 567.35 283.673 21.163 2.96e-06 ***
## Residuals 27 361.91 13.404
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Response Y2 :
## Df Sum Sq Mean Sq F value Pr(>F)
## Kelompok 2 365.83 182.916 10.22 0.0004958 ***
## Residuals 27 483.22 17.897
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Response Y3 :
## Df Sum Sq Mean Sq F value Pr(>F)
## Kelompok 2 70.441 35.220 3.6418 0.03979 *
## Residuals 27 261.123 9.671
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Pembahasan:
set.seed(2024)
n_sc <- 20
Sebelum_SC <- data.frame(
Aljabar = round(rnorm(n_sc, mean = 58, sd = 8)),
Kalkulus = round(rnorm(n_sc, mean = 55, sd = 9)),
Statistika = round(rnorm(n_sc, mean = 60, sd = 7))
)
Sesudah_SC <- data.frame(
Aljabar = round(Sebelum_SC$Aljabar + rnorm(n_sc, mean = 9, sd = 5)),
Kalkulus = round(Sebelum_SC$Kalkulus + rnorm(n_sc, mean = 8, sd = 6)),
Statistika = round(Sebelum_SC$Statistika + rnorm(n_sc, mean = 7, sd = 4))
)
colMeans(Sebelum_SC)
## Aljabar Kalkulus Statistika
## 55.65 54.15 60.20
colMeans(Sesudah_SC)
## Aljabar Kalkulus Statistika
## 65.30 60.90 67.65
pairs(
Sebelum_SC,
main = "Scatterplot Matrix Nilai Sebelum Remedial",
pch = 19,
col = "steelblue"
)
D_SC <- Sesudah_SC - Sebelum_SC
d_bar_SC <- colMeans(D_SC)
S_D_SC <- cov(D_SC)
n_SC <- nrow(D_SC)
p_SC <- ncol(D_SC)
T2_SC <- n_SC * t(d_bar_SC) %*% solve(S_D_SC) %*% d_bar_SC
F_SC <- ((n_SC - p_SC) / (p_SC * (n_SC - 1))) * T2_SC
pval_SC <- 1 - pf(F_SC, df1 = p_SC, df2 = n_SC - p_SC)
data.frame(
Statistik = c("T2", "F", "p-value"),
Nilai = c(round(T2_SC, 4), round(F_SC, 4), round(pval_SC, 4))
)
HotellingsT2Test(as.matrix(D_SC), mu = rep(0, p_SC))
##
## Hotelling's one sample T2-test
##
## data: as.matrix(D_SC)
## T.2 = 40.157, df1 = 3, df2 = 17, p-value = 6.225e-08
## alternative hypothesis: true location is not equal to c(0,0,0)
Interpretasi:
Kesimpulan praktikum (isi sesuai hasil knit):
x <- c(3,7,2,9,4,6,1,8,5,10)
n <- length(x)
mean_manual <- sum(x) / n
var_manual <- sum((x - mean_manual)^2) / (n - 1)
sd_manual <- sqrt(var_manual)
data.frame(
mean_builtin = mean(x),
mean_manual = mean_manual,
var_builtin = var(x),
var_manual = var_manual,
sd_builtin = sd(x),
sd_manual = sd_manual
)