Misal kita ingin membandingkan dua metode belajar (Metode A vs Metode B) pada dua mata kuliah (Matematika dan Statistika).
Sampel berpasangan → Nilai diukur pada siswa yang sama sebelum (A) dan sesudah (B).
Sampel bebas → Nilai diukur pada dua kelompok siswa yang berbeda (Kelompok A vs Kelompok B).
# Metode Belajar A
A <- data.frame(
Biologi = c(70, 65, 80, 75, 68),
Statistika = c(72, 60, 78, 74, 70)
)
# Metode Belajar B
B <- data.frame(
Biologi = c(75, 70, 85, 77, 73),
Statistika = c(76, 65, 82, 78, 74)
)
A
## Biologi Statistika
## 1 70 72
## 2 65 60
## 3 80 78
## 4 75 74
## 5 68 70
B
## Biologi Statistika
## 1 75 76
## 2 70 65
## 3 85 82
## 4 77 78
## 5 73 74
# Selisih (karena kasus berpasangan)
diff <- B - A
diff
## Biologi Statistika
## 1 5 4
## 2 5 5
## 3 5 4
## 4 2 4
## 5 5 4
# Vektor rataan (mean tiap peubah)
mean_vector <- colMeans(diff)
mean_vector
## Biologi Statistika
## 4.4 4.2
# Matriks kovarians
cov_matrix <- cov(diff)
cov_matrix
## Biologi Statistika
## Biologi 1.80 0.15
## Statistika 0.15 0.20
library(MVTests)
## Warning: package 'MVTests' was built under R version 4.4.3
##
## Attaching package: 'MVTests'
## The following object is masked from 'package:datasets':
##
## iris
X <- B - A
mean0 <- c(0,0)
result <- OneSampleHT2(X, mu0 = mean0, alpha = 0.05)
summary(result)
## One Sample Hotelling T Square Test
##
## Hotelling T Sqaure Statistic = 445.6296
## F value = 167.111 , df1 = 2 , df2 = 3 , p-value: 0.000839
##
## Descriptive Statistics
##
## Biologi Statistika
## N 5.000000 5.0000000
## Means 4.400000 4.2000000
## Sd 1.341641 0.4472136
##
##
## Detection important variable(s)
##
## Lower Upper Mu0 Important Variables?
## Biologi 1.371797 7.428203 0 *TRUE*
## Statistika 3.190599 5.209401 0 *TRUE*
result$CI
## Lower Upper Mu0 Important Variables?
## Biologi 1.371797 7.428203 0 *TRUE*
## Statistika 3.190599 5.209401 0 *TRUE*
bon <- function(mu, S, n, alpha, k){
p <- length(mu)
t_bon <- qt(1 - alpha/(2*p), df = n - 1)
se_j <- sqrt(S[k,k]/n)
lower <- mu[k] - t_bon * se_j
upper <- mu[k] + t_bon * se_j
c(lower = lower, mean = mu[k], upper = upper)
}
X <- B - A
n <- nrow(X)
mu_hat <- colMeans(X) # vektor mean
S <- cov(X) # matriks kovarians
# CI Bonferroni utk Biologi
bon(mu_hat, S, n, alpha = 0.05, k = 1)
## lower.Biologi mean.Biologi upper.Biologi
## 2.302756 4.400000 6.497244
# CI Bonferroni utk Statistika
bon(mu_hat, S, n, alpha = 0.05, k = 2)
## lower.Statistika mean.Statistika upper.Statistika
## 3.500919 4.200000 4.899081
mean1 <- colMeans(A)
mean1
## Biologi Statistika
## 71.6 70.8
mean2 <- colMeans(B)
mean2
## Biologi Statistika
## 76 75
S1 <- cov(A)
S1
## Biologi Statistika
## Biologi 35.3 35.9
## Statistika 35.9 45.2
S2 <- cov(B)
S2
## Biologi Statistika
## Biologi 32 32
## Statistika 32 40
n1 <- nrow(A)
n2 <- nrow(B)
p <- ncol(A) # harus sama dengan ncol(B)
# pooled covariance matrix
Sp <- ((n1 - 1) * S1 + (n2 - 1) * S2) / (n1 + n2 - 2)
Sp
## Biologi Statistika
## Biologi 33.65 33.95
## Statistika 33.95 42.60
library(Hotelling)
## Loading required package: corpcor
# T2 = (n1*n2)/(n1+n2) * (mean1-mean2)' %*% solve(Sp) %*% (mean1-mean2)
d <- mean1 - mean2
T2_ind <- (n1 * n2)/(n1 + n2) * drop( t(d) %*% solve(Sp) %*% d )
# konversi ke F untuk p, df1=p, df2 = n1 + n2 - p - 1
F_stat <- ( (n1 + n2 - p - 1) / (p * (n1 + n2 - 2)) ) * T2_ind
p_value_hotelling_ind <- 1 - pf(F_stat, df1 = p, df2 = n1 + n2 - p - 1)
list(T2 = T2_ind, F = F_stat, p.value = p_value_hotelling_ind)
## $T2
## [1] 1.455476
##
## $F
## [1] 0.6367707
##
## $p.value
## [1] 0.5570871
T.ci = function(mu1, mu2, S_pooled, n1, n2, avec, level = 0.95){
# mu1, mu2 = mean vector masing-masing grup
# S_pooled = pooled covariance matrix
# n1, n2 = ukuran sampel
# avec = arah vektor (misal c(1,0) untuk variabel pertama)
# level = tingkat kepercayaan (default 0.95)
p <- length(mu1)
mu_diff <- mu1 - mu2
# konstanta kritis Hotelling
cval <- qf(level, p, n1 + n2 - p - 1) * p * (n1 + n2 - 2) / (n1 + n2 - p - 1)
# estimasi proyeksi mean difference pada arah avec
zhat <- crossprod(avec, mu_diff)
# varians proyeksi
zvar <- crossprod(avec, S_pooled %*% avec) * (1/n1 + 1/n2)
const <- sqrt(cval * zvar)
c(lower = zhat - const, upper = zhat + const)
}
# Biologi
T.ci(mean1, mean2, Sp, n1, n2, avec = c(1,0), level = 0.95)
## lower upper
## -16.472694 7.672694
# Statistika
T.ci(mean1, mean2, Sp, n1, n2, avec = c(0,1), level = 0.95)
## lower upper
## -17.783649 9.383649
bon = function(mu1, mu2, S_pooled, n1, n2, alpha, k){
# mu1, mu2 = mean vector
# S_pooled = pooled covariance
# n1, n2 = ukuran sampel
# alpha = tingkat signifikansi (misal 0.05)
# k = indeks variabel (1 = Biologi, 2 = Statistika, dst.)
p <- length(mu1)
mu_diff <- mu1 - mu2
# t kritis Bonferroni
t_bon <- qt(1 - alpha/(2*p), df = n1 + n2 - 2)
# standard error utk variabel ke-k
se <- sqrt(S_pooled[k,k] * (1/n1 + 1/n2))
lower <- mu_diff[k] - t_bon * se
upper <- mu_diff[k] + t_bon * se
c(lower = lower, mean = mu_diff[k], upper = upper)
}
# Biologi
bon(mean1, mean2, Sp, n1, n2, alpha = 0.05, k = 1)
## lower.Biologi mean.Biologi upper.Biologi
## -14.494755 -4.400000 5.694755
# Statistika
bon(mean1, mean2, Sp, n1, n2, alpha = 0.05, k = 2)
## lower.Statistika mean.Statistika upper.Statistika
## -15.558161 -4.200000 7.158161
⚙️ Dibangkitkan dengan set.seed(84)
📊 Peubah: Produktivitas (jam kerja efektif per
minggu) dan Kepuasan Kerja (skala 1–100)
👥 Dua kelompok: Karyawan Tetap vs Karyawan Kontrak
library(MASS)
set.seed(84)
# ukuran sampel
n_tetap <- 22
n_kontrak <- 28
# Populasi 1: Karyawan Tetap
mu_tetap <- c(40, 75) # rata-rata jam kerja efektif, kepuasan kerja
Sigma1 <- matrix(c(30, 12,
12, 50), 2) # ragam dan kovarians
data_tetap <- mvrnorm(n_tetap, mu = mu_tetap, Sigma = Sigma1)
# Populasi 2: Karyawan Kontrak
mu_kontrak <- c(38, 70)
Sigma2 <- matrix(c(55, 20,
20, 60), 2) # ragam beda dari kelompok 1
data_kontrak <- mvrnorm(n_kontrak, mu = mu_kontrak, Sigma = Sigma2)
# satukan dataset
dataset <- data.frame(
Status = c(rep("Tetap", n_tetap), rep("Kontrak", n_kontrak)),
Produktivitas = c(data_tetap[,1], data_kontrak[,1]),
Kepuasan = c(data_tetap[,2], data_kontrak[,2])
)
dataset
## Status Produktivitas Kepuasan
## 1 Tetap 37.89063 81.91420
## 2 Tetap 47.23555 81.00409
## 3 Tetap 33.88615 71.72212
## 4 Tetap 39.00927 66.02279
## 5 Tetap 34.88215 79.50260
## 6 Tetap 41.48913 79.47379
## 7 Tetap 40.37668 84.13073
## 8 Tetap 32.28295 63.03346
## 9 Tetap 32.63173 67.51573
## 10 Tetap 34.92367 65.99525
## 11 Tetap 45.21832 60.57799
## 12 Tetap 38.09390 72.80438
## 13 Tetap 34.95312 81.55744
## 14 Tetap 44.12934 78.14431
## 15 Tetap 40.39197 80.08529
## 16 Tetap 35.51351 66.85782
## 17 Tetap 45.24835 78.25994
## 18 Tetap 44.21917 77.42775
## 19 Tetap 41.24146 72.21804
## 20 Tetap 32.34324 77.36221
## 21 Tetap 34.83962 71.09721
## 22 Tetap 37.17472 74.73478
## 23 Kontrak 39.21517 74.42643
## 24 Kontrak 40.51576 78.02271
## 25 Kontrak 37.49248 75.07235
## 26 Kontrak 42.44559 87.05263
## 27 Kontrak 34.75809 70.61743
## 28 Kontrak 38.90047 66.84761
## 29 Kontrak 36.64388 74.25127
## 30 Kontrak 32.79040 62.48740
## 31 Kontrak 41.59531 77.76176
## 32 Kontrak 37.68916 83.90343
## 33 Kontrak 39.24752 78.12855
## 34 Kontrak 40.30122 77.11150
## 35 Kontrak 41.36428 63.37444
## 36 Kontrak 39.07312 78.84347
## 37 Kontrak 48.23508 80.25021
## 38 Kontrak 39.06680 76.78949
## 39 Kontrak 26.23850 69.85246
## 40 Kontrak 43.76267 72.90826
## 41 Kontrak 30.77987 62.59005
## 42 Kontrak 36.54014 62.99529
## 43 Kontrak 32.80901 77.45746
## 44 Kontrak 39.07360 65.72756
## 45 Kontrak 33.87720 76.04894
## 46 Kontrak 42.72165 78.75156
## 47 Kontrak 38.43039 77.76899
## 48 Kontrak 38.20967 71.61813
## 49 Kontrak 25.51290 71.63719
## 50 Kontrak 36.85256 64.74270
# pisahkan kelompok
group_tetap <- subset(dataset, Status == "Tetap", select = c(Produktivitas, Kepuasan))
group_kontrak <- subset(dataset, Status == "Kontrak", select = c(Produktivitas, Kepuasan))
# vektor rataan
xbar1 <- colMeans(group_tetap)
xbar2 <- colMeans(group_kontrak)
# matriks kovarians
cov1 <- cov(group_tetap)
cov2 <- cov(group_kontrak)
xbar1; xbar2; cov1; cov2
## Produktivitas Kepuasan
## 38.54430 74.15645
## Produktivitas Kepuasan
## 37.64795 73.46569
## Produktivitas Kepuasan
## Produktivitas 21.452554 8.813284
## Kepuasan 8.813284 46.023417
## Produktivitas Kepuasan
## Produktivitas 24.20601 13.16718
## Kepuasan 13.16718 43.89861
library(Hotelling)
t2_unequal <- hotelling.test(group_tetap, group_kontrak, var.equal = FALSE)
t2_unequal
## Test stat: 0.45861
## Numerator df: 2
## Denominator df: 45.3151329567455
## P-value: 0.7998
T.ci = function(mu1, mu2, S1, S2, n1, n2, avec = rep(1, length(mu1)), level = 0.95){
p <- length(mu1)
mu_diff <- mu1 - mu2
# kritis chi-square
cval <- qchisq(level, p)
# proyeksi mean difference
zhat <- crossprod(avec, mu_diff)
# varian proyeksi
zvar <- crossprod(avec, S1 %*% avec)/n1 + crossprod(avec, S2 %*% avec)/n2
const <- sqrt(cval * zvar)
c(lower = zhat - const, upper = zhat + const)
}
# Produktivitas
T.ci(xbar1, xbar2, cov1, cov2, n_tetap, n_kontrak, avec = c(1,0), level = 0.95)
## lower upper
## -2.423585 4.216293
# Kepuasan
T.ci(xbar1, xbar2, cov1, cov2, n_tetap, n_kontrak, avec = c(0,1), level = 0.95)
## lower upper
## -3.991912 5.373438
bon = function(mu1, mu2, S1, S2, n1, n2, alpha, k){
p <- length(mu1)
mu_diff <- mu1 - mu2
# t kritis Bonferroni
t_bon <- qt(1 - alpha/(2*p), df = min(n1, n2) - 1) # konservatif
# standard error utk variabel ke-k
se <- sqrt(S1[k,k]/n1 + S2[k,k]/n2)
lower <- mu_diff[k] - t_bon * se
upper <- mu_diff[k] + t_bon * se
c(lower = lower, mean = mu_diff[k], upper = upper)
}
# Produktivitas
bon(xbar1, xbar2, cov1, cov2, n_tetap, n_kontrak, alpha = 0.05, k = 1)
## lower.Produktivitas mean.Produktivitas upper.Produktivitas
## -2.3776031 0.8963542 4.1703116
# Kepuasan
bon(xbar1, xbar2, cov1, cov2, n_tetap, n_kontrak, alpha = 0.05, k = 2)
## lower.Kepuasan mean.Kepuasan upper.Kepuasan
## -3.9270563 0.6907631 5.3085824