Studi Kasus Perbandingan Dua Metode Belajar (A dan B) pada Dua Mata Kuliah (Biologi dan Statistika)

Misal kita ingin membandingkan dua metode belajar (Metode A vs Metode B) pada dua mata kuliah (Matematika dan Statistika).

Input Data Bagian A dan B

# Metode Belajar A
A <- data.frame(
  Biologi    = c(70, 65, 80, 75, 68),
  Statistika = c(72, 60, 78, 74, 70)
)

# Metode Belajar B
B <- data.frame(
  Biologi    = c(75, 70, 85, 77, 73),
  Statistika = c(76, 65, 82, 78, 74)
)

A
##   Biologi Statistika
## 1      70         72
## 2      65         60
## 3      80         78
## 4      75         74
## 5      68         70
B
##   Biologi Statistika
## 1      75         76
## 2      70         65
## 3      85         82
## 4      77         78
## 5      73         74

BAGIAN A - Sampel Berpasangan

1. Vektor Rataan & Matriks Kovarians

# Selisih (karena kasus berpasangan)
diff <- B - A
diff
##   Biologi Statistika
## 1       5          4
## 2       5          5
## 3       5          4
## 4       2          4
## 5       5          4
# Vektor rataan (mean tiap peubah)
mean_vector <- colMeans(diff)
mean_vector
##    Biologi Statistika 
##        4.4        4.2
# Matriks kovarians
cov_matrix <- cov(diff)
cov_matrix
##            Biologi Statistika
## Biologi       1.80       0.15
## Statistika    0.15       0.20

2. Uji T2 Hotelling Dua Populasi Sampel Berpasangan

library(MVTests)
## Warning: package 'MVTests' was built under R version 4.4.3
## 
## Attaching package: 'MVTests'
## The following object is masked from 'package:datasets':
## 
##     iris
X <- B - A
mean0 <- c(0,0)
result <- OneSampleHT2(X, mu0 = mean0, alpha = 0.05)
summary(result)
##        One Sample Hotelling T Square Test 
## 
## Hotelling T Sqaure Statistic = 445.6296 
##  F value = 167.111 , df1 = 2 , df2 = 3 , p-value: 0.000839 
## 
##                    Descriptive Statistics
## 
##        Biologi Statistika
## N     5.000000  5.0000000
## Means 4.400000  4.2000000
## Sd    1.341641  0.4472136
## 
## 
##                  Detection important variable(s)
## 
##               Lower    Upper Mu0 Important Variables?
## Biologi    1.371797 7.428203   0               *TRUE*
## Statistika 3.190599 5.209401   0               *TRUE*

3. Selang Kepercayaan Simultan

result$CI
##               Lower    Upper Mu0 Important Variables?
## Biologi    1.371797 7.428203   0               *TRUE*
## Statistika 3.190599 5.209401   0               *TRUE*

4. Selang Kepercayaan Bonferroni

bon <- function(mu, S, n, alpha, k){
  p <- length(mu)
  t_bon <- qt(1 - alpha/(2*p), df = n - 1)
  
  se_j <- sqrt(S[k,k]/n)
  lower <- mu[k] - t_bon * se_j
  upper <- mu[k] + t_bon * se_j
  
  c(lower = lower, mean = mu[k], upper = upper)
}

X <- B - A
n <- nrow(X)
mu_hat <- colMeans(X)     # vektor mean
S <- cov(X)               # matriks kovarians
# CI Bonferroni utk Biologi
bon(mu_hat, S, n, alpha = 0.05, k = 1)
## lower.Biologi  mean.Biologi upper.Biologi 
##      2.302756      4.400000      6.497244
# CI Bonferroni utk Statistika
bon(mu_hat, S, n, alpha = 0.05, k = 2)
## lower.Statistika  mean.Statistika upper.Statistika 
##         3.500919         4.200000         4.899081

BAGIAN B - Sampel Saling Bebas Ragam Sama

1. Vektor Rataan & Matriks Kovarians

mean1 <- colMeans(A)
mean1
##    Biologi Statistika 
##       71.6       70.8
mean2 <- colMeans(B)
mean2
##    Biologi Statistika 
##         76         75
S1 <- cov(A)
S1
##            Biologi Statistika
## Biologi       35.3       35.9
## Statistika    35.9       45.2
S2 <- cov(B)
S2
##            Biologi Statistika
## Biologi         32         32
## Statistika      32         40
n1 <- nrow(A)
n2 <- nrow(B)
p  <- ncol(A)  # harus sama dengan ncol(B)

# pooled covariance  matrix
Sp <- ((n1 - 1) * S1 + (n2 - 1) * S2) / (n1 + n2 - 2)
Sp
##            Biologi Statistika
## Biologi      33.65      33.95
## Statistika   33.95      42.60

2. Uji T2 Hotelling Dua Populasi Sampel Saling Bebas Ragam Sama

library(Hotelling)
## Loading required package: corpcor
# T2 = (n1*n2)/(n1+n2) * (mean1-mean2)' %*% solve(Sp) %*% (mean1-mean2)
d <- mean1 - mean2
T2_ind <- (n1 * n2)/(n1 + n2) * drop( t(d) %*% solve(Sp) %*% d )

# konversi ke F untuk p, df1=p, df2 = n1 + n2 - p - 1
F_stat <- ( (n1 + n2 - p - 1) / (p * (n1 + n2 - 2)) ) * T2_ind
p_value_hotelling_ind <- 1 - pf(F_stat, df1 = p, df2 = n1 + n2 - p - 1)

list(T2 = T2_ind, F = F_stat, p.value = p_value_hotelling_ind)
## $T2
## [1] 1.455476
## 
## $F
## [1] 0.6367707
## 
## $p.value
## [1] 0.5570871

3. Selang Kepercayaan Simultan

T.ci = function(mu1, mu2, S_pooled, n1, n2, avec, level = 0.95){
  # mu1, mu2 = mean vector masing-masing grup
  # S_pooled  = pooled covariance matrix
  # n1, n2    = ukuran sampel
  # avec      = arah vektor (misal c(1,0) untuk variabel pertama)
  # level     = tingkat kepercayaan (default 0.95)

  p <- length(mu1)
  mu_diff <- mu1 - mu2
  
  # konstanta kritis Hotelling
  cval <- qf(level, p, n1 + n2 - p - 1) * p * (n1 + n2 - 2) / (n1 + n2 - p - 1)
  
  # estimasi proyeksi mean difference pada arah avec
  zhat <- crossprod(avec, mu_diff)
  
  # varians proyeksi
  zvar <- crossprod(avec, S_pooled %*% avec) * (1/n1 + 1/n2)
  
  const <- sqrt(cval * zvar)
  c(lower = zhat - const, upper = zhat + const)
}
# Biologi
T.ci(mean1, mean2, Sp, n1, n2, avec = c(1,0), level = 0.95)
##      lower      upper 
## -16.472694   7.672694
# Statistika
T.ci(mean1, mean2, Sp, n1, n2, avec = c(0,1), level = 0.95)
##      lower      upper 
## -17.783649   9.383649

4. Selang Kepercayaan Bonferroni

bon = function(mu1, mu2, S_pooled, n1, n2, alpha, k){
  # mu1, mu2 = mean vector
  # S_pooled = pooled covariance
  # n1, n2   = ukuran sampel
  # alpha    = tingkat signifikansi (misal 0.05)
  # k        = indeks variabel (1 = Biologi, 2 = Statistika, dst.)

  p <- length(mu1)
  mu_diff <- mu1 - mu2
  
  # t kritis Bonferroni
  t_bon <- qt(1 - alpha/(2*p), df = n1 + n2 - 2)
  
  # standard error utk variabel ke-k
  se <- sqrt(S_pooled[k,k] * (1/n1 + 1/n2))
  
  lower <- mu_diff[k] - t_bon * se
  upper <- mu_diff[k] + t_bon * se
  
  c(lower = lower, mean = mu_diff[k], upper = upper)
}
# Biologi
bon(mean1, mean2, Sp, n1, n2, alpha = 0.05, k = 1)
## lower.Biologi  mean.Biologi upper.Biologi 
##    -14.494755     -4.400000      5.694755
# Statistika
bon(mean1, mean2, Sp, n1, n2, alpha = 0.05, k = 2)
## lower.Statistika  mean.Statistika upper.Statistika 
##       -15.558161        -4.200000         7.158161

BAGIAN C - Sampel Saling Bebas Ragam Beda

⚙️ Dibangkitkan dengan set.seed(84)

📊 Peubah: Produktivitas (jam kerja efektif per minggu) dan Kepuasan Kerja (skala 1–100)

👥 Dua kelompok: Karyawan Tetap vs Karyawan Kontrak

1. Pembangkitan Data

library(MASS)
set.seed(84)

# ukuran sampel
n_tetap   <- 22
n_kontrak <- 28

# Populasi 1: Karyawan Tetap
mu_tetap <- c(40, 75)   # rata-rata jam kerja efektif, kepuasan kerja
Sigma1   <- matrix(c(30, 12,
                     12, 50), 2)  # ragam dan kovarians
data_tetap <- mvrnorm(n_tetap, mu = mu_tetap, Sigma = Sigma1)

# Populasi 2: Karyawan Kontrak
mu_kontrak <- c(38, 70)
Sigma2     <- matrix(c(55, 20,
                       20, 60), 2) # ragam beda dari kelompok 1
data_kontrak <- mvrnorm(n_kontrak, mu = mu_kontrak, Sigma = Sigma2)

# satukan dataset
dataset <- data.frame(
  Status = c(rep("Tetap", n_tetap), rep("Kontrak", n_kontrak)),
  Produktivitas = c(data_tetap[,1], data_kontrak[,1]),
  Kepuasan      = c(data_tetap[,2], data_kontrak[,2])
)

dataset
##     Status Produktivitas Kepuasan
## 1    Tetap      37.89063 81.91420
## 2    Tetap      47.23555 81.00409
## 3    Tetap      33.88615 71.72212
## 4    Tetap      39.00927 66.02279
## 5    Tetap      34.88215 79.50260
## 6    Tetap      41.48913 79.47379
## 7    Tetap      40.37668 84.13073
## 8    Tetap      32.28295 63.03346
## 9    Tetap      32.63173 67.51573
## 10   Tetap      34.92367 65.99525
## 11   Tetap      45.21832 60.57799
## 12   Tetap      38.09390 72.80438
## 13   Tetap      34.95312 81.55744
## 14   Tetap      44.12934 78.14431
## 15   Tetap      40.39197 80.08529
## 16   Tetap      35.51351 66.85782
## 17   Tetap      45.24835 78.25994
## 18   Tetap      44.21917 77.42775
## 19   Tetap      41.24146 72.21804
## 20   Tetap      32.34324 77.36221
## 21   Tetap      34.83962 71.09721
## 22   Tetap      37.17472 74.73478
## 23 Kontrak      39.21517 74.42643
## 24 Kontrak      40.51576 78.02271
## 25 Kontrak      37.49248 75.07235
## 26 Kontrak      42.44559 87.05263
## 27 Kontrak      34.75809 70.61743
## 28 Kontrak      38.90047 66.84761
## 29 Kontrak      36.64388 74.25127
## 30 Kontrak      32.79040 62.48740
## 31 Kontrak      41.59531 77.76176
## 32 Kontrak      37.68916 83.90343
## 33 Kontrak      39.24752 78.12855
## 34 Kontrak      40.30122 77.11150
## 35 Kontrak      41.36428 63.37444
## 36 Kontrak      39.07312 78.84347
## 37 Kontrak      48.23508 80.25021
## 38 Kontrak      39.06680 76.78949
## 39 Kontrak      26.23850 69.85246
## 40 Kontrak      43.76267 72.90826
## 41 Kontrak      30.77987 62.59005
## 42 Kontrak      36.54014 62.99529
## 43 Kontrak      32.80901 77.45746
## 44 Kontrak      39.07360 65.72756
## 45 Kontrak      33.87720 76.04894
## 46 Kontrak      42.72165 78.75156
## 47 Kontrak      38.43039 77.76899
## 48 Kontrak      38.20967 71.61813
## 49 Kontrak      25.51290 71.63719
## 50 Kontrak      36.85256 64.74270

2. Vektor Rataan & Matriks Kovarians

# pisahkan kelompok
group_tetap   <- subset(dataset, Status == "Tetap", select = c(Produktivitas, Kepuasan))
group_kontrak <- subset(dataset, Status == "Kontrak", select = c(Produktivitas, Kepuasan))

# vektor rataan
xbar1 <- colMeans(group_tetap)
xbar2 <- colMeans(group_kontrak)

# matriks kovarians
cov1 <- cov(group_tetap)
cov2 <- cov(group_kontrak)

xbar1; xbar2; cov1; cov2
## Produktivitas      Kepuasan 
##      38.54430      74.15645
## Produktivitas      Kepuasan 
##      37.64795      73.46569
##               Produktivitas  Kepuasan
## Produktivitas     21.452554  8.813284
## Kepuasan           8.813284 46.023417
##               Produktivitas Kepuasan
## Produktivitas      24.20601 13.16718
## Kepuasan           13.16718 43.89861

3. Uji T2 Hotelling Dua Populasi Sampel Saling Bebas Ragam Beda

library(Hotelling)

t2_unequal <- hotelling.test(group_tetap, group_kontrak, var.equal = FALSE)
t2_unequal
## Test stat:  0.45861 
## Numerator df:  2 
## Denominator df:  45.3151329567455 
## P-value:  0.7998

4. Selang Kepercayaan Simultan

T.ci = function(mu1, mu2, S1, S2, n1, n2, avec = rep(1, length(mu1)), level = 0.95){
  p <- length(mu1)
  mu_diff <- mu1 - mu2
  
  # kritis chi-square
  cval <- qchisq(level, p)
  
  # proyeksi mean difference
  zhat <- crossprod(avec, mu_diff)
  
  # varian proyeksi
  zvar <- crossprod(avec, S1 %*% avec)/n1 + crossprod(avec, S2 %*% avec)/n2
  
  const <- sqrt(cval * zvar)
  c(lower = zhat - const, upper = zhat + const)
}
# Produktivitas
T.ci(xbar1, xbar2, cov1, cov2, n_tetap, n_kontrak, avec = c(1,0), level = 0.95)
##     lower     upper 
## -2.423585  4.216293
# Kepuasan
T.ci(xbar1, xbar2, cov1, cov2, n_tetap, n_kontrak, avec = c(0,1), level = 0.95)
##     lower     upper 
## -3.991912  5.373438

5. Selang Kepercayaan Bonferroni

bon = function(mu1, mu2, S1, S2, n1, n2, alpha, k){
  p <- length(mu1)
  mu_diff <- mu1 - mu2
  
  # t kritis Bonferroni
  t_bon <- qt(1 - alpha/(2*p), df = min(n1, n2) - 1) # konservatif
  
  # standard error utk variabel ke-k
  se <- sqrt(S1[k,k]/n1 + S2[k,k]/n2)
  
  lower <- mu_diff[k] - t_bon * se
  upper <- mu_diff[k] + t_bon * se
  
  c(lower = lower, mean = mu_diff[k], upper = upper)
}
# Produktivitas
bon(xbar1, xbar2, cov1, cov2, n_tetap, n_kontrak, alpha = 0.05, k = 1)
## lower.Produktivitas  mean.Produktivitas upper.Produktivitas 
##          -2.3776031           0.8963542           4.1703116
# Kepuasan
bon(xbar1, xbar2, cov1, cov2, n_tetap, n_kontrak, alpha = 0.05, k = 2)
## lower.Kepuasan  mean.Kepuasan upper.Kepuasan 
##     -3.9270563      0.6907631      5.3085824