Soal 1

winsorized_mean <- function(x, alpha){
  n <- length(x)
  k <- floor(n * alpha)
  
  x_sorted <- sort(x)
  y <- x_sorted
  
  if(k > 0){
    y[1:k] <- x_sorted[k+1]
    y[(n-k+1):n] <- x_sorted[n-k]
  }
  
  total <- 0
  for(i in 1:n){
    total <- total + y[i]
  }
  
  mean_manual <- total / n
  
  return(mean_manual)
}

x <- c(12, 45, 52, 58, 61, 63, 67, 70, 72, 75, 78, 82, 88, 95, 310)

mean_biasa <- winsorized_mean(x, 0)
mean_winsor <- winsorized_mean(x, 0.2)

mean_biasa
## [1] 81.86667
mean_winsor
## [1] 69.73333
boxplot(x, main="Boxplot Data Produksi")

Interpretasi Soal 1

Nilai 310 merupakan outlier sehingga mempengaruhi rata-rata biasa. Winsorized mean lebih stabil karena mengganti nilai ekstrem.


Soal 2

df <- read.csv('C:/Users/dinda/Downloads/Quiz 1 Komstat/data_quiz1.csv')

X <- as.matrix(df[, c("x1","x2","x3")])
w <- df$w

weighted_corr <- function(X, w){
  
  X <- as.matrix(X)
  
  W <- diag(w)
  
  # total bobot manual (tanpa sum)
  nw <- 0
  for(i in 1:length(w)){
    nw <- nw + w[i]
  }
  
  one <- matrix(1, nrow(X), 1)
  
  xw <- (1/nw) * t(X) %*% W %*% one
  
  D <- X - one %*% t(xw)
  
  Sw <- (1/nw) * t(D) %*% W %*% D
  
  sw <- sqrt(diag(Sw))
  
  V <- diag(sw)
  
  Rw <- solve(V) %*% Sw %*% solve(V)
  
  return(list(
    W = W,
    mean = xw,
    cov = Sw,
    sd = sw,
    cor = Rw
  ))
}

hasil <- weighted_corr(X, w)

hasil$mean
##        [,1]
## x1 73.88530
## x2 65.39059
## x3 17.00938
hasil$cov
##           x1        x2        x3
## x1  38.16362 -37.75105 -27.15386
## x2 -37.75105  41.10767  29.16587
## x3 -27.15386  29.16587  21.14757
hasil$sd
##       x1       x2       x3 
## 6.177671 6.411527 4.598649
hasil$cor
##            [,1]       [,2]       [,3]
## [1,]  1.0000000 -0.9531095 -0.9558207
## [2,] -0.9531095  1.0000000  0.9891979
## [3,] -0.9558207  0.9891979  1.0000000
pairs(X, main="Scatter Plot Matrix")

heatmap(hasil$cor, main="Heatmap Korelasi")

Interpretasi Soal 2

Nilai korelasi menunjukkan hubungan antar indeks lingkungan. Korelasi tinggi menunjukkan hubungan yang kuat antar variabel. Pembobotan membuat hasil lebih representatif sesuai luas wilayah.