Tiara Gusti Elita Dewi || 5003251030 || Statistika D

==============================================

Soal 1 - Winsorized Mean

==============================================

— Data —

x <- c(12, 45, 52, 58, 61, 63, 67, 70, 72, 75, 78, 82, 88, 95, 310)

— [a] Buatlah fungsi winsorized_mean(x, alpha) —

average <- function(x){
  n <- length(x)
  hasil <- 0
  for(i in 1:n){
    hasil = hasil + x[i]
  }
  return (hasil/n)
}

winsorized_mean <- function(x, alpha) {
  xi <- sort(x)
  n <- length(xi)
  y <- xi
  k <- floor(n*alpha)
  if(k > 0){
    y[1:k] <- xi[k+1]
  }
  if(k > 0){
    y[(n-k+1):n] <- xi[n-k]
  }
  average(y)
}

— [b] Hitung ordinary mean (alpha=0) dan Winsorized mean 20% (alpha=0.2) —

# Ordinary mean
ordinary <- winsorized_mean (x, 0)
ordinary
## [1] 81.86667
# Winsorized mean 20%
winsorized <- winsorized_mean (x, 0.2)
winsorized
## [1] 69.73333

— [c] Visualisasi data —

boxplot(x, 
        horizontal = TRUE, 
        main = "Boxplot Data X", 
        xlab = "Nilai X",
        col = "#f2e8d6",
        border = "#be93d4",
        pch = 19)
abline(v = ordinary, 
       col = "#a91b0d",
       lwd = 2)
abline(v = winsorized,
       col = "#6f4685", 
       lwd = 2)
legend("topright", 
       legend=c("Ordinary Mean", "Winsorized Mean"), 
       col=c("#a91b0d", "#6f4685"),
       lwd=2)

—- Analisis —

#Data memiliki outlier yang sangat ekstrem yaitu 310. Akibatnya, nilai Ordinary Mean menjadi jauh lebih tinggi sebesar (81,86). Dengan menggunakan Winsorized Mean (69,73), nilai rata-rata lebih menunjukkan pusat massa data yang sebenarnya (mendekati median).

==============================================

Soal 2 - Weighted Multivariate Descriptive Statistics

==============================================

— Baca data CSV —

#install.packages("readxl")
library(readxl)
df <- read.csv("C:/Users/Lenovo/Documents/RStudio/data_quiz1.csv")

X <- as.matrix(df[, c("x1","x2","x3")])
w_kecil <- df$w

— [a] Buatlah fungsi weighted_corr(X, w) —

total <- function(x){
  n <- length(x)
  hasil <- 0
  for(i in 1:n){
    hasil = hasil + x[i]
  }
  return (hasil)
}

weighted_corr <- function(X, w_kecil) {
  n <- nrow(X)
  nw <- total(w_kecil)
  w_besar <- diag(w_kecil)
  satu <- rep(1:n)
  
  x_bar_w <- (1/nw)*t(X)%*%w_besar%*%satu
  D <- X - satu %*% t(x_bar_w)
  
  S_w <- (1/nw)*t(D)%*%w_besar%*%D
  s_w <- sqrt(diag(S_w))
  
  V <- diag(s_w)
  R_w <- solve(V)%*%S_w%*%solve(V)
  
  return(list(
    w_besar = w_besar,
    x_bar_w = x_bar_w,
    S_w = S_w,
    s_w = s_w,
    R_w = R_w
  ))
}

— [b] Aplikasikan fungsi pada data —

# Panggil fungsi
hasil <- weighted_corr(X, w_kecil)

# Tampilkan vektor mean tertimbang
hasil$x_bar_w
##         [,1]
## x1 1067.7107
## x2  951.4680
## x3  246.9083
# Tampilkan matriks varians-kovarians tertimbang
hasil$S_w
##           x1        x2       x3
## x1 323869278 288609228 74894805
## x2 288609228 257188122 66740985
## x3  74894805  66740985 17319467
# Tampilkan vektor standar deviasi tertimbang
hasil$s_w
##        x1        x2        x3 
## 17996.368 16037.086  4161.666
# Tampilkan matriks korelasi tertimbang
hasil$R_w
##           [,1]      [,2]      [,3]
## [1,] 1.0000000 0.9999997 0.9999990
## [2,] 0.9999997 1.0000000 0.9999997
## [3,] 0.9999990 0.9999997 1.0000000

— [c] Visualisasi data —

#install.packages("corrplot")
library(corrplot)
## corrplot 0.95 loaded
corrplot(hasil$R_w, 
         method = "circle", 
         type = "upper", 
         addCoef.col = "#89cff0",
         title = "Weighted Correlation Matrix")

— Analisis —

#Weighted Mean memberikan pusat data yang lebih akurat karena mempertimbangkan bobot tiap observasi. Melalui Matriks Korelasi Tertimbang (R_w), kita dapat melihat hubungan antar variabel x1, x2, dan x3. Jika pasangan variabel bernilai korelasi positif, berarti kedua variabel tersebut cenderung meningkat secara bersamaan.