##INTERPRETASI: Berdasarkan visualisasi, terlihat bahwa terdapat satu nilai ekstrem yaitu 310 yang jauh lebih besar dibandingkan data lainnya. Keberadaan outlier ini menyebabkan nilai mean biasa menjadi lebih tinggi dan kurang merepresentasikan pusat data yang sebenarnya. Hal ini terlihat dari posisi titik mean yang berada lebih jauh di atas distribusi utama data. Sementara itu, winsorized mean menunjukkan nilai yang lebih rendah dan lebih mendekati kumpulan data utama. Jadi penggunaan winsorized mean lebih disarankan jika ada nilai outlier.

library(ggplot2)

x <- c(12, 45, 52, 58, 61, 63, 67, 70, 72, 75, 78, 82, 88, 95, 310)

# --- [a] Buatlah fungsi winsorized_mean(x, alpha) ---

winsorized_mean <- function(x, alpha) {
  # TULIS KODE ANDA DI SINI
  n = length(x)
  k=floor(n*alpha)
  sum=0
  
  for(i in 1:n){
    if(i  < k){
      x[i] = x[k+1]
    }else if(i > (n-k)){
      x[i] = x[n-k]
    }
  }
  
  for(j in 1:n){
    sum = sum + x[j]  
  }
  return(sum/n)
}

# --- [b] Hitung ordinary mean (alpha=0) dan Winsorized mean 20% (alpha=0.2) ---

# Ordinary mean
# TULIS KODE ANDA DI SINI
meanbiasa <- winsorized_mean (x, 0) 


# Winsorized mean 20%
# TULIS KODE ANDA DI SINI
wmean <- winsorized_mean(x, 0.2)


# --- [c] buat visualisasi datanya ---
df <- data.frame(nilai = x)

ggplot(df, aes(x=" ", y = nilai)) +
  geom_boxplot(fill = "yellow") +
  
  # meanbiasa
  geom_point(aes(x = "", y = meanbiasa), color = "blue", size = 3) +
  
  # wmean
  geom_point(aes(x = "", y = wmean), color = "red", size = 3) +
  
  labs(
    title = "Mean biasa vs Winsorized Mean",
    y = "Nilai",
    x = ""
  )
## Warning in geom_point(aes(x = "", y = meanbiasa), color = "blue", size = 3): All aesthetics have length 1, but the data has 15 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing
##   a single row.
## Warning in geom_point(aes(x = "", y = wmean), color = "red", size = 3): All aesthetics have length 1, but the data has 15 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing
##   a single row.