Tiara Gusti Elita Dewi || 5003251030 || Statistika D
— Data —
x <- c(12, 45, 52, 58, 61, 63, 67, 70, 72, 75, 78, 82, 88, 95, 310)
— [a] Buatlah fungsi winsorized_mean(x, alpha) —
average <- function(x){
n <- length(x)
hasil <- 0
for(i in 1:n){
hasil = hasil + x[i]
}
return (hasil/n)
}
winsorized_mean <- function(x, alpha) {
xi <- sort(x)
n <- length(xi)
y <- xi
k <- floor(n*alpha)
if(k > 0){
y[1:k] <- xi[k+1]
}
if(k > 0){
y[(n-k+1):n] <- xi[n-k]
}
average(y)
}
— [b] Hitung ordinary mean (alpha=0) dan Winsorized mean 20% (alpha=0.2) —
# Ordinary mean
ordinary <- winsorized_mean (x, 0)
ordinary
## [1] 81.86667
# Winsorized mean 20%
winsorized <- winsorized_mean (x, 0.2)
winsorized
## [1] 69.73333
— [c] Visualisasi data —
boxplot(x,
horizontal = TRUE,
main = "Boxplot Data X",
xlab = "Nilai X",
col = "#f2e8d6",
border = "#be93d4",
pch = 19)
abline(v = ordinary,
col = "#a91b0d",
lwd = 2)
abline(v = winsorized,
col = "#6f4685",
lwd = 2)
legend("topright",
legend=c("Ordinary Mean", "Winsorized Mean"),
col=c("#a91b0d", "#6f4685"),
lwd=2)
—- Analisis —
#Data memiliki outlier yang sangat ekstrem yaitu 310. Akibatnya, nilai Ordinary Mean menjadi jauh lebih tinggi sebesar (81,86). Dengan menggunakan Winsorized Mean (69,73), nilai rata-rata lebih menunjukkan pusat massa data yang sebenarnya (mendekati median).
— Baca data CSV —
#install.packages("readxl")
library(readxl)
df <- read.csv("C:/Users/Lenovo/Documents/RStudio/data_quiz1.csv")
X <- as.matrix(df[, c("x1","x2","x3")])
w_kecil <- df$w
— [a] Buatlah fungsi weighted_corr(X, w) —
total <- function(x){
n <- length(x)
hasil <- 0
for(i in 1:n){
hasil = hasil + x[i]
}
return (hasil)
}
weighted_corr <- function(X, w_kecil) {
n <- nrow(X)
nw <- total(w_kecil)
w_besar <- diag(w_kecil)
satu <- rep(1:n)
x_bar_w <- (1/nw)*t(X)%*%w_besar%*%satu
D <- X - satu %*% t(x_bar_w)
S_w <- (1/nw)*t(D)%*%w_besar%*%D
s_w <- sqrt(diag(S_w))
V <- diag(s_w)
R_w <- solve(V)%*%S_w%*%solve(V)
return(list(
w_besar = w_besar,
x_bar_w = x_bar_w,
S_w = S_w,
s_w = s_w,
R_w = R_w
))
}
— [b] Aplikasikan fungsi pada data —
# Panggil fungsi
hasil <- weighted_corr(X, w_kecil)
# Tampilkan vektor mean tertimbang
hasil$x_bar_w
## [,1]
## x1 1067.7107
## x2 951.4680
## x3 246.9083
# Tampilkan matriks varians-kovarians tertimbang
hasil$S_w
## x1 x2 x3
## x1 323869278 288609228 74894805
## x2 288609228 257188122 66740985
## x3 74894805 66740985 17319467
# Tampilkan vektor standar deviasi tertimbang
hasil$s_w
## x1 x2 x3
## 17996.368 16037.086 4161.666
# Tampilkan matriks korelasi tertimbang
hasil$R_w
## [,1] [,2] [,3]
## [1,] 1.0000000 0.9999997 0.9999990
## [2,] 0.9999997 1.0000000 0.9999997
## [3,] 0.9999990 0.9999997 1.0000000
— [c] Visualisasi data —
#install.packages("corrplot")
library(corrplot)
## corrplot 0.95 loaded
corrplot(hasil$R_w,
method = "circle",
type = "upper",
addCoef.col = "#89cff0",
title = "Weighted Correlation Matrix")
— Analisis —
#Weighted Mean memberikan pusat data yang lebih akurat karena mempertimbangkan bobot tiap observasi. Melalui Matriks Korelasi Tertimbang (R_w), kita dapat melihat hubungan antar variabel x1, x2, dan x3. Jika pasangan variabel bernilai korelasi positif, berarti kedua variabel tersebut cenderung meningkat secara bersamaan.