# =============================================================================
# QUIZ - Computational Statistics IUP
# Bachelor Program of Statistics, F-Scientics ITS
# Even Semester 2025/2026
# =============================================================================
# Name : Naila Apta Fausta
# NRP : 5003251003
# Date : Tuesday, 10th of March 2025
# =============================================================================
# =============================================================================
# QUESTION 1: TRIMMED MEAN (50 points)
# =============================================================================
#
# Formula:
# X_bar_alpha = (1 / (n - 2k)) * sum(X_(i), i = k+1 to n-k)
# where k = floor(n * alpha)
#
# =============================================================================
# Part (a): Write the trimmed_mean function
trimmed_mean <- function(x, alpha = 0.1) {
n <- length(x)
k <- floor(n*alpha)
sortx <- sort(x)
sumx <- 0
for (i in (k+1):(n-k)){
sumx <- sumx + sortx[i]
}
X_bar_alpha <- (1/(n-2*k)) * sumx
return(X_bar_alpha)
}
# Part (b): Compute ordinary mean and 10% trimmed mean
x <- c(850, 920, 1100, 1250, 1300, 1400, 1500, 1550, 1600, 1750, 2100, 8500)
# Ordinary mean (alpha = 0)
Mean_manual <- function(x){
n <- length(x)
result <- 0
for(i in 1:n){
result <- result + x[i]
}
return(result/n)
}
ordinary_mean <- Mean_manual(x)
ordinary_mean
## [1] 1985
# 10% Trimmed mean (alpha = 0.10)
trimmed_10 <- trimmed_mean(x, 0.10)
trimmed_10
## [1] 1447
# Visualization Boxplot
boxplot(x,
main = "Boxplot of Expenditure Data",
ylab = "Expenditure (thousand IDR)",
col = "lightblue")

# analysis
#Ordinary mean = 1985
#10% trimmed mean = 1447
# Interpretation
#The ordinary mean is substantially higher than the trimmed mean due to the presence of an extreme value (8500).
#This shows that the mean is sensitive to outliers, while the trimmed mean provides a more reliable measure of central tendency.
# =============================================================================
# QUESTION 2: WEIGHTED MULTIVARIATE DESCRIPTIVE STATISTICS (50 points)
# =============================================================================
#
# Formulas:
# x_bar_w = (1/n_w) * X^T W 1 (weighted mean vector)
# S_w = 1/(n_w - 1) * D^T W D (weighted covariance matrix)
# where D = X - 1 * x_bar_w^T, W = diag(w), n_w = sum(w)
#
# =============================================================================
# Part (a): Write the weighted_multi_desc function
weighted_multi_desc <- function(X, w) {
W <- diag(w)
n_w <- sum(w)
one <- rep(1, length(w))
x_bar_w <- (1/n_w) * t(X) %*% W %*% one
D <- X - one %*% t(x_bar_w)
S_w <- (1/(n_w - 1)) * t(D) %*% W %*% D
s_w <- sqrt(diag(S_w))
return(list(
w_mean = x_bar_w,
w_cov = S_w,
w_std = s_w
))
}
# Part (b): Apply to the 31 kecamatan data
# Load data
data <- read.csv("data_quiz.csv")
X <- as.matrix(data[, c("Income", "Expenditure", "HH_Size")])
w <- data$Weight
result <- weighted_multi_desc(X, w)
result$w_mean
## [,1]
## Income 4.186869
## Expenditure 3.350505
## HH_Size 3.832997
result$w_cov
## Income Expenditure HH_Size
## Income 0.8703337 0.6840776 -0.4882477
## Expenditure 0.6840776 0.5384541 -0.3843073
## HH_Size -0.4882477 -0.3843073 0.2926238
result$w_std
## Income Expenditure HH_Size
## 0.9329168 0.7337943 0.5409471
# Visualization Scatter Plot
plot(data$Income, data$Expenditure,
main = "Income and Expenditure",
xlab = "Income",
ylab = "Expenditure",
pch = 19,
col = "steelblue",
cex = 1.5)
abline(lm(Expenditure ~ Income, data = data),
col = "red", lwd = 2)

# analysis
#Weighted Mean
#Income = 4.186869
#Expenditure = 3.350505
#HH_Size = 3.832997
#Weighted Covariance
#Income & Expenditure = 0.6840776 (positive)
#Income & HH_Size = -0.4882477 (negative)
#Expenditure & HH_Size = -0.3843073 (negative)
#Weighted Standard Deviation
#Income = 0.9329168
#Expenditure = 0.7337943
#HH_Size = 0.5409471
# interpretation
#The weighted mean indicates that the average income and expenditure across districts are relatively moderate.
#The positive covariance between income and expenditure suggests that districts with higher income tend to have higher expenditure. This relationship is also clearly supported by the scatter plot and the upward-sloping regression line.
#The negative covariance between household size and both income and expenditure indicates that districts with larger household sizes tend to have lower income and spending levels.
#The standard deviation shows that income varies the most across districts, while household size is relatively more consistent.