5003251003_Naila-Apta-Fausta

# =============================================================================
# QUIZ - Computational Statistics IUP
# Bachelor Program of Statistics, F-Scientics ITS
# Even Semester 2025/2026
# =============================================================================
# Name : Naila Apta Fausta
# NRP  : 5003251003
# Date : Tuesday, 10th of March 2025
# =============================================================================


# =============================================================================
# QUESTION 1: TRIMMED MEAN (50 points)
# =============================================================================
#
# Formula:
#   X_bar_alpha = (1 / (n - 2k)) * sum(X_(i), i = k+1 to n-k)
#   where k = floor(n * alpha)
#
# =============================================================================

# Part (a): Write the trimmed_mean function

trimmed_mean <- function(x, alpha = 0.1) {
  n <- length(x)
  k <- floor(n*alpha)
  
  sortx <- sort(x)
  
  sumx <- 0
  for (i in (k+1):(n-k)){
    sumx <- sumx + sortx[i]
  }
  
  X_bar_alpha <- (1/(n-2*k)) * sumx
  return(X_bar_alpha)
}

# Part (b): Compute ordinary mean and 10% trimmed mean

x <- c(850, 920, 1100, 1250, 1300, 1400, 1500, 1550, 1600, 1750, 2100, 8500)

# Ordinary mean (alpha = 0)
Mean_manual <- function(x){
  n <- length(x)
  result <- 0
  
  for(i in 1:n){
    result <- result + x[i]
  }
  return(result/n)
}
ordinary_mean <- Mean_manual(x)
ordinary_mean

## [1] 1985

# 10% Trimmed mean (alpha = 0.10)
trimmed_10 <- trimmed_mean(x, 0.10)
trimmed_10

## [1] 1447

# Visualization Boxplot
boxplot(x,
        main = "Boxplot of Expenditure Data",
        ylab = "Expenditure (thousand IDR)",
        col = "lightblue")

# analysis
#Ordinary mean = 1985
#10% trimmed mean = 1447

# Interpretation
#The ordinary mean is substantially higher than the trimmed mean due to the presence of an extreme value (8500).
#This shows that the mean is sensitive to outliers, while the trimmed mean provides a more reliable measure of central tendency.

# =============================================================================
# QUESTION 2: WEIGHTED MULTIVARIATE DESCRIPTIVE STATISTICS (50 points)
# =============================================================================
#
# Formulas:
#   x_bar_w = (1/n_w) * X^T W 1        (weighted mean vector)
#   S_w     = 1/(n_w - 1) * D^T W D    (weighted covariance matrix)
#   where D = X - 1 * x_bar_w^T, W = diag(w), n_w = sum(w)
#
# =============================================================================

# Part (a): Write the weighted_multi_desc function

weighted_multi_desc <- function(X, w) {
  W <- diag(w)
  
  n_w <- sum(w)
  
  one <- rep(1, length(w))
  
  x_bar_w <- (1/n_w) * t(X) %*% W %*% one
  
  D <- X - one %*% t(x_bar_w)

  S_w <- (1/(n_w - 1)) * t(D) %*% W %*% D
  
  s_w <- sqrt(diag(S_w))
  
  return(list(
    w_mean = x_bar_w,
    w_cov = S_w,
    w_std = s_w
  ))
}


# Part (b): Apply to the 31 kecamatan data

# Load data
data <- read.csv("data_quiz.csv")
X <- as.matrix(data[, c("Income", "Expenditure", "HH_Size")])
w <- data$Weight

result <- weighted_multi_desc(X, w)

result$w_mean

##                 [,1]
## Income      4.186869
## Expenditure 3.350505
## HH_Size     3.832997

result$w_cov

##                 Income Expenditure    HH_Size
## Income       0.8703337   0.6840776 -0.4882477
## Expenditure  0.6840776   0.5384541 -0.3843073
## HH_Size     -0.4882477  -0.3843073  0.2926238

result$w_std

##      Income Expenditure     HH_Size 
##   0.9329168   0.7337943   0.5409471

# Visualization Scatter Plot
plot(data$Income, data$Expenditure, 
     main = "Income and Expenditure",
     xlab = "Income",
     ylab = "Expenditure",
     pch  = 19,
     col  = "steelblue",
     cex  = 1.5)

abline(lm(Expenditure ~ Income, data = data),
       col = "red", lwd = 2)

# analysis
#Weighted Mean
#Income = 4.186869
#Expenditure = 3.350505
#HH_Size = 3.832997

#Weighted Covariance
#Income & Expenditure = 0.6840776 (positive)
#Income & HH_Size = -0.4882477 (negative)
#Expenditure & HH_Size = -0.3843073 (negative)

#Weighted Standard Deviation
#Income = 0.9329168
#Expenditure = 0.7337943
#HH_Size = 0.5409471

# interpretation
#The weighted mean indicates that the average income and expenditure across districts are relatively moderate.
#The positive covariance between income and expenditure suggests that districts with higher income tend to have higher expenditure. This relationship is also clearly supported by the scatter plot and the upward-sloping regression line.
#The negative covariance between household size and both income and expenditure indicates that districts with larger household sizes tend to have lower income and spending levels.
#The standard deviation shows that income varies the most across districts, while household size is relatively more consistent.

5003251003_Naila-Apta-Fausta–quiz-.R

IVONY NILASARI

2026-03-31