Data Visualization Kevin Utomo 5003251151

Answer to Quiz Question 1 x <- c(850, 920, 1100, 1250, 1300, 1400, 1500, 1550, 1600, 1750, 2100, 8500)

Write an R function called trimmed_mean(x,alpha) that computes the trimmed mean based on the formula above. The function should sort the data, remove 𝑘 observations from each end, and return the mean of the remaining values.

x <- c(850, 920, 1100, 1250, 1300, 1400, 1500, 1550, 1600, 1750, 2100, 8500)
trimmed_mean <- function(x, alpha = 0.1) {
  if (alpha < 0 || alpha > 0.5 ) {
    stop("alpha must be between 0 and 0.5")
  }
  
  n <- length(x)
  k <- floor(n*alpha)
  sorted_data <- sort(x)
  
  sum_x <- 0
  for (i in (k+1):(n-k)) {
    sum_x <- sum_x + sorted_data[i]
  }
  return(sum_x/(n-2*k))
}

Using your function, compute the ordinary mean (i.e., alpha = 0) and the 10% trimmed mean (i.e., alpha = 0.10) of the data.

mean_alpha_0 <- trimmed_mean(x, 0)
mean_alpha_0

## [1] 1985

mean_alpha_0.1 <- trimmed_mean(x, 0.1)
mean_alpha_0.1

## [1] 1447

Question 2 The Surabaya City Government conducts a household welfare survey across all 31 kecamatan (sub-districts). Three variables are recorded as district-level averages: monthly income (𝑥*, million IDR), monthly expenditure (𝑥+, million IDR), and household size (𝑥., persons). Each kecamatan is weighted by its population (in tens of thousands), reflecting how many households that kecamatan represents.

Write an R function weighted_multi_desc(X,w)

weighted_multi_desc <- function(X, w) {
  
  n <- nrow(X)
  p <- ncol(X)
  vector1 <- matrix(1, nrow = n, ncol = 1)
  
  W <- diag(w)
  
  n_w <- 0
  for (i in 1:n) {
    n_w <- n_w + w[i]
  }
  
  #Weighted mean vector
  x_bar_w <- (1/n_w)* t(X) %*% W %*% vector1
  
  #Deviation matrix
  D <- X - vector1 %*% t(x_bar_w)
  
  #Weighted covariance matrix
  S_w <- (1 / (n_w - 1)) * t(D) %*% W %*% D
  
  #Weighted standard deviation vector
  Std_w <- sqrt(diag(S_w))
  
  return(list(
    W = W,
    x_bar_w = x_bar_w,
    S_w = S_w,
    Std_w = Std_w
  ))
  
}

Apply your function to the data attached.

data <- read.csv("https://raw.githubusercontent.com/Kevin-Lrx/Quiz-1-Data-MM-2-Year-2026/5c62aff9a8e9de76b2885ef50215c9ec31bbdd50/data_quiz.csv", header = TRUE, sep = ",")
X <- as.matrix(data[, c("Income", "Expenditure", "HH_Size")])
w <- data$Weight

print(data)

##           Kecamatan Income Expenditure HH_Size Weight
## 1          Asemrowo    3.1         2.5     4.2      5
## 2            Benowo    3.8         3.0     4.0      7
## 3           Bubutan    4.2         3.4     3.5     10
## 4             Bulak    3.5         2.8     4.1      5
## 5       Dukuh Pakis    6.5         5.2     3.0      6
## 6          Gayungan    5.8         4.6     3.1      4
## 7           Genteng    5.5         4.4     2.8      6
## 8            Gubeng    5.9         4.7     2.9     13
## 9      Gunung Anyar    4.8         3.8     3.6      6
## 10        Jambangan    4.3         3.4     3.8      5
## 11     Karangpilang    4.5         3.6     3.7      7
## 12         Kenjeran    3.0         2.4     4.5     18
## 13       Krembangan    3.4         2.7     4.3     11
## 14      Lakarsantri    4.0         3.2     3.9      6
## 15        Mulyorejo    5.2         4.1     3.2      9
## 16  Pabean Cantikan    3.3         2.6     4.4      7
## 17            Pakal    3.6         2.9     4.1      6
## 18          Rungkut    5.0         4.0     3.3     12
## 19       Sambikerep    4.1         3.3     3.8      7
## 20          Sawahan    3.7         3.0     4.2     20
## 21         Semampir    2.8         2.3     4.7     18
## 22        Simokerto    3.2         2.6     4.3      9
## 23         Sukolilo    5.6         4.5     3.0     11
## 24    Sukomanunggal    4.6         3.7     3.5     10
## 25       Tambaksari    3.5         2.8     4.4     23
## 26           Tandes    4.0         3.2     3.9      9
## 27        Tegalsari    4.1         3.3     3.7     10
## 28 Tenggilis Mejoyo    5.3         4.2     3.1      6
## 29           Wiyung    4.7         3.7     3.6      7
## 30         Wonocolo    4.9         3.9     3.4      8
## 31        Wonokromo    4.4         3.5     3.8     16

weighted_multi_desc(X, w)

## $W
##       [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10] [,11] [,12] [,13]
##  [1,]    5    0    0    0    0    0    0    0    0     0     0     0     0
##  [2,]    0    7    0    0    0    0    0    0    0     0     0     0     0
##  [3,]    0    0   10    0    0    0    0    0    0     0     0     0     0
##  [4,]    0    0    0    5    0    0    0    0    0     0     0     0     0
##  [5,]    0    0    0    0    6    0    0    0    0     0     0     0     0
##  [6,]    0    0    0    0    0    4    0    0    0     0     0     0     0
##  [7,]    0    0    0    0    0    0    6    0    0     0     0     0     0
##  [8,]    0    0    0    0    0    0    0   13    0     0     0     0     0
##  [9,]    0    0    0    0    0    0    0    0    6     0     0     0     0
## [10,]    0    0    0    0    0    0    0    0    0     5     0     0     0
## [11,]    0    0    0    0    0    0    0    0    0     0     7     0     0
## [12,]    0    0    0    0    0    0    0    0    0     0     0    18     0
## [13,]    0    0    0    0    0    0    0    0    0     0     0     0    11
## [14,]    0    0    0    0    0    0    0    0    0     0     0     0     0
## [15,]    0    0    0    0    0    0    0    0    0     0     0     0     0
## [16,]    0    0    0    0    0    0    0    0    0     0     0     0     0
## [17,]    0    0    0    0    0    0    0    0    0     0     0     0     0
## [18,]    0    0    0    0    0    0    0    0    0     0     0     0     0
## [19,]    0    0    0    0    0    0    0    0    0     0     0     0     0
## [20,]    0    0    0    0    0    0    0    0    0     0     0     0     0
## [21,]    0    0    0    0    0    0    0    0    0     0     0     0     0
## [22,]    0    0    0    0    0    0    0    0    0     0     0     0     0
## [23,]    0    0    0    0    0    0    0    0    0     0     0     0     0
## [24,]    0    0    0    0    0    0    0    0    0     0     0     0     0
## [25,]    0    0    0    0    0    0    0    0    0     0     0     0     0
## [26,]    0    0    0    0    0    0    0    0    0     0     0     0     0
## [27,]    0    0    0    0    0    0    0    0    0     0     0     0     0
## [28,]    0    0    0    0    0    0    0    0    0     0     0     0     0
## [29,]    0    0    0    0    0    0    0    0    0     0     0     0     0
## [30,]    0    0    0    0    0    0    0    0    0     0     0     0     0
## [31,]    0    0    0    0    0    0    0    0    0     0     0     0     0
##       [,14] [,15] [,16] [,17] [,18] [,19] [,20] [,21] [,22] [,23] [,24] [,25]
##  [1,]     0     0     0     0     0     0     0     0     0     0     0     0
##  [2,]     0     0     0     0     0     0     0     0     0     0     0     0
##  [3,]     0     0     0     0     0     0     0     0     0     0     0     0
##  [4,]     0     0     0     0     0     0     0     0     0     0     0     0
##  [5,]     0     0     0     0     0     0     0     0     0     0     0     0
##  [6,]     0     0     0     0     0     0     0     0     0     0     0     0
##  [7,]     0     0     0     0     0     0     0     0     0     0     0     0
##  [8,]     0     0     0     0     0     0     0     0     0     0     0     0
##  [9,]     0     0     0     0     0     0     0     0     0     0     0     0
## [10,]     0     0     0     0     0     0     0     0     0     0     0     0
## [11,]     0     0     0     0     0     0     0     0     0     0     0     0
## [12,]     0     0     0     0     0     0     0     0     0     0     0     0
## [13,]     0     0     0     0     0     0     0     0     0     0     0     0
## [14,]     6     0     0     0     0     0     0     0     0     0     0     0
## [15,]     0     9     0     0     0     0     0     0     0     0     0     0
## [16,]     0     0     7     0     0     0     0     0     0     0     0     0
## [17,]     0     0     0     6     0     0     0     0     0     0     0     0
## [18,]     0     0     0     0    12     0     0     0     0     0     0     0
## [19,]     0     0     0     0     0     7     0     0     0     0     0     0
## [20,]     0     0     0     0     0     0    20     0     0     0     0     0
## [21,]     0     0     0     0     0     0     0    18     0     0     0     0
## [22,]     0     0     0     0     0     0     0     0     9     0     0     0
## [23,]     0     0     0     0     0     0     0     0     0    11     0     0
## [24,]     0     0     0     0     0     0     0     0     0     0    10     0
## [25,]     0     0     0     0     0     0     0     0     0     0     0    23
## [26,]     0     0     0     0     0     0     0     0     0     0     0     0
## [27,]     0     0     0     0     0     0     0     0     0     0     0     0
## [28,]     0     0     0     0     0     0     0     0     0     0     0     0
## [29,]     0     0     0     0     0     0     0     0     0     0     0     0
## [30,]     0     0     0     0     0     0     0     0     0     0     0     0
## [31,]     0     0     0     0     0     0     0     0     0     0     0     0
##       [,26] [,27] [,28] [,29] [,30] [,31]
##  [1,]     0     0     0     0     0     0
##  [2,]     0     0     0     0     0     0
##  [3,]     0     0     0     0     0     0
##  [4,]     0     0     0     0     0     0
##  [5,]     0     0     0     0     0     0
##  [6,]     0     0     0     0     0     0
##  [7,]     0     0     0     0     0     0
##  [8,]     0     0     0     0     0     0
##  [9,]     0     0     0     0     0     0
## [10,]     0     0     0     0     0     0
## [11,]     0     0     0     0     0     0
## [12,]     0     0     0     0     0     0
## [13,]     0     0     0     0     0     0
## [14,]     0     0     0     0     0     0
## [15,]     0     0     0     0     0     0
## [16,]     0     0     0     0     0     0
## [17,]     0     0     0     0     0     0
## [18,]     0     0     0     0     0     0
## [19,]     0     0     0     0     0     0
## [20,]     0     0     0     0     0     0
## [21,]     0     0     0     0     0     0
## [22,]     0     0     0     0     0     0
## [23,]     0     0     0     0     0     0
## [24,]     0     0     0     0     0     0
## [25,]     0     0     0     0     0     0
## [26,]     9     0     0     0     0     0
## [27,]     0    10     0     0     0     0
## [28,]     0     0     6     0     0     0
## [29,]     0     0     0     7     0     0
## [30,]     0     0     0     0     8     0
## [31,]     0     0     0     0     0    16
## 
## $x_bar_w
##                 [,1]
## Income      4.186869
## Expenditure 3.350505
## HH_Size     3.832997
## 
## $S_w
##                 Income Expenditure    HH_Size
## Income       0.8703337   0.6840776 -0.4882477
## Expenditure  0.6840776   0.5384541 -0.3843073
## HH_Size     -0.4882477  -0.3843073  0.2926238
## 
## $Std_w
##      Income Expenditure     HH_Size 
##   0.9329168   0.7337943   0.5409471

DATA VISUALIZATION

# Data kecamatan
kecamatan <- read.csv("https://raw.githubusercontent.com/Kevin-Lrx/Quiz-1-Data-MM-2-Year-2026/5c62aff9a8e9de76b2885ef50215c9ec31bbdd50/data_quiz.csv", header = TRUE)

x <- c(850, 920, 1100, 1250, 1300, 1400, 1500, 1550, 1600, 1750, 2100, 8500)

head(kecamatan)

##  [1]  850  920 1100 1250 1300 1400 1500 1550 1600 1750 2100 8500

DATA KECAMATAN DATA VISUALIZATION

#1. Summary
summary(kecamatan)

##   Kecamatan             Income       Expenditure       HH_Size     
##  Length:31          Min.   :2.800   Min.   :2.300   Min.   :2.800  
##  Class :character   1st Qu.:3.550   1st Qu.:2.850   1st Qu.:3.350  
##  Mode  :character   Median :4.200   Median :3.400   Median :3.800  
##                     Mean   :4.332   Mean   :3.461   Mean   :3.735  
##                     3rd Qu.:4.950   3rd Qu.:3.950   3rd Qu.:4.150  
##                     Max.   :6.500   Max.   :5.200   Max.   :4.700  
##      Weight      
##  Min.   : 4.000  
##  1st Qu.: 6.000  
##  Median : 8.000  
##  Mean   : 9.581  
##  3rd Qu.:11.000  
##  Max.   :23.000

#2. Scatter Plot
plot(kecamatan$Income, kecamatan$Expenditure,
     main = "Income vs Expenditure",
     xlab = "Income",
     ylab = "Expenditure",
     col  = "blue",
     pch  = 19)

abline(lm(Expenditure ~ Income, data = kecamatan),
       col = "red", lwd = 2)

#2.1 Scatter Plot (ggplot)
library(ggplot2)
library(reshape2)
ggplot(kecamatan, aes(x = Income, y = Expenditure)) +
  geom_point(color = "blue", size = 3) +
  geom_smooth(method = "lm", color = "red", se = FALSE) +
  ggtitle("Income vs Expenditure Kecamatan") +
  theme_minimal()

## `geom_smooth()` using formula = 'y ~ x'

#3. Pair Plot
pairs(kecamatan[, c("Income","Expenditure","HH_Size")],
      col = "blue",
      pch = 19,
      main = "Pair Plot Kecamatan")

#3.1 Pair Plot (ggplot)
library(GGally)
ggpairs(kecamatan[, c("Income","Expenditure","HH_Size")],
        title = "Pair Plot Kecamatan",
        upper = list(continuous = wrap("points", color = "blue")),
        lower = list(continuous = wrap("points", color = "blue")),
        diag = list(continuous = wrap("barDiag", fill = "green", binwidth = 500)))

#4. Histogram
layout(matrix(1:3, nrow = 1))

hist(kecamatan$Income, col = "blue", main = "Income")
hist(kecamatan$Expenditure, col = "red", main = "Expenditure")
hist(kecamatan$HH_Size, col = "green", main = "HH Size")

#4.1 Histogram (ggplot)
library(reshape2)
kecamatan_long <- melt(kecamatan[, c("Income", "Expenditure", "HH_Size")])

## No id variables; using all as measure variables

ggplot(kecamatan_long, aes(x = value, fill = variable)) +
  geom_histogram(position = "dodge", bins = 10, alpha = 0.7) +
  facet_wrap(~variable, scales = "free") +
  ggtitle("Histograms Kecamatan") +
  theme_minimal()

#5. Correlation Heatmap 
library(corrplot)

## corrplot 0.95 loaded

cor_matrix <- cor(kecamatan[, c("Income","Expenditure","HH_Size")])

corrplot(cor_matrix,
         method = "color",
         addCoef.col = "blue")

#5.1 Correlation Heatmap (ggplot)
library(reshape2)

cor_matrix <- cor(kecamatan[, c("Income","Expenditure","HH_Size")])
cor_df <- melt(cor_matrix)

ggplot(cor_df, aes(x=Var1, y=Var2, fill=value)) +
  geom_tile() +
  geom_text(aes(label=round(value,2)), color="blue", size=5) +
  scale_fill_gradient2(low="white", high="red", mid="yellow", midpoint=0.5) +
  ggtitle("Correlation Heatmap Kecamatan") +
  theme_minimal()

#6. Boxplot
boxplot(kecamatan[, c("Income","Expenditure","HH_Size")],
        main = "Boxplot Kecamatan",
        col = c("blue","red","green"))

#6.1 Box plot (ggplot)
library(ggplot2)

# Base ggplot, mapping x to a factor for each variable
ggplot() +
  geom_boxplot(aes(y = kecamatan$Income, x = "Income"), fill = "blue") +
  geom_boxplot(aes(y = kecamatan$Expenditure, x = "Expenditure"), fill = "red") +
  geom_boxplot(aes(y = kecamatan$HH_Size, x = "HH_Size"), fill = "green") +
  labs(title = "Boxplot Kecamatan",
       x = "Variable",
       y = "Value") +
  theme_minimal()

#7. Trend Lines Plot (ggplot)
ggplot(kecamatan, aes(x = Income, y = Expenditure)) +
  geom_point(color = "blue", size = 2) +
  geom_smooth(method = "lm", se = FALSE, color = "red", linewidth = 0.5) +
  labs(title = "Income vs Expenditure with Regression line") +
  theme_minimal() +
  theme(plot.title = element_text(face = "bold"))

## `geom_smooth()` using formula = 'y ~ x'

#8. Bubble Plot (ggplot)
ggplot(kecamatan, aes(x = Income, y = Expenditure, size = Weight)) +
  geom_point(alpha = 0.5, color = "blue") +
  labs(title="Income vs Expenditure (Weighted by Population)")

#9. Pair Plot (ggplot)
ggpairs(kecamatan[, c("Income","Expenditure","HH_Size")], title = "Pair Plot Household Welfare survey",)

DATA X DATA VISUALIZATION

#1. Statistik Deskriptif
summary(x)

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##     850    1212    1450    1985    1638    8500

mean(x)

## [1] 1985

sd(x)

## [1] 2081.164

#2. Histogram
hist(x,
     breaks = 10,
     col    = "red",
     main   = "Histogram Data x")

#2.1 Histogram (ggplot)
ggplot(data.frame(x=x), aes(x=x)) +
  geom_histogram(binwidth = 500, fill = "red", color = "black", alpha = 0.7) +
  ggtitle("Histogram Data x") +
  theme_minimal()

#3. Scatter Index Plot
plot(x,
     main = "Scatter Plot Data x",
     xlab = "Index",
     ylab = "Value",
     col  = "blue",
     pch  = 19)

#3.1 Scatter Index Plot (ggplot)
ggplot(data = data.frame(Index = 1:length(x), Value = x), 
       aes(x = Index, y = Value)) +
  geom_point(color = "blue", size = 2) +  # pch 19 equivalent
  labs(title = "Scatter Plot Data x",
       x = "Index",
       y = "Value") +
  theme_minimal()

#4. Boxplot
boxplot(x,
        main = "Boxplot Data x",
        col  = "red")

#4.1 Boxplot (ggplot)
ggplot(data.frame(x=x), aes(y=x)) +
  geom_boxplot(fill="red") +
  ggtitle("Boxplot Data x") +
  theme_minimal()

#5. QQ Plot
qqnorm(x)
qqline(x, col = "red")

#5.1 QQ Plot (ggplot)
ggplot(data.frame(x=x), aes(sample = x)) +
  stat_qq(color = "blue") +
  stat_qq_line(color = "red") +
  ggtitle("QQ Plot Data x") +
  theme_minimal()

#6. Density plot (ggplot)
data_frame_x <- data.frame(x = x)
ggplot(data_frame_x, aes(x = x)) +
  geom_density(fill = "pink", alpha = 0.5) +
  geom_vline(aes(xintercept = mean_alpha_0), color = "blue", linetype = "dashed", linewidth = 1) +
  geom_vline(aes(xintercept = mean_alpha_0.1), color = "red", linetype = "dashed", linewidth = 1 ) +
  labs(title = "Density Plot of Expenditure Data X", x = "Expenditure", y = "Density")