x <- c(850, 920, 1100, 1250, 1300, 1400, 1500, 1550, 1600, 1750, 2100, 8500)
trimmed_mean <- function(x, alpha = 0.1) {
if (alpha < 0 || alpha > 0.5 ) {
stop("alpha must be between 0 and 0.5")
}
n <- length(x)
k <- floor(n*alpha)
sorted_data <- sort(x)
sum_x <- 0
for (i in (k+1):(n-k)) {
sum_x <- sum_x + sorted_data[i]
}
return(sum_x/(n-2*k))
}
mean_alpha_0 <- trimmed_mean(x, 0)
mean_alpha_0
## [1] 1985
mean_alpha_0.1 <- trimmed_mean(x, 0.1)
mean_alpha_0.1
## [1] 1447
Question 2 The Surabaya City Government conducts a household welfare survey across all 31 kecamatan (sub-districts). Three variables are recorded as district-level averages: monthly income (𝑥*, million IDR), monthly expenditure (𝑥+, million IDR), and household size (𝑥., persons). Each kecamatan is weighted by its population (in tens of thousands), reflecting how many households that kecamatan represents.
weighted_multi_desc <- function(X, w) {
n <- nrow(X)
p <- ncol(X)
vector1 <- matrix(1, nrow = n, ncol = 1)
W <- diag(w)
n_w <- 0
for (i in 1:n) {
n_w <- n_w + w[i]
}
#Weighted mean vector
x_bar_w <- (1/n_w)* t(X) %*% W %*% vector1
#Deviation matrix
D <- X - vector1 %*% t(x_bar_w)
#Weighted covariance matrix
S_w <- (1 / (n_w - 1)) * t(D) %*% W %*% D
#Weighted standard deviation vector
Std_w <- sqrt(diag(S_w))
return(list(
W = W,
x_bar_w = x_bar_w,
S_w = S_w,
Std_w = Std_w
))
}
data <- read.csv("https://raw.githubusercontent.com/Kevin-Lrx/Quiz-1-Data-MM-2-Year-2026/5c62aff9a8e9de76b2885ef50215c9ec31bbdd50/data_quiz.csv", header = TRUE, sep = ",")
X <- as.matrix(data[, c("Income", "Expenditure", "HH_Size")])
w <- data$Weight
print(data)
## Kecamatan Income Expenditure HH_Size Weight
## 1 Asemrowo 3.1 2.5 4.2 5
## 2 Benowo 3.8 3.0 4.0 7
## 3 Bubutan 4.2 3.4 3.5 10
## 4 Bulak 3.5 2.8 4.1 5
## 5 Dukuh Pakis 6.5 5.2 3.0 6
## 6 Gayungan 5.8 4.6 3.1 4
## 7 Genteng 5.5 4.4 2.8 6
## 8 Gubeng 5.9 4.7 2.9 13
## 9 Gunung Anyar 4.8 3.8 3.6 6
## 10 Jambangan 4.3 3.4 3.8 5
## 11 Karangpilang 4.5 3.6 3.7 7
## 12 Kenjeran 3.0 2.4 4.5 18
## 13 Krembangan 3.4 2.7 4.3 11
## 14 Lakarsantri 4.0 3.2 3.9 6
## 15 Mulyorejo 5.2 4.1 3.2 9
## 16 Pabean Cantikan 3.3 2.6 4.4 7
## 17 Pakal 3.6 2.9 4.1 6
## 18 Rungkut 5.0 4.0 3.3 12
## 19 Sambikerep 4.1 3.3 3.8 7
## 20 Sawahan 3.7 3.0 4.2 20
## 21 Semampir 2.8 2.3 4.7 18
## 22 Simokerto 3.2 2.6 4.3 9
## 23 Sukolilo 5.6 4.5 3.0 11
## 24 Sukomanunggal 4.6 3.7 3.5 10
## 25 Tambaksari 3.5 2.8 4.4 23
## 26 Tandes 4.0 3.2 3.9 9
## 27 Tegalsari 4.1 3.3 3.7 10
## 28 Tenggilis Mejoyo 5.3 4.2 3.1 6
## 29 Wiyung 4.7 3.7 3.6 7
## 30 Wonocolo 4.9 3.9 3.4 8
## 31 Wonokromo 4.4 3.5 3.8 16
weighted_multi_desc(X, w)
## $W
## [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10] [,11] [,12] [,13]
## [1,] 5 0 0 0 0 0 0 0 0 0 0 0 0
## [2,] 0 7 0 0 0 0 0 0 0 0 0 0 0
## [3,] 0 0 10 0 0 0 0 0 0 0 0 0 0
## [4,] 0 0 0 5 0 0 0 0 0 0 0 0 0
## [5,] 0 0 0 0 6 0 0 0 0 0 0 0 0
## [6,] 0 0 0 0 0 4 0 0 0 0 0 0 0
## [7,] 0 0 0 0 0 0 6 0 0 0 0 0 0
## [8,] 0 0 0 0 0 0 0 13 0 0 0 0 0
## [9,] 0 0 0 0 0 0 0 0 6 0 0 0 0
## [10,] 0 0 0 0 0 0 0 0 0 5 0 0 0
## [11,] 0 0 0 0 0 0 0 0 0 0 7 0 0
## [12,] 0 0 0 0 0 0 0 0 0 0 0 18 0
## [13,] 0 0 0 0 0 0 0 0 0 0 0 0 11
## [14,] 0 0 0 0 0 0 0 0 0 0 0 0 0
## [15,] 0 0 0 0 0 0 0 0 0 0 0 0 0
## [16,] 0 0 0 0 0 0 0 0 0 0 0 0 0
## [17,] 0 0 0 0 0 0 0 0 0 0 0 0 0
## [18,] 0 0 0 0 0 0 0 0 0 0 0 0 0
## [19,] 0 0 0 0 0 0 0 0 0 0 0 0 0
## [20,] 0 0 0 0 0 0 0 0 0 0 0 0 0
## [21,] 0 0 0 0 0 0 0 0 0 0 0 0 0
## [22,] 0 0 0 0 0 0 0 0 0 0 0 0 0
## [23,] 0 0 0 0 0 0 0 0 0 0 0 0 0
## [24,] 0 0 0 0 0 0 0 0 0 0 0 0 0
## [25,] 0 0 0 0 0 0 0 0 0 0 0 0 0
## [26,] 0 0 0 0 0 0 0 0 0 0 0 0 0
## [27,] 0 0 0 0 0 0 0 0 0 0 0 0 0
## [28,] 0 0 0 0 0 0 0 0 0 0 0 0 0
## [29,] 0 0 0 0 0 0 0 0 0 0 0 0 0
## [30,] 0 0 0 0 0 0 0 0 0 0 0 0 0
## [31,] 0 0 0 0 0 0 0 0 0 0 0 0 0
## [,14] [,15] [,16] [,17] [,18] [,19] [,20] [,21] [,22] [,23] [,24] [,25]
## [1,] 0 0 0 0 0 0 0 0 0 0 0 0
## [2,] 0 0 0 0 0 0 0 0 0 0 0 0
## [3,] 0 0 0 0 0 0 0 0 0 0 0 0
## [4,] 0 0 0 0 0 0 0 0 0 0 0 0
## [5,] 0 0 0 0 0 0 0 0 0 0 0 0
## [6,] 0 0 0 0 0 0 0 0 0 0 0 0
## [7,] 0 0 0 0 0 0 0 0 0 0 0 0
## [8,] 0 0 0 0 0 0 0 0 0 0 0 0
## [9,] 0 0 0 0 0 0 0 0 0 0 0 0
## [10,] 0 0 0 0 0 0 0 0 0 0 0 0
## [11,] 0 0 0 0 0 0 0 0 0 0 0 0
## [12,] 0 0 0 0 0 0 0 0 0 0 0 0
## [13,] 0 0 0 0 0 0 0 0 0 0 0 0
## [14,] 6 0 0 0 0 0 0 0 0 0 0 0
## [15,] 0 9 0 0 0 0 0 0 0 0 0 0
## [16,] 0 0 7 0 0 0 0 0 0 0 0 0
## [17,] 0 0 0 6 0 0 0 0 0 0 0 0
## [18,] 0 0 0 0 12 0 0 0 0 0 0 0
## [19,] 0 0 0 0 0 7 0 0 0 0 0 0
## [20,] 0 0 0 0 0 0 20 0 0 0 0 0
## [21,] 0 0 0 0 0 0 0 18 0 0 0 0
## [22,] 0 0 0 0 0 0 0 0 9 0 0 0
## [23,] 0 0 0 0 0 0 0 0 0 11 0 0
## [24,] 0 0 0 0 0 0 0 0 0 0 10 0
## [25,] 0 0 0 0 0 0 0 0 0 0 0 23
## [26,] 0 0 0 0 0 0 0 0 0 0 0 0
## [27,] 0 0 0 0 0 0 0 0 0 0 0 0
## [28,] 0 0 0 0 0 0 0 0 0 0 0 0
## [29,] 0 0 0 0 0 0 0 0 0 0 0 0
## [30,] 0 0 0 0 0 0 0 0 0 0 0 0
## [31,] 0 0 0 0 0 0 0 0 0 0 0 0
## [,26] [,27] [,28] [,29] [,30] [,31]
## [1,] 0 0 0 0 0 0
## [2,] 0 0 0 0 0 0
## [3,] 0 0 0 0 0 0
## [4,] 0 0 0 0 0 0
## [5,] 0 0 0 0 0 0
## [6,] 0 0 0 0 0 0
## [7,] 0 0 0 0 0 0
## [8,] 0 0 0 0 0 0
## [9,] 0 0 0 0 0 0
## [10,] 0 0 0 0 0 0
## [11,] 0 0 0 0 0 0
## [12,] 0 0 0 0 0 0
## [13,] 0 0 0 0 0 0
## [14,] 0 0 0 0 0 0
## [15,] 0 0 0 0 0 0
## [16,] 0 0 0 0 0 0
## [17,] 0 0 0 0 0 0
## [18,] 0 0 0 0 0 0
## [19,] 0 0 0 0 0 0
## [20,] 0 0 0 0 0 0
## [21,] 0 0 0 0 0 0
## [22,] 0 0 0 0 0 0
## [23,] 0 0 0 0 0 0
## [24,] 0 0 0 0 0 0
## [25,] 0 0 0 0 0 0
## [26,] 9 0 0 0 0 0
## [27,] 0 10 0 0 0 0
## [28,] 0 0 6 0 0 0
## [29,] 0 0 0 7 0 0
## [30,] 0 0 0 0 8 0
## [31,] 0 0 0 0 0 16
##
## $x_bar_w
## [,1]
## Income 4.186869
## Expenditure 3.350505
## HH_Size 3.832997
##
## $S_w
## Income Expenditure HH_Size
## Income 0.8703337 0.6840776 -0.4882477
## Expenditure 0.6840776 0.5384541 -0.3843073
## HH_Size -0.4882477 -0.3843073 0.2926238
##
## $Std_w
## Income Expenditure HH_Size
## 0.9329168 0.7337943 0.5409471
# Data kecamatan
kecamatan <- read.csv("https://raw.githubusercontent.com/Kevin-Lrx/Quiz-1-Data-MM-2-Year-2026/5c62aff9a8e9de76b2885ef50215c9ec31bbdd50/data_quiz.csv", header = TRUE)
x <- c(850, 920, 1100, 1250, 1300, 1400, 1500, 1550, 1600, 1750, 2100, 8500)
head(kecamatan)
x
## [1] 850 920 1100 1250 1300 1400 1500 1550 1600 1750 2100 8500
DATA KECAMATAN DATA VISUALIZATION
#1. Summary
summary(kecamatan)
## Kecamatan Income Expenditure HH_Size
## Length:31 Min. :2.800 Min. :2.300 Min. :2.800
## Class :character 1st Qu.:3.550 1st Qu.:2.850 1st Qu.:3.350
## Mode :character Median :4.200 Median :3.400 Median :3.800
## Mean :4.332 Mean :3.461 Mean :3.735
## 3rd Qu.:4.950 3rd Qu.:3.950 3rd Qu.:4.150
## Max. :6.500 Max. :5.200 Max. :4.700
## Weight
## Min. : 4.000
## 1st Qu.: 6.000
## Median : 8.000
## Mean : 9.581
## 3rd Qu.:11.000
## Max. :23.000
#2. Scatter Plot
plot(kecamatan$Income, kecamatan$Expenditure,
main = "Income vs Expenditure",
xlab = "Income",
ylab = "Expenditure",
col = "blue",
pch = 19)
abline(lm(Expenditure ~ Income, data = kecamatan),
col = "red", lwd = 2)
#2.1 Scatter Plot (ggplot)
library(ggplot2)
library(reshape2)
ggplot(kecamatan, aes(x = Income, y = Expenditure)) +
geom_point(color = "blue", size = 3) +
geom_smooth(method = "lm", color = "red", se = FALSE) +
ggtitle("Income vs Expenditure Kecamatan") +
theme_minimal()
## `geom_smooth()` using formula = 'y ~ x'
#3. Pair Plot
pairs(kecamatan[, c("Income","Expenditure","HH_Size")],
col = "blue",
pch = 19,
main = "Pair Plot Kecamatan")
#3.1 Pair Plot (ggplot)
library(GGally)
ggpairs(kecamatan[, c("Income","Expenditure","HH_Size")],
title = "Pair Plot Kecamatan",
upper = list(continuous = wrap("points", color = "blue")),
lower = list(continuous = wrap("points", color = "blue")),
diag = list(continuous = wrap("barDiag", fill = "green", binwidth = 500)))
#4. Histogram
layout(matrix(1:3, nrow = 1))
hist(kecamatan$Income, col = "blue", main = "Income")
hist(kecamatan$Expenditure, col = "red", main = "Expenditure")
hist(kecamatan$HH_Size, col = "green", main = "HH Size")
#4.1 Histogram (ggplot)
library(reshape2)
kecamatan_long <- melt(kecamatan[, c("Income", "Expenditure", "HH_Size")])
## No id variables; using all as measure variables
ggplot(kecamatan_long, aes(x = value, fill = variable)) +
geom_histogram(position = "dodge", bins = 10, alpha = 0.7) +
facet_wrap(~variable, scales = "free") +
ggtitle("Histograms Kecamatan") +
theme_minimal()
#5. Correlation Heatmap
library(corrplot)
## corrplot 0.95 loaded
cor_matrix <- cor(kecamatan[, c("Income","Expenditure","HH_Size")])
corrplot(cor_matrix,
method = "color",
addCoef.col = "blue")
#5.1 Correlation Heatmap (ggplot)
library(reshape2)
cor_matrix <- cor(kecamatan[, c("Income","Expenditure","HH_Size")])
cor_df <- melt(cor_matrix)
ggplot(cor_df, aes(x=Var1, y=Var2, fill=value)) +
geom_tile() +
geom_text(aes(label=round(value,2)), color="blue", size=5) +
scale_fill_gradient2(low="white", high="red", mid="yellow", midpoint=0.5) +
ggtitle("Correlation Heatmap Kecamatan") +
theme_minimal()
#6. Boxplot
boxplot(kecamatan[, c("Income","Expenditure","HH_Size")],
main = "Boxplot Kecamatan",
col = c("blue","red","green"))
#6.1 Box plot (ggplot)
library(ggplot2)
# Base ggplot, mapping x to a factor for each variable
ggplot() +
geom_boxplot(aes(y = kecamatan$Income, x = "Income"), fill = "blue") +
geom_boxplot(aes(y = kecamatan$Expenditure, x = "Expenditure"), fill = "red") +
geom_boxplot(aes(y = kecamatan$HH_Size, x = "HH_Size"), fill = "green") +
labs(title = "Boxplot Kecamatan",
x = "Variable",
y = "Value") +
theme_minimal()
#7. Trend Lines Plot (ggplot)
ggplot(kecamatan, aes(x = Income, y = Expenditure)) +
geom_point(color = "blue", size = 2) +
geom_smooth(method = "lm", se = FALSE, color = "red", linewidth = 0.5) +
labs(title = "Income vs Expenditure with Regression line") +
theme_minimal() +
theme(plot.title = element_text(face = "bold"))
## `geom_smooth()` using formula = 'y ~ x'
#8. Bubble Plot (ggplot)
ggplot(kecamatan, aes(x = Income, y = Expenditure, size = Weight)) +
geom_point(alpha = 0.5, color = "blue") +
labs(title="Income vs Expenditure (Weighted by Population)")
#9. Pair Plot (ggplot)
ggpairs(kecamatan[, c("Income","Expenditure","HH_Size")], title = "Pair Plot Household Welfare survey",)
DATA X DATA VISUALIZATION
#1. Statistik Deskriptif
summary(x)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 850 1212 1450 1985 1638 8500
mean(x)
## [1] 1985
sd(x)
## [1] 2081.164
#2. Histogram
hist(x,
breaks = 10,
col = "red",
main = "Histogram Data x")
#2.1 Histogram (ggplot)
ggplot(data.frame(x=x), aes(x=x)) +
geom_histogram(binwidth = 500, fill = "red", color = "black", alpha = 0.7) +
ggtitle("Histogram Data x") +
theme_minimal()
#3. Scatter Index Plot
plot(x,
main = "Scatter Plot Data x",
xlab = "Index",
ylab = "Value",
col = "blue",
pch = 19)
#3.1 Scatter Index Plot (ggplot)
ggplot(data = data.frame(Index = 1:length(x), Value = x),
aes(x = Index, y = Value)) +
geom_point(color = "blue", size = 2) + # pch 19 equivalent
labs(title = "Scatter Plot Data x",
x = "Index",
y = "Value") +
theme_minimal()
#4. Boxplot
boxplot(x,
main = "Boxplot Data x",
col = "red")
#4.1 Boxplot (ggplot)
ggplot(data.frame(x=x), aes(y=x)) +
geom_boxplot(fill="red") +
ggtitle("Boxplot Data x") +
theme_minimal()
#5. QQ Plot
qqnorm(x)
qqline(x, col = "red")
#5.1 QQ Plot (ggplot)
ggplot(data.frame(x=x), aes(sample = x)) +
stat_qq(color = "blue") +
stat_qq_line(color = "red") +
ggtitle("QQ Plot Data x") +
theme_minimal()
#6. Density plot (ggplot)
data_frame_x <- data.frame(x = x)
ggplot(data_frame_x, aes(x = x)) +
geom_density(fill = "pink", alpha = 0.5) +
geom_vline(aes(xintercept = mean_alpha_0), color = "blue", linetype = "dashed", linewidth = 1) +
geom_vline(aes(xintercept = mean_alpha_0.1), color = "red", linetype = "dashed", linewidth = 1 ) +
labs(title = "Density Plot of Expenditure Data X", x = "Expenditure", y = "Density")