UAS SNP Kelompok 1

Kelompok I SD-A1

Anggota Kelompok

Nama NIM
Bonita Salwa Kalisha 164231070
Glennesha Putri Anandaprasa 164231086
Khairunnisa 164231097
Sarah Alya Azizah 164231105

Data

Sumber data yang digunakan dalam penelitian ini adalah data sekunder dari Badan Pusat Statistik (BPS) dengan judul dataset “Pengeluaran per Kapita yang Disesuaikan menurut Jenis Kelamin (Ribu Rupiah/Orang/Tahun)”. Data ini diperoleh melalui laman resmi BPS.go.id

data <- data.frame(
  Region = c("Kulon Progo", "Bantul", "Gunung Kidul", "Sleman", "Kota Yogyakarta"),
  Male_2022 = c(13.621, 18.555, 16.349, 17.736, 19.545),
  Male_2023 = c(13.836, 19.227, 16.645, 18.353, 20.163),
  Female_2022 = c(9.947, 15.289, 6.807, 15.366, 19.136),
  Female_2023 = c(9.997, 15.671, 6.889, 15.762, 19.651)
)

print(data)
##            Region Male_2022 Male_2023 Female_2022 Female_2023
## 1     Kulon Progo    13.621    13.836       9.947       9.997
## 2          Bantul    18.555    19.227      15.289      15.671
## 3    Gunung Kidul    16.349    16.645       6.807       6.889
## 4          Sleman    17.736    18.353      15.366      15.762
## 5 Kota Yogyakarta    19.545    20.163      19.136      19.651

EDA

Statistika Deskriptif

Ukuran Pemusatan Data

# Statistik deskriptif
summary(data[, -1]) # Mengabaikan kolom Region
##    Male_2022       Male_2023      Female_2022      Female_2023    
##  Min.   :13.62   Min.   :13.84   Min.   : 6.807   Min.   : 6.889  
##  1st Qu.:16.35   1st Qu.:16.64   1st Qu.: 9.947   1st Qu.: 9.997  
##  Median :17.74   Median :18.35   Median :15.289   Median :15.671  
##  Mean   :17.16   Mean   :17.64   Mean   :13.309   Mean   :13.594  
##  3rd Qu.:18.55   3rd Qu.:19.23   3rd Qu.:15.366   3rd Qu.:15.762  
##  Max.   :19.55   Max.   :20.16   Max.   :19.136   Max.   :19.651

Ukuran Penyebaran Data

# Fungsi untuk menghitung ukuran pemusatan dan penyebaran
summary_stats <- function(x) {
  c(
    Range = diff(range(x)),
    Variance = var(x),
    SD = sd(x),
    IQR = IQR(x)
  )
}
# Terapkan fungsi untuk setiap kolom numerik
results <- sapply(data[, -1], summary_stats) # Hilangkan kolom Region

# Tampilkan hasil
print(results)
##          Male_2022 Male_2023 Female_2022 Female_2023
## Range     5.924000  6.327000   12.329000   12.762000
## Variance  5.287065  6.213198   23.921156   25.899209
## SD        2.299362  2.492629    4.890926    5.089127
## IQR       2.206000  2.582000    5.419000    5.765000

Uji Distribusi Normal

# Gabungkan semua kolom numerik menjadi satu vektor
all_values <- unlist(data[, -1]) # Menghilangkan kolom Region dan meratakan data

# Uji normalitas Shapiro-Wilk
shapiro_test <- shapiro.test(all_values)

# Menampilkan hasil
print(shapiro_test)
## 
##  Shapiro-Wilk normality test
## 
## data:  all_values
## W = 0.88056, p-value = 0.01811

Visualisasi

Boxplot

# Mengubah data menjadi format long
library(reshape2)
## Warning: package 'reshape2' was built under R version 4.3.3
data_long <- melt(data, id.vars = "Region", 
                  variable.name = "Kategori", 
                  value.name = "Pengeluaran")

# Menambahkan kolom tahun dan jenis kelamin
data_long$Tahun <- ifelse(grepl("2022", data_long$Kategori), "2022", "2023")
data_long$Jenis_Kelamin <- ifelse(grepl("Male", data_long$Kategori), "Laki-Laki", "Perempuan")

# Membuat boxplot untuk membandingkan Laki-Laki vs Perempuan
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.3.3
ggplot(data_long, aes(x = Jenis_Kelamin, y = Pengeluaran, fill = Jenis_Kelamin)) +
  geom_boxplot() +
  facet_wrap(~ Tahun) +
  labs(title = "Perbandingan Pengeluaran Laki-Laki vs Perempuan di Yogyakarta (2022 vs 2023)",
       x = "Jenis Kelamin",
       y = "Pengeluaran per Kapita (Ribu Rupiah)",
       fill = "Jenis Kelamin") +
  theme_minimal()

Line Chart

# Ubah data ke format panjang (long format)
data_long <- tidyr::pivot_longer(
  data, 
  cols = -Region, 
  names_to = c("Gender", "Year"),
  names_sep = "_"
)

# Line plot
ggplot(data_long, aes(x = Year, y = value, color = Gender, group = interaction(Region, Gender))) +
  geom_line(size = 1) +
  geom_point(size = 2) +
  facet_wrap(~ Region) +
  labs(
    title = "Pengeluaran Per Kapita Berdasarkan Gender dan Tahun",
    x = "Tahun",
    y = "Pengeluaran Per Kapita",
    color = "Gender"
  ) +
  theme_minimal() +
  theme(
    plot.title = element_text(hjust = 0.5, size = 16),
    axis.text.x = element_text(angle = 45, hjust = 1)
  )
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

Uji Independen

Uji Kruskal-Wallis

kruskal_test_2022 <- kruskal.test(value ~ Gender,
                                  data = subset(data_long, Year == "2022"))
kruskal_test_2023 <- kruskal.test(value ~ Gender,
                                  data = subset(data_long, Year == "2023"))

# Hasil uji 
kruskal_test_2022
## 
##  Kruskal-Wallis rank sum test
## 
## data:  value by Gender
## Kruskal-Wallis chi-squared = 1.8436, df = 1, p-value = 0.1745
kruskal_test_2023
## 
##  Kruskal-Wallis rank sum test
## 
## data:  value by Gender
## Kruskal-Wallis chi-squared = 1.8436, df = 1, p-value = 0.1745

Uji Johnkheere-Terpstra

# Install dan muat library clinfun
if (!require("clinfun")) install.packages("clinfun")
## Loading required package: clinfun
## Warning: package 'clinfun' was built under R version 4.3.3
library(clinfun)

# Data dalam format long untuk uji
data_long <- data.frame(
  Value = c(data$Male_2022, data$Male_2023, data$Female_2022, data$Female_2023),
  Year = rep(c(2022, 2023), each = 5, times = 2),
  Gender = rep(c("Male", "Female"), each = 10),
  Region = rep(data$Region, 4)
)


# Gabungkan semua data ke dalam satu vektor
values <- c(data$Male_2022, data$Male_2023, data$Female_2022, data$Female_2023)

# Buat vektor grup (misalnya: 1 untuk 2022 dan 2 untuk 2023)
group <- rep(c(1, 2, 1, 2), each = 5)

# Jalankan uji Jonckheere-Terpstra
jt_result <- jonckheere.test(values, group, alternative = "increasing")
print(jt_result)
## 
##  Jonckheere-Terpstra test
## 
## data:  
## JT = 58, p-value = 0.2894
## alternative hypothesis: increasing

Uji Dependen

Uji Friedman

# Uji Friedman untuk perubahan antar tahun
friedman_data <- data.frame(
  Group = c("Kulon Progo", "Bantul", "Gunung Kidul", "Sleman", "Kota Yogyakarta"),
  Male_2022 = data$Male_2022,
  Male_2023 = data$Male_2023,
  Female_2022 = data$Female_2022,
  Female_2023 = data$Female_2023
)

friedman_test_male <- friedman.test(as.matrix(friedman_data[, 2:3]))
friedman_test_female <- friedman.test(as.matrix(friedman_data[, 4:5]))

# Hasil uji Friedman
friedman_test_male
## 
##  Friedman rank sum test
## 
## data:  as.matrix(friedman_data[, 2:3])
## Friedman chi-squared = 5, df = 1, p-value = 0.02535
friedman_test_female
## 
##  Friedman rank sum test
## 
## data:  as.matrix(friedman_data[, 4:5])
## Friedman chi-squared = 5, df = 1, p-value = 0.02535

Uji Page

# Install dan muat library DescTools
if (!require("DescTools")) install.packages("DescTools")
## Loading required package: DescTools
## Warning: package 'DescTools' was built under R version 4.3.3
library(DescTools)

# Data untuk Page Test
page_data <- matrix(
  c(
    data$Male_2022,  # 2022 Laki-Laki
    data$Male_2023,  # 2023 Laki-Laki
    data$Female_2022, # 2022 Perempuan
    data$Female_2023 # 2023 Perempuan
  ),
  nrow = 5, # 5 Wilayah
  byrow = FALSE,
  dimnames = list(
    c("Kulon Progo", "Bantul", "Gunung Kidul", "Sleman", "Kota Yogyakarta"), # Nama Wilayah
    c("Male_2022", "Male_2023", "Female_2022", "Female_2023") # Tahun dan Gender
  )
)

# Jalankan Page Test
page_test_result <- PageTest(page_data)
print("Hasil Page Test:")
## [1] "Hasil Page Test:"
print(page_test_result)
## 
##  Page test for ordered alternatives
## 
## data:  page_data
## L = 113, p-value = 0.9747

Uji Asosiasi

Uji Korelasi Male 2022 dan 2023

# Male_2022 vs Male_2023
male_2022_2023_corr <- cor.test(data$Male_2022, data$Male_2023, method = "kendall")
cat("\nKendall's Tau untuk Male 2022 vs Male 2023:\n")
## 
## Kendall's Tau untuk Male 2022 vs Male 2023:
print(male_2022_2023_corr)
## 
##  Kendall's rank correlation tau
## 
## data:  data$Male_2022 and data$Male_2023
## T = 10, p-value = 0.01667
## alternative hypothesis: true tau is not equal to 0
## sample estimates:
## tau 
##   1

Uji Korelasi Female 2022 dan 2023

# Female_2022 vs Female_2023
female_2022_2023_corr <- cor.test(data$Female_2022, data$Female_2023, method = "kendall")
cat("\nKendall's Tau untuk Female 2022 vs Female 2023:\n")
## 
## Kendall's Tau untuk Female 2022 vs Female 2023:
print(female_2022_2023_corr)
## 
##  Kendall's rank correlation tau
## 
## data:  data$Female_2022 and data$Female_2023
## T = 10, p-value = 0.01667
## alternative hypothesis: true tau is not equal to 0
## sample estimates:
## tau 
##   1

Uji Korelasi Male dan Female Tahun 2022

# Male_2022 vs Female_2022
male_female_corr <- cor.test(data$Male_2022, data$Female_2022, method = "kendall")
cat("\nKendall's Tau untuk Male 2023 vs Female 2023:\n")
## 
## Kendall's Tau untuk Male 2023 vs Female 2023:
print(male_female_corr)
## 
##  Kendall's rank correlation tau
## 
## data:  data$Male_2022 and data$Female_2022
## T = 8, p-value = 0.2333
## alternative hypothesis: true tau is not equal to 0
## sample estimates:
## tau 
## 0.6

Uji Korelasi Male dan Female Tahun 2023

# Male_2023 vs Female_2023
male_female_corr <- cor.test(data$Male_2023, data$Female_2023, method = "kendall")
cat("\nKendall's Tau untuk Male 2023 vs Female 2023:\n")
## 
## Kendall's Tau untuk Male 2023 vs Female 2023:
print(male_female_corr)
## 
##  Kendall's rank correlation tau
## 
## data:  data$Male_2023 and data$Female_2023
## T = 8, p-value = 0.2333
## alternative hypothesis: true tau is not equal to 0
## sample estimates:
## tau 
## 0.6
# Male_2023 vs Female_2023
male_female_corr <- cor.test(data$Male_2023, data$Female_2023, method = "kendall")
cat("\nKendall's Tau untuk Male 2023 vs Female 2023:\n")
## 
## Kendall's Tau untuk Male 2023 vs Female 2023:
print(male_female_corr)
## 
##  Kendall's rank correlation tau
## 
## data:  data$Male_2023 and data$Female_2023
## T = 8, p-value = 0.2333
## alternative hypothesis: true tau is not equal to 0
## sample estimates:
## tau 
## 0.6