Tugas Statistika Deskriptif dan Visualisasi Data Lingkungan

Deskripsi Studi Kasus

Indeks Kualitas Lingkungan Hidup (IKLH) merupakan ukuran komposit yang mencerminkan kondisi lingkungan di Indonesia melalui enam indikator utama, yaitu Indeks Kualitas Udara (IKU), Indeks Kualitas Air (IKA), Indeks Kualitas Tutupan Lahan (IKTL), Indeks Kualitas Lahan (IKL), Indeks Kualitas Ekosistem Gambut (IKEG), dan Indeks Kualitas Air Laut (IKAL). Data IKLH yang dianalisis mencakup periode 2019–2023 untuk seluruh provinsi, sehingga dapat digunakan untuk mengeksplorasi dinamika lingkungan baik secara spasial maupun temporal. Analisis ini bertujuan memberikan gambaran mengenai variasi dan tren kualitas lingkungan di Indonesia, sekaligus menyoroti potensi perbedaan antar wilayah dan perkembangan dari waktu ke waktu sebagai dasar evaluasi dan pengambilan kebijakan berkelanjutan. Source: https://www.bps.go.id/id/statistics-table/2/MjUzMiMy/komponen-penyusun-indeks-kualitas-lingkungan-hidup-menurut-provinsi.html

library(readr)
library(readxl)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(tidyr)
# gabung data menjadi format data panel
# baca raw tanpa header
raw <- read_excel("D:/[UNNES]/SEMESTER 5 [22 SKS]/Statistika Lingkungan [2 SKS]/P4/IKLH Indonesia.xlsx",
                  col_names = FALSE)
## New names:
## • `` -> `...1`
## • `` -> `...2`
## • `` -> `...3`
## • `` -> `...4`
## • `` -> `...5`
## • `` -> `...6`
## • `` -> `...7`
## • `` -> `...8`
## • `` -> `...9`
## • `` -> `...10`
## • `` -> `...11`
## • `` -> `...12`
## • `` -> `...13`
## • `` -> `...14`
## • `` -> `...15`
## • `` -> `...16`
## • `` -> `...17`
## • `` -> `...18`
## • `` -> `...19`
## • `` -> `...20`
## • `` -> `...21`
## • `` -> `...22`
## • `` -> `...23`
## • `` -> `...24`
## • `` -> `...25`
## • `` -> `...26`
## • `` -> `...27`
## • `` -> `...28`
## • `` -> `...29`
## • `` -> `...30`
## • `` -> `...31`
# ambil header
indikator <- raw[1, -1] %>% unlist() %>% as.character()
tahun <- raw[2, -1] %>% unlist() %>% as.character()
new_names <- c("Provinsi", paste(indikator, tahun, sep = "_"))

# ambil data mulai baris 3
iklh <- raw[-c(1,2), ]
colnames(iklh) <- new_names

# pivot ke long
iklh_long <- iklh %>%
  pivot_longer(
    cols = -Provinsi,
    names_to = c("Indikator", "Tahun"),
    names_sep = "_",
    values_to = "Nilai"
  ) %>%
  mutate(
    Tahun = as.integer(Tahun),
    Nilai = as.numeric(Nilai)
  )
## Warning: There was 1 warning in `mutate()`.
## ℹ In argument: `Nilai = as.numeric(Nilai)`.
## Caused by warning:
## ! NAs introduced by coercion
head(iklh_long, 15)
## # A tibble: 15 × 4
##    Provinsi Indikator                               Tahun Nilai
##    <chr>    <chr>                                   <int> <dbl>
##  1 ACEH     Indeks Kualitas Udara                    2019  90.7
##  2 ACEH     Indeks Kualitas Air                      2019  60.6
##  3 ACEH     Indeks Kualitas Tutupan Lahan (IKTL)     2019  76.6
##  4 ACEH     Indeks Kualitas Lahan (IKL)              2019  NA  
##  5 ACEH     Indeks Kualitas Ekosistem Gambut (IKEG)  2019  58.5
##  6 ACEH     Indeks Kualitas Air Laut (IKAL)          2019  78.7
##  7 ACEH     Indeks Kualitas Udara                    2020  89.5
##  8 ACEH     Indeks Kualitas Air                      2020  61.4
##  9 ACEH     Indeks Kualitas Tutupan Lahan (IKTL)     2020  76.8
## 10 ACEH     Indeks Kualitas Lahan (IKL)              2020  76.1
## 11 ACEH     Indeks Kualitas Ekosistem Gambut (IKEG)  2020  57.8
## 12 ACEH     Indeks Kualitas Air Laut (IKAL)          2020  63.5
## 13 ACEH     Indeks Kualitas Udara                    2021  89.6
## 14 ACEH     Indeks Kualitas Air                      2021  57.1
## 15 ACEH     Indeks Kualitas Tutupan Lahan (IKTL)     2021  76.6

1. Polar Area Chart

library(ggplot2)
library(dplyr)
library(viridis)
## Loading required package: viridisLite
# rename versi singkat indikator
sumut2023 <- iklh_long %>% 
  filter(Provinsi == "SUMATERA UTARA", Tahun == 2023) %>%
  mutate(Indikator_short = recode(Indikator,
    "Indeks Kualitas Udara" = "IK Udara",
    "Indeks Kualitas Air" = "IK Air",
    "Indeks Kualitas Tutupan Lahan (IKTL)" = "IK Tutupan",
    "Indeks Kualitas Lahan (IKL)" = "IK Lahan",
    "Indeks Kualitas Ekosistem Gambut (IKEG)" = "IK Gambut",
    "Indeks Kualitas Air Laut (IKAL)" = "IK Laut"
  ))

ggplot(sumut2023, aes(x = Indikator_short, y = Nilai, fill = Indikator_short)) +
  geom_col(width = 1, color = "black") +
  coord_polar() +
  scale_fill_viridis(discrete = TRUE, option = "D") +
  theme_minimal(base_size = 14) +
  labs(title = "Komposisi Indeks Kualitas Lingkungan Hidup
       Sumatera Utara (2023)",
       x = "", y = "", fill = "Indikator") +
  theme(
    legend.position = "right",
    legend.key.size = unit(0.6, "cm"),
    legend.text = element_text(size = 9),
    plot.title = element_text(hjust = 0.5, face = "bold")
  )

2. Treemap

library(treemapify)
library(dplyr)
library(ggplot2)
library(viridis)

# filter data sumatera utara 2023
sumut2023 <- iklh_long %>% 
  filter(Provinsi == "SUMATERA UTARA", Tahun == 2023) %>%
  mutate(Indikator_short = recode(Indikator,
    "Indeks Kualitas Udara" = "IK Udara",
    "Indeks Kualitas Air" = "IK Air",
    "Indeks Kualitas Tutupan Lahan (IKTL)" = "IK Tutupan",
    "Indeks Kualitas Lahan (IKL)" = "IK Lahan",
    "Indeks Kualitas Ekosistem Gambut (IKEG)" = "IK Gambut",
    "Indeks Kualitas Air Laut (IKAL)" = "IK Laut"
  ))

# plot treemap
ggplot(sumut2023, aes(area = Nilai, fill = Indikator_short, label = Indikator_short)) +
  geom_treemap(color = "white") +
  geom_treemap_text(colour = "white", place = "centre", grow = TRUE, reflow = TRUE) +
  scale_fill_viridis(discrete = TRUE, option = "C") +
  theme_minimal() +
  labs(title = "Treemap Indeks Kualitas Lingkungan Hidup
       Sumatera Utara (2023)",
       fill = "Indikator") +
  theme(plot.title = element_text(hjust = 0.5, face = "bold"))

# treemap sumut 5 tahun
sumut_all <- iklh_long %>% 
  filter(Provinsi == "SUMATERA UTARA", Tahun %in% 2019:2023) %>%
  mutate(Indikator_short = recode(Indikator,
    "Indeks Kualitas Udara" = "IK Udara",
    "Indeks Kualitas Air" = "IK Air",
    "Indeks Kualitas Tutupan Lahan (IKTL)" = "IK Tutupan",
    "Indeks Kualitas Lahan (IKL)" = "IK Lahan",
    "Indeks Kualitas Ekosistem Gambut (IKEG)" = "IK Gambut",
    "Indeks Kualitas Air Laut (IKAL)" = "IK Laut"
  ))

ggplot(sumut_all, aes(area = Nilai, fill = Indikator_short, label = Indikator_short)) +
  geom_treemap(color = "white") +
  geom_treemap_text(colour = "white", place = "centre", reflow = TRUE, grow = T) +
  scale_fill_viridis(discrete = TRUE, option = "C") +
  facet_wrap(~Tahun) +
  theme_minimal() +
  labs(title = "Treemap Indeks Kualitas Lingkungan
       Sumatera Utara (2019–2023)",
       fill = "Indikator") +
  theme(plot.title = element_text(hjust = 0.5, face = "bold"))
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_treemap()`).
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_treemap_text()`).

# treemap nasional 2023
indo2023 <- iklh_long %>% 
  filter(Tahun == 2023) %>%
  mutate(Indikator_short = recode(Indikator,
    "Indeks Kualitas Udara" = "IK Udara",
    "Indeks Kualitas Air" = "IK Air",
    "Indeks Kualitas Tutupan Lahan (IKTL)" = "IK Tutupan",
    "Indeks Kualitas Lahan (IKL)" = "IK Lahan",
    "Indeks Kualitas Ekosistem Gambut (IKEG)" = "IK Gambut",
    "Indeks Kualitas Air Laut (IKAL)" = "IK Laut"
  ))

ggplot(indo2023, aes(area = Nilai, fill = Indikator_short, 
                     label = Indikator_short, subgroup = Provinsi)) +
  geom_treemap(color = "white") +
  geom_treemap_subgroup_border(color = "black", size = 0.5) +
  geom_treemap_subgroup_text(place = "centre", grow = T, alpha = 0.3, colour = "black") +
  geom_treemap_text(colour = "white", place = "centre", reflow = TRUE, grow = T) +
  scale_fill_viridis(discrete = TRUE, option = "C") +
  theme_minimal() +
  labs(title = "Treemap Indeks Kualitas Lingkungan Hidup Indonesia (2023)",
       fill = "Indikator") +
  theme(plot.title = element_text(hjust = 0.5, face = "bold"))
## Warning: Removed 17 rows containing missing values or values outside the scale range
## (`geom_treemap()`).
## Warning: Removed 17 rows containing missing values or values outside the scale range
## (`geom_subgroup_border()`).
## Warning: Removed 17 rows containing missing values or values outside the scale range
## (`geom_subgroup_text()`).
## Warning: Removed 17 rows containing missing values or values outside the scale range
## (`geom_treemap_text()`).

# faceted treemap 2019-2023
indo_all <- iklh_long %>% 
  filter(Tahun %in% 2019:2023) %>%
  mutate(Indikator_short = recode(Indikator,
    "Indeks Kualitas Udara" = "IK Udara",
    "Indeks Kualitas Air" = "IK Air",
    "Indeks Kualitas Tutupan Lahan (IKTL)" = "IK Tutupan",
    "Indeks Kualitas Lahan (IKL)" = "IK Lahan",
    "Indeks Kualitas Ekosistem Gambut (IKEG)" = "IK Gambut",
    "Indeks Kualitas Air Laut (IKAL)" = "IK Laut"
  ))

ggplot(indo_all, aes(area = Nilai, fill = Indikator_short, 
                     label = Indikator_short, subgroup = Provinsi)) +
  geom_treemap(color = "white") +
  geom_treemap_subgroup_border(color = "black", size = 0.4) +
  geom_treemap_subgroup_text(place = "centre", grow = T, alpha = 0.25, colour = "black") +
  geom_treemap_text(colour = "white", place = "centre", reflow = TRUE, grow = T) +
  scale_fill_viridis(discrete = TRUE, option = "C") +
  facet_wrap(~Tahun) +
  theme_minimal() +
  labs(title = "Treemap Indeks Kualitas Lingkungan Hidup Indonesia (2019–2023)",
       fill = "Indikator") +
  theme(plot.title = element_text(hjust = 0.5, face = "bold"))
## Warning: Removed 209 rows containing missing values or values outside the scale range
## (`geom_treemap()`).
## Warning: Removed 209 rows containing missing values or values outside the scale range
## (`geom_subgroup_border()`).
## Warning: Removed 209 rows containing missing values or values outside the scale range
## (`geom_subgroup_text()`).
## Warning: Removed 209 rows containing missing values or values outside the scale range
## (`geom_treemap_text()`).

3. Beeswarm Plot

library(ggplot2)
library(ggbeeswarm)

indo2023 <- iklh_long %>% filter(Tahun == 2023) %>%
  mutate(Indikator_short = recode(Indikator,
    "Indeks Kualitas Udara" = "IK Udara",
    "Indeks Kualitas Air" = "IK Air",
    "Indeks Kualitas Tutupan Lahan (IKTL)" = "IK Tutupan",
    "Indeks Kualitas Lahan (IKL)" = "IK Lahan",
    "Indeks Kualitas Ekosistem Gambut (IKEG)" = "IK Gambut",
    "Indeks Kualitas Air Laut (IKAL)" = "IK Laut"
  ))

ggplot(indo2023, aes(x = Indikator_short, y = Nilai, color = Indikator_short)) +
  geom_beeswarm(size = 2) +
  theme_minimal() +
  scale_color_viridis_d(option = "D") +
  labs(title = "Distribusi Nilai IKLH antar Provinsi (2023)",
       x = "Indikator", y = "Nilai Indeks") +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))
## Warning: Removed 17 rows containing missing values or values outside the scale range
## (`geom_point()`).

4. Slope Chart

library(dplyr)
library(ggplot2)

sumut_2tahun <- iklh_long %>% 
  filter(Provinsi == "SUMATERA UTARA", Tahun %in% c(2019, 2023)) %>%
  mutate(Indikator_short = recode(Indikator,
    "Indeks Kualitas Udara" = "IK Udara",
    "Indeks Kualitas Air" = "IK Air",
    "Indeks Kualitas Tutupan Lahan (IKTL)" = "IK Tutupan",
    "Indeks Kualitas Lahan (IKL)" = "IK Lahan",
    "Indeks Kualitas Ekosistem Gambut (IKEG)" = "IK Gambut",
    "Indeks Kualitas Air Laut (IKAL)" = "IK Laut"
  ))

ggplot(sumut_2tahun, aes(x = Tahun, y = Nilai, group = Indikator_short, color = Indikator_short)) +
  geom_line(size = 1.2) +
  geom_point(size = 3) +
  scale_x_continuous(breaks = c(2019, 2023)) +
  theme_minimal(base_size = 14) +
  labs(title = "Perubahan Indeks IKLH Sumatera Utara (2019 vs 2023)",
       x = "Tahun", y = "Nilai Indeks", color = "Indikator") +
  theme(plot.title = element_text(hjust = 0.5, face = "bold"))
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_line()`).
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_point()`).

5. Parallel Coordinates

library(GGally)
library(viridis)

# data wide untuk 2023
indo2023_wide <- iklh_long %>%
  filter(Tahun == 2023) %>%
  select(-Tahun) %>%
  pivot_wider(names_from = Indikator, values_from = Nilai) %>%
  mutate(Provinsi = as.factor(Provinsi))

# parallel coordinates plot
ggparcoord(indo2023_wide,
           columns = 2:ncol(indo2023_wide),
           groupColumn = "Provinsi",
           scale = "uniminmax",
           alphaLines = 0.6) +
  scale_color_viridis_d(option = "C", guide = "none") +
  theme_minimal(base_size = 13) +
  labs(title = "Profil IKLH Antar Provinsi (2023)",
       x = "Indikator", y = "Skala 0-1 (Normalisasi)") +
  theme(
    axis.text.x = element_text(angle = 30, hjust = 1),
    plot.title = element_text(hjust = 0.5, face = "bold")
  )

6. Ridgeline Plot

library(ggridges)

ggplot(iklh_long %>% filter(Indikator == "Indeks Kualitas Udara"), 
       aes(x = Nilai, y = factor(Tahun), fill = factor(Tahun))) +
  geom_density_ridges(alpha = 0.7, scale = 2) +
  scale_fill_viridis_d(option = "D") +
  theme_minimal(base_size = 13) +
  labs(title = "Distribusi Indeks Kualitas Udara Antar Tahun",
       x = "Nilai Indeks", y = "Tahun", fill = "Tahun") +
  theme(plot.title = element_text(hjust = 0.5, face = "bold"))
## Picking joint bandwidth of 1.07
## Warning: Removed 16 rows containing non-finite outside the scale range
## (`stat_density_ridges()`).