#analisis data untuk penugasan forkas 19
#Inflasi Bulanan Menurut Kabupaten/Kota (M-to-M) (Persen)

library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.4.3
## Warning: package 'tidyr' was built under R version 4.4.3
## Warning: package 'purrr' was built under R version 4.4.3
## Warning: package 'dplyr' was built under R version 4.4.3
## Warning: package 'forcats' was built under R version 4.4.3
## Warning: package 'lubridate' was built under R version 4.4.3
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.1     ✔ tibble    3.2.1
## ✔ lubridate 1.9.4     ✔ tidyr     1.3.1
## ✔ purrr     1.0.4     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(janitor)
## Warning: package 'janitor' was built under R version 4.4.3
## 
## Attaching package: 'janitor'
## 
## The following objects are masked from 'package:stats':
## 
##     chisq.test, fisher.test
library(readxl)

inflasi1 <- read_excel("D:/STIS/SEMESTER 2/forkas.xlsx")
inflasi <- inflasi1

#ambil header bulan
inflasi <- inflasi[-1,]
#rename kolom pertama jadi kabupaten
colnames(inflasi)[1] <- "kabupaten"
#hapus baris kosong 
inflasi <- inflasi %>% filter(!is.na(kabupaten))

#ke format long
inflasi2 <- inflasi %>%
  pivot_longer(cols = -kabupaten,
               names_to = "bulan",
               values_to = "inflasi") %>%mutate(inflasi = as.numeric(inflasi))
inflasi2
## # A tibble: 1,800 × 3
##    kabupaten     bulan     inflasi
##    <chr>         <chr>       <dbl>
##  1 KOTA MEULABOH Januari     -0.44
##  2 KOTA MEULABOH Februari    -0.72
##  3 KOTA MEULABOH Maret        1.88
##  4 KOTA MEULABOH April        1.75
##  5 KOTA MEULABOH Mei         -0.18
##  6 KOTA MEULABOH Juni         0.07
##  7 KOTA MEULABOH Juli         0.39
##  8 KOTA MEULABOH Agustus      0.55
##  9 KOTA MEULABOH September    0.19
## 10 KOTA MEULABOH Oktober      0.14
## # ℹ 1,790 more rows
#analisis statistik deskriptif
inflasi2 %>% 
  summarise(
    average = mean(inflasi, na.rm = TRUE),
    median  = median(inflasi, na.rm = TRUE),
    sd      = sd(inflasi, na.rm = TRUE),
    min     = min(inflasi, na.rm = TRUE),
    max     = max(inflasi, na.rm = TRUE)
  )
## # A tibble: 1 × 5
##   average median    sd   min   max
##     <dbl>  <dbl> <dbl> <dbl> <dbl>
## 1   0.261    0.2 0.860 -2.47  6.94
#analisis trend per bulan 
inflasi_bulan <- inflasi2 %>%
  group_by(bulan) %>%
  summarise(rata_inflasi = mean(inflasi, na.rm = TRUE)) %>%
  mutate(bulan = factor(bulan, levels = c(
    "Januari", "Februari", "Maret", "April", "Mei", "Juni",
    "Juli", "Agustus", "September", "Oktober", "November", "Desember"))) %>%
  arrange(bulan)
inflasi_bulan
## # A tibble: 12 × 2
##    bulan     rata_inflasi
##    <fct>            <dbl>
##  1 Januari        -0.654 
##  2 Februari       -0.403 
##  3 Maret           1.73  
##  4 April           1.05  
##  5 Mei            -0.400 
##  6 Juni            0.167 
##  7 Juli            0.504 
##  8 Agustus        -0.124 
##  9 September       0.191 
## 10 Oktober         0.150 
## 11 November        0.0759
## 12 Desember        0.839
#analisis perwilayah 
inflasi_kab <- inflasi2 %>%
  group_by(kabupaten) %>%
  summarise(rata_inflasi = mean(inflasi, na.rm = TRUE)) %>%
  arrange(desc(rata_inflasi))
inflasi_kab
## # A tibble: 150 × 2
##    kabupaten         rata_inflasi
##    <chr>                    <dbl>
##  1 KOTA GUNUNGSITOLI        0.879
##  2 KAB ACEH TAMIANG         0.586
##  3 KAB PASAMAN BARAT        0.58 
##  4 KAB TOLI TOLI            0.508
##  5 TEMBILAHAN               0.505
##  6 KOTA BANDA ACEH          0.501
##  7 KAB NABIRE               0.481
##  8 KOTA SIBOLGA             0.462
##  9 KOTA MEULABOH            0.458
## 10 KOTA LHOKSEUMAWE         0.456
## # ℹ 140 more rows
#nilai ekstrem 
inflasi2 %>%
  filter(inflasi == max(inflasi, na.rm = TRUE) |
           inflasi == min(inflasi, na.rm = TRUE))
## # A tibble: 2 × 3
##   kabupaten         bulan    inflasi
##   <chr>             <chr>      <dbl>
## 1 KOTA GUNUNGSITOLI Desember    6.94
## 2 KAB GORONTALO     Mei        -2.47
#grafik 
ggplot(inflasi_bulan, aes(x = bulan, y = rata_inflasi, group = 1)) +
  geom_line() +
  geom_point() +
  geom_text(aes(label = round(rata_inflasi, 2)), vjust = -1, size = 2.5) +
  labs(
    title = "Trend Inflasi Bulanan (M-to-M)",
    x = "Bulan",
    y = "Rata-rata Inflasi (%)"
  ) +
  theme_light()

#top 10 inflasi tertinggi
top10 <- inflasi_kab %>%
  slice_max(rata_inflasi, n = 10)

ggplot(top10, aes(x = reorder(kabupaten, rata_inflasi), y = rata_inflasi)) +
  geom_col(fill = "darkorange") +
  geom_text(aes(label = round(rata_inflasi, 2)),
            hjust = -0.1, size = 3) +
  coord_flip() +
  labs(
    title = "Top 10 Kabupaten/Kota Inflasi Tertinggi (%)",
    x = "Kabupaten/Kota",
    y = "Rata-rata Inflasi (%)"
  ) +
  theme_light()

#top 10 inflasi terendah
bottom10 <- inflasi_kab %>%
  slice_min(rata_inflasi, n = 10)

ggplot(bottom10, aes(x = reorder(kabupaten, rata_inflasi), y = rata_inflasi)) +
  geom_col(fill = "gold") +
  geom_text(aes(label = round(rata_inflasi, 2)),
            hjust = -0.1, size = 3) +
  coord_flip() +
  labs(
    title = "Top 10 Inflasi Terendah",
    x = "Kabupaten/Kota",
    y = "Rata-rata Inflasi (%)"
  ) +
  theme_light()

#histogram 
ggplot(inflasi2, aes(x = inflasi)) +
  geom_histogram(bins = 30,
                 fill = "darkorange3",
                 color = "white") +
  labs(
    title = "Distribusi Inflasi",
    x = "Inflasi (%)",
    y = "Frekuensi"
  ) +
  theme_light()

#boxplot
ggplot(inflasi2, aes(y = inflasi)) +
  geom_boxplot(fill = "cornflowerblue") +
  labs(title = "Boxplot Inflasi") +
  theme_light()

#heatmap
inflasi2$bulan <- factor(inflasi2$bulan, levels = c(
  "Januari","Februari","Maret","April","Mei","Juni",
  "Juli","Agustus","September","Oktober","November","Desember"
))

top10_kab <- inflasi2 %>%
  group_by(kabupaten) %>%
  summarise(rata_inflasi = mean(inflasi, na.rm = TRUE)) %>%
  slice_max(rata_inflasi, n = 10)

data_top10 <- inflasi2 %>%
  filter(kabupaten %in% top10_kab$kabupaten)

ggplot(data_top10, aes(x = bulan, y = kabupaten, fill = inflasi)) +
  geom_tile(color = "white") +
  scale_fill_gradient2(
    low = "dodgerblue",
    mid = "navajowhite",
    high = "orangered",
    midpoint = 0
  ) +
  labs(
    title = "Heatmap Inflasi (Top 10 Kabupaten/Kota)",
    x = "Bulan",
    y = "Kabupaten/Kota",
    fill = "Inflasi (%)"
  ) +
  theme_minimal() +
  theme(
    axis.text.y = element_text(size = 6),
    plot.title = element_text(face = "bold"))