library(readxl)
library(dplyr)
library(tidyr)
library(stringr)
library(ggplot2)
library(lubridate)
data_fob <- read_excel("~/PKL/Data PKL FOB ekspor.xlsx", sheet = 1)

# lihat struktur
str(data_fob)
## tibble [49,199 × 8] (S3: tbl_df/tbl/data.frame)
##  $ Tgl. DAB      : num [1:49199] 2020 2020 2020 2020 2020 2020 2020 2020 2020 2020 ...
##  $ Kabupaten/Kota: chr [1:49199] "BANTUL" "BANTUL" "BANTUL" "BANTUL" ...
##  $ No. HS        : chr [1:49199] "`621600" "`42029290" "`65069990" "`48236900" ...
##  $ Berat Kotor   : num [1:49199] 125 60.5 1724.4 1833.8 8650 ...
##  $ Berat Bersih  : num [1:49199] 0 53.5 1634.4 1762.8 8400 ...
##  $ Satuan        : chr [1:49199] "KGM" "KGM" "KGM" "KGM" ...
##  $ FOB (USD)     : num [1:49199] 7468 1781 3654 4769 12639 ...
##  $ Negara Tujuan : chr [1:49199] "AUSTRIA" "BELGIUM" "BELGIUM" "BELGIUM" ...
colnames(data_fob) <- c("Tahun", "Kabupaten", "HS",
                        "Berat_Kotor", "Berat_Bersih", "Satuan",
                        "FOB", "Negara")

data_fob <- data_fob %>%
  mutate(
    Tahun = as.character(Tahun),
    FOB   = as.numeric(FOB),
    Kabupaten = toupper(Kabupaten)
  )
data_fob <- data_fob %>%
  mutate(Negara = str_to_upper(Negara),
         Negara = str_trim(Negara))

jumlah_negara <- data_fob %>%
  summarise(Jumlah_Negara_Unik = n_distinct(Negara))

jumlah_negara
## # A tibble: 1 × 1
##   Jumlah_Negara_Unik
##                <int>
## 1                132
daftar_negara <- data_fob %>%
  distinct(Negara) %>%
  arrange(Negara)

daftar_negara
## # A tibble: 132 × 1
##    Negara    
##    <chr>     
##  1 ALBANIA   
##  2 ALGERIA   
##  3 ANGUILLA  
##  4 ARGENTINA 
##  5 ARUBA     
##  6 AUSTRALIA 
##  7 AUSTRIA   
##  8 AZERBAIJAN
##  9 BAHRAIN   
## 10 BANGLADESH
## # ℹ 122 more rows
data_fob <- data_fob %>%
  mutate(
    benua = case_when(

      #ASIA
      Negara %in% c("AZERBAIJAN", "BAHRAIN","BANGLADESH","BRUNEI DARUSSALAM","CAMBODIA",
                    "CHINA","GEORGIA","HONG KONG","INDIA","IRAQ","ISRAEL",
                    "JAPAN","JORDAN","KUWAIT","LAO PEOPLE'S DEMOCRATIC REPUBLIC",
                    "LEBANON","MACAU","MALAYSIA","MALDIVES","MYANMAR","OMAN",
                    "PAKISTAN","PHILIPPINES","QATAR","REPUBLIC OF KOREA",
                    "SAUDI ARABIA","SINGAPORE","SRI LANKA","SYRIAN ARAB REPUBLIC",
                    "TAIWAN","THAILAND","TURKEY","UNITED ARAB EMIRATES",
                    "UZBEKISTAN","VIET NAM","YEMEN") ~ "ASIA",

      #EROPA
      Negara %in% c("ALBANIA","AUSTRIA","BELARUS","BELGIUM","BULGARIA",
                    "CROATIA","CYPRUS","CZECH REPUBLIC","DENMARK","ESTONIA",
                    "FINLAND","FRANCE","GERMANY","GREECE","IRELAND","ITALY",
                    "LATVIA","LITHUANIA","MALTA","MONACO","NETHERLANDS",
                    "NORWAY","POLAND","PORTUGAL","ROMANIA","RUSSIAN FEDERATION",
                    "SERBIA","SLOVENIA","SPAIN","SWEDEN","SWITZERLAND",
                    "UKRAINE","UNITED KINGDOM") ~ "EUROPE",

      #AFRIKA
      Negara %in% c("ALGERIA","CAMEROON","CAPE VERDE",
                    "CONGO, THE DEMOCRATIC REPUBLIC OF THE","COTE D'IVOIRE",
                    "DJIBOUTI","EGYPT","GHANA","KENYA",
                    "LIBYAN ARAB JAMAHIRIYA","MADAGASCAR","MAURITIUS",
                    "MOROCCO","MOZAMBIQUE","NAMIBIA","NIGERIA","REUNION",
                    "SENEGAL","SEYCHELLES","SOMALIA","SOUTH AFRICA",
                    "SUDAN","TANZANIA, UNITED REPUBLIC OF","TOGO","TUNISIA") ~ "AFRICA",

      #AMERIKA UTARA
      Negara %in% c("ANGUILLA","ARUBA","BRITISH VIRGIN ISLANDS","CANADA",
                    "COSTA RICA","DOMINICA","DOMINICAN REPUBLIC","EL SALVADOR",
                    "GUADELOUPE","GUATEMALA","JAMAICA","MARTINIQUE","MEXICO", "NETHERLANDS ANTILLES",
                    "PANAMA","PUERTO RICO","SAINT KITTS AND NEVIS",
                    "SAINT VINCENT AND THE GRENADINES","SINT MAARTEN (DUTCH PART)",
                    "TURKS AND CAICOS ISLANDS","UNITED STATES OF AMERICA",
                    "VIRGIN ISLANDS, U.S.") ~ "AMERICA_UTARA",

      #AMERIKA SELATAN
      Negara %in% c("ARGENTINA","BRAZIL","CHILE","COLOMBIA",
                    "ECUADOR","PARAGUAY","PERU","URUGUAY") ~ "AMERICA_SELATAN",

      #AUSTRALIA / OSEANIA
      Negara %in% c("AUSTRALIA","FIJI","FRENCH POLYNESIA",
                    "NEW CALEDONIA","NEW ZEALAND","VANUATU",
                    "WALLIS AND FUTUNA") ~ "AUSTRALIA_OSEANIA",

      #ANTARTIKA
      Negara %in% c("FRENCH SOUTHERN TERRITORIES") ~ "ANTARTIKA"
    )
  )

data_fob %>%
  filter(is.na(benua)) %>%
  distinct(Negara)
## # A tibble: 0 × 1
## # ℹ 1 variable: Negara <chr>
data_fob
## # A tibble: 49,199 × 9
##    Tahun Kabupaten HS        Berat_Kotor Berat_Bersih Satuan    FOB Negara benua
##    <chr> <chr>     <chr>           <dbl>        <dbl> <chr>   <dbl> <chr>  <chr>
##  1 2020  BANTUL    `621600         125            0   KGM     7468  AUSTR… EURO…
##  2 2020  BANTUL    `42029290        60.5         53.5 KGM     1781. BELGI… EURO…
##  3 2020  BANTUL    `65069990      1724.        1634.  KGM     3654  BELGI… EURO…
##  4 2020  BANTUL    `48236900      1834.        1763.  KGM     4769. BELGI… EURO…
##  5 2020  BANTUL    `940360        8650         8400   KGM    12639  BELGI… EURO…
##  6 2020  BANTUL    `94035000      8332.        7599.  KGS    55354  BELGI… EURO…
##  7 2020  BANTUL    `940360        3501         3345   KGM    15273. BELGI… EURO…
##  8 2020  BANTUL    `691090        3201         2988   KGM     3854. BELGI… EURO…
##  9 2020  BANTUL    `700992         784          709   KGM     4311. BELGI… EURO…
## 10 2020  BANTUL    `94036010      4438         4128   KGM    23100  BELGI… EURO…
## # ℹ 49,189 more rows
# Agregasi data tahunan per kabupaten
data_fob <- data_fob %>%
  mutate(Tahun_num = suppressWarnings(as.numeric(Tahun)))

data_fob <- data_fob %>%
  mutate(
    Tahun_bersih = case_when(
      Tahun_num > 30000 ~ year(as.Date(Tahun_num, origin = "1899-12-30")), 
      Tahun_num <= 30000 ~ Tahun_num,                                      
      TRUE ~ NA_real_
    )
  )
data_fob <- data_fob %>%
  mutate(Tahun = Tahun_bersih) %>%
  select(-Tahun_num, -Tahun_bersih)
#Agregasi Provinsi
fob_provinsi <- data_fob %>%
  group_by(benua, Tahun) %>%
  summarise(
    total_FOB = sum(FOB, na.rm = TRUE)
  ) %>%
  ungroup()

fob_provinsi_valid <- fob_provinsi %>%
  group_by(benua) %>%
  filter(n() >= 2) %>%
  ungroup()
fob_provinsi$Tahun <- factor(fob_provinsi$Tahun,
                             levels = sort(unique(fob_provinsi$Tahun)))
ggplot(fob_provinsi,
       aes(x = Tahun, y = total_FOB, color = benua, group = benua)) +
  geom_line(linewidth = 1) +
  geom_point(size = 2) +
  labs(title = "Perkembangan Nilai FOB Ekspor DIY Berdasarkan Benua",
       x = "Tahun",
       y = "Total FOB") +
  theme_minimal()

# Koefisien Variasi provinsi
risiko_provinsi <- fob_provinsi_valid %>%
  group_by(benua) %>%
  summarise(
    mean_FOB = mean(total_FOB, na.rm = TRUE),
    sd_FOB   = sd(total_FOB, na.rm = TRUE),
    CV       = sd_FOB / mean_FOB
  ) %>%
  ungroup()

#Klasifikasi Risiko
risiko_provinsi <- risiko_provinsi %>%
  mutate(
    kategori = ifelse(CV <= 0.5,
                      "Risiko Rendah",
                      "Risiko Tinggi")
  )

risiko_provinsi
## # A tibble: 6 × 5
##   benua              mean_FOB    sd_FOB    CV kategori     
##   <chr>                 <dbl>     <dbl> <dbl> <chr>        
## 1 AFRICA             1637643.  1308582. 0.799 Risiko Tinggi
## 2 AMERICA_SELATAN     725697.   449118. 0.619 Risiko Tinggi
## 3 AMERICA_UTARA     52831012. 10633030. 0.201 Risiko Rendah
## 4 ASIA              76425387. 37993796. 0.497 Risiko Rendah
## 5 AUSTRALIA_OSEANIA 11230194.  5794049. 0.516 Risiko Tinggi
## 6 EUROPE            78736653. 20876316. 0.265 Risiko Rendah
# Agregasi per kabupaten
fob_tahunan <- data_fob %>%
  group_by(Kabupaten, benua, Tahun) %>%
  summarise(
    total_FOB = sum(FOB, na.rm = TRUE)
  ) %>%
  ungroup()

fob_tahunan_valid <- fob_tahunan %>%
  group_by(Kabupaten, benua) %>%
  filter(n() >= 2) %>%
  ungroup()
#Koefisien Variasi
risiko_fob <- fob_tahunan_valid %>%
  group_by(Kabupaten, benua) %>%
  summarise(
    mean_FOB = mean(total_FOB, na.rm = TRUE),
    sd_FOB   = sd(total_FOB, na.rm = TRUE),
    CV       = sd_FOB / mean_FOB
  ) %>%
  ungroup()

#Klasifikasi risiko
risiko_fob <- risiko_fob %>%
  mutate(
    kategori = ifelse(CV <= 0.5,
                      "Risiko Rendah",
                      "Risiko Tinggi")
  )

risiko_fob %>%
  arrange(Kabupaten, desc(CV))
## # A tibble: 30 × 6
##    Kabupaten   benua              mean_FOB    sd_FOB     CV kategori     
##    <chr>       <chr>                 <dbl>     <dbl>  <dbl> <chr>        
##  1 BANTUL      ASIA              36365763. 43204162. 1.19   Risiko Tinggi
##  2 BANTUL      AUSTRALIA_OSEANIA  7473977.  5022679. 0.672  Risiko Tinggi
##  3 BANTUL      EUROPE            56211552. 20494171. 0.365  Risiko Rendah
##  4 BANTUL      AFRICA             1850523.   436315. 0.236  Risiko Rendah
##  5 BANTUL      AMERICA_UTARA     21734770.  1563447. 0.0719 Risiko Rendah
##  6 BANTUL      AMERICA_SELATAN     501630.    24115. 0.0481 Risiko Rendah
##  7 GUNUNGKIDUL AMERICA_SELATAN      14825.    12513. 0.844  Risiko Tinggi
##  8 GUNUNGKIDUL AMERICA_UTARA     12434224. 10268878. 0.826  Risiko Tinggi
##  9 GUNUNGKIDUL ASIA                387301.   272962. 0.705  Risiko Tinggi
## 10 GUNUNGKIDUL AFRICA               11075.     4774. 0.431  Risiko Rendah
## # ℹ 20 more rows
# Tingkat Risiko Setiap Kabupaten
ggplot(risiko_fob,
       aes(x = reorder(benua, CV), y = CV, fill = kategori)) +
  geom_col(width = 0.5) +  # 👈 bar lebih tebal
  facet_wrap(~Kabupaten, scales = "free_y", ncol = 1) +
  coord_flip() +
  geom_hline(yintercept = 0.5, linetype = "dashed", color = "red") +
  labs(title = "Risiko Ekspor FOB Menurut Benua",
       x = "Benua",
       y = "Koefisien Variasi (CV)") +
  theme_minimal() +
  theme(
    strip.text = element_text(size = 12, face = "bold"),
    axis.text.y = element_text(size = 10),
    plot.title = element_text(hjust = 0.5)
  )