library(tidyverse) # Untuk manipulasi data (dplyr) dan visualisasi (ggplot2) library(cluster) # Untuk algoritma clustering library(randomForest) # Untuk algoritma klasifikasi Random Forest library(caret) # Untuk pelatihan model klasifikasi dan evaluasi library(ggplot2) # Untuk visualisasi library(leaflet) # Untuk visualisasi peta interaktif library(scales) # Untuk format mata uang dalam visualisasi
file_path <- “synthetic_disaster_events_2025 (1).csv” data_raw <- read.csv(file_path)
cat(“— Tampilan Data Awal —”) print(str(data_raw)) print(head(data_raw))
cat(“— Jumlah Nilai Hilang per Kolom —”) print(colSums(is.na(data_raw))) # Asumsi: data bersih, jika ada NA, perlu imputasi atau penghapusan.
data_clean <- data_raw %>% # Konversi date menjadi tipe Date mutate(date = as.Date(date), # Konversi kolom kategorikal menjadi faktor disaster_type = as.factor(disaster_type), location = as.factor(location), aid_provided = as.factor(aid_provided), is_major_disaster = as.factor(is_major_disaster)) %>% # Feature Engineering: Ekstraksi tahun mutate(year = as.integer(format(date, “%Y”))) %>% # Hapus kolom ‘event_id’ karena tidak relevan untuk pemodelan select(-event_id)
data_modeling <- data_clean cat(“— Struktur Data Setelah Cleaning —”) print(str(data_modeling))
cat(“— Proses K-Means Clustering —”)
clustering_vars <- data_modeling %>% select(severity_level, affected_population, estimated_economic_loss_usd, response_time_hours)
data_scaled <- scale(clustering_vars)
set.seed(42) wss <- numeric(15) for (i in 1:15) { wss[i] <- sum(kmeans(data_scaled, centers = i)$withinss) }
plot(1:15, wss, type = “b”, xlab = “Jumlah Cluster (K)”, ylab = “Within-Cluster Sum of Squares (WSS)”, main = “Metode Elbow untuk K-Means”) #
K_optimal <- 4
kmeans_model <- kmeans(data_scaled, centers = K_optimal, nstart = 25)
data_modeling\(cluster <- as.factor(kmeans_model\)cluster)
cat(“Cluster:”) print(table(data_modeling$cluster))
cat(“Cluster (Scaled):”) print(kmeans_model$centers)
cat(“— Proses Random Forest Classification —”)
data_clf <- data_modeling %>% select(-date, -location, -year) # Hapus kolom yang tidak cocok untuk RF
set.seed(42) train_index <- createDataPartition(data_clf$is_major_disaster, p = 0.7, list = FALSE) train_data <- data_clf[train_index, ] test_data <- data_clf[-train_index, ]
rf_model <- randomForest(is_major_disaster ~ ., data = train_data, ntree = 500, importance = TRUE)
cat(“Model Random Forest:”) print(rf_model)
predictions <- predict(rf_model, test_data)
confusion_matrix <- confusionMatrix(predictions, test_data$is_major_disaster) cat(“Konfusi:”) print(confusion_matrix)
cat(“Variabel (Feature Importance):”) print(importance(rf_model)) varImpPlot(rf_model, main = “Pentingnya Variabel untuk Klasifikasi”) #
cat(“— Visualisasi Data —”)
p1 <- ggplot(data_clean, aes(x = disaster_type, fill = disaster_type)) + geom_bar() + labs(title = “1. Distribusi Tipe Bencana”, x = “Tipe Bencana”, y = “Jumlah Kejadian”) + theme_minimal() + coord_flip()
p2 <- ggplot(data_clean, aes(x = severity_level, y = estimated_economic_loss_usd)) + geom_point(aes(color = is_major_disaster), alpha = 0.6) + geom_smooth(method = “lm”, se = FALSE, color = “red”) + scale_y_continuous(labels = dollar_format(prefix = “USD”)) + labs(title = “2. Severity Level vs. Estimated Economic Loss”, x = “Tingkat Keparahan”, y = “Kerugian Ekonomi (USD)”) + theme_minimal()
p3 <- ggplot(data_modeling, aes(x = affected_population, y = estimated_economic_loss_usd, color = cluster)) + geom_point(alpha = 0.6) + scale_y_continuous(labels = comma) + scale_x_continuous(labels = comma) + labs(title = “3. Hasil K-Means Clustering”, x = “Populasi Terdampak”, y = “Kerugian Ekonomi (USD)”) + theme_minimal()
print(p1) # print(p2) print(p3)
cat(“— Peta Interaktif (Major Disaster berdasarkan Cluster) —”)
map_data <- data_modeling %>% filter(is_major_disaster == 1)
%>% # Hanya fokus pada Major Disaster mutate(popup_info =
paste(“Tipe:”, disaster_type, “
”, “Kerugian:”,
dollar(estimated_economic_loss_usd), “
”, “Cluster:”, cluster))
leaflet_map <- leaflet(map_data) %>% addTiles() %>% addCircles(lng = ~longitude, lat = ~latitude, weight = 1, radius = 8000, # Atur radius agar terlihat jelas color = ~factor(cluster, labels = c(“red”, “blue”, “green”, “orange”)), fillOpacity = 0.8, popup = ~popup_info) %>% addLegend(“bottomright”, colors = c(“red”, “blue”, “green”, “orange”), labels = paste(“Cluster”, 1:4), title = “Cluster Bencana (Mayor)”)
print(leaflet_map) #