Klasifikasi Kategori Cuaca LDA dan MLR

# Baca data
data <- read.csv("bmkg_cuaca.csv")
head(data)

##      Tanggal   Tn   Tx Tavg RH_avg   RR  ss ff_x ddd_x ff_avg ddd_car
## 1 01-01-2024 26.0 35.0 29.9     83  3.8 6.5    3    60      1      C 
## 2 02-01-2024 26.2 35.2 30.6     78 21.2 6.7    4   160      2      C 
## 3 03-01-2024 27.0 35.8 27.8     90  0.0 8.9    4    60      1      C 
## 4 04-01-2024 25.8 34.4 29.0     82  1.5 1.9    4    70      2      W 
## 5 05-01-2024 25.5 33.4 28.8     83  2.2 6.0    5   110      2      W 
## 6 06-01-2024 25.4 31.3 28.0     87  0.0 3.8    3    90      1      C 
##   Kategori_Cuaca
## 1   Hujan Sedang
## 2    Hujan Lebat
## 3  Cerah Berawan
## 4        Gerimis
## 5        Gerimis
## 6        Berawan

#-------------PREPROCESSING---------

# Cek jumlah NA per kolom
colSums(is.na(data))

##        Tanggal             Tn             Tx           Tavg         RH_avg 
##              0             12              3              1              1 
##             RR             ss           ff_x          ddd_x         ff_avg 
##             48              4              0              0              0 
##        ddd_car Kategori_Cuaca 
##              0              0

# Gantilah nilai 8888 menjadi NA, kemudian hapus baris yang mengandung NA
data[data == 8888] <- NA
data <- na.omit(data)

# di kolom ddd_car ternyata ada yang Ganda karena Spasi
data$ddd_car <- trimws(data$ddd_car)

# Identifikasi kolom numerik (selain Tanggal dan Kategori_Cuaca)
numeric_cols <- names(data)[sapply(data, is.numeric) & !(names(data) %in% c("Tanggal"))]

# Hapus baris jika semua kolom numeriknya NA
data <- data[!apply(data[numeric_cols], 1, function(x) all(is.na(x))), ]

# Fungsi untuk mengisi NA dengan interpolasi linear antar baris
interpolate_na <- function(x) {
  approx(1:length(x), x, 1:length(x), rule = 2)$y
}

# Terapkan interpolasi untuk setiap kolom numerik
data[numeric_cols] <- lapply(data[numeric_cols], interpolate_na)
# Tampilkan data hasil imputasi
head(data)

##      Tanggal   Tn   Tx Tavg RH_avg   RR  ss ff_x ddd_x ff_avg ddd_car
## 1 01-01-2024 26.0 35.0 29.9     83  3.8 6.5    3    60      1       C
## 2 02-01-2024 26.2 35.2 30.6     78 21.2 6.7    4   160      2       C
## 3 03-01-2024 27.0 35.8 27.8     90  0.0 8.9    4    60      1       C
## 4 04-01-2024 25.8 34.4 29.0     82  1.5 1.9    4    70      2       W
## 5 05-01-2024 25.5 33.4 28.8     83  2.2 6.0    5   110      2       W
## 6 06-01-2024 25.4 31.3 28.0     87  0.0 3.8    3    90      1       C
##   Kategori_Cuaca
## 1   Hujan Sedang
## 2    Hujan Lebat
## 3  Cerah Berawan
## 4        Gerimis
## 5        Gerimis
## 6        Berawan

# Cek jumlah NA per kolom
colSums(is.na(data))

##        Tanggal             Tn             Tx           Tavg         RH_avg 
##              0              0              0              0              0 
##             RR             ss           ff_x          ddd_x         ff_avg 
##              0              0              0              0              0 
##        ddd_car Kategori_Cuaca 
##              0              0

# Hitung matriks korelasi untuk kolom numerik saja
cor_matrix <- cor(data[numeric_cols], use = "complete.obs")
# Tampilkan matriks korelasi (bisa di-plot juga nanti)
print(round(cor_matrix, 2))

##           Tn    Tx  Tavg RH_avg    RR    ss  ff_x ddd_x ff_avg
## Tn      1.00  0.32  0.54   0.16 -0.14 -0.01 -0.15  0.10  -0.13
## Tx      0.32  1.00  0.74  -0.45 -0.05  0.25  0.11 -0.07   0.06
## Tavg    0.54  0.74  1.00  -0.51 -0.17  0.28  0.01 -0.23   0.12
## RH_avg  0.16 -0.45 -0.51   1.00  0.37 -0.50 -0.28  0.39  -0.46
## RR     -0.14 -0.05 -0.17   0.37  1.00 -0.33  0.02  0.18  -0.24
## ss     -0.01  0.25  0.28  -0.50 -0.33  1.00  0.13 -0.27   0.22
## ff_x   -0.15  0.11  0.01  -0.28  0.02  0.13  1.00  0.19   0.62
## ddd_x   0.10 -0.07 -0.23   0.39  0.18 -0.27  0.19  1.00  -0.03
## ff_avg -0.13  0.06  0.12  -0.46 -0.24  0.22  0.62 -0.03   1.00

# Visualisasi korelasi menggunakan heatmap (opsional, butuh package corrplot)
if(!require(corrplot)) install.packages("corrplot")

## Loading required package: corrplot

## corrplot 0.95 loaded

library(corrplot)
corrplot(cor_matrix, method = "color", type = "upper", tl.cex=0.7, tl.col="black", diag=FALSE)

#hapus Tn, Tx karena sudah diwakili Tavg. Hapus ff_x karena sudah diwakili ff_avg (redudant)
columns_to_drop <- c("Tn", "Tx", "ff_x", "Tanggal")
data <- data[, !(names(data) %in% columns_to_drop)]
head(data)

##   Tavg RH_avg   RR  ss ddd_x ff_avg ddd_car Kategori_Cuaca
## 1 29.9     83  3.8 6.5    60      1       C   Hujan Sedang
## 2 30.6     78 21.2 6.7   160      2       C    Hujan Lebat
## 3 27.8     90  0.0 8.9    60      1       C  Cerah Berawan
## 4 29.0     82  1.5 1.9    70      2       W        Gerimis
## 5 28.8     83  2.2 6.0   110      2       W        Gerimis
## 6 28.0     87  0.0 3.8    90      1       C        Berawan

library(tidyverse)

## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.1     ✔ tibble    3.2.1
## ✔ lubridate 1.9.4     ✔ tidyr     1.3.1
## ✔ purrr     1.0.4

## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

# Ubah nilai ddd_car ke factor dengan label deskriptif
data <- data %>%
  mutate(
    ddd_car = factor(ddd_car, levels = c("C", "E", "NE", "NW", "S", "SW", "W"),
                     labels = c("Tenang (Calm)", "Timur", "Timur Laut", "Barat Laut", 
                                "Selatan", "Barat Daya", "Barat"))
  )
write.csv(data, "data_bmkg_preprocessed.csv", row.names = FALSE)
data <- read.csv("data_bmkg_preprocessed.csv")


#EDA

# Pastikan packages terinstal dan diload
packages <- c("tidyverse", "GGally", "corrplot", "gridExtra")
new.packages <- packages[!(packages %in% installed.packages())]
if(length(new.packages)) install.packages(new.packages)
lapply(packages, library, character.only = TRUE)

## Registered S3 method overwritten by 'GGally':
##   method from   
##   +.gg   ggplot2
## 
## Attaching package: 'gridExtra'
## 
## The following object is masked from 'package:dplyr':
## 
##     combine

## [[1]]
##  [1] "lubridate" "forcats"   "stringr"   "dplyr"     "purrr"     "readr"    
##  [7] "tidyr"     "tibble"    "ggplot2"   "tidyverse" "corrplot"  "stats"    
## [13] "graphics"  "grDevices" "utils"     "datasets"  "methods"   "base"     
## 
## [[2]]
##  [1] "GGally"    "lubridate" "forcats"   "stringr"   "dplyr"     "purrr"    
##  [7] "readr"     "tidyr"     "tibble"    "ggplot2"   "tidyverse" "corrplot" 
## [13] "stats"     "graphics"  "grDevices" "utils"     "datasets"  "methods"  
## [19] "base"     
## 
## [[3]]
##  [1] "GGally"    "lubridate" "forcats"   "stringr"   "dplyr"     "purrr"    
##  [7] "readr"     "tidyr"     "tibble"    "ggplot2"   "tidyverse" "corrplot" 
## [13] "stats"     "graphics"  "grDevices" "utils"     "datasets"  "methods"  
## [19] "base"     
## 
## [[4]]
##  [1] "gridExtra" "GGally"    "lubridate" "forcats"   "stringr"   "dplyr"    
##  [7] "purrr"     "readr"     "tidyr"     "tibble"    "ggplot2"   "tidyverse"
## [13] "corrplot"  "stats"     "graphics"  "grDevices" "utils"     "datasets" 
## [19] "methods"   "base"

# Gunakan data hasil preprocessing
data_clean <- data  # data hasil preprocessing dan imputasi NA

# Cek struktur data
str(data_clean)

## 'data.frame':    217 obs. of  8 variables:
##  $ Tavg          : num  29.9 30.6 27.8 29 28.8 28 27.7 27.7 28.7 28.5 ...
##  $ RH_avg        : int  83 78 90 82 83 87 88 89 86 83 ...
##  $ RR            : num  3.8 21.2 0 1.5 2.2 0 2.1 78.1 26.5 3.3 ...
##  $ ss            : num  6.5 6.7 8.9 1.9 6 3.8 2 0.6 4.5 5 ...
##  $ ddd_x         : int  60 160 60 70 110 90 340 200 230 190 ...
##  $ ff_avg        : int  1 2 1 2 2 1 1 2 2 1 ...
##  $ ddd_car       : chr  "Tenang (Calm)" "Tenang (Calm)" "Tenang (Calm)" "Barat" ...
##  $ Kategori_Cuaca: chr  "Hujan Sedang" "Hujan Lebat" "Cerah Berawan" "Gerimis" ...

# Ringkasan statistik deskriptif
summary(data_clean)

##       Tavg           RH_avg            RR               ss        
##  Min.   :25.90   Min.   :53.00   Min.   : 0.000   Min.   : 0.000  
##  1st Qu.:28.00   1st Qu.:72.00   1st Qu.: 0.000   1st Qu.: 5.200  
##  Median :28.90   Median :78.00   Median : 0.000   Median : 7.900  
##  Mean   :28.84   Mean   :77.02   Mean   : 6.572   Mean   : 6.469  
##  3rd Qu.:29.70   3rd Qu.:82.00   3rd Qu.: 4.500   3rd Qu.: 8.000  
##  Max.   :31.00   Max.   :96.00   Max.   :84.500   Max.   :10.000  
##      ddd_x           ff_avg        ddd_car          Kategori_Cuaca    
##  Min.   : 30.0   Min.   :1.000   Length:217         Length:217        
##  1st Qu.: 80.0   1st Qu.:2.000   Class :character   Class :character  
##  Median :100.0   Median :2.000   Mode  :character   Mode  :character  
##  Mean   :128.5   Mean   :2.442                                        
##  3rd Qu.:150.0   3rd Qu.:3.000                                        
##  Max.   :340.0   Max.   :4.000

# Jumlah observasi per kategori cuaca
table(data_clean$Kategori_Cuaca)

## 
##       Berawan         Cerah Cerah Berawan       Gerimis   Hujan Lebat 
##             6            60            60            28            24 
##  Hujan Sedang 
##            39

# Visualisasi distribusi kategori cuaca
ggplot(data_clean, aes(x = Kategori_Cuaca, fill = Kategori_Cuaca)) +
  geom_bar() +
  labs(title = "Distribusi Kategori Cuaca", x = "Kategori", y = "Jumlah") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

# Boxplot suhu rata-rata per kategori
ggplot(data_clean, aes(x = Kategori_Cuaca, y = Tavg, fill = Kategori_Cuaca)) +
  geom_boxplot() +
  labs(title = "Boxplot Suhu Rata-rata per Kategori Cuaca") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

# Korelasi antar variabel numerik
numeric_only <- data_clean[, sapply(data_clean, is.numeric)]

cor_matrix <- cor(numeric_only, use = "complete.obs")
corrplot(cor_matrix, method = "color", type = "upper", 
         tl.cex = 0.7, tl.col = "black", diag = FALSE)

#-------------UJI ASUMSI---------

#Uji linearitas dengan scatterplott matrix
# Filter hanya kolom numerik
numeric_data <- data[sapply(data, is.numeric)]
# Plot scatterplot matrix
pairs(numeric_data, main = "Scatterplot Matrix untuk Uji Linearitas", pch = 21, bg = "lightblue")

#Berdasarkan hasil scatterplot matrix, diketahui bahwa beberapa pasangan variabel seperti Tavg dengan RH_avg atau ss memperlihatkan pola sebaran titik yang cukup linear. Namun, sebagian besar pasangan variabel lainnya menunjukkan hubungan non-linear atau tidak membentuk pola linear yang jelas. Hal ini menunjukkan bahwa asumsi linearitas hanya terpenuhi sebagian. Oleh karena itu, jika model linier akan digunakan, perlu dilakukan transformasi data atau pemilihan fitur yang tepat.


#Uji Outlier (dengan Mahalanobis Distance)
# Pilih hanya kolom numerik untuk analisis Mahalanobis
numeric_data <- data[sapply(data, is.numeric)]
# Hitung Mahalanobis distance
mahal_dist <- mahalanobis(numeric_data, colMeans(numeric_data), cov(numeric_data))
# Hitung batas cutoff outlier (dengan derajat bebas = jumlah kolom)
cutoff <- qchisq(0.975, df = ncol(numeric_data))
# Identifikasi baris yang merupakan outlier
outliers <- which(mahal_dist > cutoff)
# Tampilkan jumlah dan indeks outlier
length(outliers)

## [1] 10

print(outliers)

##  [1]   8  24  31  37  47  54  58  76  94 162

# Visualisasi
plot(mahal_dist, type = "h", main = "Mahalanobis Distance", ylab = "Distance")
abline(h = cutoff, col = "red", lty = 2)

#output menunjukkan terdapat 12 observasi (baris data) yang terindikasi sebagai outlier multivariat yang nantinya akan dihapus karena pada multinomial regression outliers sangat mempengaruhi model 
data_clean <- data[-outliers, ]
# Cek hasilnya
nrow(data)       # jumlah baris sebelum

## [1] 217

nrow(data_clean) # jumlah baris sesudah

## [1] 207

head(data_clean)

##   Tavg RH_avg   RR  ss ddd_x ff_avg       ddd_car Kategori_Cuaca
## 1 29.9     83  3.8 6.5    60      1 Tenang (Calm)   Hujan Sedang
## 2 30.6     78 21.2 6.7   160      2 Tenang (Calm)    Hujan Lebat
## 3 27.8     90  0.0 8.9    60      1 Tenang (Calm)  Cerah Berawan
## 4 29.0     82  1.5 1.9    70      2         Barat        Gerimis
## 5 28.8     83  2.2 6.0   110      2         Barat        Gerimis
## 6 28.0     87  0.0 3.8    90      1 Tenang (Calm)        Berawan

# Uji independensi (autokorelasi) 
# Pastikan package ini terinstal
if(!require(lmtest)) install.packages("lmtest")

## Loading required package: lmtest
## Loading required package: zoo
## 
## Attaching package: 'zoo'
## 
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric

library(lmtest)
# Buat salinan data agar aman
data_indep <- data_clean
# Ubah Kategori_Cuaca jadi numerik (sementara)
data_indep$Kategori_Cuaca_Num <- as.numeric(as.factor(data_indep$Kategori_Cuaca))
# Buat model regresi linier (dummy)
model_dw <- lm(Kategori_Cuaca_Num ~ ., data = data_indep[, sapply(data_indep, is.numeric)])


# Uji Durbin-Watson
dw_result <- dwtest(model_dw)
print(dw_result)

## 
##  Durbin-Watson test
## 
## data:  model_dw
## DW = 1.946, p-value = 0.281
## alternative hypothesis: true autocorrelation is greater than 0

#Nilai DW mendekati 2 → Tidak ada autokorelasi (✅ memenuhi asumsi independensi).

# Uji Multikolinearitas – dengan VIF (Variance Inflation Factor)
# Pastikan package car sudah terinstal
if (!require(car)) install.packages("car")

## Loading required package: car
## Loading required package: carData
## 
## Attaching package: 'car'
## 
## The following object is masked from 'package:dplyr':
## 
##     recode
## 
## The following object is masked from 'package:purrr':
## 
##     some

library(car)
# Buat model regresi linier dengan target dummy (misalnya Tavg atau variabel target sesungguhnya)
# Bisa ganti "Tavg" dengan variabel target kamu yang sebenarnya
model_vif <- lm(Kategori_Cuaca_Num ~ ., data = data_indep[, sapply(data_indep, is.numeric)])
# Hitung nilai VIF
vif_values <- vif(model_vif)
print(vif_values)

##     Tavg   RH_avg       RR       ss    ddd_x   ff_avg 
## 1.341802 2.298771 1.223165 1.312329 1.221417 1.398698

# VIF > 5 menunjukkan kemungkinan multikolinearitas. VIF > 10 menunjukkan multikolinearitas serius
# Asumsi tidak ada multikolinearitas terpenuhi.

#Uji Normalitas Multivariat – dengan Mardia Test
# Pastikan package MVN sudah terinstal
if (!require(MVN)) install.packages("MVN")

## Loading required package: MVN

library(MVN)
# Ambil hanya kolom numerik
numeric_data <- data[sapply(data_clean, is.numeric)]
# Lakukan Mardia Test
mardia_result <- mvn(data = numeric_data, mvnTest = "mardia")
print(mardia_result$multivariateNormality)

##              Test       Statistic               p value Result
## 1 Mardia Skewness 639.62833983284 5.49409057142149e-100     NO
## 2 Mardia Kurtosis 7.3856030405196  1.51656465163796e-13     NO
## 3             MVN            <NA>                  <NA>     NO

#diperoleh p-value yang jauh lebih kecil dari 0.05. Hal ini menunjukkan bahwa: Data tidak memenuhi asumsi normalitas multivariat.

#Uji normalitas dengan kolmogorov spirnov
numeric_data <- data[sapply(data, is.numeric)]
# Lakukan uji Kolmogorov–Smirnov pada setiap kolom
ks_results <- lapply(numeric_data, function(col) {
  # Normalisasi kolom jika perlu
  col <- scale(col)
  ks.test(col, "pnorm")
})

## Warning in ks.test.default(col, "pnorm"): ties should not be present for the
## one-sample Kolmogorov-Smirnov test
## Warning in ks.test.default(col, "pnorm"): ties should not be present for the
## one-sample Kolmogorov-Smirnov test
## Warning in ks.test.default(col, "pnorm"): ties should not be present for the
## one-sample Kolmogorov-Smirnov test
## Warning in ks.test.default(col, "pnorm"): ties should not be present for the
## one-sample Kolmogorov-Smirnov test
## Warning in ks.test.default(col, "pnorm"): ties should not be present for the
## one-sample Kolmogorov-Smirnov test
## Warning in ks.test.default(col, "pnorm"): ties should not be present for the
## one-sample Kolmogorov-Smirnov test

# Tampilkan hasil p-value tiap kolom
for (i in seq_along(ks_results)) {
  cat("Kolom:", names(ks_results)[i], "\n")
  print(ks_results[[i]]$p.value)
  cat("\n")
}

## Kolom: Tavg 
## [1] 0.3006132
## 
## Kolom: RH_avg 
## [1] 0.1609301
## 
## Kolom: RR 
## [1] 7.764448e-21
## 
## Kolom: ss 
## [1] 2.969106e-10
## 
## Kolom: ddd_x 
## [1] 3.667445e-16
## 
## Kolom: ff_avg 
## [1] 7.298252e-16

#uji homogenitas varians dengan barlet test
# Misalnya kita ingin menguji homogenitas varians dari setiap variabel numerik terhadap kategori cuaca
for (col in names(data_clean)[sapply(data_clean, is.numeric)]) {
  cat("Bartlett Test untuk variabel:", col, "\n")
  print(bartlett.test(data_clean[[col]] ~ data_clean$Kategori_Cuaca))
  cat("\n")
}

## Bartlett Test untuk variabel: Tavg 
## 
##  Bartlett test of homogeneity of variances
## 
## data:  data_clean[[col]] by data_clean$Kategori_Cuaca
## Bartlett's K-squared = 7.1928, df = 5, p-value = 0.2067
## 
## 
## Bartlett Test untuk variabel: RH_avg 
## 
##  Bartlett test of homogeneity of variances
## 
## data:  data_clean[[col]] by data_clean$Kategori_Cuaca
## Bartlett's K-squared = 4.9165, df = 5, p-value = 0.4262
## 
## 
## Bartlett Test untuk variabel: RR 
## 
##  Bartlett test of homogeneity of variances
## 
## data:  data_clean[[col]] by data_clean$Kategori_Cuaca
## Bartlett's K-squared = Inf, df = 5, p-value < 2.2e-16
## 
## 
## Bartlett Test untuk variabel: ss 
## 
##  Bartlett test of homogeneity of variances
## 
## data:  data_clean[[col]] by data_clean$Kategori_Cuaca
## Bartlett's K-squared = 227.32, df = 5, p-value < 2.2e-16
## 
## 
## Bartlett Test untuk variabel: ddd_x 
## 
##  Bartlett test of homogeneity of variances
## 
## data:  data_clean[[col]] by data_clean$Kategori_Cuaca
## Bartlett's K-squared = 107.24, df = 5, p-value < 2.2e-16
## 
## 
## Bartlett Test untuk variabel: ff_avg 
## 
##  Bartlett test of homogeneity of variances
## 
## data:  data_clean[[col]] by data_clean$Kategori_Cuaca
## Bartlett's K-squared = 6.2578, df = 5, p-value = 0.2819

#----------------MLR---------------
library(nnet)
library(dplyr)      # untuk %>% dan data manipulation
library(broom)      # untuk tidy()
library(kableExtra) # untuk kable_styling()

## 
## Attaching package: 'kableExtra'
## 
## The following object is masked from 'package:dplyr':
## 
##     group_rows

library(knitr)      # untuk kable()
fit_full <- multinom(Kategori_Cuaca ~ RR+Tavg+RH_avg+ss+ddd_x+ff_avg+ddd_car, data_clean)

## # weights:  84 (65 variable)
## initial  value 370.894210 
## iter  10 value 234.290778
## iter  20 value 194.790984
## iter  30 value 128.942975
## iter  40 value 80.217288
## iter  50 value 66.761095
## iter  60 value 56.287683
## iter  70 value 48.129583
## iter  80 value 45.881164
## iter  90 value 44.716892
## iter 100 value 42.517399
## final  value 42.517399 
## stopped after 100 iterations

#Interpret for the relative log odds
summary(fit_full)

## Warning in sqrt(diag(vc)): NaNs produced

## Call:
## multinom(formula = Kategori_Cuaca ~ RR + Tavg + RH_avg + ss + 
##     ddd_x + ff_avg + ddd_car, data = data_clean)
## 
## Coefficients:
##               (Intercept)        RR       Tavg     RH_avg        ss      ddd_x
## Cerah          114.124932 16.813176 -6.2656644 -1.5409423 10.667590 0.07896792
## Cerah Berawan  153.639540  8.282869 -5.4308207 -0.7390146  7.718777 0.06919883
## Gerimis       -315.377722 32.277232  3.8578633  1.6782064  5.361079 0.09871998
## Hujan Lebat      4.506905 48.048530  2.8020820 -3.7625872 -1.638738 0.18739507
## Hujan Sedang  -118.267389 39.014629 -0.1559959  0.7540256  5.034844 0.09464518
##                    ff_avg ddd_carBarat Daya ddd_carBarat Laut ddd_carSelatan
## Cerah           7.1307930         125.99075         32.739433     -149.19582
## Cerah Berawan   7.1810570          15.66337         45.892377      104.53938
## Gerimis        10.1337177          30.16470          1.856387      -44.28981
## Hujan Lebat   -11.8834790          53.45077         28.432923       -4.54002
## Hujan Sedang    0.4463558          34.98387       -117.649474       90.04106
##               ddd_carTenang (Calm) ddd_carTimur ddd_carTimur Laut
## Cerah                    96.026124    105.45896         150.61396
## Cerah Berawan            -2.600540      7.99057          52.65552
## Gerimis                   5.495606     14.12862          60.14670
## Hujan Lebat               1.041417    -25.85826         118.42329
## Hujan Sedang             -4.208387     15.22438          63.03140
## 
## Std. Errors:
##               (Intercept)       RR     Tavg    RH_avg       ss      ddd_x
## Cerah          6.94147906 3.724792 1.211786 0.6885924 2.324110 0.10420114
## Cerah Berawan  6.82656632 3.897549 1.214907 0.6710433 2.197607 0.10240229
## Gerimis        0.49424248 2.289549 1.249003 0.6015901 2.044282 0.09274115
## Hujan Lebat    0.02965934 2.306960 2.568362 1.0364578 1.371542 0.19998336
## Hujan Sedang   0.28399505 2.543429 1.615863 0.6986036 2.144737 0.09567693
##                  ff_avg ddd_carBarat Daya ddd_carBarat Laut ddd_carSelatan
## Cerah         3.3253663      4.834473e+00      1.926981e-11   8.843567e-12
## Cerah Berawan 3.2921813      2.570777e-06               NaN   8.125216e-13
## Gerimis       3.2338605      4.088302e+00               NaN   1.540735e-15
## Hujan Lebat   0.1527412      4.679369e-03      1.905820e-26            NaN
## Hujan Sedang  4.8699014      6.676690e+00      4.928367e-72   1.284103e-25
##               ddd_carTenang (Calm) ddd_carTimur ddd_carTimur Laut
## Cerah                    4.7653406    4.2289338      2.104957e+00
## Cerah Berawan            4.1520844    4.9296845      2.560951e+00
## Gerimis                  5.6522086    5.0664194      2.970325e+00
## Hujan Lebat              0.7200695    0.1516557      6.515452e-51
## Hujan Sedang             8.6659637    6.1156506      5.530772e-01
## 
## Residual Deviance: 85.0348 
## AIC: 215.0348

#disini ada warning karena harusnya antara RR, ss, dan ddd_car salah satu didrop karena multikoliniaritas tingggi
#or
tidy(fit_full, conf.int = TRUE) %>%
  kable() %>%
  kable_styling("basic", full_width = FALSE)

## Warning in sqrt(diag(vc)): NaNs produced

## Warning in sqrt(diag(vcov(object))): NaNs produced

y.level	term	estimate	std.error	statistic	p.value	conf.low	conf.high
Cerah	(Intercept)	114.1249322	6.9414791	1.644101e+01	0.0000000	100.5198833	127.7299812
Cerah	RR	16.8131757	3.7247920	4.513856e+00	0.0000064	9.5127176	24.1136338
Cerah	Tavg	-6.2656644	1.2117857	-5.170604e+00	0.0000002	-8.6407208	-3.8906080
Cerah	RH_avg	-1.5409423	0.6885924	-2.237815e+00	0.0252331	-2.8905587	-0.1913260
Cerah	ss	10.6675904	2.3241100	4.589968e+00	0.0000044	6.1124185	15.2227622
Cerah	ddd_x	0.0789679	0.1042011	7.578412e-01	0.4485460	-0.1252626	0.2831984
Cerah	ff_avg	7.1307930	3.3253663	2.144363e+00	0.0320038	0.6131949	13.6483912
Cerah	ddd_carBarat Daya	125.9907463	4.8344728	2.606091e+01	0.0000000	116.5153537	135.4661389
Cerah	ddd_carBarat Laut	32.7394329	0.0000000	1.699001e+12	0.0000000	32.7394329	32.7394329
Cerah	ddd_carSelatan	-149.1958217	0.0000000	-1.687055e+13	0.0000000	-149.1958217	-149.1958217
Cerah	ddd_carTenang (Calm)	96.0261239	4.7653406	2.015095e+01	0.0000000	86.6862279	105.3660199
Cerah	ddd_carTimur	105.4589572	4.2289338	2.493748e+01	0.0000000	97.1703992	113.7475152
Cerah	ddd_carTimur Laut	150.6139627	2.1049568	7.155204e+01	0.0000000	146.4883232	154.7396022
Cerah Berawan	(Intercept)	153.6395403	6.8265663	2.250612e+01	0.0000000	140.2597161	167.0193644
Cerah Berawan	RR	8.2828691	3.8975489	2.125148e+00	0.0335742	0.6438138	15.9219245
Cerah Berawan	Tavg	-5.4308207	1.2149067	-4.470154e+00	0.0000078	-7.8119941	-3.0496473
Cerah Berawan	RH_avg	-0.7390146	0.6710433	-1.101292e+00	0.2707696	-2.0542353	0.5762061
Cerah Berawan	ss	7.7187768	2.1976067	3.512356e+00	0.0004442	3.4115468	12.0260069
Cerah Berawan	ddd_x	0.0691988	0.1024023	6.757547e-01	0.4991964	-0.1315060	0.2699036
Cerah Berawan	ff_avg	7.1810570	3.2921813	2.181246e+00	0.0291652	0.7285002	13.6336138
Cerah Berawan	ddd_carBarat Daya	15.6633684	0.0000026	6.092853e+06	0.0000000	15.6633634	15.6633734
Cerah Berawan	ddd_carBarat Laut	45.8923770	NaN	NaN	NaN	NaN	NaN
Cerah Berawan	ddd_carSelatan	104.5393839	0.0000000	1.286604e+14	0.0000000	104.5393839	104.5393839
Cerah Berawan	ddd_carTenang (Calm)	-2.6005395	4.1520844	-6.263215e-01	0.5311041	-10.7384754	5.5373963
Cerah Berawan	ddd_carTimur	7.9905700	4.9296845	1.620909e+00	0.1050372	-1.6714341	17.6525741
Cerah Berawan	ddd_carTimur Laut	52.6555244	2.5609508	2.056093e+01	0.0000000	47.6361531	57.6748957
Gerimis	(Intercept)	-315.3777219	0.4942425	-6.381032e+02	0.0000000	-316.3464194	-314.4090244
Gerimis	RR	32.2772319	2.2895492	1.409764e+01	0.0000000	27.7897978	36.7646659
Gerimis	Tavg	3.8578633	1.2490025	3.088755e+00	0.0020100	1.4098633	6.3058633
Gerimis	RH_avg	1.6782064	0.6015901	2.789618e+00	0.0052770	0.4991115	2.8573013
Gerimis	ss	5.3610786	2.0442823	2.622475e+00	0.0087294	1.3543590	9.3677983
Gerimis	ddd_x	0.0987200	0.0927411	1.064468e+00	0.2871168	-0.0830493	0.2804893
Gerimis	ff_avg	10.1337177	3.2338605	3.133629e+00	0.0017266	3.7954675	16.4719679
Gerimis	ddd_carBarat Daya	30.1647036	4.0883021	7.378296e+00	0.0000000	22.1517786	38.1776285
Gerimis	ddd_carBarat Laut	1.8563866	NaN	NaN	NaN	NaN	NaN
Gerimis	ddd_carSelatan	-44.2898146	0.0000000	-2.874590e+16	0.0000000	-44.2898146	-44.2898146
Gerimis	ddd_carTenang (Calm)	5.4956064	5.6522086	9.722936e-01	0.3309045	-5.5825188	16.5737316
Gerimis	ddd_carTimur	14.1286223	5.0664194	2.788680e+00	0.0052923	4.1986227	24.0586219
Gerimis	ddd_carTimur Laut	60.1467000	2.9703254	2.024920e+01	0.0000000	54.3249692	65.9684308
Hujan Lebat	(Intercept)	4.5069046	0.0296593	1.519556e+02	0.0000000	4.4487734	4.5650359
Hujan Lebat	RR	48.0485304	2.3069598	2.082764e+01	0.0000000	43.5269722	52.5700886
Hujan Lebat	Tavg	2.8020820	2.5683620	1.091000e+00	0.2752730	-2.2318149	7.8359790
Hujan Lebat	RH_avg	-3.7625872	1.0364578	-3.630237e+00	0.0002832	-5.7940072	-1.7311672
Hujan Lebat	ss	-1.6387379	1.3715424	-1.194814e+00	0.2321597	-4.3269115	1.0494357
Hujan Lebat	ddd_x	0.1873951	0.1999834	9.370533e-01	0.3487311	-0.2045651	0.5793552
Hujan Lebat	ff_avg	-11.8834790	0.1527412	-7.780139e+01	0.0000000	-12.1828463	-11.5841117
Hujan Lebat	ddd_carBarat Daya	53.4507688	0.0046794	1.142264e+04	0.0000000	53.4415974	53.4599402
Hujan Lebat	ddd_carBarat Laut	28.4329233	0.0000000	1.491899e+27	0.0000000	28.4329233	28.4329233
Hujan Lebat	ddd_carSelatan	-4.5400196	NaN	NaN	NaN	NaN	NaN
Hujan Lebat	ddd_carTenang (Calm)	1.0414167	0.7200695	1.446272e+00	0.1481008	-0.3698937	2.4527270
Hujan Lebat	ddd_carTimur	-25.8582632	0.1516557	-1.705064e+02	0.0000000	-26.1555029	-25.5610235
Hujan Lebat	ddd_carTimur Laut	118.4232904	0.0000000	1.817576e+52	0.0000000	118.4232904	118.4232904
Hujan Sedang	(Intercept)	-118.2673890	0.2839951	-4.164417e+02	0.0000000	-118.8240091	-117.7107690
Hujan Sedang	RR	39.0146291	2.5434295	1.533938e+01	0.0000000	34.0295989	43.9996592
Hujan Sedang	Tavg	-0.1559959	1.6158634	-9.654030e-02	0.9230915	-3.3230298	3.0110381
Hujan Sedang	RH_avg	0.7540256	0.6986036	1.079333e+00	0.2804395	-0.6152123	2.1232636
Hujan Sedang	ss	5.0348442	2.1447370	2.347535e+00	0.0188981	0.8312369	9.2384515
Hujan Sedang	ddd_x	0.0946452	0.0956769	9.892163e-01	0.3225573	-0.0928782	0.2821685
Hujan Sedang	ff_avg	0.4463558	4.8699014	9.165600e-02	0.9269713	-9.0984754	9.9911871
Hujan Sedang	ddd_carBarat Daya	34.9838733	6.6766896	5.239703e+00	0.0000002	21.8978022	48.0699444
Hujan Sedang	ddd_carBarat Laut	-117.6494737	0.0000000	-2.387190e+73	0.0000000	-117.6494737	-117.6494737
Hujan Sedang	ddd_carSelatan	90.0410587	0.0000000	7.011979e+26	0.0000000	90.0410587	90.0410587
Hujan Sedang	ddd_carTenang (Calm)	-4.2083873	8.6659637	-4.856225e-01	0.6272348	-21.1933641	12.7765894
Hujan Sedang	ddd_carTimur	15.2243801	6.1156506	2.489413e+00	0.0127954	3.2379251	27.2108351
Hujan Sedang	ddd_carTimur Laut	63.0314024	0.5530772	1.139649e+02	0.0000000	61.9473910	64.1154137

# Simpan hasil tidy model ke dalam CSV
output_tidy <- tidy(fit_full, conf.int = TRUE)

## Warning in sqrt(diag(vc)): NaNs produced
## Warning in sqrt(diag(vc)): NaNs produced

write.csv(output_tidy, "Interpret for the relative log odds.csv", row.names = FALSE)
#Interpret for the relative risk ratios----
exp(coef(fit_full))

##                 (Intercept)           RR        Tavg     RH_avg           ss
## Cerah          3.662927e+49 2.003870e+07  0.00190045 0.21417918 42941.343935
## Cerah Berawan  5.306456e+66 3.955527e+03  0.00437950 0.47758430  2250.205525
## Gerimis       1.079433e-137 1.041894e+14 47.36404000 5.35594073   212.954524
## Hujan Lebat    9.064081e+01 7.365659e+20 16.47892087 0.02322358     0.194225
## Hujan Sedang   4.336362e-52 8.786949e+16  0.85556272 2.12553947   153.675646
##                  ddd_x       ff_avg ddd_carBarat Daya ddd_carBarat Laut
## Cerah         1.082170 1.249868e+03      5.212978e+54      1.654074e+14
## Cerah Berawan 1.071649 1.314297e+03      6.346210e+06      8.527193e+19
## Gerimis       1.103757 2.517780e+04      1.259982e+13      6.400567e+00
## Hujan Lebat   1.206104 6.903521e-06      1.634459e+23      2.229778e+12
## Hujan Sedang  1.099269 1.562607e+00      1.560641e+15      8.044199e-52
##               ddd_carSelatan ddd_carTenang (Calm) ddd_carTimur
## Cerah           1.603533e-65         5.053773e+41 6.313107e+45
## Cerah Berawan   2.516967e+45         7.423352e-02 2.952980e+03
## Gerimis         5.823417e-20         2.436192e+02 1.367674e+06
## Hujan Lebat     1.067320e-02         2.833228e+00 5.887067e-12
## Hujan Sedang    1.271554e+39         1.487033e-02 4.091328e+06
##               ddd_carTimur Laut
## Cerah              2.575211e+65
## Cerah Berawan      7.379105e+22
## Gerimis            1.322452e+26
## Hujan Lebat        2.695141e+51
## Hujan Sedang       2.366956e+27

#or
tidy(fit_full, conf.int = TRUE, exponentiate = TRUE) %>%
  kable() %>%
  kable_styling("basic", full_width = FALSE)

## Warning in sqrt(diag(vc)): NaNs produced
## Warning in sqrt(diag(vc)): NaNs produced

y.level	term	estimate	std.error	statistic	p.value	conf.low	conf.high
Cerah	(Intercept)	3.662927e+49	6.9414791	1.644101e+01	0.0000000	4.520960e+43	2.967741e+55
Cerah	RR	2.003870e+07	3.7247920	4.513856e+00	0.0000064	1.353072e+04	2.967687e+10
Cerah	Tavg	1.900500e-03	1.2117857	-5.170604e+00	0.0000002	1.768000e-04	2.043290e-02
Cerah	RH_avg	2.141792e-01	0.6885924	-2.237815e+00	0.0252331	5.554520e-02	8.258633e-01
Cerah	ss	4.294134e+04	2.3241100	4.589968e+00	0.0000044	4.514292e+02	4.084714e+06
Cerah	ddd_x	1.082170e+00	0.1042011	7.578412e-01	0.4485460	8.822652e-01	1.327368e+00
Cerah	ff_avg	1.249868e+03	3.3253663	2.144363e+00	0.0320038	1.846321e+00	8.460986e+05
Cerah	ddd_carBarat Daya	5.212978e+54	4.8344728	2.606091e+01	0.0000000	3.999219e+50	6.795112e+58
Cerah	ddd_carBarat Laut	1.654074e+14	0.0000000	1.699001e+12	0.0000000	1.654074e+14	1.654074e+14
Cerah	ddd_carSelatan	0.000000e+00	0.0000000	-1.687055e+13	0.0000000	0.000000e+00	0.000000e+00
Cerah	ddd_carTenang (Calm)	5.053773e+41	4.7653406	2.015095e+01	0.0000000	4.439667e+37	5.752823e+45
Cerah	ddd_carTimur	6.313107e+45	4.2289338	2.493748e+01	0.0000000	1.586968e+42	2.511413e+49
Cerah	ddd_carTimur Laut	2.575211e+65	2.1049568	7.155204e+01	0.0000000	4.159781e+63	1.594246e+67
Cerah Berawan	(Intercept)	5.306456e+66	6.8265663	2.250612e+01	0.0000000	8.203908e+60	3.432324e+72
Cerah Berawan	RR	3.955527e+03	3.8975489	2.125148e+00	0.0335742	1.903727e+00	8.218716e+06
Cerah Berawan	Tavg	4.379500e-03	1.2149067	-4.470154e+00	0.0000078	4.048000e-04	4.737560e-02
Cerah Berawan	RH_avg	4.775843e-01	0.6710433	-1.101292e+00	0.2707696	1.281908e-01	1.779275e+00
Cerah Berawan	ss	2.250206e+03	2.1976067	3.512356e+00	0.0004442	3.031209e+01	1.670431e+05
Cerah Berawan	ddd_x	1.071649e+00	0.1024023	6.757547e-01	0.4991964	8.767740e-01	1.309838e+00
Cerah Berawan	ff_avg	1.314297e+03	3.2921813	2.181246e+00	0.0291652	2.071971e+00	8.336874e+05
Cerah Berawan	ddd_carBarat Daya	6.346210e+06	0.0000026	6.092853e+06	0.0000000	6.346178e+06	6.346242e+06
Cerah Berawan	ddd_carBarat Laut	8.527193e+19	NaN	NaN	NaN	NaN	NaN
Cerah Berawan	ddd_carSelatan	2.516967e+45	0.0000000	1.286604e+14	0.0000000	2.516967e+45	2.516967e+45
Cerah Berawan	ddd_carTenang (Calm)	7.423350e-02	4.1520844	-6.263215e-01	0.5311041	2.170000e-05	2.540158e+02
Cerah Berawan	ddd_carTimur	2.952980e+03	4.9296845	1.620909e+00	0.1050372	1.879773e-01	4.638905e+07
Cerah Berawan	ddd_carTimur Laut	7.379105e+22	2.5609508	2.056093e+01	0.0000000	4.876615e+20	1.116578e+25
Gerimis	(Intercept)	0.000000e+00	0.4942425	-6.381032e+02	0.0000000	0.000000e+00	0.000000e+00
Gerimis	RR	1.041894e+14	2.2895492	1.409764e+01	0.0000000	1.172076e+12	9.261717e+15
Gerimis	Tavg	4.736404e+01	1.2490025	3.088755e+00	0.0020100	4.095396e+00	5.477743e+02
Gerimis	RH_avg	5.355941e+00	0.6015901	2.789618e+00	0.0052770	1.647257e+00	1.741447e+01
Gerimis	ss	2.129545e+02	2.0442823	2.622475e+00	0.0087294	3.874277e+00	1.170532e+04
Gerimis	ddd_x	1.103757e+00	0.0927411	1.064468e+00	0.2871168	9.203057e-01	1.323777e+00
Gerimis	ff_avg	2.517780e+04	3.2338605	3.133629e+00	0.0017266	4.449904e+01	1.424573e+07
Gerimis	ddd_carBarat Daya	1.259982e+13	4.0883021	7.378296e+00	0.0000000	4.172489e+09	3.804814e+16
Gerimis	ddd_carBarat Laut	6.400567e+00	NaN	NaN	NaN	NaN	NaN
Gerimis	ddd_carSelatan	0.000000e+00	0.0000000	-2.874590e+16	0.0000000	0.000000e+00	0.000000e+00
Gerimis	ddd_carTenang (Calm)	2.436192e+02	5.6522086	9.722936e-01	0.3309045	3.763100e-03	1.577176e+07
Gerimis	ddd_carTimur	1.367674e+06	5.0664194	2.788680e+00	0.0052923	6.659455e+01	2.808838e+10
Gerimis	ddd_carTimur Laut	1.322452e+26	2.9703254	2.024920e+01	0.0000000	3.917729e+23	4.464013e+28
Hujan Lebat	(Intercept)	9.064082e+01	0.0296593	1.519556e+02	0.0000000	8.552198e+01	9.606604e+01
Hujan Lebat	RR	7.365659e+20	2.3069598	2.082764e+01	0.0000000	8.007996e+18	6.774846e+22
Hujan Lebat	Tavg	1.647892e+01	2.5683620	1.091000e+00	0.2752730	1.073335e-01	2.530011e+03
Hujan Lebat	RH_avg	2.322360e-02	1.0364578	-3.630237e+00	0.0002832	3.045800e-03	1.770776e-01
Hujan Lebat	ss	1.942250e-01	1.3715424	-1.194814e+00	0.2321597	1.320830e-02	2.856039e+00
Hujan Lebat	ddd_x	1.206104e+00	0.1999834	9.370533e-01	0.3487311	8.150017e-01	1.784887e+00
Hujan Lebat	ff_avg	6.900000e-06	0.1527412	-7.780139e+01	0.0000000	5.100000e-06	9.300000e-06
Hujan Lebat	ddd_carBarat Daya	1.634459e+23	0.0046794	1.142264e+04	0.0000000	1.619537e+23	1.649518e+23
Hujan Lebat	ddd_carBarat Laut	2.229778e+12	0.0000000	1.491899e+27	0.0000000	2.229778e+12	2.229778e+12
Hujan Lebat	ddd_carSelatan	1.067320e-02	NaN	NaN	NaN	NaN	NaN
Hujan Lebat	ddd_carTenang (Calm)	2.833228e+00	0.7200695	1.446272e+00	0.1481008	6.908078e-01	1.161999e+01
Hujan Lebat	ddd_carTimur	0.000000e+00	0.1516557	-1.705064e+02	0.0000000	0.000000e+00	0.000000e+00
Hujan Lebat	ddd_carTimur Laut	2.695141e+51	0.0000000	1.817576e+52	0.0000000	2.695141e+51	2.695141e+51
Hujan Sedang	(Intercept)	0.000000e+00	0.2839951	-4.164417e+02	0.0000000	0.000000e+00	0.000000e+00
Hujan Sedang	RR	8.786949e+16	2.5434295	1.533938e+01	0.0000000	6.009897e+14	1.284722e+19
Hujan Sedang	Tavg	8.555627e-01	1.6158634	-9.654030e-02	0.9230915	3.604350e-02	2.030847e+01
Hujan Sedang	RH_avg	2.125539e+00	0.6986036	1.079333e+00	0.2804395	5.405261e-01	8.358371e+00
Hujan Sedang	ss	1.536756e+02	2.1447370	2.347535e+00	0.0188981	2.296157e+00	1.028510e+04
Hujan Sedang	ddd_x	1.099269e+00	0.0956769	9.892163e-01	0.3225573	9.113045e-01	1.326002e+00
Hujan Sedang	ff_avg	1.562607e+00	4.8699014	9.165600e-02	0.9269713	1.118000e-04	2.183320e+04
Hujan Sedang	ddd_carBarat Daya	1.560641e+15	6.6766896	5.239703e+00	0.0000002	3.236642e+09	7.525088e+20
Hujan Sedang	ddd_carBarat Laut	0.000000e+00	0.0000000	-2.387190e+73	0.0000000	0.000000e+00	0.000000e+00
Hujan Sedang	ddd_carSelatan	1.271554e+39	0.0000000	7.011979e+26	0.0000000	1.271554e+39	1.271554e+39
Hujan Sedang	ddd_carTenang (Calm)	1.487030e-02	8.6659637	-4.856225e-01	0.6272348	0.000000e+00	3.538362e+05
Hujan Sedang	ddd_carTimur	4.091328e+06	6.1156506	2.489413e+00	0.0127954	2.548080e+01	6.569246e+11
Hujan Sedang	ddd_carTimur Laut	2.366956e+27	0.5530772	1.139649e+02	0.0000000	8.005899e+26	6.997943e+27

# Simpan hasil tidy model ke dalam CSV
output_tidy <- tidy(fit_full, conf.int = TRUE, exponentiate = TRUE)

## Warning in sqrt(diag(vc)): NaNs produced
## Warning in sqrt(diag(vc)): NaNs produced

write.csv(output_tidy, "Interpret for the relative risk ratios.csv", row.names = FALSE)
levels(factor(data$ddd_car))

## [1] "Barat"         "Barat Daya"    "Barat Laut"    "Selatan"      
## [5] "Tenang (Calm)" "Timur"         "Timur Laut"

# ---------------- EVALUASI MLR ----------------
#library yang diperlukan
library(nnet)       # Untuk multinom()
#melakukan prediksi terhadap data
mlr_pred <- predict(fit_full, newdata = data_clean)
data_clean$MLR_Predicted <- mlr_pred
#membuat confusion matrix antara prediksi dan label asli
conf_matrix <- table(Predicted = mlr_pred,
                     Actual = data_clean$Kategori_Cuaca)
print(conf_matrix)

##                Actual
## Predicted       Berawan Cerah Cerah Berawan Gerimis Hujan Lebat Hujan Sedang
##   Berawan             6     0             0       0           0            0
##   Cerah               0    55             7       2           0            0
##   Cerah Berawan       0     4            53       0           0            0
##   Gerimis             0     0             0      26           0            0
##   Hujan Lebat         0     0             0       0          17            0
##   Hujan Sedang        0     0             0       0           0           37

#menghitung akurasi klasifikasi dari model
accuracy <- mean(mlr_pred == data_clean$Kategori_Cuaca)
cat("\nAkurasi klasifikasi MLR:", round(accuracy * 100, 2), "%\n")

## 
## Akurasi klasifikasi MLR: 93.72 %

#Membuat dataframe berisi skor diskriminan dan label asli
mlr_scores <- data.frame(mlr_pred)
mlr_scores$Kategori_Cuaca <- data_clean$Kategori_Cuaca



#----------------LDA---------------
# =======================
# LDA Multikelas Cuaca
# =======================
# Pastikan library MASS tersedia
if (!require(MASS)) install.packages("MASS")

## Loading required package: MASS
## 
## Attaching package: 'MASS'
## 
## The following object is masked from 'package:dplyr':
## 
##     select

library(MASS)
# Pastikan juga variabel kategorikal (ddd_car dan Kategori_Cuaca) bertipe faktor
data_clean$ddd_car <- as.factor(data_clean$ddd_car)
data_clean$Kategori_Cuaca <- as.factor(data_clean$Kategori_Cuaca)
# LDA menggunakan semua variabel prediktor yang diminta
lda_model <- lda(Kategori_Cuaca ~ Tavg + RH_avg + RR + ss + ddd_x + ff_avg + ddd_car, data = data_clean)
# Ringkasan model
print(lda_model)

## Call:
## lda(Kategori_Cuaca ~ Tavg + RH_avg + RR + ss + ddd_x + ff_avg + 
##     ddd_car, data = data_clean)
## 
## Prior probabilities of groups:
##       Berawan         Cerah Cerah Berawan       Gerimis   Hujan Lebat 
##    0.02898551    0.28502415    0.28985507    0.13526570    0.08212560 
##  Hujan Sedang 
##    0.17874396 
## 
## Group means:
##                   Tavg   RH_avg         RR       ss     ddd_x   ff_avg
## Berawan       28.25000 80.66667  0.0000000 2.633333 108.33333 2.166667
## Cerah         29.19661 68.69492  0.0000000 8.071186  90.89831 2.847458
## Cerah Berawan 28.93167 76.50000  0.0000000 7.650000 108.50000 2.583333
## Gerimis       28.79643 82.07143  0.9678571 5.075000 162.50000 2.107143
## Hujan Lebat   28.50000 82.94118 31.5000000 4.805882 162.35294 1.941176
## Hujan Sedang  28.73514 82.21622 10.1621622 5.172973 172.70270 2.108108
##               ddd_carBarat Daya ddd_carBarat Laut ddd_carSelatan
## Berawan              0.16666667        0.00000000     0.00000000
## Cerah                0.01694915        0.00000000     0.00000000
## Cerah Berawan        0.00000000        0.00000000     0.01666667
## Gerimis              0.14285714        0.00000000     0.00000000
## Hujan Lebat          0.17647059        0.05882353     0.00000000
## Hujan Sedang         0.13513514        0.00000000     0.02702703
##               ddd_carTenang (Calm) ddd_carTimur ddd_carTimur Laut
## Berawan                 0.33333333    0.3333333        0.00000000
## Cerah                   0.03389831    0.8305085        0.11864407
## Cerah Berawan           0.05000000    0.6833333        0.11666667
## Gerimis                 0.28571429    0.3214286        0.07142857
## Hujan Lebat             0.17647059    0.2352941        0.00000000
## Hujan Sedang            0.21621622    0.2972973        0.02702703
## 
## Coefficients of linear discriminants:
##                               LD1          LD2          LD3          LD4
## Tavg                  0.229325211  0.432202376 -0.230089387 -0.350835576
## RH_avg                0.087271730  0.200434166 -0.149675103  0.041040283
## RR                    0.284937442 -0.107048505 -0.014890439 -0.008025864
## ss                   -0.146713392 -0.162785938 -0.377516773 -0.073061535
## ddd_x                 0.002611645  0.001011126 -0.001934972 -0.015235921
## ff_avg               -0.170157592 -0.054360293 -0.217155274  0.372316139
## ddd_carBarat Daya     0.790433879 -0.010280672  1.060621351 -1.414675873
## ddd_carBarat Laut     1.987601945 -2.592134308  0.631015334  2.905221064
## ddd_carSelatan        1.581409782  1.746998802 -2.413638672  0.097271736
## ddd_carTenang (Calm) -0.434772762  0.124862949  1.095778244 -1.254907690
## ddd_carTimur          0.068728399 -0.268408114 -0.573257075 -1.484483318
## ddd_carTimur Laut     0.225484945 -0.075320546 -1.046307996 -1.947796412
##                               LD5
## Tavg                 -0.227119208
## RH_avg               -0.041359258
## RR                    0.008470898
## ss                   -0.062862919
## ddd_x                 0.000195861
## ff_avg                0.029015935
## ddd_carBarat Daya    -1.406425801
## ddd_carBarat Laut    -9.029519385
## ddd_carSelatan        5.200024780
## ddd_carTenang (Calm) -1.309853488
## ddd_carTimur         -0.911858689
## ddd_carTimur Laut    -1.928780916
## 
## Proportion of trace:
##    LD1    LD2    LD3    LD4    LD5 
## 0.8476 0.1254 0.0202 0.0052 0.0015

# Lakukan prediksi menggunakan model LDA
lda_pred <- predict(lda_model)
# Plot hasil proyeksi ke LD1 dan LD2
plot(lda_pred$x[, 1:2],
     col = as.numeric(data_clean$Kategori_Cuaca),
     pch = 19,
     xlab = "LD1", ylab = "LD2",
     main = "LDA Scatter Plot")
legend("topright", legend = levels(data_clean$Kategori_Cuaca),
       col = 1:length(levels(data_clean$Kategori_Cuaca)), pch = 19)

#----------------Evaluasi----------------
#melakukan prediksi kelas berdasarkan model LDA
lda_pred <- predict(lda_model, newdata = data_clean)
data_clean$LDA_Predicted <- lda_pred$class
#membuat confusion matrix antara prediksi dan label asli
conf_matrix <- table(Predicted = lda_pred$class,
                     Actual = data_clean$Kategori_Cuaca)
print(conf_matrix)

##                Actual
## Predicted       Berawan Cerah Cerah Berawan Gerimis Hujan Lebat Hujan Sedang
##   Berawan             2     0             0       2           0            1
##   Cerah               0    53             8       0           0            0
##   Cerah Berawan       1     6            49       8           0            2
##   Gerimis             3     0             3      18           0            5
##   Hujan Lebat         0     0             0       0          16            0
##   Hujan Sedang        0     0             0       0           1           29

#menghitung akurasi klasifikasi dari model
accuracy <- mean(lda_pred$class == data_clean$Kategori_Cuaca)
cat("\nAkurasi klasifikasi LDA:", round(accuracy * 100, 2), "%\n")

## 
## Akurasi klasifikasi LDA: 80.68 %

manova_model <- manova(cbind(Tavg, RH_avg, RR, ss, ddd_x, ff_avg, as.numeric(ddd_car)) ~ Kategori_Cuaca, data = data_clean)

# Summary dengan Wilks' Lambda
summary(manova_model, test = "Wilks")

##                 Df    Wilks approx F num Df den Df    Pr(>F)    
## Kategori_Cuaca   5 0.035282   28.548     35 822.72 < 2.2e-16 ***
## Residuals      201                                              
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

#----------------PERCEPTUAL MAPPINNG---------------
# Prediksi dan Konfusi untuk LDA
# Prediksi menggunakan LDA
lda_pred <- predict(lda_model, data_clean)
lda_class <- lda_pred$class
# Ubah ke angka untuk MDS
lda_dist <- dist(scale(as.matrix(lda_pred$x[, 1:2])))
lda_mds <- cmdscale(lda_dist)
# Dataframe untuk plot LDA
lda_df <- data.frame(
  Dim1 = lda_mds[, 1],
  Dim2 = lda_mds[, 2],
  Predicted = lda_class
)

# Prediksi dan Konfusi untuk MLR
# Prediksi menggunakan Multinomial Logistic Regression
mlr_pred <- predict(fit_full, newdata = data_clean)
mlr_prob <- predict(fit_full, newdata = data_clean, type = "probs")
# Gunakan probabilitas (jarak antar prediksi) untuk MDS
mlr_dist <- dist(scale(as.matrix(mlr_prob)))
mlr_mds <- cmdscale(mlr_dist)
# Dataframe untuk plot MLR
mlr_df <- data.frame(
  Dim1 = mlr_mds[, 1],
  Dim2 = mlr_mds[, 2],
  Predicted = mlr_pred
)


#Plot Perceptual Mapping
library(ggplot2)
# Plot MDS LDA
plot_lda <- ggplot(lda_df, aes(x = Dim1, y = Dim2, color = Predicted)) +
  geom_point(size = 2) +
  labs(title = "MDS Mapping Berdasarkan Hasil Klasifikasi LDA") +
  theme_minimal()
# Plot MDS MLR
plot_mlr <- ggplot(mlr_df, aes(x = Dim1, y = Dim2, color = Predicted)) +
  geom_point(size = 2) +
  labs(title = "MDS Mapping Berdasarkan Hasil Multinomial Logistic Regression") +
  theme_minimal()
# Tampilkan dua plot secara bersamaan
library(gridExtra)
grid.arrange(plot_lda, plot_mlr, ncol = 2)

Klasifikasi Kategori Cuaca LDA dan MLR

Yasyifa Sastiya N

2025-06-09