# Baca data
data <- read.csv("bmkg_cuaca.csv")
head(data)
##      Tanggal   Tn   Tx Tavg RH_avg   RR  ss ff_x ddd_x ff_avg ddd_car
## 1 01-01-2024 26.0 35.0 29.9     83  3.8 6.5    3    60      1      C 
## 2 02-01-2024 26.2 35.2 30.6     78 21.2 6.7    4   160      2      C 
## 3 03-01-2024 27.0 35.8 27.8     90  0.0 8.9    4    60      1      C 
## 4 04-01-2024 25.8 34.4 29.0     82  1.5 1.9    4    70      2      W 
## 5 05-01-2024 25.5 33.4 28.8     83  2.2 6.0    5   110      2      W 
## 6 06-01-2024 25.4 31.3 28.0     87  0.0 3.8    3    90      1      C 
##   Kategori_Cuaca
## 1   Hujan Sedang
## 2    Hujan Lebat
## 3  Cerah Berawan
## 4        Gerimis
## 5        Gerimis
## 6        Berawan
#-------------PREPROCESSING---------

# Cek jumlah NA per kolom
colSums(is.na(data))
##        Tanggal             Tn             Tx           Tavg         RH_avg 
##              0             12              3              1              1 
##             RR             ss           ff_x          ddd_x         ff_avg 
##             48              4              0              0              0 
##        ddd_car Kategori_Cuaca 
##              0              0
# Gantilah nilai 8888 menjadi NA, kemudian hapus baris yang mengandung NA
data[data == 8888] <- NA
data <- na.omit(data)

# di kolom ddd_car ternyata ada yang Ganda karena Spasi
data$ddd_car <- trimws(data$ddd_car)

# Identifikasi kolom numerik (selain Tanggal dan Kategori_Cuaca)
numeric_cols <- names(data)[sapply(data, is.numeric) & !(names(data) %in% c("Tanggal"))]

# Hapus baris jika semua kolom numeriknya NA
data <- data[!apply(data[numeric_cols], 1, function(x) all(is.na(x))), ]

# Fungsi untuk mengisi NA dengan interpolasi linear antar baris
interpolate_na <- function(x) {
  approx(1:length(x), x, 1:length(x), rule = 2)$y
}

# Terapkan interpolasi untuk setiap kolom numerik
data[numeric_cols] <- lapply(data[numeric_cols], interpolate_na)
# Tampilkan data hasil imputasi
head(data)
##      Tanggal   Tn   Tx Tavg RH_avg   RR  ss ff_x ddd_x ff_avg ddd_car
## 1 01-01-2024 26.0 35.0 29.9     83  3.8 6.5    3    60      1       C
## 2 02-01-2024 26.2 35.2 30.6     78 21.2 6.7    4   160      2       C
## 3 03-01-2024 27.0 35.8 27.8     90  0.0 8.9    4    60      1       C
## 4 04-01-2024 25.8 34.4 29.0     82  1.5 1.9    4    70      2       W
## 5 05-01-2024 25.5 33.4 28.8     83  2.2 6.0    5   110      2       W
## 6 06-01-2024 25.4 31.3 28.0     87  0.0 3.8    3    90      1       C
##   Kategori_Cuaca
## 1   Hujan Sedang
## 2    Hujan Lebat
## 3  Cerah Berawan
## 4        Gerimis
## 5        Gerimis
## 6        Berawan
# Cek jumlah NA per kolom
colSums(is.na(data))
##        Tanggal             Tn             Tx           Tavg         RH_avg 
##              0              0              0              0              0 
##             RR             ss           ff_x          ddd_x         ff_avg 
##              0              0              0              0              0 
##        ddd_car Kategori_Cuaca 
##              0              0
# Hitung matriks korelasi untuk kolom numerik saja
cor_matrix <- cor(data[numeric_cols], use = "complete.obs")
# Tampilkan matriks korelasi (bisa di-plot juga nanti)
print(round(cor_matrix, 2))
##           Tn    Tx  Tavg RH_avg    RR    ss  ff_x ddd_x ff_avg
## Tn      1.00  0.32  0.54   0.16 -0.14 -0.01 -0.15  0.10  -0.13
## Tx      0.32  1.00  0.74  -0.45 -0.05  0.25  0.11 -0.07   0.06
## Tavg    0.54  0.74  1.00  -0.51 -0.17  0.28  0.01 -0.23   0.12
## RH_avg  0.16 -0.45 -0.51   1.00  0.37 -0.50 -0.28  0.39  -0.46
## RR     -0.14 -0.05 -0.17   0.37  1.00 -0.33  0.02  0.18  -0.24
## ss     -0.01  0.25  0.28  -0.50 -0.33  1.00  0.13 -0.27   0.22
## ff_x   -0.15  0.11  0.01  -0.28  0.02  0.13  1.00  0.19   0.62
## ddd_x   0.10 -0.07 -0.23   0.39  0.18 -0.27  0.19  1.00  -0.03
## ff_avg -0.13  0.06  0.12  -0.46 -0.24  0.22  0.62 -0.03   1.00
# Visualisasi korelasi menggunakan heatmap (opsional, butuh package corrplot)
if(!require(corrplot)) install.packages("corrplot")
## Loading required package: corrplot
## corrplot 0.95 loaded
library(corrplot)
corrplot(cor_matrix, method = "color", type = "upper", tl.cex=0.7, tl.col="black", diag=FALSE)

#hapus Tn, Tx karena sudah diwakili Tavg. Hapus ff_x karena sudah diwakili ff_avg (redudant)
columns_to_drop <- c("Tn", "Tx", "ff_x", "Tanggal")
data <- data[, !(names(data) %in% columns_to_drop)]
head(data)
##   Tavg RH_avg   RR  ss ddd_x ff_avg ddd_car Kategori_Cuaca
## 1 29.9     83  3.8 6.5    60      1       C   Hujan Sedang
## 2 30.6     78 21.2 6.7   160      2       C    Hujan Lebat
## 3 27.8     90  0.0 8.9    60      1       C  Cerah Berawan
## 4 29.0     82  1.5 1.9    70      2       W        Gerimis
## 5 28.8     83  2.2 6.0   110      2       W        Gerimis
## 6 28.0     87  0.0 3.8    90      1       C        Berawan
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.1     ✔ tibble    3.2.1
## ✔ lubridate 1.9.4     ✔ tidyr     1.3.1
## ✔ purrr     1.0.4
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
# Ubah nilai ddd_car ke factor dengan label deskriptif
data <- data %>%
  mutate(
    ddd_car = factor(ddd_car, levels = c("C", "E", "NE", "NW", "S", "SW", "W"),
                     labels = c("Tenang (Calm)", "Timur", "Timur Laut", "Barat Laut", 
                                "Selatan", "Barat Daya", "Barat"))
  )
write.csv(data, "data_bmkg_preprocessed.csv", row.names = FALSE)
data <- read.csv("data_bmkg_preprocessed.csv")


#EDA

# Pastikan packages terinstal dan diload
packages <- c("tidyverse", "GGally", "corrplot", "gridExtra")
new.packages <- packages[!(packages %in% installed.packages())]
if(length(new.packages)) install.packages(new.packages)
lapply(packages, library, character.only = TRUE)
## Registered S3 method overwritten by 'GGally':
##   method from   
##   +.gg   ggplot2
## 
## Attaching package: 'gridExtra'
## 
## The following object is masked from 'package:dplyr':
## 
##     combine
## [[1]]
##  [1] "lubridate" "forcats"   "stringr"   "dplyr"     "purrr"     "readr"    
##  [7] "tidyr"     "tibble"    "ggplot2"   "tidyverse" "corrplot"  "stats"    
## [13] "graphics"  "grDevices" "utils"     "datasets"  "methods"   "base"     
## 
## [[2]]
##  [1] "GGally"    "lubridate" "forcats"   "stringr"   "dplyr"     "purrr"    
##  [7] "readr"     "tidyr"     "tibble"    "ggplot2"   "tidyverse" "corrplot" 
## [13] "stats"     "graphics"  "grDevices" "utils"     "datasets"  "methods"  
## [19] "base"     
## 
## [[3]]
##  [1] "GGally"    "lubridate" "forcats"   "stringr"   "dplyr"     "purrr"    
##  [7] "readr"     "tidyr"     "tibble"    "ggplot2"   "tidyverse" "corrplot" 
## [13] "stats"     "graphics"  "grDevices" "utils"     "datasets"  "methods"  
## [19] "base"     
## 
## [[4]]
##  [1] "gridExtra" "GGally"    "lubridate" "forcats"   "stringr"   "dplyr"    
##  [7] "purrr"     "readr"     "tidyr"     "tibble"    "ggplot2"   "tidyverse"
## [13] "corrplot"  "stats"     "graphics"  "grDevices" "utils"     "datasets" 
## [19] "methods"   "base"
# Gunakan data hasil preprocessing
data_clean <- data  # data hasil preprocessing dan imputasi NA

# Cek struktur data
str(data_clean)
## 'data.frame':    217 obs. of  8 variables:
##  $ Tavg          : num  29.9 30.6 27.8 29 28.8 28 27.7 27.7 28.7 28.5 ...
##  $ RH_avg        : int  83 78 90 82 83 87 88 89 86 83 ...
##  $ RR            : num  3.8 21.2 0 1.5 2.2 0 2.1 78.1 26.5 3.3 ...
##  $ ss            : num  6.5 6.7 8.9 1.9 6 3.8 2 0.6 4.5 5 ...
##  $ ddd_x         : int  60 160 60 70 110 90 340 200 230 190 ...
##  $ ff_avg        : int  1 2 1 2 2 1 1 2 2 1 ...
##  $ ddd_car       : chr  "Tenang (Calm)" "Tenang (Calm)" "Tenang (Calm)" "Barat" ...
##  $ Kategori_Cuaca: chr  "Hujan Sedang" "Hujan Lebat" "Cerah Berawan" "Gerimis" ...
# Ringkasan statistik deskriptif
summary(data_clean)
##       Tavg           RH_avg            RR               ss        
##  Min.   :25.90   Min.   :53.00   Min.   : 0.000   Min.   : 0.000  
##  1st Qu.:28.00   1st Qu.:72.00   1st Qu.: 0.000   1st Qu.: 5.200  
##  Median :28.90   Median :78.00   Median : 0.000   Median : 7.900  
##  Mean   :28.84   Mean   :77.02   Mean   : 6.572   Mean   : 6.469  
##  3rd Qu.:29.70   3rd Qu.:82.00   3rd Qu.: 4.500   3rd Qu.: 8.000  
##  Max.   :31.00   Max.   :96.00   Max.   :84.500   Max.   :10.000  
##      ddd_x           ff_avg        ddd_car          Kategori_Cuaca    
##  Min.   : 30.0   Min.   :1.000   Length:217         Length:217        
##  1st Qu.: 80.0   1st Qu.:2.000   Class :character   Class :character  
##  Median :100.0   Median :2.000   Mode  :character   Mode  :character  
##  Mean   :128.5   Mean   :2.442                                        
##  3rd Qu.:150.0   3rd Qu.:3.000                                        
##  Max.   :340.0   Max.   :4.000
# Jumlah observasi per kategori cuaca
table(data_clean$Kategori_Cuaca)
## 
##       Berawan         Cerah Cerah Berawan       Gerimis   Hujan Lebat 
##             6            60            60            28            24 
##  Hujan Sedang 
##            39
# Visualisasi distribusi kategori cuaca
ggplot(data_clean, aes(x = Kategori_Cuaca, fill = Kategori_Cuaca)) +
  geom_bar() +
  labs(title = "Distribusi Kategori Cuaca", x = "Kategori", y = "Jumlah") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

# Boxplot suhu rata-rata per kategori
ggplot(data_clean, aes(x = Kategori_Cuaca, y = Tavg, fill = Kategori_Cuaca)) +
  geom_boxplot() +
  labs(title = "Boxplot Suhu Rata-rata per Kategori Cuaca") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

# Korelasi antar variabel numerik
numeric_only <- data_clean[, sapply(data_clean, is.numeric)]

cor_matrix <- cor(numeric_only, use = "complete.obs")
corrplot(cor_matrix, method = "color", type = "upper", 
         tl.cex = 0.7, tl.col = "black", diag = FALSE)

#-------------UJI ASUMSI---------

#Uji linearitas dengan scatterplott matrix
# Filter hanya kolom numerik
numeric_data <- data[sapply(data, is.numeric)]
# Plot scatterplot matrix
pairs(numeric_data, main = "Scatterplot Matrix untuk Uji Linearitas", pch = 21, bg = "lightblue")

#Berdasarkan hasil scatterplot matrix, diketahui bahwa beberapa pasangan variabel seperti Tavg dengan RH_avg atau ss memperlihatkan pola sebaran titik yang cukup linear. Namun, sebagian besar pasangan variabel lainnya menunjukkan hubungan non-linear atau tidak membentuk pola linear yang jelas. Hal ini menunjukkan bahwa asumsi linearitas hanya terpenuhi sebagian. Oleh karena itu, jika model linier akan digunakan, perlu dilakukan transformasi data atau pemilihan fitur yang tepat.


#Uji Outlier (dengan Mahalanobis Distance)
# Pilih hanya kolom numerik untuk analisis Mahalanobis
numeric_data <- data[sapply(data, is.numeric)]
# Hitung Mahalanobis distance
mahal_dist <- mahalanobis(numeric_data, colMeans(numeric_data), cov(numeric_data))
# Hitung batas cutoff outlier (dengan derajat bebas = jumlah kolom)
cutoff <- qchisq(0.975, df = ncol(numeric_data))
# Identifikasi baris yang merupakan outlier
outliers <- which(mahal_dist > cutoff)
# Tampilkan jumlah dan indeks outlier
length(outliers)
## [1] 10
print(outliers)
##  [1]   8  24  31  37  47  54  58  76  94 162
# Visualisasi
plot(mahal_dist, type = "h", main = "Mahalanobis Distance", ylab = "Distance")
abline(h = cutoff, col = "red", lty = 2)

#output menunjukkan terdapat 12 observasi (baris data) yang terindikasi sebagai outlier multivariat yang nantinya akan dihapus karena pada multinomial regression outliers sangat mempengaruhi model 
data_clean <- data[-outliers, ]
# Cek hasilnya
nrow(data)       # jumlah baris sebelum
## [1] 217
nrow(data_clean) # jumlah baris sesudah
## [1] 207
head(data_clean)
##   Tavg RH_avg   RR  ss ddd_x ff_avg       ddd_car Kategori_Cuaca
## 1 29.9     83  3.8 6.5    60      1 Tenang (Calm)   Hujan Sedang
## 2 30.6     78 21.2 6.7   160      2 Tenang (Calm)    Hujan Lebat
## 3 27.8     90  0.0 8.9    60      1 Tenang (Calm)  Cerah Berawan
## 4 29.0     82  1.5 1.9    70      2         Barat        Gerimis
## 5 28.8     83  2.2 6.0   110      2         Barat        Gerimis
## 6 28.0     87  0.0 3.8    90      1 Tenang (Calm)        Berawan
# Uji independensi (autokorelasi) 
# Pastikan package ini terinstal
if(!require(lmtest)) install.packages("lmtest")
## Loading required package: lmtest
## Loading required package: zoo
## 
## Attaching package: 'zoo'
## 
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
library(lmtest)
# Buat salinan data agar aman
data_indep <- data_clean
# Ubah Kategori_Cuaca jadi numerik (sementara)
data_indep$Kategori_Cuaca_Num <- as.numeric(as.factor(data_indep$Kategori_Cuaca))
# Buat model regresi linier (dummy)
model_dw <- lm(Kategori_Cuaca_Num ~ ., data = data_indep[, sapply(data_indep, is.numeric)])


# Uji Durbin-Watson
dw_result <- dwtest(model_dw)
print(dw_result)
## 
##  Durbin-Watson test
## 
## data:  model_dw
## DW = 1.946, p-value = 0.281
## alternative hypothesis: true autocorrelation is greater than 0
#Nilai DW mendekati 2 → Tidak ada autokorelasi (✅ memenuhi asumsi independensi).

# Uji Multikolinearitas – dengan VIF (Variance Inflation Factor)
# Pastikan package car sudah terinstal
if (!require(car)) install.packages("car")
## Loading required package: car
## Loading required package: carData
## 
## Attaching package: 'car'
## 
## The following object is masked from 'package:dplyr':
## 
##     recode
## 
## The following object is masked from 'package:purrr':
## 
##     some
library(car)
# Buat model regresi linier dengan target dummy (misalnya Tavg atau variabel target sesungguhnya)
# Bisa ganti "Tavg" dengan variabel target kamu yang sebenarnya
model_vif <- lm(Kategori_Cuaca_Num ~ ., data = data_indep[, sapply(data_indep, is.numeric)])
# Hitung nilai VIF
vif_values <- vif(model_vif)
print(vif_values)
##     Tavg   RH_avg       RR       ss    ddd_x   ff_avg 
## 1.341802 2.298771 1.223165 1.312329 1.221417 1.398698
# VIF > 5 menunjukkan kemungkinan multikolinearitas. VIF > 10 menunjukkan multikolinearitas serius
# Asumsi tidak ada multikolinearitas terpenuhi.

#Uji Normalitas Multivariat – dengan Mardia Test
# Pastikan package MVN sudah terinstal
if (!require(MVN)) install.packages("MVN")
## Loading required package: MVN
library(MVN)
# Ambil hanya kolom numerik
numeric_data <- data[sapply(data_clean, is.numeric)]
# Lakukan Mardia Test
mardia_result <- mvn(data = numeric_data, mvnTest = "mardia")
print(mardia_result$multivariateNormality)
##              Test       Statistic               p value Result
## 1 Mardia Skewness 639.62833983284 5.49409057142149e-100     NO
## 2 Mardia Kurtosis 7.3856030405196  1.51656465163796e-13     NO
## 3             MVN            <NA>                  <NA>     NO
#diperoleh p-value yang jauh lebih kecil dari 0.05. Hal ini menunjukkan bahwa: Data tidak memenuhi asumsi normalitas multivariat.

#Uji normalitas dengan kolmogorov spirnov
numeric_data <- data[sapply(data, is.numeric)]
# Lakukan uji Kolmogorov–Smirnov pada setiap kolom
ks_results <- lapply(numeric_data, function(col) {
  # Normalisasi kolom jika perlu
  col <- scale(col)
  ks.test(col, "pnorm")
})
## Warning in ks.test.default(col, "pnorm"): ties should not be present for the
## one-sample Kolmogorov-Smirnov test
## Warning in ks.test.default(col, "pnorm"): ties should not be present for the
## one-sample Kolmogorov-Smirnov test
## Warning in ks.test.default(col, "pnorm"): ties should not be present for the
## one-sample Kolmogorov-Smirnov test
## Warning in ks.test.default(col, "pnorm"): ties should not be present for the
## one-sample Kolmogorov-Smirnov test
## Warning in ks.test.default(col, "pnorm"): ties should not be present for the
## one-sample Kolmogorov-Smirnov test
## Warning in ks.test.default(col, "pnorm"): ties should not be present for the
## one-sample Kolmogorov-Smirnov test
# Tampilkan hasil p-value tiap kolom
for (i in seq_along(ks_results)) {
  cat("Kolom:", names(ks_results)[i], "\n")
  print(ks_results[[i]]$p.value)
  cat("\n")
}
## Kolom: Tavg 
## [1] 0.3006132
## 
## Kolom: RH_avg 
## [1] 0.1609301
## 
## Kolom: RR 
## [1] 7.764448e-21
## 
## Kolom: ss 
## [1] 2.969106e-10
## 
## Kolom: ddd_x 
## [1] 3.667445e-16
## 
## Kolom: ff_avg 
## [1] 7.298252e-16
#uji homogenitas varians dengan barlet test
# Misalnya kita ingin menguji homogenitas varians dari setiap variabel numerik terhadap kategori cuaca
for (col in names(data_clean)[sapply(data_clean, is.numeric)]) {
  cat("Bartlett Test untuk variabel:", col, "\n")
  print(bartlett.test(data_clean[[col]] ~ data_clean$Kategori_Cuaca))
  cat("\n")
}
## Bartlett Test untuk variabel: Tavg 
## 
##  Bartlett test of homogeneity of variances
## 
## data:  data_clean[[col]] by data_clean$Kategori_Cuaca
## Bartlett's K-squared = 7.1928, df = 5, p-value = 0.2067
## 
## 
## Bartlett Test untuk variabel: RH_avg 
## 
##  Bartlett test of homogeneity of variances
## 
## data:  data_clean[[col]] by data_clean$Kategori_Cuaca
## Bartlett's K-squared = 4.9165, df = 5, p-value = 0.4262
## 
## 
## Bartlett Test untuk variabel: RR 
## 
##  Bartlett test of homogeneity of variances
## 
## data:  data_clean[[col]] by data_clean$Kategori_Cuaca
## Bartlett's K-squared = Inf, df = 5, p-value < 2.2e-16
## 
## 
## Bartlett Test untuk variabel: ss 
## 
##  Bartlett test of homogeneity of variances
## 
## data:  data_clean[[col]] by data_clean$Kategori_Cuaca
## Bartlett's K-squared = 227.32, df = 5, p-value < 2.2e-16
## 
## 
## Bartlett Test untuk variabel: ddd_x 
## 
##  Bartlett test of homogeneity of variances
## 
## data:  data_clean[[col]] by data_clean$Kategori_Cuaca
## Bartlett's K-squared = 107.24, df = 5, p-value < 2.2e-16
## 
## 
## Bartlett Test untuk variabel: ff_avg 
## 
##  Bartlett test of homogeneity of variances
## 
## data:  data_clean[[col]] by data_clean$Kategori_Cuaca
## Bartlett's K-squared = 6.2578, df = 5, p-value = 0.2819
#----------------MLR---------------
library(nnet)
library(dplyr)      # untuk %>% dan data manipulation
library(broom)      # untuk tidy()
library(kableExtra) # untuk kable_styling()
## 
## Attaching package: 'kableExtra'
## 
## The following object is masked from 'package:dplyr':
## 
##     group_rows
library(knitr)      # untuk kable()
fit_full <- multinom(Kategori_Cuaca ~ RR+Tavg+RH_avg+ss+ddd_x+ff_avg+ddd_car, data_clean)
## # weights:  84 (65 variable)
## initial  value 370.894210 
## iter  10 value 234.290778
## iter  20 value 194.790984
## iter  30 value 128.942975
## iter  40 value 80.217288
## iter  50 value 66.761095
## iter  60 value 56.287683
## iter  70 value 48.129583
## iter  80 value 45.881164
## iter  90 value 44.716892
## iter 100 value 42.517399
## final  value 42.517399 
## stopped after 100 iterations
#Interpret for the relative log odds
summary(fit_full)
## Warning in sqrt(diag(vc)): NaNs produced
## Call:
## multinom(formula = Kategori_Cuaca ~ RR + Tavg + RH_avg + ss + 
##     ddd_x + ff_avg + ddd_car, data = data_clean)
## 
## Coefficients:
##               (Intercept)        RR       Tavg     RH_avg        ss      ddd_x
## Cerah          114.124932 16.813176 -6.2656644 -1.5409423 10.667590 0.07896792
## Cerah Berawan  153.639540  8.282869 -5.4308207 -0.7390146  7.718777 0.06919883
## Gerimis       -315.377722 32.277232  3.8578633  1.6782064  5.361079 0.09871998
## Hujan Lebat      4.506905 48.048530  2.8020820 -3.7625872 -1.638738 0.18739507
## Hujan Sedang  -118.267389 39.014629 -0.1559959  0.7540256  5.034844 0.09464518
##                    ff_avg ddd_carBarat Daya ddd_carBarat Laut ddd_carSelatan
## Cerah           7.1307930         125.99075         32.739433     -149.19582
## Cerah Berawan   7.1810570          15.66337         45.892377      104.53938
## Gerimis        10.1337177          30.16470          1.856387      -44.28981
## Hujan Lebat   -11.8834790          53.45077         28.432923       -4.54002
## Hujan Sedang    0.4463558          34.98387       -117.649474       90.04106
##               ddd_carTenang (Calm) ddd_carTimur ddd_carTimur Laut
## Cerah                    96.026124    105.45896         150.61396
## Cerah Berawan            -2.600540      7.99057          52.65552
## Gerimis                   5.495606     14.12862          60.14670
## Hujan Lebat               1.041417    -25.85826         118.42329
## Hujan Sedang             -4.208387     15.22438          63.03140
## 
## Std. Errors:
##               (Intercept)       RR     Tavg    RH_avg       ss      ddd_x
## Cerah          6.94147906 3.724792 1.211786 0.6885924 2.324110 0.10420114
## Cerah Berawan  6.82656632 3.897549 1.214907 0.6710433 2.197607 0.10240229
## Gerimis        0.49424248 2.289549 1.249003 0.6015901 2.044282 0.09274115
## Hujan Lebat    0.02965934 2.306960 2.568362 1.0364578 1.371542 0.19998336
## Hujan Sedang   0.28399505 2.543429 1.615863 0.6986036 2.144737 0.09567693
##                  ff_avg ddd_carBarat Daya ddd_carBarat Laut ddd_carSelatan
## Cerah         3.3253663      4.834473e+00      1.926981e-11   8.843567e-12
## Cerah Berawan 3.2921813      2.570777e-06               NaN   8.125216e-13
## Gerimis       3.2338605      4.088302e+00               NaN   1.540735e-15
## Hujan Lebat   0.1527412      4.679369e-03      1.905820e-26            NaN
## Hujan Sedang  4.8699014      6.676690e+00      4.928367e-72   1.284103e-25
##               ddd_carTenang (Calm) ddd_carTimur ddd_carTimur Laut
## Cerah                    4.7653406    4.2289338      2.104957e+00
## Cerah Berawan            4.1520844    4.9296845      2.560951e+00
## Gerimis                  5.6522086    5.0664194      2.970325e+00
## Hujan Lebat              0.7200695    0.1516557      6.515452e-51
## Hujan Sedang             8.6659637    6.1156506      5.530772e-01
## 
## Residual Deviance: 85.0348 
## AIC: 215.0348
#disini ada warning karena harusnya antara RR, ss, dan ddd_car salah satu didrop karena multikoliniaritas tingggi
#or
tidy(fit_full, conf.int = TRUE) %>%
  kable() %>%
  kable_styling("basic", full_width = FALSE)
## Warning in sqrt(diag(vc)): NaNs produced
## Warning in sqrt(diag(vcov(object))): NaNs produced
y.level term estimate std.error statistic p.value conf.low conf.high
Cerah (Intercept) 114.1249322 6.9414791 1.644101e+01 0.0000000 100.5198833 127.7299812
Cerah RR 16.8131757 3.7247920 4.513856e+00 0.0000064 9.5127176 24.1136338
Cerah Tavg -6.2656644 1.2117857 -5.170604e+00 0.0000002 -8.6407208 -3.8906080
Cerah RH_avg -1.5409423 0.6885924 -2.237815e+00 0.0252331 -2.8905587 -0.1913260
Cerah ss 10.6675904 2.3241100 4.589968e+00 0.0000044 6.1124185 15.2227622
Cerah ddd_x 0.0789679 0.1042011 7.578412e-01 0.4485460 -0.1252626 0.2831984
Cerah ff_avg 7.1307930 3.3253663 2.144363e+00 0.0320038 0.6131949 13.6483912
Cerah ddd_carBarat Daya 125.9907463 4.8344728 2.606091e+01 0.0000000 116.5153537 135.4661389
Cerah ddd_carBarat Laut 32.7394329 0.0000000 1.699001e+12 0.0000000 32.7394329 32.7394329
Cerah ddd_carSelatan -149.1958217 0.0000000 -1.687055e+13 0.0000000 -149.1958217 -149.1958217
Cerah ddd_carTenang (Calm) 96.0261239 4.7653406 2.015095e+01 0.0000000 86.6862279 105.3660199
Cerah ddd_carTimur 105.4589572 4.2289338 2.493748e+01 0.0000000 97.1703992 113.7475152
Cerah ddd_carTimur Laut 150.6139627 2.1049568 7.155204e+01 0.0000000 146.4883232 154.7396022
Cerah Berawan (Intercept) 153.6395403 6.8265663 2.250612e+01 0.0000000 140.2597161 167.0193644
Cerah Berawan RR 8.2828691 3.8975489 2.125148e+00 0.0335742 0.6438138 15.9219245
Cerah Berawan Tavg -5.4308207 1.2149067 -4.470154e+00 0.0000078 -7.8119941 -3.0496473
Cerah Berawan RH_avg -0.7390146 0.6710433 -1.101292e+00 0.2707696 -2.0542353 0.5762061
Cerah Berawan ss 7.7187768 2.1976067 3.512356e+00 0.0004442 3.4115468 12.0260069
Cerah Berawan ddd_x 0.0691988 0.1024023 6.757547e-01 0.4991964 -0.1315060 0.2699036
Cerah Berawan ff_avg 7.1810570 3.2921813 2.181246e+00 0.0291652 0.7285002 13.6336138
Cerah Berawan ddd_carBarat Daya 15.6633684 0.0000026 6.092853e+06 0.0000000 15.6633634 15.6633734
Cerah Berawan ddd_carBarat Laut 45.8923770 NaN NaN NaN NaN NaN
Cerah Berawan ddd_carSelatan 104.5393839 0.0000000 1.286604e+14 0.0000000 104.5393839 104.5393839
Cerah Berawan ddd_carTenang (Calm) -2.6005395 4.1520844 -6.263215e-01 0.5311041 -10.7384754 5.5373963
Cerah Berawan ddd_carTimur 7.9905700 4.9296845 1.620909e+00 0.1050372 -1.6714341 17.6525741
Cerah Berawan ddd_carTimur Laut 52.6555244 2.5609508 2.056093e+01 0.0000000 47.6361531 57.6748957
Gerimis (Intercept) -315.3777219 0.4942425 -6.381032e+02 0.0000000 -316.3464194 -314.4090244
Gerimis RR 32.2772319 2.2895492 1.409764e+01 0.0000000 27.7897978 36.7646659
Gerimis Tavg 3.8578633 1.2490025 3.088755e+00 0.0020100 1.4098633 6.3058633
Gerimis RH_avg 1.6782064 0.6015901 2.789618e+00 0.0052770 0.4991115 2.8573013
Gerimis ss 5.3610786 2.0442823 2.622475e+00 0.0087294 1.3543590 9.3677983
Gerimis ddd_x 0.0987200 0.0927411 1.064468e+00 0.2871168 -0.0830493 0.2804893
Gerimis ff_avg 10.1337177 3.2338605 3.133629e+00 0.0017266 3.7954675 16.4719679
Gerimis ddd_carBarat Daya 30.1647036 4.0883021 7.378296e+00 0.0000000 22.1517786 38.1776285
Gerimis ddd_carBarat Laut 1.8563866 NaN NaN NaN NaN NaN
Gerimis ddd_carSelatan -44.2898146 0.0000000 -2.874590e+16 0.0000000 -44.2898146 -44.2898146
Gerimis ddd_carTenang (Calm) 5.4956064 5.6522086 9.722936e-01 0.3309045 -5.5825188 16.5737316
Gerimis ddd_carTimur 14.1286223 5.0664194 2.788680e+00 0.0052923 4.1986227 24.0586219
Gerimis ddd_carTimur Laut 60.1467000 2.9703254 2.024920e+01 0.0000000 54.3249692 65.9684308
Hujan Lebat (Intercept) 4.5069046 0.0296593 1.519556e+02 0.0000000 4.4487734 4.5650359
Hujan Lebat RR 48.0485304 2.3069598 2.082764e+01 0.0000000 43.5269722 52.5700886
Hujan Lebat Tavg 2.8020820 2.5683620 1.091000e+00 0.2752730 -2.2318149 7.8359790
Hujan Lebat RH_avg -3.7625872 1.0364578 -3.630237e+00 0.0002832 -5.7940072 -1.7311672
Hujan Lebat ss -1.6387379 1.3715424 -1.194814e+00 0.2321597 -4.3269115 1.0494357
Hujan Lebat ddd_x 0.1873951 0.1999834 9.370533e-01 0.3487311 -0.2045651 0.5793552
Hujan Lebat ff_avg -11.8834790 0.1527412 -7.780139e+01 0.0000000 -12.1828463 -11.5841117
Hujan Lebat ddd_carBarat Daya 53.4507688 0.0046794 1.142264e+04 0.0000000 53.4415974 53.4599402
Hujan Lebat ddd_carBarat Laut 28.4329233 0.0000000 1.491899e+27 0.0000000 28.4329233 28.4329233
Hujan Lebat ddd_carSelatan -4.5400196 NaN NaN NaN NaN NaN
Hujan Lebat ddd_carTenang (Calm) 1.0414167 0.7200695 1.446272e+00 0.1481008 -0.3698937 2.4527270
Hujan Lebat ddd_carTimur -25.8582632 0.1516557 -1.705064e+02 0.0000000 -26.1555029 -25.5610235
Hujan Lebat ddd_carTimur Laut 118.4232904 0.0000000 1.817576e+52 0.0000000 118.4232904 118.4232904
Hujan Sedang (Intercept) -118.2673890 0.2839951 -4.164417e+02 0.0000000 -118.8240091 -117.7107690
Hujan Sedang RR 39.0146291 2.5434295 1.533938e+01 0.0000000 34.0295989 43.9996592
Hujan Sedang Tavg -0.1559959 1.6158634 -9.654030e-02 0.9230915 -3.3230298 3.0110381
Hujan Sedang RH_avg 0.7540256 0.6986036 1.079333e+00 0.2804395 -0.6152123 2.1232636
Hujan Sedang ss 5.0348442 2.1447370 2.347535e+00 0.0188981 0.8312369 9.2384515
Hujan Sedang ddd_x 0.0946452 0.0956769 9.892163e-01 0.3225573 -0.0928782 0.2821685
Hujan Sedang ff_avg 0.4463558 4.8699014 9.165600e-02 0.9269713 -9.0984754 9.9911871
Hujan Sedang ddd_carBarat Daya 34.9838733 6.6766896 5.239703e+00 0.0000002 21.8978022 48.0699444
Hujan Sedang ddd_carBarat Laut -117.6494737 0.0000000 -2.387190e+73 0.0000000 -117.6494737 -117.6494737
Hujan Sedang ddd_carSelatan 90.0410587 0.0000000 7.011979e+26 0.0000000 90.0410587 90.0410587
Hujan Sedang ddd_carTenang (Calm) -4.2083873 8.6659637 -4.856225e-01 0.6272348 -21.1933641 12.7765894
Hujan Sedang ddd_carTimur 15.2243801 6.1156506 2.489413e+00 0.0127954 3.2379251 27.2108351
Hujan Sedang ddd_carTimur Laut 63.0314024 0.5530772 1.139649e+02 0.0000000 61.9473910 64.1154137
# Simpan hasil tidy model ke dalam CSV
output_tidy <- tidy(fit_full, conf.int = TRUE)
## Warning in sqrt(diag(vc)): NaNs produced
## Warning in sqrt(diag(vc)): NaNs produced
write.csv(output_tidy, "Interpret for the relative log odds.csv", row.names = FALSE)
#Interpret for the relative risk ratios----
exp(coef(fit_full))
##                 (Intercept)           RR        Tavg     RH_avg           ss
## Cerah          3.662927e+49 2.003870e+07  0.00190045 0.21417918 42941.343935
## Cerah Berawan  5.306456e+66 3.955527e+03  0.00437950 0.47758430  2250.205525
## Gerimis       1.079433e-137 1.041894e+14 47.36404000 5.35594073   212.954524
## Hujan Lebat    9.064081e+01 7.365659e+20 16.47892087 0.02322358     0.194225
## Hujan Sedang   4.336362e-52 8.786949e+16  0.85556272 2.12553947   153.675646
##                  ddd_x       ff_avg ddd_carBarat Daya ddd_carBarat Laut
## Cerah         1.082170 1.249868e+03      5.212978e+54      1.654074e+14
## Cerah Berawan 1.071649 1.314297e+03      6.346210e+06      8.527193e+19
## Gerimis       1.103757 2.517780e+04      1.259982e+13      6.400567e+00
## Hujan Lebat   1.206104 6.903521e-06      1.634459e+23      2.229778e+12
## Hujan Sedang  1.099269 1.562607e+00      1.560641e+15      8.044199e-52
##               ddd_carSelatan ddd_carTenang (Calm) ddd_carTimur
## Cerah           1.603533e-65         5.053773e+41 6.313107e+45
## Cerah Berawan   2.516967e+45         7.423352e-02 2.952980e+03
## Gerimis         5.823417e-20         2.436192e+02 1.367674e+06
## Hujan Lebat     1.067320e-02         2.833228e+00 5.887067e-12
## Hujan Sedang    1.271554e+39         1.487033e-02 4.091328e+06
##               ddd_carTimur Laut
## Cerah              2.575211e+65
## Cerah Berawan      7.379105e+22
## Gerimis            1.322452e+26
## Hujan Lebat        2.695141e+51
## Hujan Sedang       2.366956e+27
#or
tidy(fit_full, conf.int = TRUE, exponentiate = TRUE) %>%
  kable() %>%
  kable_styling("basic", full_width = FALSE)
## Warning in sqrt(diag(vc)): NaNs produced
## Warning in sqrt(diag(vc)): NaNs produced
y.level term estimate std.error statistic p.value conf.low conf.high
Cerah (Intercept) 3.662927e+49 6.9414791 1.644101e+01 0.0000000 4.520960e+43 2.967741e+55
Cerah RR 2.003870e+07 3.7247920 4.513856e+00 0.0000064 1.353072e+04 2.967687e+10
Cerah Tavg 1.900500e-03 1.2117857 -5.170604e+00 0.0000002 1.768000e-04 2.043290e-02
Cerah RH_avg 2.141792e-01 0.6885924 -2.237815e+00 0.0252331 5.554520e-02 8.258633e-01
Cerah ss 4.294134e+04 2.3241100 4.589968e+00 0.0000044 4.514292e+02 4.084714e+06
Cerah ddd_x 1.082170e+00 0.1042011 7.578412e-01 0.4485460 8.822652e-01 1.327368e+00
Cerah ff_avg 1.249868e+03 3.3253663 2.144363e+00 0.0320038 1.846321e+00 8.460986e+05
Cerah ddd_carBarat Daya 5.212978e+54 4.8344728 2.606091e+01 0.0000000 3.999219e+50 6.795112e+58
Cerah ddd_carBarat Laut 1.654074e+14 0.0000000 1.699001e+12 0.0000000 1.654074e+14 1.654074e+14
Cerah ddd_carSelatan 0.000000e+00 0.0000000 -1.687055e+13 0.0000000 0.000000e+00 0.000000e+00
Cerah ddd_carTenang (Calm) 5.053773e+41 4.7653406 2.015095e+01 0.0000000 4.439667e+37 5.752823e+45
Cerah ddd_carTimur 6.313107e+45 4.2289338 2.493748e+01 0.0000000 1.586968e+42 2.511413e+49
Cerah ddd_carTimur Laut 2.575211e+65 2.1049568 7.155204e+01 0.0000000 4.159781e+63 1.594246e+67
Cerah Berawan (Intercept) 5.306456e+66 6.8265663 2.250612e+01 0.0000000 8.203908e+60 3.432324e+72
Cerah Berawan RR 3.955527e+03 3.8975489 2.125148e+00 0.0335742 1.903727e+00 8.218716e+06
Cerah Berawan Tavg 4.379500e-03 1.2149067 -4.470154e+00 0.0000078 4.048000e-04 4.737560e-02
Cerah Berawan RH_avg 4.775843e-01 0.6710433 -1.101292e+00 0.2707696 1.281908e-01 1.779275e+00
Cerah Berawan ss 2.250206e+03 2.1976067 3.512356e+00 0.0004442 3.031209e+01 1.670431e+05
Cerah Berawan ddd_x 1.071649e+00 0.1024023 6.757547e-01 0.4991964 8.767740e-01 1.309838e+00
Cerah Berawan ff_avg 1.314297e+03 3.2921813 2.181246e+00 0.0291652 2.071971e+00 8.336874e+05
Cerah Berawan ddd_carBarat Daya 6.346210e+06 0.0000026 6.092853e+06 0.0000000 6.346178e+06 6.346242e+06
Cerah Berawan ddd_carBarat Laut 8.527193e+19 NaN NaN NaN NaN NaN
Cerah Berawan ddd_carSelatan 2.516967e+45 0.0000000 1.286604e+14 0.0000000 2.516967e+45 2.516967e+45
Cerah Berawan ddd_carTenang (Calm) 7.423350e-02 4.1520844 -6.263215e-01 0.5311041 2.170000e-05 2.540158e+02
Cerah Berawan ddd_carTimur 2.952980e+03 4.9296845 1.620909e+00 0.1050372 1.879773e-01 4.638905e+07
Cerah Berawan ddd_carTimur Laut 7.379105e+22 2.5609508 2.056093e+01 0.0000000 4.876615e+20 1.116578e+25
Gerimis (Intercept) 0.000000e+00 0.4942425 -6.381032e+02 0.0000000 0.000000e+00 0.000000e+00
Gerimis RR 1.041894e+14 2.2895492 1.409764e+01 0.0000000 1.172076e+12 9.261717e+15
Gerimis Tavg 4.736404e+01 1.2490025 3.088755e+00 0.0020100 4.095396e+00 5.477743e+02
Gerimis RH_avg 5.355941e+00 0.6015901 2.789618e+00 0.0052770 1.647257e+00 1.741447e+01
Gerimis ss 2.129545e+02 2.0442823 2.622475e+00 0.0087294 3.874277e+00 1.170532e+04
Gerimis ddd_x 1.103757e+00 0.0927411 1.064468e+00 0.2871168 9.203057e-01 1.323777e+00
Gerimis ff_avg 2.517780e+04 3.2338605 3.133629e+00 0.0017266 4.449904e+01 1.424573e+07
Gerimis ddd_carBarat Daya 1.259982e+13 4.0883021 7.378296e+00 0.0000000 4.172489e+09 3.804814e+16
Gerimis ddd_carBarat Laut 6.400567e+00 NaN NaN NaN NaN NaN
Gerimis ddd_carSelatan 0.000000e+00 0.0000000 -2.874590e+16 0.0000000 0.000000e+00 0.000000e+00
Gerimis ddd_carTenang (Calm) 2.436192e+02 5.6522086 9.722936e-01 0.3309045 3.763100e-03 1.577176e+07
Gerimis ddd_carTimur 1.367674e+06 5.0664194 2.788680e+00 0.0052923 6.659455e+01 2.808838e+10
Gerimis ddd_carTimur Laut 1.322452e+26 2.9703254 2.024920e+01 0.0000000 3.917729e+23 4.464013e+28
Hujan Lebat (Intercept) 9.064082e+01 0.0296593 1.519556e+02 0.0000000 8.552198e+01 9.606604e+01
Hujan Lebat RR 7.365659e+20 2.3069598 2.082764e+01 0.0000000 8.007996e+18 6.774846e+22
Hujan Lebat Tavg 1.647892e+01 2.5683620 1.091000e+00 0.2752730 1.073335e-01 2.530011e+03
Hujan Lebat RH_avg 2.322360e-02 1.0364578 -3.630237e+00 0.0002832 3.045800e-03 1.770776e-01
Hujan Lebat ss 1.942250e-01 1.3715424 -1.194814e+00 0.2321597 1.320830e-02 2.856039e+00
Hujan Lebat ddd_x 1.206104e+00 0.1999834 9.370533e-01 0.3487311 8.150017e-01 1.784887e+00
Hujan Lebat ff_avg 6.900000e-06 0.1527412 -7.780139e+01 0.0000000 5.100000e-06 9.300000e-06
Hujan Lebat ddd_carBarat Daya 1.634459e+23 0.0046794 1.142264e+04 0.0000000 1.619537e+23 1.649518e+23
Hujan Lebat ddd_carBarat Laut 2.229778e+12 0.0000000 1.491899e+27 0.0000000 2.229778e+12 2.229778e+12
Hujan Lebat ddd_carSelatan 1.067320e-02 NaN NaN NaN NaN NaN
Hujan Lebat ddd_carTenang (Calm) 2.833228e+00 0.7200695 1.446272e+00 0.1481008 6.908078e-01 1.161999e+01
Hujan Lebat ddd_carTimur 0.000000e+00 0.1516557 -1.705064e+02 0.0000000 0.000000e+00 0.000000e+00
Hujan Lebat ddd_carTimur Laut 2.695141e+51 0.0000000 1.817576e+52 0.0000000 2.695141e+51 2.695141e+51
Hujan Sedang (Intercept) 0.000000e+00 0.2839951 -4.164417e+02 0.0000000 0.000000e+00 0.000000e+00
Hujan Sedang RR 8.786949e+16 2.5434295 1.533938e+01 0.0000000 6.009897e+14 1.284722e+19
Hujan Sedang Tavg 8.555627e-01 1.6158634 -9.654030e-02 0.9230915 3.604350e-02 2.030847e+01
Hujan Sedang RH_avg 2.125539e+00 0.6986036 1.079333e+00 0.2804395 5.405261e-01 8.358371e+00
Hujan Sedang ss 1.536756e+02 2.1447370 2.347535e+00 0.0188981 2.296157e+00 1.028510e+04
Hujan Sedang ddd_x 1.099269e+00 0.0956769 9.892163e-01 0.3225573 9.113045e-01 1.326002e+00
Hujan Sedang ff_avg 1.562607e+00 4.8699014 9.165600e-02 0.9269713 1.118000e-04 2.183320e+04
Hujan Sedang ddd_carBarat Daya 1.560641e+15 6.6766896 5.239703e+00 0.0000002 3.236642e+09 7.525088e+20
Hujan Sedang ddd_carBarat Laut 0.000000e+00 0.0000000 -2.387190e+73 0.0000000 0.000000e+00 0.000000e+00
Hujan Sedang ddd_carSelatan 1.271554e+39 0.0000000 7.011979e+26 0.0000000 1.271554e+39 1.271554e+39
Hujan Sedang ddd_carTenang (Calm) 1.487030e-02 8.6659637 -4.856225e-01 0.6272348 0.000000e+00 3.538362e+05
Hujan Sedang ddd_carTimur 4.091328e+06 6.1156506 2.489413e+00 0.0127954 2.548080e+01 6.569246e+11
Hujan Sedang ddd_carTimur Laut 2.366956e+27 0.5530772 1.139649e+02 0.0000000 8.005899e+26 6.997943e+27
# Simpan hasil tidy model ke dalam CSV
output_tidy <- tidy(fit_full, conf.int = TRUE, exponentiate = TRUE) 
## Warning in sqrt(diag(vc)): NaNs produced
## Warning in sqrt(diag(vc)): NaNs produced
write.csv(output_tidy, "Interpret for the relative risk ratios.csv", row.names = FALSE)
levels(factor(data$ddd_car))
## [1] "Barat"         "Barat Daya"    "Barat Laut"    "Selatan"      
## [5] "Tenang (Calm)" "Timur"         "Timur Laut"
# ---------------- EVALUASI MLR ----------------
#library yang diperlukan
library(nnet)       # Untuk multinom()
#melakukan prediksi terhadap data
mlr_pred <- predict(fit_full, newdata = data_clean)
data_clean$MLR_Predicted <- mlr_pred
#membuat confusion matrix antara prediksi dan label asli
conf_matrix <- table(Predicted = mlr_pred,
                     Actual = data_clean$Kategori_Cuaca)
print(conf_matrix)
##                Actual
## Predicted       Berawan Cerah Cerah Berawan Gerimis Hujan Lebat Hujan Sedang
##   Berawan             6     0             0       0           0            0
##   Cerah               0    55             7       2           0            0
##   Cerah Berawan       0     4            53       0           0            0
##   Gerimis             0     0             0      26           0            0
##   Hujan Lebat         0     0             0       0          17            0
##   Hujan Sedang        0     0             0       0           0           37
#menghitung akurasi klasifikasi dari model
accuracy <- mean(mlr_pred == data_clean$Kategori_Cuaca)
cat("\nAkurasi klasifikasi MLR:", round(accuracy * 100, 2), "%\n")
## 
## Akurasi klasifikasi MLR: 93.72 %
#Membuat dataframe berisi skor diskriminan dan label asli
mlr_scores <- data.frame(mlr_pred)
mlr_scores$Kategori_Cuaca <- data_clean$Kategori_Cuaca



#----------------LDA---------------
# =======================
# LDA Multikelas Cuaca
# =======================
# Pastikan library MASS tersedia
if (!require(MASS)) install.packages("MASS")
## Loading required package: MASS
## 
## Attaching package: 'MASS'
## 
## The following object is masked from 'package:dplyr':
## 
##     select
library(MASS)
# Pastikan juga variabel kategorikal (ddd_car dan Kategori_Cuaca) bertipe faktor
data_clean$ddd_car <- as.factor(data_clean$ddd_car)
data_clean$Kategori_Cuaca <- as.factor(data_clean$Kategori_Cuaca)
# LDA menggunakan semua variabel prediktor yang diminta
lda_model <- lda(Kategori_Cuaca ~ Tavg + RH_avg + RR + ss + ddd_x + ff_avg + ddd_car, data = data_clean)
# Ringkasan model
print(lda_model)
## Call:
## lda(Kategori_Cuaca ~ Tavg + RH_avg + RR + ss + ddd_x + ff_avg + 
##     ddd_car, data = data_clean)
## 
## Prior probabilities of groups:
##       Berawan         Cerah Cerah Berawan       Gerimis   Hujan Lebat 
##    0.02898551    0.28502415    0.28985507    0.13526570    0.08212560 
##  Hujan Sedang 
##    0.17874396 
## 
## Group means:
##                   Tavg   RH_avg         RR       ss     ddd_x   ff_avg
## Berawan       28.25000 80.66667  0.0000000 2.633333 108.33333 2.166667
## Cerah         29.19661 68.69492  0.0000000 8.071186  90.89831 2.847458
## Cerah Berawan 28.93167 76.50000  0.0000000 7.650000 108.50000 2.583333
## Gerimis       28.79643 82.07143  0.9678571 5.075000 162.50000 2.107143
## Hujan Lebat   28.50000 82.94118 31.5000000 4.805882 162.35294 1.941176
## Hujan Sedang  28.73514 82.21622 10.1621622 5.172973 172.70270 2.108108
##               ddd_carBarat Daya ddd_carBarat Laut ddd_carSelatan
## Berawan              0.16666667        0.00000000     0.00000000
## Cerah                0.01694915        0.00000000     0.00000000
## Cerah Berawan        0.00000000        0.00000000     0.01666667
## Gerimis              0.14285714        0.00000000     0.00000000
## Hujan Lebat          0.17647059        0.05882353     0.00000000
## Hujan Sedang         0.13513514        0.00000000     0.02702703
##               ddd_carTenang (Calm) ddd_carTimur ddd_carTimur Laut
## Berawan                 0.33333333    0.3333333        0.00000000
## Cerah                   0.03389831    0.8305085        0.11864407
## Cerah Berawan           0.05000000    0.6833333        0.11666667
## Gerimis                 0.28571429    0.3214286        0.07142857
## Hujan Lebat             0.17647059    0.2352941        0.00000000
## Hujan Sedang            0.21621622    0.2972973        0.02702703
## 
## Coefficients of linear discriminants:
##                               LD1          LD2          LD3          LD4
## Tavg                  0.229325211  0.432202376 -0.230089387 -0.350835576
## RH_avg                0.087271730  0.200434166 -0.149675103  0.041040283
## RR                    0.284937442 -0.107048505 -0.014890439 -0.008025864
## ss                   -0.146713392 -0.162785938 -0.377516773 -0.073061535
## ddd_x                 0.002611645  0.001011126 -0.001934972 -0.015235921
## ff_avg               -0.170157592 -0.054360293 -0.217155274  0.372316139
## ddd_carBarat Daya     0.790433879 -0.010280672  1.060621351 -1.414675873
## ddd_carBarat Laut     1.987601945 -2.592134308  0.631015334  2.905221064
## ddd_carSelatan        1.581409782  1.746998802 -2.413638672  0.097271736
## ddd_carTenang (Calm) -0.434772762  0.124862949  1.095778244 -1.254907690
## ddd_carTimur          0.068728399 -0.268408114 -0.573257075 -1.484483318
## ddd_carTimur Laut     0.225484945 -0.075320546 -1.046307996 -1.947796412
##                               LD5
## Tavg                 -0.227119208
## RH_avg               -0.041359258
## RR                    0.008470898
## ss                   -0.062862919
## ddd_x                 0.000195861
## ff_avg                0.029015935
## ddd_carBarat Daya    -1.406425801
## ddd_carBarat Laut    -9.029519385
## ddd_carSelatan        5.200024780
## ddd_carTenang (Calm) -1.309853488
## ddd_carTimur         -0.911858689
## ddd_carTimur Laut    -1.928780916
## 
## Proportion of trace:
##    LD1    LD2    LD3    LD4    LD5 
## 0.8476 0.1254 0.0202 0.0052 0.0015
# Lakukan prediksi menggunakan model LDA
lda_pred <- predict(lda_model)
# Plot hasil proyeksi ke LD1 dan LD2
plot(lda_pred$x[, 1:2],
     col = as.numeric(data_clean$Kategori_Cuaca),
     pch = 19,
     xlab = "LD1", ylab = "LD2",
     main = "LDA Scatter Plot")
legend("topright", legend = levels(data_clean$Kategori_Cuaca),
       col = 1:length(levels(data_clean$Kategori_Cuaca)), pch = 19)

#----------------Evaluasi----------------
#melakukan prediksi kelas berdasarkan model LDA
lda_pred <- predict(lda_model, newdata = data_clean)
data_clean$LDA_Predicted <- lda_pred$class
#membuat confusion matrix antara prediksi dan label asli
conf_matrix <- table(Predicted = lda_pred$class,
                     Actual = data_clean$Kategori_Cuaca)
print(conf_matrix)
##                Actual
## Predicted       Berawan Cerah Cerah Berawan Gerimis Hujan Lebat Hujan Sedang
##   Berawan             2     0             0       2           0            1
##   Cerah               0    53             8       0           0            0
##   Cerah Berawan       1     6            49       8           0            2
##   Gerimis             3     0             3      18           0            5
##   Hujan Lebat         0     0             0       0          16            0
##   Hujan Sedang        0     0             0       0           1           29
#menghitung akurasi klasifikasi dari model
accuracy <- mean(lda_pred$class == data_clean$Kategori_Cuaca)
cat("\nAkurasi klasifikasi LDA:", round(accuracy * 100, 2), "%\n")
## 
## Akurasi klasifikasi LDA: 80.68 %
manova_model <- manova(cbind(Tavg, RH_avg, RR, ss, ddd_x, ff_avg, as.numeric(ddd_car)) ~ Kategori_Cuaca, data = data_clean)

# Summary dengan Wilks' Lambda
summary(manova_model, test = "Wilks")
##                 Df    Wilks approx F num Df den Df    Pr(>F)    
## Kategori_Cuaca   5 0.035282   28.548     35 822.72 < 2.2e-16 ***
## Residuals      201                                              
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#----------------PERCEPTUAL MAPPINNG---------------
# Prediksi dan Konfusi untuk LDA
# Prediksi menggunakan LDA
lda_pred <- predict(lda_model, data_clean)
lda_class <- lda_pred$class
# Ubah ke angka untuk MDS
lda_dist <- dist(scale(as.matrix(lda_pred$x[, 1:2])))
lda_mds <- cmdscale(lda_dist)
# Dataframe untuk plot LDA
lda_df <- data.frame(
  Dim1 = lda_mds[, 1],
  Dim2 = lda_mds[, 2],
  Predicted = lda_class
)

# Prediksi dan Konfusi untuk MLR
# Prediksi menggunakan Multinomial Logistic Regression
mlr_pred <- predict(fit_full, newdata = data_clean)
mlr_prob <- predict(fit_full, newdata = data_clean, type = "probs")
# Gunakan probabilitas (jarak antar prediksi) untuk MDS
mlr_dist <- dist(scale(as.matrix(mlr_prob)))
mlr_mds <- cmdscale(mlr_dist)
# Dataframe untuk plot MLR
mlr_df <- data.frame(
  Dim1 = mlr_mds[, 1],
  Dim2 = mlr_mds[, 2],
  Predicted = mlr_pred
)


#Plot Perceptual Mapping
library(ggplot2)
# Plot MDS LDA
plot_lda <- ggplot(lda_df, aes(x = Dim1, y = Dim2, color = Predicted)) +
  geom_point(size = 2) +
  labs(title = "MDS Mapping Berdasarkan Hasil Klasifikasi LDA") +
  theme_minimal()
# Plot MDS MLR
plot_mlr <- ggplot(mlr_df, aes(x = Dim1, y = Dim2, color = Predicted)) +
  geom_point(size = 2) +
  labs(title = "MDS Mapping Berdasarkan Hasil Multinomial Logistic Regression") +
  theme_minimal()
# Tampilkan dua plot secara bersamaan
library(gridExtra)
grid.arrange(plot_lda, plot_mlr, ncol = 2)