1 LOAD DATA

Dataset yang digunakan dalam proyek ini berasal dari Kaggle dengan judul “Denpasar Bali Historical Weather Data”, yang berisi data cuaca historis Kota Denpasar dari tahun 1990 hingga 2020. Namun, untuk kebutuhan proyek ini, fokus klasifikasi diarahkan pada empat kategori utama yang relevan dengan kondisi cuaca Denpasar tahun 2019, yaitu Clouds, Rain, Thunderstorm, dan Clear.

library(readr)
library(lubridate)
## 
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
## 
##     date, intersect, setdiff, union
data_path <- "D:/TUGAS KAMPUS SEMESTER 4/ANALISIS MULTIVARIAT/PROYEK AKHIR ANMUL/openweatherdata-denpasar-1990-2020.csv"
data <- read_csv(data_path)
## Warning: One or more parsing issues, call `problems()` on your data frame for details,
## e.g.:
##   dat <- vroom(...)
##   problems(dat)
## Rows: 264924 Columns: 32
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (5): dt_iso, city_name, weather_main, weather_description, weather_icon
## dbl (17): dt, timezone, lat, lon, temp, temp_min, temp_max, pressure, humidi...
## lgl (10): sea_level, grnd_level, rain_12h, rain_today, snow_1h, snow_3h, sno...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
data$dt_iso <- gsub(" UTC", "", data$dt_iso)
data$dt_iso <- as_datetime(data$dt_iso)
data_filtered <- data %>% filter(year(dt_iso) == 2019)

if (nrow(data_filtered) == 0) stop("Tidak ada data untuk tahun 2019!")

head(data_filtered)
## # A tibble: 6 × 32
##           dt dt_iso              timezone city_name   lat   lon  temp temp_min
##        <dbl> <dttm>                 <dbl> <chr>     <dbl> <dbl> <dbl>    <dbl>
## 1 1546300800 2019-01-01 00:00:00    28800 Denpasar  -8.65  115.  27.1     27  
## 2 1546304400 2019-01-01 01:00:00    28800 Denpasar  -8.65  115.  27.8     27.4
## 3 1546308000 2019-01-01 02:00:00    28800 Denpasar  -8.65  115.  29.5     29  
## 4 1546311600 2019-01-01 03:00:00    28800 Denpasar  -8.65  115.  30.1     30  
## 5 1546315200 2019-01-01 04:00:00    28800 Denpasar  -8.65  115.  30.2     30  
## 6 1546318800 2019-01-01 05:00:00    28800 Denpasar  -8.65  115.  30       30  
## # ℹ 24 more variables: temp_max <dbl>, pressure <dbl>, sea_level <lgl>,
## #   grnd_level <lgl>, humidity <dbl>, wind_speed <dbl>, wind_deg <dbl>,
## #   rain_1h <dbl>, rain_3h <dbl>, rain_6h <dbl>, rain_12h <lgl>,
## #   rain_24h <dbl>, rain_today <lgl>, snow_1h <lgl>, snow_3h <lgl>,
## #   snow_6h <lgl>, snow_12h <lgl>, snow_24h <lgl>, snow_today <lgl>,
## #   clouds_all <dbl>, weather_id <dbl>, weather_main <chr>,
## #   weather_description <chr>, weather_icon <chr>

2 EXPLORATORY DATA ANALYSIS

2.1 Hitung Missing Values

cat("Jumlah baris:", nrow(data_filtered), "\n")
## Jumlah baris: 8822
cat("Jumlah kolom:", ncol(data_filtered), "\n")
## Jumlah kolom: 32
na_summary <- data.frame(Kolom = names(data_filtered),
                         Jumlah_NA = colSums(is.na(data_filtered))) %>%
  arrange(desc(Jumlah_NA))
na_summary
##                                   Kolom Jumlah_NA
## sea_level                     sea_level      8822
## grnd_level                   grnd_level      8822
## rain_12h                       rain_12h      8822
## rain_today                   rain_today      8822
## snow_1h                         snow_1h      8822
## snow_3h                         snow_3h      8822
## snow_6h                         snow_6h      8822
## snow_12h                       snow_12h      8822
## snow_24h                       snow_24h      8822
## snow_today                   snow_today      8822
## rain_1h                         rain_1h      8813
## rain_24h                       rain_24h      8543
## rain_6h                         rain_6h      8541
## rain_3h                         rain_3h      8275
## dt                                   dt         0
## dt_iso                           dt_iso         0
## timezone                       timezone         0
## city_name                     city_name         0
## lat                                 lat         0
## lon                                 lon         0
## temp                               temp         0
## temp_min                       temp_min         0
## temp_max                       temp_max         0
## pressure                       pressure         0
## humidity                       humidity         0
## wind_speed                   wind_speed         0
## wind_deg                       wind_deg         0
## clouds_all                   clouds_all         0
## weather_id                   weather_id         0
## weather_main               weather_main         0
## weather_description weather_description         0
## weather_icon               weather_icon         0
freq_weather <- table(data_filtered$weather_main, useNA = "ifany")
weather_summary <- as.data.frame(freq_weather)
colnames(weather_summary) <- c("Weather Main", "Jumlah")
weather_summary
##   Weather Main Jumlah
## 1        Clear     22
## 2       Clouds   8075
## 3         Haze      5
## 4         Mist      3
## 5         Rain    571
## 6        Smoke      3
## 7 Thunderstorm    143

3 PREPROCESSING

3.1 Filter Kolom Target

data_filtered <- data_filtered %>%
  select(temp, temp_min, temp_max, pressure, humidity,
         wind_speed, wind_deg, clouds_all, weather_main) %>%
  filter(weather_main %in% c("Clouds", "Rain", "Thunderstorm", "Clear"))

if (length(unique(data_filtered$weather_main)) < 2) {
  stop("Kolom weather_main hanya memiliki satu kategori setelah filter. Tidak bisa dilakukan split!")
}

data_filtered$weather_main <- as.factor(data_filtered$weather_main)
data_filtered$weather_main <- as.integer(data_filtered$weather_main) - 1

is.ordered(data_filtered$weather_main)
## [1] FALSE
str(data_filtered)
## tibble [8,811 × 9] (S3: tbl_df/tbl/data.frame)
##  $ temp        : num [1:8811] 27.1 27.8 29.5 30.1 30.1 ...
##  $ temp_min    : num [1:8811] 27 27.4 29 30 30 30 30 29 29 29 ...
##  $ temp_max    : num [1:8811] 27.4 28 30.4 30.4 30.4 30 30 30 29.4 29.4 ...
##  $ pressure    : num [1:8811] 1010 1010 1010 1010 1009 ...
##  $ humidity    : num [1:8811] 94 88 83 79 79 79 74 79 83 79 ...
##  $ wind_speed  : num [1:8811] 0.5 2.1 3.1 4.1 4.1 5.1 4.6 5.7 5.7 5.7 ...
##  $ wind_deg    : num [1:8811] 0 240 240 260 260 250 260 260 260 270 ...
##  $ clouds_all  : num [1:8811] 40 20 20 20 20 20 20 20 20 20 ...
##  $ weather_main: num [1:8811] 2 2 1 1 1 1 1 1 1 1 ...
unique(data_filtered$weather_main)
## [1] 2 1 0 3

3.2 Penanganan Missing Values

data_filtered <- data_filtered %>%
  mutate(across(where(is.numeric), ~ ifelse(is.na(.), mean(., na.rm = TRUE), .)))

cat("Jumlah nilai NA setelah imputasi (dengan mean):\n")
## Jumlah nilai NA setelah imputasi (dengan mean):
print(colSums(is.na(data_filtered)))
##         temp     temp_min     temp_max     pressure     humidity   wind_speed 
##            0            0            0            0            0            0 
##     wind_deg   clouds_all weather_main 
##            0            0            0

3.3 Split Data

library(caret)
## Warning: package 'caret' was built under R version 4.4.3
## Loading required package: ggplot2
## Loading required package: lattice
data_filtered$weather_main <- as.factor(data_filtered$weather_main)
split_index <- createDataPartition(data_filtered$weather_main, p = 0.7, list = FALSE)
train_data <- data_filtered[split_index, ]
test_data <- data_filtered[-split_index, ]
cat("Jumlah data Train :", nrow(train_data), "\n")
## Jumlah data Train : 6170
cat("Jumlah data Test  :", nrow(test_data), "\n")
## Jumlah data Test  : 2641

3.4 Penyeimbangan Data

bisa dilihat berdasarkan distribusi pada kolom target, terdapat ketidakseimbangan data maka dilakukan penangan imbalaced data. penganan ini dilakukan pada data train.

if (!require(smotefamily)) install.packages("smotefamily")
## Loading required package: smotefamily
## Warning: package 'smotefamily' was built under R version 4.4.3
library(smotefamily)

set.seed(123)
n_total_train <- nrow(train_data)
n_target_final <- n_total_train * 2
n_classes <- length(levels(train_data$weather_main))
n_per_class <- floor(n_target_final / n_classes)

list_per_class <- split(train_data, train_data$weather_main)
balanced_list <- list()

for (cls in names(list_per_class)) {
  data_cls <- list_per_class[[cls]]
  n_cls <- nrow(data_cls)
  balanced_list[[cls]] <- if (n_cls > n_per_class) {
    slice_sample(data_cls, n = n_per_class)
  } else {
    slice_sample(data_cls, n = n_per_class, replace = TRUE)
  }
}
train_balanced <- bind_rows(balanced_list)

cat("Distribusi kelas setelah balancing di train_data:\n")
## Distribusi kelas setelah balancing di train_data:
table(train_balanced$weather_main)
## 
##    0    1    2    3 
## 3085 3085 3085 3085
cat("Total data train setelah balancing:", nrow(train_balanced), "\n")
## Total data train setelah balancing: 12340
cat("Distribusi kelas di test_data (tidak diubah):\n")
## Distribusi kelas di test_data (tidak diubah):
table(test_data$weather_main)
## 
##    0    1    2    3 
##    6 2422  171   42

4 Uji Asumsi (VIF dan Chi-square)

4.1 Uji VIF

library(car)
## Warning: package 'car' was built under R version 4.4.3
## Loading required package: carData
## Warning: package 'carData' was built under R version 4.4.3
## 
## Attaching package: 'car'
## The following object is masked from 'package:dplyr':
## 
##     recode
temp_vars <- dplyr::select(train_balanced, temp, temp_min, temp_max)
pca_temp <- prcomp(temp_vars, scale. = TRUE)
pc_df <- as.data.frame(pca_temp$x[,1, drop=FALSE])
colnames(pc_df) <- "pca_temp"

train_balanced_pca <- train_balanced %>%
  dplyr::select(-temp, -temp_min, -temp_max) %>%
  bind_cols(pc_df)

lm_model <- lm(as.numeric(weather_main) ~ ., data = train_balanced_pca)
vif_values <- vif(lm_model)
vif_values
##   pressure   humidity wind_speed   wind_deg clouds_all   pca_temp 
##   1.319953   2.649630   1.193450   1.106895   1.411245   2.327425

4.2 Uji Chi-Square

train_balanced_pca$weather_main <- as.factor(train_balanced_pca$weather_main)
num_vars <- names(train_balanced_pca)[sapply(train_balanced_pca, is.numeric)]
chi_sq_test <- function(varname, data, target) {
  binned_var <- cut(data[[varname]], breaks = 5, include.lowest = TRUE)
  contingency_table <- table(binned_var, data[[target]])
  if (all(dim(contingency_table) > 1)) {
    chisq.test(contingency_table)$p.value
  } else NA
}

chi_results <- sapply(num_vars, chi_sq_test, data = train_balanced_pca, target = "weather_main")
## Warning in chisq.test(contingency_table): Chi-squared approximation may be
## incorrect
chi_df <- data.frame(Variable = names(chi_results), P_Value = chi_results)
chi_df
##              Variable       P_Value
## pressure     pressure 2.742739e-305
## humidity     humidity  0.000000e+00
## wind_speed wind_speed  0.000000e+00
## wind_deg     wind_deg  0.000000e+00
## clouds_all clouds_all  0.000000e+00
## pca_temp     pca_temp 1.094242e-219

5 NORMALISASI

feature_cols <- c("temp", "temp_min", "temp_max", "pressure", "humidity", "wind_speed", "wind_deg", "clouds_all")
min_max_norm <- function(x) (x - min(x)) / (max(x) - min(x))

train_balanced_norm <- train_balanced
train_balanced_norm[feature_cols] <- lapply(train_balanced[feature_cols], min_max_norm)

test_data_norm <- test_data
for (col in feature_cols) {
  min_val <- min(train_balanced[[col]])
  max_val <- max(train_balanced[[col]])
  test_data_norm[[col]] <- (test_data[[col]] - min_val) / (max_val - min_val)
}

summary(train_balanced_norm)
##       temp           temp_min         temp_max         pressure     
##  Min.   :0.0000   Min.   :0.0000   Min.   :0.0000   Min.   :0.0000  
##  1st Qu.:0.3494   1st Qu.:0.3750   1st Qu.:0.3226   1st Qu.:0.2727  
##  Median :0.4469   Median :0.4792   Median :0.4355   Median :0.4545  
##  Mean   :0.4610   Mean   :0.4820   Mean   :0.4532   Mean   :0.4463  
##  3rd Qu.:0.5697   3rd Qu.:0.5536   3rd Qu.:0.5645   3rd Qu.:0.6364  
##  Max.   :1.0000   Max.   :1.0000   Max.   :1.0000   Max.   :1.0000  
##     humidity        wind_speed        wind_deg        clouds_all    
##  Min.   :0.0000   Min.   :0.0000   Min.   :0.0000   Min.   :0.0000  
##  1st Qu.:0.5417   1st Qu.:0.1301   1st Qu.:0.3056   1st Qu.:0.1000  
##  Median :0.6458   Median :0.2841   Median :0.3889   Median :0.4000  
##  Mean   :0.6566   Mean   :0.2719   Mean   :0.4819   Mean   :0.3479  
##  3rd Qu.:0.8750   3rd Qu.:0.3610   3rd Qu.:0.7500   3rd Qu.:0.4000  
##  Max.   :1.0000   Max.   :1.0000   Max.   :1.0000   Max.   :1.0000  
##  weather_main
##  0:3085      
##  1:3085      
##  2:3085      
##  3:3085      
##              
## 
summary(test_data_norm)
##       temp             temp_min           temp_max           pressure     
##  Min.   :-0.04834   Min.   :-0.04167   Min.   :-0.08065   Min.   :0.0000  
##  1st Qu.: 0.35578   1st Qu.: 0.39732   1st Qu.: 0.33065   1st Qu.:0.3636  
##  Median : 0.45325   Median : 0.47917   Median : 0.46774   Median :0.5455  
##  Mean   : 0.47085   Mean   : 0.49226   Mean   : 0.46185   Mean   :0.5318  
##  3rd Qu.: 0.58162   3rd Qu.: 0.59077   3rd Qu.: 0.56452   3rd Qu.:0.6636  
##  Max.   : 0.97464   Max.   : 0.96280   Max.   : 1.00000   Max.   :1.0909  
##     humidity        wind_speed          wind_deg        clouds_all    
##  Min.   :0.0625   Min.   :0.006159   Min.   :0.0000   Min.   :0.0000  
##  1st Qu.:0.5417   1st Qu.:0.130100   1st Qu.:0.2778   1st Qu.:0.2000  
##  Median :0.6458   Median :0.284065   Median :0.3333   Median :0.2000  
##  Mean   :0.6314   Mean   :0.275867   Mean   :0.4256   Mean   :0.3152  
##  3rd Qu.:0.7500   3rd Qu.:0.407236   3rd Qu.:0.6944   3rd Qu.:0.4000  
##  Max.   :1.0000   Max.   :1.038491   Max.   :1.0000   Max.   :1.0000  
##  weather_main
##  0:   6      
##  1:2422      
##  2: 171      
##  3:  42      
##              
## 

6 VISUALISASI AWAL

6.1 Visualiasi Proporsi Kategori Cuaca Sebelum Penangan Imbalanced

library(ggplot2)
library(dplyr)
train_weather_summary_1 <- train_data %>%
  group_by(weather_main) %>%
  summarise(Jumlah = n()) %>%
  arrange(desc(Jumlah))

train_weather_summary_1$Proporsi <- train_weather_summary_1$Jumlah / sum(train_weather_summary_1$Jumlah)

ggplot(train_weather_summary_1, aes(x = reorder(weather_main, -Jumlah), y = Jumlah, fill = weather_main)) +
  geom_bar(stat = "identity") +
  theme_minimal() +
  coord_flip() +
  labs(title = "Distribusi Kategori Cuaca pada Data Train (Sebelum Balanced)",
       x = "Kategori Cuaca", y = "Jumlah")

6.2 Visualiasi Proporsi Kategori Cuaca Setelah Penangan Imbalanced

library(ggplot2)
library(dplyr)
train_weather_summary <- train_balanced %>%
  group_by(weather_main) %>%
  summarise(Jumlah = n()) %>%
  arrange(desc(Jumlah))

train_weather_summary$Proporsi <- train_weather_summary$Jumlah / sum(train_weather_summary$Jumlah)

ggplot(train_weather_summary, aes(x = reorder(weather_main, -Jumlah), y = Jumlah, fill = weather_main)) +
  geom_bar(stat = "identity") +
  theme_minimal() +
  coord_flip() +
  labs(title = "Distribusi Kategori Cuaca pada Data Train (Balanced)",
       x = "Kategori Cuaca", y = "Jumlah")

6.3 Visualisasi Korelasi(Sebelum PCA)

library(corrplot)
## Warning: package 'corrplot' was built under R version 4.4.3
## corrplot 0.95 loaded
correlation_matrix <- cor(train_balanced_norm[, feature_cols])
corrplot(correlation_matrix, method = "color", type = "upper", tl.col = "black", 
         addCoef.col = "black", number.cex = 0.7, diag = FALSE)

7 PEMODELAN - Multinomial Logistic Regression + PCA

karena fitur temp, temp_min, & temp_max mempunyai korelasi yang sangat tinggi, maka saya lakukan PCA kepada ketiga fitur menjadi 1 fitur (pca_temp).

library(nnet)

temp_vars_train <- train_balanced_norm %>% dplyr::select(temp, temp_min, temp_max)
pca_temp <- prcomp(temp_vars_train, scale. = TRUE)
pc1_train <- as.data.frame(pca_temp$x[,1, drop=FALSE])
colnames(pc1_train) <- "pca_temp"

train_mlr <- train_balanced_norm %>%
  dplyr::select(-temp, -temp_min, -temp_max) %>%
  bind_cols(pc1_train)

model_mlr <- multinom(weather_main ~ ., data = train_mlr, trace = FALSE)

temp_vars_test <- test_data_norm %>% dplyr::select(temp, temp_min, temp_max)
pc1_test_mat <- predict(pca_temp, newdata = temp_vars_test)[, 1, drop = FALSE]
pc1_test_df <- as.data.frame(pc1_test_mat)
colnames(pc1_test_df) <- "pca_temp"

test_mlr <- test_data_norm %>%
  dplyr::select(-temp, -temp_min, -temp_max) %>%
  bind_cols(pc1_test_df)
test_mlr$weather_main <- as.factor(test_mlr$weather_main)

pred_mlr <- predict(model_mlr, test_mlr)
conf_matrix_mlr <- confusionMatrix(pred_mlr, test_mlr$weather_main)

conf_matrix_mlr
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction    0    1    2    3
##          0    4    2    0    0
##          1    2 1873   34    9
##          2    0  257   82   14
##          3    0  290   55   19
## 
## Overall Statistics
##                                          
##                Accuracy : 0.749          
##                  95% CI : (0.732, 0.7654)
##     No Information Rate : 0.9171         
##     P-Value [Acc > NIR] : 1              
##                                          
##                   Kappa : 0.2231         
##                                          
##  Mcnemar's Test P-Value : NA             
## 
## Statistics by Class:
## 
##                      Class: 0 Class: 1 Class: 2 Class: 3
## Sensitivity          0.666667   0.7733  0.47953 0.452381
## Specificity          0.999241   0.7945  0.89028 0.867257
## Pos Pred Value       0.666667   0.9765  0.23229 0.052198
## Neg Pred Value       0.999241   0.2407  0.96110 0.989899
## Prevalence           0.002272   0.9171  0.06475 0.015903
## Detection Rate       0.001515   0.7092  0.03105 0.007194
## Detection Prevalence 0.002272   0.7262  0.13366 0.137827
## Balanced Accuracy    0.832954   0.7839  0.68491 0.659819

8 VISUALIASI AKHIR

8.1 Visualiasi Korelasi Setelah PCA

library(ggplot2)
library(reshape2)
## Warning: package 'reshape2' was built under R version 4.4.3
library(corrplot)

cor_matrix <- cor(train_mlr %>% dplyr::select(where(is.numeric)))

cor_df <- melt(cor_matrix)
ggplot(cor_df, aes(x = Var1, y = Var2, fill = value)) +
  geom_tile(color = "white") +
  scale_fill_gradient2(low = "blue", high = "red", mid = "white",
                       midpoint = 0, limit = c(-1,1), space = "Lab",
                       name = "Korelasi") +
  theme_minimal() +
  labs(title = "Heatmap Korelasi Setelah PCA Suhu") +
  theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust = 1))

corrplot(cor_matrix, method = "color", type = "upper", 
         tl.col = "black", tl.srt = 45, addCoef.col = "black",
         title = "Korelasi Fitur Setelah PCA Suhu", mar=c(0,0,1,0))

8.2 Visualiasi Akurasi Perkelas

class_accuracy <- diag(conf_matrix_mlr$table) / rowSums(conf_matrix_mlr$table)
class_accuracy_df <- data.frame(Kelas = names(class_accuracy),
                                Akurasi = round(class_accuracy, 3))

ggplot(class_accuracy_df, aes(x = reorder(Kelas, -Akurasi), y = Akurasi, fill = Kelas)) +
  geom_bar(stat = "identity") +
  geom_text(aes(label = Akurasi), vjust = -0.3, size = 3.5) +
  theme_minimal() +
  labs(title = "Akurasi per Kelas Cuaca",
       x = "Kategori Cuaca", y = "Akurasi") +
  ylim(0, 1)

Grafik menunjukkan bahwa model memiliki akurasi tinggi pada kelas cuaca 1 (97,7%) dan 0 (85,7%), namun sangat rendah pada kelas 2 (21,4%) dan 3 (7,8%). Hal ini mengindikasikan bahwa model cenderung hanya mampu mengenali cuaca yang umum atau dominan, sementara gagal membedakan kelas cuaca yang lebih jarang.

8.3 Distribusi PCA Suhu berdasarkan Target

ggplot(train_mlr, aes(x = weather_main, y = pca_temp, fill = weather_main)) +
  geom_boxplot() +
  labs(title = "Distribusi PCA Suhu berdasarkan Weather",
       x = "Weather Main", y = "PCA Temp (Komponen 1)") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1), legend.position = "none")

Boxplot di atas menunjukkan distribusi nilai PCA dari suhu (gabungan temp, temp_min, dan temp_max) berdasarkan kategori cuaca utama (weather_main). Terlihat bahwa nilai tengah (median) untuk masing-masing kategori cuaca berada di sekitar nol, menandakan tidak ada perbedaan ekstrem antar kelompok.

8.4 Confusion Matrix Heatmap

library(ggplot2)
library(caret)
library(reshape2)

cm_df <- as.data.frame(conf_matrix_mlr$table)

ggplot(cm_df, aes(x = Reference, y = Prediction, fill = Freq)) +
  geom_tile(color = "white") +
  geom_text(aes(label = Freq), size = 4) +
  scale_fill_gradient(low = "white", high = "steelblue") +
  labs(title = "Confusion Matrix Multinomial Logistic Regression",
       x = "Actual (Reference)",
       y = "Predicted") +
  theme_minimal()

8.5 Visualisasi MDS

test_features_pca <- test_mlr %>% dplyr::select(-weather_main)

dist_matrix_pca <- dist(test_features_pca)
mds_coords_pca <- cmdscale(dist_matrix_pca, k = 2) %>% as.data.frame()
colnames(mds_coords_pca) <- c("Dim1", "Dim2")

mds_coords_pca$Predicted <- pred_mlr
mds_coords_pca$Actual <- test_mlr$weather_main

ggplot(mds_coords_pca, aes(x = Dim1, y = Dim2, color = Predicted, shape = Actual)) +
  geom_point(alpha = 0.7, size = 3) +
  labs(title = "MDS Plot - MLR dengan PCA pada Suhu",
       x = "Dimensi 1", y = "Dimensi 2") +
  theme_minimal()

Plot ini menunjukkan visualisasi data cuaca hasil reduksi dimensi menggunakan MDS berdasarkan hasil PCA suhu dan prediksi model MLR. erlihat bahwa sebagian besar data terkonsentrasi di tengah, terutama label dengan kode 1 (warna hijau), yang menunjukkan bahwa model lebih sering memprediksi kelas tersebut dibandingkan yang lain.

9 REFERENSI

  • [1] I. Qutab, K. I. Malik, and H. Arooj, “Sentiment classification using multinomial logistic regression on Roman Urdu text,” Int. J. Innov. Sci. Technol., vol. 4, no. 2, pp. 323–335, 2022, doi: 10.33411/IJIST/2022040204.
  • [2] N. Anusha, M. Sai Chaithanya, and G. Jithendranath Reddy, “Weather prediction using multi linear regression algorithm,” in IOP Conference Series: Materials Science and Engineering, vol. 590, 2019, Art. no. 012034, doi: 10.1088/1757-899X/590/1/012034.
  • [3] E. Novitasari and A. Sofro, “Analisis regresi multinomial untuk pemodelan faktor penyebab kekerasan dalam rumah tangga,” MATHunesa, vol. 11, no. 1, 2023. [Online]. Available: https://journal.unesa.ac.id/index.php/mathunesa