Dosen Pengampu: Ike Fitriyaningsih, M.Si
Mata Kuliah: Analisis Multivariat
Kelas: Sains Data 2023 E

1 Load Library dan Data

library(readr)
library(lubridate)
## 
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
## 
##     date, intersect, setdiff, union
data_path <- "openweatherdata-denpasar-1990-2020.csv"
data <- read_csv(data_path)
## Warning: One or more parsing issues, call `problems()` on your data frame for details,
## e.g.:
##   dat <- vroom(...)
##   problems(dat)
## Rows: 264924 Columns: 32
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (5): dt_iso, city_name, weather_main, weather_description, weather_icon
## dbl (17): dt, timezone, lat, lon, temp, temp_min, temp_max, pressure, humidi...
## lgl (10): sea_level, grnd_level, rain_12h, rain_today, snow_1h, snow_3h, sno...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
data$dt_iso <- gsub(" UTC", "", data$dt_iso)
data$dt_iso <- as_datetime(data$dt_iso)
data_filtered <- data %>% filter(year(dt_iso) == 2019)

if (nrow(data_filtered) == 0) stop("Tidak ada data untuk tahun 2019!")

head(data_filtered)
## # A tibble: 6 × 32
##           dt dt_iso              timezone city_name   lat   lon  temp temp_min
##        <dbl> <dttm>                 <dbl> <chr>     <dbl> <dbl> <dbl>    <dbl>
## 1 1546300800 2019-01-01 00:00:00    28800 Denpasar  -8.65  115.  27.1     27  
## 2 1546304400 2019-01-01 01:00:00    28800 Denpasar  -8.65  115.  27.8     27.4
## 3 1546308000 2019-01-01 02:00:00    28800 Denpasar  -8.65  115.  29.5     29  
## 4 1546311600 2019-01-01 03:00:00    28800 Denpasar  -8.65  115.  30.1     30  
## 5 1546315200 2019-01-01 04:00:00    28800 Denpasar  -8.65  115.  30.2     30  
## 6 1546318800 2019-01-01 05:00:00    28800 Denpasar  -8.65  115.  30       30  
## # ℹ 24 more variables: temp_max <dbl>, pressure <dbl>, sea_level <lgl>,
## #   grnd_level <lgl>, humidity <dbl>, wind_speed <dbl>, wind_deg <dbl>,
## #   rain_1h <dbl>, rain_3h <dbl>, rain_6h <dbl>, rain_12h <lgl>,
## #   rain_24h <dbl>, rain_today <lgl>, snow_1h <lgl>, snow_3h <lgl>,
## #   snow_6h <lgl>, snow_12h <lgl>, snow_24h <lgl>, snow_today <lgl>,
## #   clouds_all <dbl>, weather_id <dbl>, weather_main <chr>,
## #   weather_description <chr>, weather_icon <chr>

2 Eksplorasi Data

cat("Jumlah baris:", nrow(data_filtered), "\n")
## Jumlah baris: 8822
cat("Jumlah kolom:", ncol(data_filtered), "\n")
## Jumlah kolom: 32
na_summary <- data.frame(Kolom = names(data_filtered),
                         Jumlah_NA = colSums(is.na(data_filtered))) %>%
  arrange(desc(Jumlah_NA))
na_summary
##                                   Kolom Jumlah_NA
## sea_level                     sea_level      8822
## grnd_level                   grnd_level      8822
## rain_12h                       rain_12h      8822
## rain_today                   rain_today      8822
## snow_1h                         snow_1h      8822
## snow_3h                         snow_3h      8822
## snow_6h                         snow_6h      8822
## snow_12h                       snow_12h      8822
## snow_24h                       snow_24h      8822
## snow_today                   snow_today      8822
## rain_1h                         rain_1h      8813
## rain_24h                       rain_24h      8543
## rain_6h                         rain_6h      8541
## rain_3h                         rain_3h      8275
## dt                                   dt         0
## dt_iso                           dt_iso         0
## timezone                       timezone         0
## city_name                     city_name         0
## lat                                 lat         0
## lon                                 lon         0
## temp                               temp         0
## temp_min                       temp_min         0
## temp_max                       temp_max         0
## pressure                       pressure         0
## humidity                       humidity         0
## wind_speed                   wind_speed         0
## wind_deg                       wind_deg         0
## clouds_all                   clouds_all         0
## weather_id                   weather_id         0
## weather_main               weather_main         0
## weather_description weather_description         0
## weather_icon               weather_icon         0
freq_weather <- table(data_filtered$weather_main, useNA = "ifany")
weather_summary <- as.data.frame(freq_weather)
colnames(weather_summary) <- c("Weather Main", "Jumlah")
weather_summary
##   Weather Main Jumlah
## 1        Clear     22
## 2       Clouds   8075
## 3         Haze      5
## 4         Mist      3
## 5         Rain    571
## 6        Smoke      3
## 7 Thunderstorm    143

2.1 Distribusi Frekuensi Kategori Cuaca

library(ggplot2)
ggplot(weather_summary, aes(x = `Weather Main`, y = Jumlah, fill = `Weather Main`)) +
  geom_bar(stat = "identity") +
  theme_minimal() +
  labs(title = "Distribusi Frekuensi Kategori Cuaca (2019)",
       x = "Jenis Cuaca",
       y = "Jumlah Observasi") +
  geom_text(aes(label = Jumlah), vjust = -0.5)

2.2 Distribusi Variabel Numerik

numeric_cols_to_plot <- c("temp", "temp_min", "temp_max", "pressure", "humidity", "wind_speed", "wind_deg", "clouds_all")
data_for_hist <- data_filtered %>% select(all_of(numeric_cols_to_plot))

library(ggplot2)
library(tidyr)

data_for_hist %>%
  pivot_longer(cols = everything(), names_to = "variable", values_to = "value") %>%
  ggplot(aes(x = value)) +
  geom_histogram(aes(y = ..density..), bins = 30, fill = "steelblue", color = "black", alpha = 0.7) +
  geom_density(color = "red") +
  facet_wrap(~variable, scales = "free") +
  theme_minimal() +
  labs(title = "Distribusi Variabel Numerik",
       x = "Nilai",
       y = "Densitas")
## Warning: The dot-dot notation (`..density..`) was deprecated in ggplot2 3.4.0.
## ℹ Please use `after_stat(density)` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

3 Preprocessing

data_filtered <- data_filtered %>%
  select(temp, temp_min, temp_max, pressure, humidity,
         wind_speed, wind_deg, clouds_all, weather_main) %>%
  filter(weather_main %in% c("Clouds", "Rain", "Thunderstorm", "Clear"))

if (length(unique(data_filtered$weather_main)) < 2) {
  stop("Kolom weather_main hanya memiliki satu kategori setelah filter. Tidak bisa dilakukan split!")
}

data_filtered$weather_main <- as.factor(data_filtered$weather_main)
data_filtered$weather_main <- as.integer(data_filtered$weather_main) - 1

is.ordered(data_filtered$weather_main)
## [1] FALSE
str(data_filtered)
## tibble [8,811 × 9] (S3: tbl_df/tbl/data.frame)
##  $ temp        : num [1:8811] 27.1 27.8 29.5 30.1 30.1 ...
##  $ temp_min    : num [1:8811] 27 27.4 29 30 30 30 30 29 29 29 ...
##  $ temp_max    : num [1:8811] 27.4 28 30.4 30.4 30.4 30 30 30 29.4 29.4 ...
##  $ pressure    : num [1:8811] 1010 1010 1010 1010 1009 ...
##  $ humidity    : num [1:8811] 94 88 83 79 79 79 74 79 83 79 ...
##  $ wind_speed  : num [1:8811] 0.5 2.1 3.1 4.1 4.1 5.1 4.6 5.7 5.7 5.7 ...
##  $ wind_deg    : num [1:8811] 0 240 240 260 260 250 260 260 260 270 ...
##  $ clouds_all  : num [1:8811] 40 20 20 20 20 20 20 20 20 20 ...
##  $ weather_main: num [1:8811] 2 2 1 1 1 1 1 1 1 1 ...
unique(data_filtered$weather_main)
## [1] 2 1 0 3

3.1 Penanganan Missing Value

data_filtered <- data_filtered %>%
  mutate(across(where(is.numeric), ~ ifelse(is.na(.), mean(., na.rm = TRUE), .)))

cat("Jumlah nilai NA setelah imputasi (dengan mean):\n")
## Jumlah nilai NA setelah imputasi (dengan mean):
print(colSums(is.na(data_filtered)))
##         temp     temp_min     temp_max     pressure     humidity   wind_speed 
##            0            0            0            0            0            0 
##     wind_deg   clouds_all weather_main 
##            0            0            0

3.2 Split Data

library(caret)
## Loading required package: lattice
data_filtered$weather_main <- as.factor(data_filtered$weather_main)
split_index <- createDataPartition(data_filtered$weather_main, p = 0.7, list = FALSE)
train_data <- data_filtered[split_index, ]
test_data <- data_filtered[-split_index, ]

3.3 Penyeimbangan Data

if (!require(smotefamily)) install.packages("smotefamily")
## Loading required package: smotefamily
library(smotefamily)

set.seed(123)
n_total_train <- nrow(train_data)
n_target_final <- n_total_train * 2
n_classes <- length(levels(train_data$weather_main))
n_per_class <- floor(n_target_final / n_classes)

list_per_class <- split(train_data, train_data$weather_main)
balanced_list <- list()

for (cls in names(list_per_class)) {
  data_cls <- list_per_class[[cls]]
  n_cls <- nrow(data_cls)
  balanced_list[[cls]] <- if (n_cls > n_per_class) {
    slice_sample(data_cls, n = n_per_class)
  } else {
    slice_sample(data_cls, n = n_per_class, replace = TRUE)
  }
}
train_balanced <- bind_rows(balanced_list)

cat("Total data train setelah balancing:", nrow(train_balanced), "\n")
## Total data train setelah balancing: 12340

3.4 Perbandingan Distribusi Kelas Cuaca

# 1. Data distribusi kelas SEBELUM balancing (dari train_data)
dist_train_asli <- as.data.frame(table(train_data$weather_main))
colnames(dist_train_asli) <- c("Kelas_Cuaca", "Jumlah")
dist_train_asli$Keterangan <- "Data Latih (Sebelum Balancing)"

# 2. Data distribusi kelas SETELAH balancing (dari train_balanced)
dist_train_seimbang <- as.data.frame(table(train_balanced$weather_main))
colnames(dist_train_seimbang) <- c("Kelas_Cuaca", "Jumlah")
dist_train_seimbang$Keterangan <- "Data Latih (Setelah Balancing)"

# 3. Data distribusi kelas pada data TEST (dari test_data)
dist_test <- as.data.frame(table(test_data$weather_main))
colnames(dist_test) <- c("Kelas_Cuaca", "Jumlah")
dist_test$Keterangan <- "Data Uji (Tidak Diubah)"

data_plot_distribusi <- rbind(dist_train_asli, dist_train_seimbang, dist_test)

ggplot(data_plot_distribusi, aes(x = Kelas_Cuaca, y = Jumlah, fill = Keterangan)) +
  geom_bar(stat = "identity", position = position_dodge(width = 0.9)) +
  geom_text(aes(label = Jumlah), 
            position = position_dodge(width = 0.9), 
            vjust = -0.3, 
            size = 3.5) +
  theme_minimal(base_size = 12) + 
  labs(title = "Perbandingan Distribusi Kelas Cuaca",
       x = "Jenis Cuaca",
       y = "Jumlah Sampel",
       fill = "Keterangan Data") +
  theme(axis.text.x = element_text(angle = 45, hjust = 1), 
        plot.title = element_text(hjust = 0.5, face = "bold"), 
        legend.position = "top") + 
  scale_fill_brewer(palette = "Set2")

3.5 Normalisasi Data

feature_cols <- c("temp", "temp_min", "temp_max", "pressure", "humidity", "wind_speed", "wind_deg", "clouds_all")
min_max_norm <- function(x) (x - min(x)) / (max(x) - min(x))

train_balanced_norm <- train_balanced
train_balanced_norm[feature_cols] <- lapply(train_balanced[feature_cols], min_max_norm)

test_data_norm <- test_data
for (col in feature_cols) {
  min_val <- min(train_balanced[[col]])
  max_val <- max(train_balanced[[col]])
  test_data_norm[[col]] <- (test_data[[col]] - min_val) / (max_val - min_val)
}

summary(train_balanced_norm)
##       temp           temp_min         temp_max         pressure     
##  Min.   :0.0000   Min.   :0.0000   Min.   :0.0000   Min.   :0.0000  
##  1st Qu.:0.3761   1st Qu.:0.3676   1st Qu.:0.3731   1st Qu.:0.3636  
##  Median :0.4615   Median :0.4643   Median :0.4627   Median :0.4545  
##  Mean   :0.4750   Mean   :0.4702   Mean   :0.4833   Mean   :0.4705  
##  3rd Qu.:0.5544   3rd Qu.:0.5536   3rd Qu.:0.5746   3rd Qu.:0.6364  
##  Max.   :1.0000   Max.   :1.0000   Max.   :1.0000   Max.   :1.0000  
##     humidity        wind_speed        wind_deg        clouds_all    
##  Min.   :0.0000   Min.   :0.0000   Min.   :0.0000   Min.   :0.0000  
##  1st Qu.:0.5333   1st Qu.:0.1201   1st Qu.:0.2778   1st Qu.:0.0800  
##  Median :0.6222   Median :0.2319   Median :0.3889   Median :0.4000  
##  Mean   :0.6425   Mean   :0.2535   Mean   :0.4832   Mean   :0.3529  
##  3rd Qu.:0.8667   3rd Qu.:0.3438   3rd Qu.:0.7500   3rd Qu.:0.7500  
##  Max.   :1.0000   Max.   :1.0000   Max.   :1.0000   Max.   :1.0000  
##  weather_main
##  0:3085      
##  1:3085      
##  2:3085      
##  3:3085      
##              
## 
summary(test_data_norm)
##       temp              temp_min           temp_max         pressure     
##  Min.   :0.0007553   Min.   :-0.04167   Min.   :0.0000   Min.   :0.0000  
##  1st Qu.:0.3836858   1st Qu.: 0.39732   1st Qu.:0.3731   1st Qu.:0.3636  
##  Median :0.4773414   Median : 0.47917   Median :0.5000   Median :0.5455  
##  Mean   :0.4938370   Mean   : 0.49112   Mean   :0.4995   Mean   :0.5293  
##  3rd Qu.:0.6027190   3rd Qu.: 0.59821   3rd Qu.:0.5970   3rd Qu.:0.6364  
##  Max.   :0.9773414   Max.   : 0.96280   Max.   :0.9701   Max.   :1.0909  
##     humidity        wind_speed           wind_deg        clouds_all    
##  Min.   :0.0000   Min.   :0.0007457   Min.   :0.0000   Min.   :0.0000  
##  1st Qu.:0.4889   1st Qu.:0.1200597   1st Qu.:0.2778   1st Qu.:0.2000  
##  Median :0.6222   Median :0.2319165   Median :0.3333   Median :0.2000  
##  Mean   :0.6030   Mean   :0.2553001   Mean   :0.4263   Mean   :0.3102  
##  3rd Qu.:0.7333   3rd Qu.:0.3437733   3rd Qu.:0.6944   3rd Qu.:0.4000  
##  Max.   :1.0000   Max.   :0.9254288   Max.   :1.0000   Max.   :1.0000  
##  weather_main
##  0:   6      
##  1:2422      
##  2: 171      
##  3:  42      
##              
## 

4 Visualisasi Korelasi

library(corrplot)
## corrplot 0.95 loaded
correlation_matrix <- cor(train_balanced_norm[, feature_cols])
corrplot(correlation_matrix, method = "color", type = "upper", tl.col = "black", 
         addCoef.col = "black", number.cex = 0.7, diag = FALSE)

5 UJI ASUMSI LDA (PAKAI DATA BELUM NORMALISASI)

library(biotools)   # untuk Box's M test
## Loading required package: MASS
## 
## Attaching package: 'MASS'
## The following object is masked _by_ '.GlobalEnv':
## 
##     select
## The following object is masked from 'package:dplyr':
## 
##     select
## ---
## biotools version 4.3
library(car)        # untuk VIF
## Loading required package: carData
## 
## Attaching package: 'car'
## The following object is masked from 'package:dplyr':
## 
##     recode
library(nnet)       # untuk multinomial logistic regression 

# Daftar fitur numerik
feature_cols <- c("temp", "temp_min", "temp_max", "pressure", "humidity", 
                  "wind_speed", "wind_deg", "clouds_all")

cat("== Uji Normalitas Tiap Kelas (Shapiro-Wilk) ==\n")
## == Uji Normalitas Tiap Kelas (Shapiro-Wilk) ==
# Uji normalitas univariat untuk tiap fitur per kelas
for (cls in levels(train_balanced$weather_main)) {
  cat("Kelas:", cls, "\n")
  subset_data <- train_balanced[train_balanced$weather_main == cls, feature_cols]
  
  for (feature in feature_cols) {
    shapiro_test <- shapiro.test(subset_data[[feature]])
    cat(sprintf("  %s: W=%.4f, p=%.4f\n", feature, shapiro_test$statistic, shapiro_test$p.value))
  }
  cat("\n")
}
## Kelas: 0 
##   temp: W=0.9280, p=0.0000
##   temp_min: W=0.8951, p=0.0000
##   temp_max: W=0.9370, p=0.0000
##   pressure: W=0.8809, p=0.0000
##   humidity: W=0.8845, p=0.0000
##   wind_speed: W=0.9531, p=0.0000
##   wind_deg: W=0.6410, p=0.0000
##   clouds_all: W=0.6887, p=0.0000
## 
## Kelas: 1 
##   temp: W=0.9910, p=0.0000
##   temp_min: W=0.9906, p=0.0000
##   temp_max: W=0.9870, p=0.0000
##   pressure: W=0.9784, p=0.0000
##   humidity: W=0.9660, p=0.0000
##   wind_speed: W=0.9696, p=0.0000
##   wind_deg: W=0.8954, p=0.0000
##   clouds_all: W=0.6177, p=0.0000
## 
## Kelas: 2 
##   temp: W=0.9891, p=0.0000
##   temp_min: W=0.9796, p=0.0000
##   temp_max: W=0.9795, p=0.0000
##   pressure: W=0.9684, p=0.0000
##   humidity: W=0.9273, p=0.0000
##   wind_speed: W=0.8844, p=0.0000
##   wind_deg: W=0.8555, p=0.0000
##   clouds_all: W=0.8263, p=0.0000
## 
## Kelas: 3 
##   temp: W=0.9662, p=0.0000
##   temp_min: W=0.9629, p=0.0000
##   temp_max: W=0.9563, p=0.0000
##   pressure: W=0.9717, p=0.0000
##   humidity: W=0.9206, p=0.0000
##   wind_speed: W=0.8877, p=0.0000
##   wind_deg: W=0.8762, p=0.0000
##   clouds_all: W=0.7882, p=0.0000
cat("== Uji Homogenitas Matriks Kovarian (Box's M) ==\n")
## == Uji Homogenitas Matriks Kovarian (Box's M) ==
boxM_result <- boxM(train_balanced[, feature_cols], train_balanced$weather_main)
boxM_result
## 
##  Box's M-test for Homogeneity of Covariance Matrices
## 
## data:  train_balanced[, feature_cols]
## Chi-Sq (approx.) = 25173, df = 108, p-value < 2.2e-16
cat("== Uji Normalitas Multivariat ==\n")
## == Uji Normalitas Multivariat ==
library(MVN)

# Uji normalitas multivariat dengan metode Mardia
result <- mvn(data = train_balanced %>% dplyr::select(-weather_main), mvnTest = "mardia")
result$multivariateNormality
##              Test        Statistic p value Result
## 1 Mardia Skewness 43175.1969984824       0     NO
## 2 Mardia Kurtosis 79.7638388317432       0     NO
## 3             MVN             <NA>    <NA>     NO

6 Pemodelan - Analisis Diskriminan

library(MASS)

model_lda <- lda(weather_main ~ ., data = train_balanced_norm)
pred_lda <- predict(model_lda, test_data_norm)$class
conf_matrix_lda <- confusionMatrix(pred_lda, test_data_norm$weather_main)

cat("=== Evaluasi LDA ===\n")
## === Evaluasi LDA ===
conf_matrix_lda
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction    0    1    2    3
##          0    5  300    1    0
##          1    1 1678   39   12
##          2    0  251   81   11
##          3    0  193   50   19
## 
## Overall Statistics
##                                          
##                Accuracy : 0.6751         
##                  95% CI : (0.6569, 0.693)
##     No Information Rate : 0.9171         
##     P-Value [Acc > NIR] : 1              
##                                          
##                   Kappa : 0.1649         
##                                          
##  Mcnemar's Test P-Value : NA             
## 
## Statistics by Class:
## 
##                      Class: 0 Class: 1 Class: 2 Class: 3
## Sensitivity          0.833333   0.6928  0.47368 0.452381
## Specificity          0.885769   0.7626  0.89393 0.906503
## Pos Pred Value       0.016340   0.9699  0.23615 0.072519
## Neg Pred Value       0.999572   0.1833  0.96084 0.990332
## Prevalence           0.002272   0.9171  0.06475 0.015903
## Detection Rate       0.001893   0.6354  0.03067 0.007194
## Detection Prevalence 0.115865   0.6551  0.12988 0.099205
## Balanced Accuracy    0.859551   0.7277  0.68381 0.679442
accuracy_lda <- conf_matrix_lda$overall['Accuracy']
cat(sprintf("Accuracy LDA: %.4f\n", accuracy_lda))
## Accuracy LDA: 0.6751

7 Visualisasi Akhir

7.1 Visualisasi Matriks Konfusi (Heatmap)

library(ggplot2)

conf_table <- as.data.frame(conf_matrix_lda$table)
colnames(conf_table) <- c("Prediction", "Reference", "Freq")

ggplot(data = conf_table, aes(x = Prediction, y = Reference, fill = Freq)) +
  geom_tile() +
  geom_text(aes(label = Freq), vjust = 0.5, color = "white", size = 5) + 
  scale_fill_gradient(low = "lightblue", high = "darkblue") +
  theme_minimal() +
  labs(title = "Matriks Konfusi LDA",
       x = "Kelas Prediksi",
       y = "Kelas Aktual")

7.2 Plot Hasil LDA (Scatter Plot LD1 vs LD2)

lda_predictions_test <- predict(model_lda, test_data_norm)
lda_plot_data_test <- data.frame(LD1 = lda_predictions_test$x[,1],
                          
                                 LD2 = if(ncol(lda_predictions_test$x) >= 2) lda_predictions_test$x[,2] else NA,
                                 weather_main = test_data_norm$weather_main) 

if (ncol(lda_predictions_test$x) >= 2) {
  library(ggplot2)
  ggplot(lda_plot_data_test, aes(x = LD1, y = LD2, color = weather_main)) +
    geom_point(alpha = 0.7) +
    stat_ellipse(aes(group = weather_main), type = "norm", level = 0.68, linetype = 2) +
    theme_minimal() +
    labs(title = "Proyeksi Data Test pada Fungsi Diskriminan Linier",
         x = "LD1", y = "LD2", color = "Jenis Cuaca Aktual")
} else if (ncol(lda_predictions_test$x) == 1) {
  library(ggplot2) 
  ggplot(lda_plot_data_test, aes(x = LD1, fill = weather_main)) +
    geom_density(alpha = 0.7) +
    theme_minimal() +
    labs(title = "Densitas LD1 per Jenis Cuaca Aktual (Data Test)",
         x = "LD1", fill = "Jenis Cuaca Aktual")
}

7.3 Plot Koefisien/Bobot Fungsi Diskriminan Linier (LDA Loadings)

lda_coefficients <- as.data.frame(model_lda$scaling)
lda_coefficients$variable <- rownames(lda_coefficients)

library(ggplot2)
library(tidyr) 

lda_coefficients_long <- lda_coefficients %>%
  pivot_longer(cols = -variable, names_to = "LD", values_to = "Coefficient")

# Plot koefisien
ggplot(lda_coefficients_long, aes(x = reorder(variable, Coefficient), y = Coefficient, fill = LD)) +
  geom_bar(stat = "identity", position = position_dodge()) +
  coord_flip() + 
  facet_wrap(~LD) + 
  theme_minimal() +
  labs(title = "Koefisien Fungsi Diskriminan Linier (LDA Loadings)",
       x = "Variabel",
       y = "Koefisien (Bobot)") +
  scale_fill_brewer(palette = "Set1") 

8 Referensi

[1] M. S. Tjahaya, Raupong, and G. M. Tinungki, “Analisis diskriminan linear robust dengan metode Winsorized modified one-step M-estimator,” Estimasi, vol. 3, no. 1, pp. 1–13, 2022, doi: 10.20956/ejsa.vi.11302.

[2] M. S. Irwanto, F. A. Bachtiar, and N. Yudistira, “Klasifikasi aktivitas manusia menggunakan algoritme computed-input-weight extreme learning machine dengan reduksi dimensi principal component analysis,” J. Teknol. Inf. Ilmu Komput., vol. 9, no. 6, pp. 1195–1202, 2022, doi: 10.25126/jtiik.202295504.

[3] C. Herdian, A. Kamila, and I. G. A. M. Budidarma, “Studi kasus feature engineering untuk data teks: perbandingan label encoding dan one-hot encoding pada metode linear regresi,” Technologia: J. Ilmiah, vol. 15, no. 1, Art. no. e13457, 2024, doi: 10.31602/tji.v15i1.13457.

[4] P. J. Muhammad Ali, “Investigating the impact of min-max data normalization on the regression performance of k-nearest neighbor with different similarity measurements,” ARO Sci. J. Koya Univ., vol. 10, pp. 85–91, 2022, doi: 10.14500/aro.10955.