1. Muat Library

library(ggplot2)
library(dplyr)
library(tidyr)
library(reshape2)
library(gridExtra)
library(scales)
library(GGally)      # Pairplot
library(ggcorrplot)  # Heatmap korelasi yang lebih rapi

# install.packages(c("ggplot2","dplyr","tidyr","reshape2",
#                    "gridExtra","scales","GGally","ggcorrplot"))

2. Load Dataset

df <- read.csv("Occupancy_Estimation.csv",
               stringsAsFactors = FALSE)

cat("Dimensi  :", nrow(df), "baris x", ncol(df), "kolom\n")
## Dimensi  : 10129 baris x 19 kolom
cat("Kolom    :", paste(names(df), collapse = ", "), "\n")
## Kolom    : Date, Time, S1_Temp, S2_Temp, S3_Temp, S4_Temp, S1_Light, S2_Light, S3_Light, S4_Light, S1_Sound, S2_Sound, S3_Sound, S4_Sound, S5_CO2, S5_CO2_Slope, S6_PIR, S7_PIR, Room_Occupancy_Count

Tampilan Awal Data

head(df, 10)
##          Date     Time S1_Temp S2_Temp S3_Temp S4_Temp S1_Light S2_Light
## 1  2017/12/22 10:49:41   24.94   24.75   24.56   25.38      121       34
## 2  2017/12/22 10:50:12   24.94   24.75   24.56   25.44      121       33
## 3  2017/12/22 10:50:42   25.00   24.75   24.50   25.44      121       34
## 4  2017/12/22 10:51:13   25.00   24.75   24.56   25.44      121       34
## 5  2017/12/22 10:51:44   25.00   24.75   24.56   25.44      121       34
## 6  2017/12/22 10:52:14   25.00   24.81   24.56   25.44      121       34
## 7  2017/12/22 10:52:45   25.00   24.75   24.56   25.44      120       34
## 8  2017/12/22 10:53:15   25.00   24.81   24.56   25.44      121       34
## 9  2017/12/22 10:53:46   25.00   24.81   24.56   25.50      122       35
## 10 2017/12/22 10:54:17   25.00   24.81   24.56   25.50      101       34
##    S3_Light S4_Light S1_Sound S2_Sound S3_Sound S4_Sound S5_CO2 S5_CO2_Slope
## 1        53       40     0.08     0.19     0.06     0.06    390   0.76923077
## 2        53       40     0.93     0.05     0.06     0.06    390   0.64615385
## 3        53       40     0.43     0.11     0.08     0.06    390   0.51923077
## 4        53       40     0.41     0.10     0.10     0.09    390   0.38846154
## 5        54       40     0.18     0.06     0.06     0.06    390   0.25384615
## 6        54       40     0.13     0.06     0.06     0.07    390   0.16538462
## 7        54       40     1.39     0.32     0.43     0.06    390   0.07692308
## 8        54       41     0.09     0.06     0.09     0.05    390  -0.01153846
## 9        56       43     0.09     0.05     0.06     0.13    390  -0.10000000
## 10       57       43     3.84     0.64     0.48     0.39    390  -0.18846154
##    S6_PIR S7_PIR Room_Occupancy_Count
## 1       0      0                    1
## 2       0      0                    1
## 3       0      0                    1
## 4       0      0                    1
## 5       0      0                    1
## 6       0      0                    1
## 7       1      0                    1
## 8       0      0                    1
## 9       0      0                    1
## 10      1      1                    1
str(df)
## 'data.frame':    10129 obs. of  19 variables:
##  $ Date                : chr  "2017/12/22" "2017/12/22" "2017/12/22" "2017/12/22" ...
##  $ Time                : chr  "10:49:41" "10:50:12" "10:50:42" "10:51:13" ...
##  $ S1_Temp             : num  24.9 24.9 25 25 25 ...
##  $ S2_Temp             : num  24.8 24.8 24.8 24.8 24.8 ...
##  $ S3_Temp             : num  24.6 24.6 24.5 24.6 24.6 ...
##  $ S4_Temp             : num  25.4 25.4 25.4 25.4 25.4 ...
##  $ S1_Light            : int  121 121 121 121 121 121 120 121 122 101 ...
##  $ S2_Light            : int  34 33 34 34 34 34 34 34 35 34 ...
##  $ S3_Light            : int  53 53 53 53 54 54 54 54 56 57 ...
##  $ S4_Light            : int  40 40 40 40 40 40 40 41 43 43 ...
##  $ S1_Sound            : num  0.08 0.93 0.43 0.41 0.18 0.13 1.39 0.09 0.09 3.84 ...
##  $ S2_Sound            : num  0.19 0.05 0.11 0.1 0.06 0.06 0.32 0.06 0.05 0.64 ...
##  $ S3_Sound            : num  0.06 0.06 0.08 0.1 0.06 0.06 0.43 0.09 0.06 0.48 ...
##  $ S4_Sound            : num  0.06 0.06 0.06 0.09 0.06 0.07 0.06 0.05 0.13 0.39 ...
##  $ S5_CO2              : int  390 390 390 390 390 390 390 390 390 390 ...
##  $ S5_CO2_Slope        : num  0.769 0.646 0.519 0.388 0.254 ...
##  $ S6_PIR              : int  0 0 0 0 0 0 1 0 0 1 ...
##  $ S7_PIR              : int  0 0 0 0 0 0 0 0 0 1 ...
##  $ Room_Occupancy_Count: int  1 1 1 1 1 1 1 1 1 1 ...

3. Statistik Deskriptif

Ringkasan Umum

summary(df)
##      Date               Time              S1_Temp         S2_Temp     
##  Length:10129       Length:10129       Min.   :24.94   Min.   :24.75  
##  Class :character   Class :character   1st Qu.:25.19   1st Qu.:25.19  
##  Mode  :character   Mode  :character   Median :25.38   Median :25.38  
##                                        Mean   :25.45   Mean   :25.55  
##                                        3rd Qu.:25.63   3rd Qu.:25.63  
##                                        Max.   :26.38   Max.   :29.00  
##     S3_Temp         S4_Temp         S1_Light         S2_Light     
##  Min.   :24.44   Min.   :24.94   Min.   :  0.00   Min.   :  0.00  
##  1st Qu.:24.69   1st Qu.:25.44   1st Qu.:  0.00   1st Qu.:  0.00  
##  Median :24.94   Median :25.75   Median :  0.00   Median :  0.00  
##  Mean   :25.06   Mean   :25.75   Mean   : 25.45   Mean   : 26.02  
##  3rd Qu.:25.38   3rd Qu.:26.00   3rd Qu.: 12.00   3rd Qu.: 14.00  
##  Max.   :26.19   Max.   :26.56   Max.   :165.00   Max.   :258.00  
##     S3_Light         S4_Light        S1_Sound         S2_Sound     
##  Min.   :  0.00   Min.   : 0.00   Min.   :0.0600   Min.   :0.0400  
##  1st Qu.:  0.00   1st Qu.: 0.00   1st Qu.:0.0700   1st Qu.:0.0500  
##  Median :  0.00   Median : 0.00   Median :0.0800   Median :0.0500  
##  Mean   : 34.25   Mean   :13.22   Mean   :0.1682   Mean   :0.1201  
##  3rd Qu.: 50.00   3rd Qu.:22.00   3rd Qu.:0.0800   3rd Qu.:0.0600  
##  Max.   :280.00   Max.   :74.00   Max.   :3.8800   Max.   :3.4400  
##     S3_Sound         S4_Sound          S5_CO2        S5_CO2_Slope     
##  Min.   :0.0400   Min.   :0.0500   Min.   : 345.0   Min.   :-6.29615  
##  1st Qu.:0.0600   1st Qu.:0.0600   1st Qu.: 355.0   1st Qu.:-0.04615  
##  Median :0.0600   Median :0.0800   Median : 360.0   Median : 0.00000  
##  Mean   :0.1581   Mean   :0.1038   Mean   : 460.9   Mean   :-0.00483  
##  3rd Qu.:0.0700   3rd Qu.:0.1000   3rd Qu.: 465.0   3rd Qu.: 0.00000  
##  Max.   :3.6700   Max.   :3.4000   Max.   :1270.0   Max.   : 8.98077  
##      S6_PIR            S7_PIR        Room_Occupancy_Count
##  Min.   :0.00000   Min.   :0.00000   Min.   :0.0000      
##  1st Qu.:0.00000   1st Qu.:0.00000   1st Qu.:0.0000      
##  Median :0.00000   Median :0.00000   Median :0.0000      
##  Mean   :0.09014   Mean   :0.07957   Mean   :0.3986      
##  3rd Qu.:0.00000   3rd Qu.:0.00000   3rd Qu.:0.0000      
##  Max.   :1.00000   Max.   :1.00000   Max.   :3.0000

Ringkasan Per-Variabel (Tabel Rapi)

# Hanya kolom numerik (kecuali Date, Time)
num_cols <- df %>% select(-Date, -Time)

tabel_desk <- data.frame(
  Variabel = names(num_cols),
  Min      = round(sapply(num_cols, min,  na.rm = TRUE), 3),
  Q1       = round(sapply(num_cols, quantile, 0.25, na.rm = TRUE), 3),
  Median   = round(sapply(num_cols, median, na.rm = TRUE), 3),
  Mean     = round(sapply(num_cols, mean,  na.rm = TRUE), 3),
  Q3       = round(sapply(num_cols, quantile, 0.75, na.rm = TRUE), 3),
  Max      = round(sapply(num_cols, max,  na.rm = TRUE), 3),
  SD       = round(sapply(num_cols, sd,   na.rm = TRUE), 3)
)
rownames(tabel_desk) <- NULL
print(tabel_desk)
##                Variabel     Min      Q1 Median    Mean     Q3      Max      SD
## 1               S1_Temp  24.940  25.190  25.38  25.454  25.63   26.380   0.351
## 2               S2_Temp  24.750  25.190  25.38  25.546  25.63   29.000   0.586
## 3               S3_Temp  24.440  24.690  24.94  25.057  25.38   26.190   0.427
## 4               S4_Temp  24.940  25.440  25.75  25.754  26.00   26.560   0.356
## 5              S1_Light   0.000   0.000   0.00  25.445  12.00  165.000  51.011
## 6              S2_Light   0.000   0.000   0.00  26.016  14.00  258.000  67.304
## 7              S3_Light   0.000   0.000   0.00  34.248  50.00  280.000  58.401
## 8              S4_Light   0.000   0.000   0.00  13.220  22.00   74.000  19.602
## 9              S1_Sound   0.060   0.070   0.08   0.168   0.08    3.880   0.317
## 10             S2_Sound   0.040   0.050   0.05   0.120   0.06    3.440   0.267
## 11             S3_Sound   0.040   0.060   0.06   0.158   0.07    3.670   0.414
## 12             S4_Sound   0.050   0.060   0.08   0.104   0.10    3.400   0.121
## 13               S5_CO2 345.000 355.000 360.00 460.860 465.00 1270.000 199.965
## 14         S5_CO2_Slope  -6.296  -0.046   0.00  -0.005   0.00    8.981   1.165
## 15               S6_PIR   0.000   0.000   0.00   0.090   0.00    1.000   0.286
## 16               S7_PIR   0.000   0.000   0.00   0.080   0.00    1.000   0.271
## 17 Room_Occupancy_Count   0.000   0.000   0.00   0.399   0.00    3.000   0.894

4. Pengecekan Kualitas Data

Missing Values

mv <- colSums(is.na(df))
df_mv <- data.frame(Kolom = names(mv), Missing = as.integer(mv)) %>%
  mutate(Persen = paste0(round(Missing / nrow(df) * 100, 2), "%"))

print(df_mv)
##                   Kolom Missing Persen
## 1                  Date       0     0%
## 2                  Time       0     0%
## 3               S1_Temp       0     0%
## 4               S2_Temp       0     0%
## 5               S3_Temp       0     0%
## 6               S4_Temp       0     0%
## 7              S1_Light       0     0%
## 8              S2_Light       0     0%
## 9              S3_Light       0     0%
## 10             S4_Light       0     0%
## 11             S1_Sound       0     0%
## 12             S2_Sound       0     0%
## 13             S3_Sound       0     0%
## 14             S4_Sound       0     0%
## 15               S5_CO2       0     0%
## 16         S5_CO2_Slope       0     0%
## 17               S6_PIR       0     0%
## 18               S7_PIR       0     0%
## 19 Room_Occupancy_Count       0     0%
if (sum(mv) == 0) {
  cat("\n✔ Tidak ada nilai hilang dalam dataset.\n")
}
## 
## ✔ Tidak ada nilai hilang dalam dataset.

Duplikat

n_dup <- sum(duplicated(df))
cat("Jumlah baris duplikat:", n_dup, "\n")
## Jumlah baris duplikat: 0
if (n_dup == 0) cat("✔ Tidak ada duplikat.\n")
## ✔ Tidak ada duplikat.

Tipe Data

df_tipe <- data.frame(
  Kolom     = names(df),
  TipeData  = sapply(df, class),
  ContohNilai = sapply(df, function(x) as.character(x[1]))
)
rownames(df_tipe) <- NULL
print(df_tipe)
##                   Kolom  TipeData    ContohNilai
## 1                  Date character     2017/12/22
## 2                  Time character       10:49:41
## 3               S1_Temp   numeric          24.94
## 4               S2_Temp   numeric          24.75
## 5               S3_Temp   numeric          24.56
## 6               S4_Temp   numeric          25.38
## 7              S1_Light   integer            121
## 8              S2_Light   integer             34
## 9              S3_Light   integer             53
## 10             S4_Light   integer             40
## 11             S1_Sound   numeric           0.08
## 12             S2_Sound   numeric           0.19
## 13             S3_Sound   numeric           0.06
## 14             S4_Sound   numeric           0.06
## 15               S5_CO2   integer            390
## 16         S5_CO2_Slope   numeric 0.769230769231
## 17               S6_PIR   integer              0
## 18               S7_PIR   integer              0
## 19 Room_Occupancy_Count   integer              1

5. Distribusi Variabel Target

tbl <- table(df$Room_Occupancy_Count)
df_dist <- data.frame(
  Kelas      = names(tbl),
  Frekuensi  = as.integer(tbl),
  Persentase = paste0(round(prop.table(tbl) * 100, 2), "%")
)
print(df_dist)
##   Kelas Frekuensi Persentase
## 1     0      8228     81.23%
## 2     1       459      4.53%
## 3     2       748      7.38%
## 4     3       694      6.85%
ggplot(df, aes(x = factor(Room_Occupancy_Count),
               fill = factor(Room_Occupancy_Count))) +
  geom_bar(color = "black", width = 0.6) +
  geom_text(stat = "count", aes(label = after_stat(count)),
            vjust = -0.5, size = 4.5, fontface = "bold") +
  scale_fill_manual(
    values = c("0"="#4472C4","1"="#ED7D31","2"="#70AD47","3"="#E74C3C"),
    labels = c("0 Penghuni","1 Penghuni","2 Penghuni","3 Penghuni")
  ) +
  labs(title    = "Distribusi Kelas Penghuni Ruangan",
       subtitle = "Kelas 0 mendominasi sebesar 81.23% — terdapat ketidakseimbangan kelas",
       x = "Jumlah Penghuni", y = "Frekuensi", fill = "Kelas") +
  theme_minimal(base_size = 13) +
  theme(plot.title    = element_text(face = "bold"),
        plot.subtitle = element_text(color = "gray40"))


6. Distribusi Fitur Sensor

Histogram Semua Fitur

df_long <- df %>%
  select(-Date, -Time) %>%
  mutate(Room_Occupancy_Count = factor(Room_Occupancy_Count)) %>%
  pivot_longer(-Room_Occupancy_Count, names_to = "Variabel", values_to = "Nilai")

ggplot(df_long, aes(x = Nilai, fill = Variabel)) +
  geom_histogram(bins = 40, color = "white", alpha = 0.85) +
  facet_wrap(~Variabel, scales = "free", ncol = 4) +
  scale_fill_viridis_d(guide = "none") +
  labs(title = "Distribusi Setiap Fitur Sensor",
       x = "Nilai", y = "Frekuensi") +
  theme_minimal(base_size = 10) +
  theme(plot.title  = element_text(face = "bold"),
        strip.text  = element_text(face = "bold", size = 9))

Boxplot Fitur per Kelas Penghuni

df_long2 <- df %>%
  select(-Date, -Time) %>%
  mutate(Room_Occupancy_Count = factor(Room_Occupancy_Count)) %>%
  pivot_longer(-Room_Occupancy_Count, names_to = "Variabel", values_to = "Nilai")

ggplot(df_long2, aes(x = Room_Occupancy_Count, y = Nilai,
                      fill = Room_Occupancy_Count)) +
  geom_boxplot(alpha = 0.75, outlier.size = 0.5, outlier.alpha = 0.3) +
  facet_wrap(~Variabel, scales = "free_y", ncol = 4) +
  scale_fill_manual(
    values = c("0"="#4472C4","1"="#ED7D31","2"="#70AD47","3"="#E74C3C")
  ) +
  labs(title = "Distribusi Fitur Sensor per Kelas Penghuni",
       x = "Jumlah Penghuni", y = "Nilai", fill = "Kelas") +
  theme_minimal(base_size = 10) +
  theme(plot.title = element_text(face = "bold"),
        strip.text = element_text(face = "bold", size = 9),
        legend.position = "bottom")


7. Analisis Korelasi

Matriks Korelasi (Heatmap)

fitur_num <- df %>%
  select(S1_Temp, S2_Temp, S3_Temp, S4_Temp,
         S1_Light, S2_Light, S3_Light, S4_Light,
         S1_Sound, S2_Sound, S3_Sound, S4_Sound,
         S5_CO2, S5_CO2_Slope)

mat_kor <- cor(fitur_num)

ggcorrplot(mat_kor,
           method   = "square",
           type     = "lower",
           lab      = TRUE,
           lab_size = 3,
           colors   = c("#D7191C", "white", "#2C7BB6"),
           title    = "Matriks Korelasi Fitur Sensor",
           ggtheme  = theme_minimal(base_size = 11))

Korelasi Fitur dengan Target

df_kor_target <- df %>%
  select(-Date, -Time) %>%
  summarise(across(-Room_Occupancy_Count,
                   ~cor(., Room_Occupancy_Count))) %>%
  pivot_longer(everything(), names_to = "Fitur", values_to = "Korelasi") %>%
  arrange(desc(abs(Korelasi)))

ggplot(df_kor_target, aes(x = reorder(Fitur, abs(Korelasi)),
                           y = Korelasi, fill = Korelasi > 0)) +
  geom_col(color = "black", width = 0.7) +
  coord_flip() +
  scale_fill_manual(values = c("TRUE" = "#2C7BB6", "FALSE" = "#D7191C"),
                    labels = c("TRUE" = "Positif", "FALSE" = "Negatif")) +
  labs(title = "Korelasi Setiap Fitur dengan Room_Occupancy_Count",
       x = "Fitur", y = "Koefisien Korelasi Pearson", fill = "Arah") +
  theme_minimal(base_size = 12) +
  theme(plot.title = element_text(face = "bold"))


8. Analisis Temporal

Tren Penghuni Berdasarkan Waktu

df_waktu <- df %>%
  mutate(
    DateTime = as.POSIXct(paste(Date, Time), format = "%Y/%m/%d %H:%M:%S"),
    Jam      = as.integer(format(DateTime, "%H")),
    Tanggal  = as.Date(Date)
  )
df_jam <- df_waktu %>%
  group_by(Jam) %>%
  summarise(RataRata = mean(Room_Occupancy_Count),
            .groups = "drop")

ggplot(df_jam, aes(x = Jam, y = RataRata)) +
  geom_line(color = "#2C7BB6", linewidth = 1.2) +
  geom_point(color = "#1F4E79", size = 2.5) +
  labs(title    = "Rata-rata Penghuni per Jam dalam Sehari",
       subtitle = "Puncak aktivitas terlihat pada jam kerja",
       x = "Jam", y = "Rata-rata Jumlah Penghuni") +
  scale_x_continuous(breaks = 0:23) +
  theme_minimal(base_size = 12) +
  theme(plot.title = element_text(face = "bold"))

df_hari <- df_waktu %>%
  group_by(Tanggal) %>%
  summarise(RataRata = mean(Room_Occupancy_Count),
            .groups = "drop")

ggplot(df_hari, aes(x = Tanggal, y = RataRata)) +
  geom_line(color = "#ED7D31", linewidth = 1) +
  geom_point(color = "#C0392B", size = 2) +
  labs(title    = "Rata-rata Penghuni per Hari",
       subtitle = "Selama periode pengamatan (22 Des 2017 – 11 Jan 2018)",
       x = "Tanggal", y = "Rata-rata Jumlah Penghuni") +
  theme_minimal(base_size = 12) +
  theme(plot.title = element_text(face = "bold"))


9. Deteksi Outlier

Boxplot untuk Setiap Sensor

fitur_plot <- c("S1_Temp","S2_Temp","S3_Temp","S4_Temp",
                "S5_CO2","S5_CO2_Slope",
                "S1_Light","S2_Light","S3_Light","S4_Light",
                "S1_Sound","S2_Sound","S3_Sound","S4_Sound")

df_out <- df %>%
  select(all_of(fitur_plot)) %>%
  pivot_longer(everything(), names_to = "Fitur", values_to = "Nilai")

ggplot(df_out, aes(x = Fitur, y = Nilai, fill = Fitur)) +
  geom_boxplot(outlier.color = "red", outlier.size = 0.8, alpha = 0.7) +
  facet_wrap(~Fitur, scales = "free", ncol = 7) +
  scale_fill_viridis_d(guide = "none") +
  labs(title = "Deteksi Outlier — Boxplot per Sensor",
       x = NULL, y = "Nilai") +
  theme_minimal(base_size = 9) +
  theme(axis.text.x = element_blank(),
        strip.text  = element_text(face = "bold"),
        plot.title  = element_text(face = "bold"))

Jumlah Outlier per Fitur (Metode IQR)

hitung_outlier <- function(x) {
  Q1 <- quantile(x, 0.25, na.rm = TRUE)
  Q3 <- quantile(x, 0.75, na.rm = TRUE)
  IQR_val <- Q3 - Q1
  sum(x < (Q1 - 1.5 * IQR_val) | x > (Q3 + 1.5 * IQR_val), na.rm = TRUE)
}

df_outlier <- df %>%
  select(-Date, -Time, -Room_Occupancy_Count) %>%
  summarise(across(everything(), hitung_outlier)) %>%
  pivot_longer(everything(), names_to = "Fitur", values_to = "JumlahOutlier") %>%
  mutate(Persen = paste0(round(JumlahOutlier / nrow(df) * 100, 2), "%")) %>%
  arrange(desc(JumlahOutlier))

print(df_outlier)
## # A tibble: 16 × 3
##    Fitur        JumlahOutlier Persen
##    <chr>                <int> <chr> 
##  1 S5_CO2_Slope          4033 39.82%
##  2 S1_Sound              1772 17.49%
##  3 S1_Light              1716 16.94%
##  4 S2_Sound              1703 16.81%
##  5 S3_Sound              1671 16.5% 
##  6 S5_CO2                1657 16.36%
##  7 S2_Light              1017 10.04%
##  8 S3_Light               950 9.38% 
##  9 S2_Temp                948 9.36% 
## 10 S6_PIR                 913 9.01% 
## 11 S7_PIR                 806 7.96% 
## 12 S4_Sound               721 7.12% 
## 13 S4_Light               543 5.36% 
## 14 S1_Temp                162 1.6%  
## 15 S3_Temp                  0 0%    
## 16 S4_Temp                  0 0%

10. Ringkasan EDA

cat("============================================================\n")
## ============================================================
cat(" RINGKASAN EKSPLORASI DATA\n")
##  RINGKASAN EKSPLORASI DATA
cat("============================================================\n")
## ============================================================
cat(sprintf("Total observasi          : %d\n", nrow(df)))
## Total observasi          : 10129
cat(sprintf("Total fitur prediktor    : %d\n", ncol(df) - 3))  # kurangi Date,Time,target
## Total fitur prediktor    : 16
cat(sprintf("Missing values           : %d\n", sum(is.na(df))))
## Missing values           : 0
cat(sprintf("Baris duplikat           : %d\n", sum(duplicated(df))))
## Baris duplikat           : 0
cat("\nDistribusi kelas:\n")
## 
## Distribusi kelas:
print(data.frame(
  Kelas     = names(tbl),
  Frekuensi = as.integer(tbl),
  Persen    = paste0(round(prop.table(tbl)*100, 2), "%")
))
##   Kelas Frekuensi Persen
## 1     0      8228 81.23%
## 2     1       459  4.53%
## 3     2       748  7.38%
## 4     3       694  6.85%
cat("\nFitur dengan korelasi tertinggi ke target:\n")
## 
## Fitur dengan korelasi tertinggi ke target:
print(head(df_kor_target, 5))
## # A tibble: 5 × 2
##   Fitur    Korelasi
##   <chr>       <dbl>
## 1 S1_Light    0.849
## 2 S3_Light    0.793
## 3 S2_Light    0.789
## 4 S1_Temp     0.701
## 5 S7_PIR      0.695
cat("\nFitur dengan outlier terbanyak:\n")
## 
## Fitur dengan outlier terbanyak:
print(head(df_outlier, 5))
## # A tibble: 5 × 3
##   Fitur        JumlahOutlier Persen
##   <chr>                <int> <chr> 
## 1 S5_CO2_Slope          4033 39.82%
## 2 S1_Sound              1772 17.49%
## 3 S1_Light              1716 16.94%
## 4 S2_Sound              1703 16.81%
## 5 S3_Sound              1671 16.5%
cat("============================================================\n")
## ============================================================