Pendahuluan

Dokumen ini berisi analisis data WiFi (occupancy) dan konsumsi energi di perpustakaan.
Langkah-langkah meliputi: preprocessing, sinkronisasi data, pembuatan profil harian, visualisasi, serta deteksi anomali.


0. Packages

packages <- c("readxl", "dplyr", "lubridate", "zoo", "ggplot2", "tidyr")
installed_packages <- packages %in% rownames(installed.packages())
if(any(!installed_packages)) {
  install.packages(packages[!installed_packages])
}
lapply(packages, library, character.only = TRUE)
## [[1]]
## [1] "readxl"    "stats"     "graphics"  "grDevices" "utils"     "datasets" 
## [7] "methods"   "base"     
## 
## [[2]]
## [1] "dplyr"     "readxl"    "stats"     "graphics"  "grDevices" "utils"    
## [7] "datasets"  "methods"   "base"     
## 
## [[3]]
##  [1] "lubridate" "dplyr"     "readxl"    "stats"     "graphics"  "grDevices"
##  [7] "utils"     "datasets"  "methods"   "base"     
## 
## [[4]]
##  [1] "zoo"       "lubridate" "dplyr"     "readxl"    "stats"     "graphics" 
##  [7] "grDevices" "utils"     "datasets"  "methods"   "base"     
## 
## [[5]]
##  [1] "ggplot2"   "zoo"       "lubridate" "dplyr"     "readxl"    "stats"    
##  [7] "graphics"  "grDevices" "utils"     "datasets"  "methods"   "base"     
## 
## [[6]]
##  [1] "tidyr"     "ggplot2"   "zoo"       "lubridate" "dplyr"     "readxl"   
##  [7] "stats"     "graphics"  "grDevices" "utils"     "datasets"  "methods"  
## [13] "base"

1. Import & Preprocessing

wifi <- readxl::read_excel("C:/Users/LENOVO/Downloads/wifi.xlsx")
lib1 <- readxl::read_excel("C:/Users/LENOVO/Downloads/library1.xlsx")
lib2 <- readxl::read_excel("C:/Users/LENOVO/Downloads/library2.xlsx")
lib3 <- readxl::read_excel("C:/Users/LENOVO/Downloads/library3.xlsx")

# --- Filter WiFi hanya Library ---
wifi_lib <- wifi %>%
  dplyr::filter(Building == "Library") %>%
  dplyr::mutate(Timestamp = lubridate::ymd_hms(time))

# --- Resample WiFi ke interval 10 menit ---
wifi_lib_10min <- wifi_lib %>%
  dplyr::mutate(Timestamp = floor_date(Timestamp, "10 minutes")) %>%
  dplyr::group_by(Timestamp) %>%
  dplyr::summarise(Occupancy = mean(`Associated Client Count`, na.rm = TRUE), .groups = "drop")

# --- Preprocessing Energi ---
fill_na <- function(x){
  if(any(is.na(x))){
    x[is.na(x)] <- mean(x, na.rm = TRUE)
  }
  return(x)
}

lib1 <- lib1 %>% mutate(Timestamp = as.POSIXct(ts, tz = "UTC"), Value = fill_na(rate))
lib2 <- lib2 %>% mutate(Timestamp = as.POSIXct(ts, tz = "UTC"), Value = fill_na(rate))
lib3 <- lib3 %>% mutate(Timestamp = as.POSIXct(ts, tz = "UTC"), Value = fill_na(rate))

# --- Gabung Energi ---
energy <- lib1 %>%
  rename(E1 = Value) %>%
  inner_join(lib2 %>% rename(E2 = Value), by = "Timestamp") %>%
  inner_join(lib3 %>% rename(E3 = Value), by = "Timestamp") %>%
  mutate(Energy = E1 + E2 + E3) %>%
  select(Timestamp, Energy)

2. Sinkronisasi Data

data_all <- wifi_lib_10min %>%
  inner_join(energy, by = "Timestamp") %>%
  mutate(
    Date = as.Date(Timestamp),
    Time = format(Timestamp, "%H:%M"),
    Hour = hour(Timestamp),
    Day = weekdays(Timestamp),
    Category = ifelse(Day %in% c("Saturday", "Sunday"), "Weekend", "Weekday"),
    IsWeekend = factor(Category, levels = c("Weekday", "Weekend"))
  )

3. Daily Profiles

daily_profiles <- data_all %>%
  group_by(Date, Time, IsWeekend) %>%
  summarise(
    Occupancy = mean(Occupancy, na.rm = TRUE),
    Energy = mean(Energy, na.rm = TRUE),
    .groups = "drop"
  ) %>%
  mutate(
    Hour = hour(as.POSIXct(Time, format="%H:%M")),
    TimePOSIX = as.POSIXct(Time, format="%H:%M")
  )

4. Visualisasi

Daily Profile Occupancy

ggplot(daily_profiles, aes(x = TimePOSIX, y = Occupancy, group = Date)) +
  geom_line(alpha = 0.2, color = "red") +
  stat_summary(fun = mean, geom = "line", aes(group = 1), color = "yellow", size = 1.2) +
  scale_x_datetime(date_labels = "%H:%M", date_breaks = "2 hour") +
  labs(title = "Daily Profile Occupancy (merah=harian, kuning=rata-rata)",
       x = "Jam", y = "Occupancy") +
  theme_minimal()
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

Daily Profile Energy

ggplot(daily_profiles, aes(x = TimePOSIX, y = Energy, group = Date)) +
  geom_line(alpha = 0.2, color = "blue") +
  stat_summary(fun = mean, geom = "line", aes(group = 1), color = "green", size = 1.2) +
  scale_x_datetime(date_labels = "%H:%M", date_breaks = "2 hour") +
  labs(title = "Daily Profile Energy (biru=harian, hijau=rata-rata)",
       x = "Jam", y = "Energy Consumption") +
  theme_minimal()

Weekday vs Weekend Comparison

# Occupancy
p_occ <- ggplot(daily_profiles, aes(x = TimePOSIX, y = Occupancy,
                                    group = IsWeekend, color = IsWeekend)) +
  stat_summary(fun = mean, geom = "line", size = 1.2) +
  scale_x_datetime(date_labels = "%H:%M", date_breaks = "2 hour") +
  labs(title = "Weekday vs Weekend - Occupancy",
       x = "Hour of Day", y = "Occupancy", color = "") +
  theme_minimal()

# Energy
p_energy <- ggplot(daily_profiles, aes(x = TimePOSIX, y = Energy,
                                       group = IsWeekend, color = IsWeekend)) +
  stat_summary(fun = mean, geom = "line", size = 1.2) +
  scale_x_datetime(date_labels = "%H:%M", date_breaks = "2 hour") +
  labs(title = "Weekday vs Weekend - Rate",
       x = "Hour of Day", y = "Rate", color = "") +
  theme_minimal()

# pakai patchwork
library(patchwork)
## Warning: package 'patchwork' was built under R version 4.4.3
p_occ / p_energy


5. Barplot Occupancy per Hour

hourly_occ <- data_all %>%
  group_by(Hour) %>%
  summarise(Avg_Occupancy = mean(Occupancy, na.rm = TRUE), .groups = "drop")

ggplot(hourly_occ, aes(x = Hour, y = Avg_Occupancy)) +
  geom_col(fill = "orange", alpha = 0.8) +
  labs(title = "Rata-rata Occupancy per Jam",
       x = "Jam", y = "Rata-rata Occupancy") +
  theme_minimal()


6. Anomaly Detection

Metode sederhana menggunakan Z-score.

data_all <- data_all %>%
  mutate(
    z_occ = scale(Occupancy),
    z_energy = scale(Energy),
    Anomaly = ifelse(abs(z_occ) > 3 | abs(z_energy) > 3, "Anomaly", "Normal")
  )

ggplot(data_all, aes(x = Timestamp, y = Occupancy, color = Anomaly)) +
  geom_point(alpha = 0.7) +
  labs(title = "Deteksi Anomali Occupancy",
       x = "Timestamp", y = "Occupancy") +
  theme_minimal()

Scatterplot Occupancy vs Energy (Highlight Anomaly)

threshold <- 3  # sensitifitas
data_all$z_occ <- scale(data_all$Occupancy)
data_all$anomaly_occ <- abs(data_all$z_occ) > threshold

ggplot(data_all, aes(x = Occupancy, y = Energy)) +
  geom_point(data = subset(data_all, anomaly_occ == FALSE),
             aes(color = "Normal"), alpha = 0.7) +
  geom_point(data = subset(data_all, anomaly_occ == TRUE),
             aes(color = "Anomaly"), alpha = 0.7) +
  scale_color_manual(values = c("Normal" = "blue", "Anomaly" = "red")) +
  labs(
    title = "Occupancy vs Energy (Anomaly Highlight)",
    x = "Occupancy",
    y = "Energy Consumption",
    color = "Legend"
  ) +
  theme_minimal()


Kesimpulan