0. Project Tasks

packages <- c("readxl", "dplyr", "lubridate", "zoo", "ggplot2", "tidyr", "patchwork")
installed_packages <- packages %in% rownames(installed.packages())
if(any(!installed_packages)) {
  install.packages(packages[!installed_packages])
}
lapply(packages, library, character.only = TRUE)
## [[1]]
## [1] "readxl"    "stats"     "graphics"  "grDevices" "utils"     "datasets" 
## [7] "methods"   "base"     
## 
## [[2]]
## [1] "dplyr"     "readxl"    "stats"     "graphics"  "grDevices" "utils"    
## [7] "datasets"  "methods"   "base"     
## 
## [[3]]
##  [1] "lubridate" "dplyr"     "readxl"    "stats"     "graphics"  "grDevices"
##  [7] "utils"     "datasets"  "methods"   "base"     
## 
## [[4]]
##  [1] "zoo"       "lubridate" "dplyr"     "readxl"    "stats"     "graphics" 
##  [7] "grDevices" "utils"     "datasets"  "methods"   "base"     
## 
## [[5]]
##  [1] "ggplot2"   "zoo"       "lubridate" "dplyr"     "readxl"    "stats"    
##  [7] "graphics"  "grDevices" "utils"     "datasets"  "methods"   "base"     
## 
## [[6]]
##  [1] "tidyr"     "ggplot2"   "zoo"       "lubridate" "dplyr"     "readxl"   
##  [7] "stats"     "graphics"  "grDevices" "utils"     "datasets"  "methods"  
## [13] "base"     
## 
## [[7]]
##  [1] "patchwork" "tidyr"     "ggplot2"   "zoo"       "lubridate" "dplyr"    
##  [7] "readxl"    "stats"     "graphics"  "grDevices" "utils"     "datasets" 
## [13] "methods"   "base"

1. Filter & Synchronize

# Import data
wifi <- read_excel("C:/Users/LENOVO/Downloads/wifi.xlsx")
lib1 <- read_excel("C:/Users/LENOVO/Downloads/library1.xlsx")
lib2 <- read_excel("C:/Users/LENOVO/Downloads/library2.xlsx")
lib3 <- read_excel("C:/Users/LENOVO/Downloads/library3.xlsx")

# --- Filter WiFi hanya untuk Library ---
wifi_lib <- wifi %>%
  filter(Building == "Library") %>%
  mutate(Timestamp = ymd_hms(time))

# --- Resample WiFi ke interval 10 menit ---
wifi_lib_10min <- wifi_lib %>%
  mutate(Timestamp = floor_date(Timestamp, "10 minutes")) %>%
  group_by(Timestamp) %>%
  summarise(Occupancy = mean(`Associated Client Count`, na.rm = TRUE), .groups = "drop")

2. Cleaning

# Fungsi isi NA
fill_na <- function(x){
  if(any(is.na(x))){
    x[is.na(x)] <- mean(x, na.rm = TRUE)
  }
  return(x)
}

# Preprocessing energi
lib1 <- lib1 %>% mutate(Timestamp = as.POSIXct(ts, tz = "UTC"), Value = fill_na(rate))
lib2 <- lib2 %>% mutate(Timestamp = as.POSIXct(ts, tz = "UTC"), Value = fill_na(rate))
lib3 <- lib3 %>% mutate(Timestamp = as.POSIXct(ts, tz = "UTC"), Value = fill_na(rate))

# Gabung energi
energy <- lib1 %>%
  rename(E1 = Value) %>%
  inner_join(lib2 %>% rename(E2 = Value), by = "Timestamp") %>%
  inner_join(lib3 %>% rename(E3 = Value), by = "Timestamp") %>%
  mutate(Energy = E1 + E2 + E3) %>%
  select(Timestamp, Energy)

# Sinkronisasi dengan WiFi
data_all <- wifi_lib_10min %>%
  inner_join(energy, by = "Timestamp") %>%
  mutate(
    Date = as.Date(Timestamp),
    Time = format(Timestamp, "%H:%M"),
    Hour = hour(Timestamp),
    Day = weekdays(Timestamp),
    Category = ifelse(Day %in% c("Saturday", "Sunday"), "Weekend", "Weekday"),
    IsWeekend = factor(Category, levels = c("Weekday", "Weekend"))
  )

3. Visualisasi

Daily Profiles

daily_profiles <- data_all %>%
  group_by(Date, Time, IsWeekend) %>%
  summarise(
    Occupancy = mean(Occupancy, na.rm = TRUE),
    Energy = mean(Energy, na.rm = TRUE),
    .groups = "drop"
  ) %>%
  mutate(
    Hour = hour(as.POSIXct(Time, format="%H:%M")),
    TimePOSIX = as.POSIXct(Time, format="%H:%M")
  )

Occupancy

ggplot(daily_profiles, aes(x = TimePOSIX, y = Occupancy, group = Date)) +
  geom_line(alpha = 0.2, color = "red") +
  stat_summary(fun = mean, geom = "line", aes(group = 1), color = "yellow", size = 1.2) +
  scale_x_datetime(date_labels = "%H:%M", date_breaks = "2 hour") +
  labs(title = "Daily Profile Occupancy", x = "Jam", y = "Occupancy") +
  theme_minimal()
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

Energy

ggplot(daily_profiles, aes(x = TimePOSIX, y = Energy, group = Date)) +
  geom_line(alpha = 0.2, color = "blue") +
  stat_summary(fun = mean, geom = "line", aes(group = 1), color = "green", size = 1.2) +
  scale_x_datetime(date_labels = "%H:%M", date_breaks = "2 hour") +
  labs(title = "Daily Profile Energy", x = "Jam", y = "Energy Consumption") +
  theme_minimal()

## Time Series Occupancy vs Energy

ggplot(data_all, aes(x = Timestamp)) +
  geom_line(aes(y = Occupancy, color = "Occupancy (WiFi)"), alpha = 0.7) +
  geom_line(aes(y = Energy, color = "Energy Consumption (kWh)"), alpha = 0.7) +
  labs(title = "Time Series of Library Occupancy vs Energy Consumption",
       x = "Time", y = "Count / kWh", color = "") +
  theme_minimal() +
  theme(legend.position = "bottom")

4. Analisis

Barplot Occupancy per Jam

hourly_occ <- data_all %>%
  group_by(Hour) %>%
  summarise(Avg_Occupancy = mean(Occupancy, na.rm = TRUE), .groups = "drop")

ggplot(hourly_occ, aes(x = Hour, y = Avg_Occupancy)) +
  geom_col(fill = "orange", alpha = 0.8) +
  labs(title = "Rata-rata Occupancy per Jam", x = "Jam", y = "Rata-rata Occupancy") +
  theme_minimal()

Korelasi Occupancy vs Energy

cor_test <- cor.test(data_all$Occupancy, data_all$Energy)
cor_test
## 
##  Pearson's product-moment correlation
## 
## data:  data_all$Occupancy and data_all$Energy
## t = 79.064, df = 2053, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.8565188 0.8779291
## sample estimates:
##       cor 
## 0.8676256

Anomaly Detection (Z-Score)

data_all <- data_all %>%
  mutate(
    z_occ = scale(Occupancy),
    z_energy = scale(Energy),
    Anomaly = ifelse(abs(z_occ) > 3 | abs(z_energy) > 3, "Anomaly", "Normal")
  )

ggplot(data_all, aes(x = Timestamp, y = Occupancy, color = Anomaly)) +
  geom_point(alpha = 0.7) +
  labs(title = "Deteksi Anomali Occupancy", x = "Timestamp", y = "Occupancy") +
  theme_minimal()


5. Weekend vs Weekday

Perbandingan Profil

p_occ <- ggplot(daily_profiles, aes(x = TimePOSIX, y = Occupancy,
                                    group = IsWeekend, color = IsWeekend)) +
  stat_summary(fun = mean, geom = "line", size = 1.2) +
  scale_x_datetime(date_labels = "%H:%M", date_breaks = "2 hour") +
  labs(title = "Weekday vs Weekend - Occupancy", x = "Jam", y = "Occupancy", color = "") +
  theme_minimal()

p_energy <- ggplot(daily_profiles, aes(x = TimePOSIX, y = Energy,
                                       group = IsWeekend, color = IsWeekend)) +
  stat_summary(fun = mean, geom = "line", size = 1.2) +
  scale_x_datetime(date_labels = "%H:%M", date_breaks = "2 hour") +
  labs(title = "Weekday vs Weekend - Energy", x = "Jam", y = "Energy", color = "") +
  theme_minimal()

p_occ / p_energy

Scatterplot dengan Highlight Anomali

threshold <- 3
data_all$z_occ <- scale(data_all$Occupancy)
data_all$anomaly_occ <- abs(data_all$z_occ) > threshold

ggplot(data_all, aes(x = Occupancy, y = Energy)) +
  geom_point(data = subset(data_all, anomaly_occ == FALSE),
             aes(color = "Normal"), alpha = 0.7) +
  geom_point(data = subset(data_all, anomaly_occ == TRUE),
             aes(color = "Anomaly"), alpha = 0.7) +
  scale_color_manual(values = c("Normal" = "blue", "Anomaly" = "red")) +
  labs(title = "Occupancy vs Energy (Anomaly Highlight)",
       x = "Occupancy", y = "Energy Consumption", color = "Legend") +
  theme_minimal()