# =========================================================
# ANALISIS DATA WIFI & ENERGI PERPUSTAKAAN
# =========================================================

# List semua package yang dibutuhkan
packages <- c("readxl", "dplyr", "lubridate", "zoo", "ggplot2", "tidyr")

# Install package jika belum ada
installed_packages <- packages %in% rownames(installed.packages())
if(any(!installed_packages)) {
  install.packages(packages[!installed_packages])
}

# Load semua library
lapply(packages, library, character.only = TRUE)
## Warning: package 'readxl' was built under R version 4.4.3
## Warning: package 'dplyr' was built under R version 4.4.3
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
## Warning: package 'lubridate' was built under R version 4.4.3
## 
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
## 
##     date, intersect, setdiff, union
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
## Warning: package 'ggplot2' was built under R version 4.4.3
## Warning: package 'tidyr' was built under R version 4.4.3
## [[1]]
## [1] "readxl"    "stats"     "graphics"  "grDevices" "utils"     "datasets" 
## [7] "methods"   "base"     
## 
## [[2]]
## [1] "dplyr"     "readxl"    "stats"     "graphics"  "grDevices" "utils"    
## [7] "datasets"  "methods"   "base"     
## 
## [[3]]
##  [1] "lubridate" "dplyr"     "readxl"    "stats"     "graphics"  "grDevices"
##  [7] "utils"     "datasets"  "methods"   "base"     
## 
## [[4]]
##  [1] "zoo"       "lubridate" "dplyr"     "readxl"    "stats"     "graphics" 
##  [7] "grDevices" "utils"     "datasets"  "methods"   "base"     
## 
## [[5]]
##  [1] "ggplot2"   "zoo"       "lubridate" "dplyr"     "readxl"    "stats"    
##  [7] "graphics"  "grDevices" "utils"     "datasets"  "methods"   "base"     
## 
## [[6]]
##  [1] "tidyr"     "ggplot2"   "zoo"       "lubridate" "dplyr"     "readxl"   
##  [7] "stats"     "graphics"  "grDevices" "utils"     "datasets"  "methods"  
## [13] "base"
# Atur working directory (opsional, kalau perlu)
# setwd("D:/COOLYAH TIARA/SEMESTER 7/Data Mining dan Visualisasi/Datmin Tiara/Tugas Kelompok 1/Data")

# =========================================================
# 1. Import Data
# =========================================================
wifi <- read_excel("C:/Users/LENOVO/Downloads/wifi.xlsx")
lib1 <- read_excel("C:/Users/LENOVO/Downloads/library1.xlsx")
lib2 <- read_excel("C:/Users/LENOVO/Downloads/library2.xlsx")
lib3 <- read_excel("C:/Users/LENOVO/Downloads/library3.xlsx")

# =========================================================
# 2. Filter WiFi hanya untuk Library
# =========================================================
wifi_lib <- wifi %>%
  filter(Building == "Library") %>%
  mutate(Timestamp = ymd_hms(time))

# Resample WiFi (interval 10 menit, rata-rata)
wifi_lib_10min <- wifi_lib %>%
  mutate(Timestamp = floor_date(Timestamp, "10 minutes")) %>%
  group_by(Timestamp) %>%
  summarise(Occupancy = mean(`Associated Client Count`, na.rm = TRUE), .groups = "drop")

# =========================================================
# 3. Gabungkan data energi (lib1, lib2, lib3)
# =========================================================
lib1 <- lib1 %>% mutate(Timestamp = as.POSIXct(ts, tz = "UTC"))
lib2 <- lib2 %>% mutate(Timestamp = as.POSIXct(ts, tz = "UTC"))
lib3 <- lib3 %>% mutate(Timestamp = as.POSIXct(ts, tz = "UTC"))

# Cleaning: isi NA dengan rata-rata 144 observasi pertama
fill_na <- function(x){
  if(any(is.na(x))){
    x[is.na(x)] <- mean(x[1:144], na.rm = TRUE)
  }
  return(x)
}

lib1$Value <- fill_na(lib1$rate)
lib2$Value <- fill_na(lib2$rate)
lib3$Value <- fill_na(lib3$rate)

# Gabungkan semua meteran jadi total energi
energy <- lib1 %>%
  rename(E1 = rate) %>%
  inner_join(lib2 %>% rename(E2 = rate), by = "Timestamp") %>%
  inner_join(lib3 %>% rename(E3 = rate), by = "Timestamp") %>%
  mutate(Energy = E1 + E2 + E3) %>%
  select(Timestamp, Energy)

# =========================================================
# 4. Sinkronisasi Occupancy & Energy
# =========================================================
data_all <- wifi_lib_10min %>%
  inner_join(energy, by = "Timestamp") %>%
  mutate(
    Day = weekdays(Timestamp),
    Category = ifelse(Day %in% c("Saturday", "Sunday"), "Weekend", "Weekday"),
    IsWeekend = factor(Category, levels = c("Weekday", "Weekend"))
  )

# =========================================================
# 5. Daily Profiles
# =========================================================
data_all <- data_all %>%
  mutate(Date = as.Date(Timestamp),
         Time = format(Timestamp, "%H:%M"))

daily_profiles <- data_all %>%
  group_by(Date, Time) %>%
  summarise(
    Occupancy = mean(Occupancy, na.rm = TRUE),
    Energy = mean(Energy, na.rm = TRUE),
    .groups = "drop"
  ) %>%
  mutate(
    IsWeekend = factor(ifelse(wday(Date) %in% c(1,7), "Weekend", "Weekday"),
                       levels = c("Weekday", "Weekend")),
    Hour = hour(as.POSIXct(Time, format="%H:%M")),
    TimePOSIX = as.POSIXct(Time, format="%H:%M")  # tambahan untuk sumbu X jam-only
  )

# =========================================================
# 6. VISUALISASI
# =========================================================

# --- Time Series Plot Occupancy & Energy (pakai tanggal+jam)
ggplot(data_all, aes(x = Timestamp)) +
  geom_line(aes(y = Occupancy, color = "Occupancy")) +
  geom_line(aes(y = Energy, color = "Energy")) +
  scale_x_datetime(date_labels = "%d-%m %H:%M", date_breaks = "6 hour") +
  labs(title = "Time Series: Occupancy vs Energy (Library)",
       x = "Tanggal & Jam", y = "Nilai", color = "Legend") +
  theme_minimal()
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_line()`).

# --- Scatter Plot Occupancy vs Energy
ggplot(data_all, aes(x = Occupancy, y = Energy)) +
  geom_point(alpha = 0.5, color = "blue") +
  geom_smooth(method = "lm", se = FALSE, color = "red") +
  labs(title = "Scatter Plot: Occupancy vs Energy",
       x = "Occupancy (WiFi)", y = "Energy Consumption") +
  theme_minimal()
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 1 row containing non-finite outside the scale range
## (`stat_smooth()`).
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_point()`).

# --- Daily Profile Plot (pakai jam-only 00:00, 02:00, dst.)
ggplot(daily_profiles, aes(x = TimePOSIX, y = Occupancy, group = Date)) +
  geom_line(alpha = 0.2, color = "red") +
  stat_summary(fun = mean, geom = "line", aes(group = 1), color = "yellow", size = 1.2) +
  scale_x_datetime(date_labels = "%H:%M", date_breaks = "2 hour") +
  labs(title = "Daily Profile Occupancy (merah=harian, kuning=rata-rata)",
       x = "Jam", y = "Occupancy") +
  theme_minimal()
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

# =========================================================
# 7. ANALISIS
# =========================================================

# Peak hours occupancy
peak_hours <- daily_profiles %>%
  group_by(Hour) %>%
  summarise(Avg_Occupancy = mean(Occupancy, na.rm = TRUE), .groups = "drop") %>%
  arrange(desc(Avg_Occupancy))

print(peak_hours)
## # A tibble: 24 × 2
##     Hour Avg_Occupancy
##    <int>         <dbl>
##  1    15          362.
##  2    14          355.
##  3    16          334.
##  4    13          330.
##  5    12          298.
##  6    17          279.
##  7    11          258.
##  8    18          221.
##  9    19          184.
## 10    10          182.
## # ℹ 14 more rows
# Korelasi Occupancy vs Energy
cor_test <- cor.test(data_all$Occupancy, data_all$Energy)
print(cor_test)
## 
##  Pearson's product-moment correlation
## 
## data:  data_all$Occupancy and data_all$Energy
## t = 79.03, df = 2052, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.8564723 0.8778939
## sample estimates:
##      cor 
## 0.867585
# =========================================================
# 8. WEEKDAY vs WEEKEND ANALYSIS
# =========================================================

# Scatter Plot
ggplot(data_all %>% filter(is.finite(Occupancy), is.finite(Energy)),
       aes(x = Occupancy, y = Energy, color = IsWeekend)) +
  geom_point(alpha = 0.4) +
  labs(title = "Occupancy vs Energy (Weekday vs Weekend)") +
  theme_minimal()

# Time Series Comparison (Energy)
ggplot(data_all %>% filter(is.finite(Energy)),
       aes(x = Timestamp, y = Energy, color = IsWeekend)) +
  geom_line(alpha = 0.7) +
  scale_x_datetime(date_labels = "%d-%m %H:%M", date_breaks = "6 hour") +
  labs(title = "Perbandingan Energy (Weekday vs Weekend)") +
  theme_minimal()

# Time Series Plot (Occupancy & Energy, dengan secondary axis)
install.packages("ggplot2")
## Warning: package 'ggplot2' is in use and will not be installed
library(ggplot2)
library(dplyr)

ggplot(data_all %>% filter(is.finite(Occupancy), is.finite(Energy)),
       aes(x = Timestamp)) +
  geom_line(aes(y = Occupancy, color = "Occupancy"), alpha = 0.7) +
  geom_line(aes(y = Energy/mean(Energy, na.rm = TRUE)*mean(Occupancy, na.rm = TRUE),
                color = "Energy"), alpha = 0.7) +
  facet_wrap(~IsWeekend, ncol = 1, scales = "free_x") +
  scale_x_datetime(date_labels = "%d-%m %H:%M", date_breaks = "6 hour") +
  scale_y_continuous(
    name = "Occupancy",
    sec.axis = sec_axis(~./mean(data_all$Occupancy, na.rm = TRUE)*mean(data_all$Energy, na.rm = TRUE),
                        name = "Energy Consumption")
  ) +
  scale_color_manual(values = c("Occupancy" = "blue", "Energy" = "green")) +
  labs(title = "Occupancy vs Energy Over Time (Weekday vs Weekend)",
       x = "Tanggal & Jam", color = "Metric") +
  theme_minimal()

# Boxplot Occupancy (Weekday vs Weekend)
ggplot(daily_profiles %>% filter(is.finite(Occupancy)), 
       aes(x = IsWeekend, y = Occupancy, fill = IsWeekend)) +
  geom_boxplot(alpha = 0.7) +
  labs(title = "Occupancy Distribution (Weekday vs Weekend)",
       x = "", y = "Occupancy") +
  theme_minimal()

# Boxplot Energy (Weekday vs Weekend)
ggplot(daily_profiles %>% filter(is.finite(Energy)), 
       aes(x = IsWeekend, y = Energy, fill = IsWeekend)) +
  geom_boxplot(alpha = 0.7) +
  labs(title = "Energy Consumption Distribution (Weekday vs Weekend)",
       x = "", y = "Energy") +
  theme_minimal()