# =========================
# Install & Load Packages
# =========================
packages <- c("tidyverse", "lubridate", "ggplot2", "readr")

installed_packages <- rownames(installed.packages())
for (p in packages) {
  if (!(p %in% installed_packages)) {
    install.packages(p)
  }
}


library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.2     ✔ tibble    3.2.1
## ✔ lubridate 1.9.4     ✔ tidyr     1.3.1
## ✔ purrr     1.0.4     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(lubridate)
library(ggplot2)
library(readr)

# =========================
# 1. Load dataset
# =========================
library(tidyverse)

df_wifi     <- read_csv("D:/Documents/AAA IRFAN/wifi.csv")
## Rows: 1883844 Columns: 7
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (4): Event Time, Uni, Building, Floor
## dbl  (2): Associated Client Count, Authenticated Client Count
## dttm (1): time
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
df_energy1  <- read_csv("D:/Documents/AAA IRFAN/library1.csv")
## Rows: 18864 Columns: 6
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (2): name, units
## dbl  (3): reading, cumulative, rate
## dttm (1): ts
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
df_energy2  <- read_csv("D:/Documents/AAA IRFAN/library2.csv")
## Rows: 18864 Columns: 6
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (2): name, units
## dbl  (3): reading, cumulative, rate
## dttm (1): ts
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
df_energy3  <- read_csv("D:/Documents/AAA IRFAN/library3.csv")
## Rows: 18864 Columns: 6
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (2): name, units
## dbl  (3): reading, cumulative, rate
## dttm (1): ts
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# =========================
# 2. Data WiFi Cleaning
# =========================
subset_cols <- c("time", "Building", "Floor", "Associated Client Count")

df_wifi_cleaned <- df_wifi %>%
  distinct(across(all_of(subset_cols)), .keep_all = TRUE) %>%
  filter(str_trim(tolower(Building)) == "library") %>%
  mutate(time = ymd_hms(time))

# =========================
# 3. Data Energi Cleaning
# =========================
df_energy1 <- df_energy1 %>% mutate(ts = ymd_hms(ts))
## Warning: There was 1 warning in `mutate()`.
## ℹ In argument: `ts = ymd_hms(ts)`.
## Caused by warning:
## !  131 failed to parse.
df_energy2 <- df_energy2 %>% mutate(ts = ymd_hms(ts))
## Warning: There was 1 warning in `mutate()`.
## ℹ In argument: `ts = ymd_hms(ts)`.
## Caused by warning:
## !  131 failed to parse.
df_energy3 <- df_energy3 %>% mutate(ts = ymd_hms(ts))
## Warning: There was 1 warning in `mutate()`.
## ℹ In argument: `ts = ymd_hms(ts)`.
## Caused by warning:
## !  131 failed to parse.
fill_missing <- function(df) {
  mean_first_144 <- mean(head(df$rate, 144), na.rm = TRUE)
  df$rate[is.na(df$rate)] <- mean_first_144
  df
}

df_energy1 <- fill_missing(df_energy1)
df_energy2 <- fill_missing(df_energy2)
df_energy3 <- fill_missing(df_energy3)

df_energy_total <- bind_rows(df_energy1, df_energy2, df_energy3) %>%
  group_by(ts) %>%
  summarise(total_rate = sum(rate, na.rm = TRUE), .groups = "drop")

# =========================
# 4. Resample WiFi (10 menit rata-rata)
# =========================
df_wifi_resampled <- df_wifi_cleaned %>%
  mutate(time_10min = floor_date(time, "10 minutes")) %>%
  group_by(time_10min) %>%
  summarise(occupancy = mean(`Associated Client Count`, na.rm = TRUE), .groups = "drop")

# =========================
# 5. Gabung WiFi + Energi
# =========================
df_final <- inner_join(df_wifi_resampled, df_energy_total, 
                       by = c("time_10min" = "ts"))

# =========================
# 6. Visualisasi
# =========================

## Time Series
ggplot(df_final, aes(x = time_10min)) +
  geom_line(aes(y = occupancy, color = "WiFi Occupancy")) +
  geom_line(aes(y = total_rate, color = "Energy Consumption")) +
  labs(title = "Tren Okupansi Perpustakaan vs. Konsumsi Energi",
       x = "Waktu", y = "Nilai") +
  scale_color_manual(values = c("WiFi Occupancy" = "blue", "Energy Consumption" = "red")) +
  theme_minimal()

## Scatter Plot
ggplot(df_final, aes(x = occupancy, y = total_rate)) +
  geom_point(alpha = 0.3, color = "darkgreen") +
  geom_smooth(method = "lm", se = FALSE, color = "red") +
  labs(title = "Hubungan Okupansi vs Konsumsi Energi",
       x = "Okupansi (WiFi)", y = "Energi")
## `geom_smooth()` using formula = 'y ~ x'

## Daily Profile
df_daily <- df_final %>%
  mutate(hour = hour(time_10min)) %>%
  group_by(hour) %>%
  summarise(occupancy = mean(occupancy, na.rm = TRUE),
            total_rate = mean(total_rate, na.rm = TRUE))

ggplot(df_daily, aes(x = hour)) +
  geom_line(aes(y = occupancy, color = "WiFi")) +
  geom_line(aes(y = total_rate, color = "Energy")) +
  labs(title = "Pola Harian Rata-rata",
       x = "Jam (0-23)", y = "Nilai") +
  theme_minimal()

# =========================
# 7. Weekday vs Weekend
# =========================
df_final <- df_final %>%
  mutate(day_of_week = wday(time_10min, label = TRUE),
         day_type = ifelse(wday(time_10min) %in% 2:6, "Weekday", "Weekend"),
         hour = hour(time_10min))

## Scatter comparison
ggplot(df_final, aes(x = occupancy, y = total_rate, color = day_type)) +
  geom_point(alpha = 0.3) +
  geom_smooth(method = "lm", se = FALSE) +
  labs(title = "Okupansi vs Energi: Weekday vs Weekend")
## `geom_smooth()` using formula = 'y ~ x'

## Daily Profile comparison
df_daytype <- df_final %>%
  group_by(day_type, hour) %>%
  summarise(occupancy = mean(occupancy, na.rm = TRUE),
            total_rate = mean(total_rate, na.rm = TRUE), .groups = "drop")

ggplot(df_daytype, aes(x = hour, group = day_type)) +
  geom_line(aes(y = occupancy, color = day_type)) +
  geom_line(aes(y = total_rate, linetype = day_type), color = "red") +
  labs(title = "Perbandingan Pola Harian: Weekday vs Weekend",
       x = "Jam", y = "Rata-rata")

list.files("C:/Users/wdarm/Documents/College/Semester 5/Data Mining & Visualization/WEEK 2/Library Wifi Analysis Project")
## character(0)