#load data
data_dir <- "C:/Users/Lenovo/OneDrive/Documents/Kuliah/Datmin"
wifi_path <- file.path(data_dir, "wifi.csv")
lib_paths <- c(file.path(data_dir, "library1.csv"),
               file.path(data_dir, "library2.csv"),
               file.path(data_dir, "library3.csv"))

wifi <- read_csv(wifi_path)
## Rows: 1883844 Columns: 7
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (4): Event Time, Uni, Building, Floor
## dbl  (2): Associated Client Count, Authenticated Client Count
## dttm (1): time
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
wifi_lib <- wifi %>%
  filter(str_detect(Building, regex("Perpustakaan|Library|Perpus|Book", ignore_case = TRUE)),
         !str_detect(Building, regex("Ruskin", ignore_case = TRUE)))

wifi_lib <- wifi_lib %>%
  mutate(time = as_datetime(time),
         time = floor_date(time, "10 minutes"))
wifi_lib_no_floor <- wifi_lib %>% select(-Floor)

wifi_lib_nonzero <- wifi_lib_no_floor %>%
  filter(`Associated Client Count` != 0,
         `Authenticated Client Count` != 0)
wifi_lib_nonzero <- wifi_lib_nonzero %>%
  mutate(Uni_Building = paste(Uni, Building, sep = " - "))
wifi_lib_agg <- wifi_lib_nonzero %>%
  group_by(time, Uni_Building) %>%
  summarise(
    `Associated Client Count` = mean(`Associated Client Count`, na.rm = TRUE),
    `Authenticated Client Count` = mean(`Authenticated Client Count`, na.rm = TRUE),
    .groups = "drop"
  )
lib1 <- read_csv(lib_paths[1]) %>% mutate(ts = as_datetime(ts))
## Rows: 18864 Columns: 6
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (2): name, units
## dbl  (3): reading, cumulative, rate
## dttm (1): ts
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
lib2 <- read_csv(lib_paths[2]) %>% mutate(ts = as_datetime(ts))
## Rows: 18864 Columns: 6
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (2): name, units
## dbl  (3): reading, cumulative, rate
## dttm (1): ts
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
lib3 <- read_csv(lib_paths[3]) %>% mutate(ts = as_datetime(ts))
## Rows: 18864 Columns: 6
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (2): name, units
## dbl  (3): reading, cumulative, rate
## dttm (1): ts
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
lib_all <- bind_rows(lib1, lib2, lib3) %>%
  mutate(ts = as_datetime(ts)) %>%
  arrange(ts)
lib_all_10min <- lib_all %>%
  mutate(ts = floor_date(ts, "10 minutes")) %>%
  group_by(ts) %>%
  summarise(across(where(is.numeric), ~sum(.x, na.rm = TRUE)), .groups = "drop") %>%
  select(-any_of(c("name", "units", "reading")))
combined_agg <- wifi_lib_agg %>%
  rename(index = time) %>%
  left_join(lib_all_10min, by = c("index" = "ts")) %>%
  drop_na() %>%
  select(-`Authenticated Client Count`)
ggplot(combined_agg, aes(x = index, y = `Associated Client Count`)) +
  geom_line(color = "darkblue") +
  labs(title = "Library Wifi Occupancy Over Time", x = "Time", y = "Client Count")

ggplot(combined_agg, aes(x = index, y = rate)) +
  geom_line(color = "orange") +
  labs(title = "Library Electricity Rate Over Time", x = "Time", y = "Rate")

ggplot(combined_agg, aes(x = `Associated Client Count`, y = rate)) +
  geom_point(alpha = 0.5) +
  labs(title = "Occupancy vs Energy Rate Scatterplot",
       x = "Associated Client Count (Occupancy)", y = "Energy Rate") +
  theme_minimal()

combined_agg <- combined_agg %>%
  mutate(date = as_date(index),
         hour = hour(index))

pivot_occ <- combined_agg %>%
  group_by(hour, date) %>%
  summarise(occ = mean(`Associated Client Count`, na.rm = TRUE), .groups = "drop")

ggplot(pivot_occ, aes(x = hour, y = occ, group = date)) +
  geom_line(alpha = 0.3) +
  stat_summary(fun = mean, geom = "line", aes(group = 1), color = "black", size = 1) +
  labs(title = "Daily Library Wifi Occupancy by Hour (Each Day + Average)",
       x = "Hour of Day", y = "Associated Client Count")
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

combined_agg <- combined_agg %>%
  mutate(weekday = wday(index, week_start = 1))  # Monday=1

weekday_data <- combined_agg %>% filter(weekday <= 5)
weekend_data <- combined_agg %>% filter(weekday > 5)
ggplot(weekday_data, aes(x = index, y = `Associated Client Count`)) +
  geom_line(color = "green") +
  labs(title = "Library Wifi Occupancy (Weekdays)", x = "Time", y = "Client Count")

ggplot(weekend_data, aes(x = index, y = `Associated Client Count`)) +
  geom_line(color = "purple") +
  labs(title = "Library Wifi Occupancy (Weekends)", x = "Time", y = "Client Count")

weekday_corr <- cor(weekday_data$`Associated Client Count`, weekday_data$rate, use="complete.obs")
weekend_corr <- cor(weekend_data$`Associated Client Count`, weekend_data$rate, use="complete.obs")

cat("Weekday correlation (occupancy vs rate):", round(weekday_corr, 3), "\n")
## Weekday correlation (occupancy vs rate): 0.893
cat("Weekend correlation (occupancy vs rate):", round(weekend_corr, 3), "\n")
## Weekend correlation (occupancy vs rate): 0.904
ggplot() +
  geom_point(data = weekday_data,
             aes(x = `Associated Client Count`, y = rate, color = "Weekday"),
             alpha = 0.5) +
  geom_point(data = weekend_data,
             aes(x = `Associated Client Count`, y = rate, color = "Weekend"),
             alpha = 0.5) +
  geom_smooth(data = weekday_data,
              aes(x = `Associated Client Count`, y = rate, color = "Weekday"),
              method = "lm", se = FALSE) +
  geom_smooth(data = weekend_data,
              aes(x = `Associated Client Count`, y = rate, color = "Weekend"),
              method = "lm", se = FALSE) +
  scale_color_manual(values = c("Weekday" = "blue", "Weekend" = "orange")) +
  labs(
    title = "Occupancy vs Energy Rate: Weekday vs Weekend",
    x = "Associated Client Count (Occupancy)",
    y = "Energy Rate",
    color = "Category"
  ) +
  theme_minimal()
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'