library(tidyverse)
library(gridExtra)
library(readxl)
library(lubridate)
library(hms)
library(forecast)
library(fastDummies)
library(imputeTS)
library(scales)
library(kableExtra)
library(modeltime)
library(Metrics)
library(dplyr)
library(lubridate)
library(patchwork)
wifi <- read_csv('C:/Users/user/Downloads/wifi.csv',col_names = TRUE,show_col_types = FALSE)
library1 <- read_csv('C:/Users/user/Downloads/library1.csv',col_names = TRUE,show_col_types = FALSE)
library2 <- read_csv('C:/Users/user/Downloads/library2.csv',col_names = TRUE,show_col_types = FALSE)
library3 <- read_csv('C:/Users/user/Downloads/library3.csv',col_names = TRUE,show_col_types = FALSE)
head(wifi)
head(library1)
head(library2)
head(library3)
dim(wifi)
## [1] 1883844       7
dim(library1)
## [1] 18864     6
dim(library2)
## [1] 18864     6
dim(library3)
## [1] 18864     6
unique(wifi$Building)
##  [1] "Graduate College"                 "Management School"               
##  [3] "SW hse 32-33"                     "SW hse 29"                       
##  [5] "Furness outer"                    "Slaidburn House (LUSU)"          
##  [7] "SW hse 34"                        "Bowland hall"                    
##  [9] "Fylde"                            "SW hse 55-56"                    
## [11] "SW hse 40-42"                     "Bowland Twr (Old Bowland Annexe)"
## [13] "Pendle"                           "Faraday and cTAP"                
## [15] "Institute for Advanced Studies"   "University House"                
## [17] "SW hse 53-54"                     "Engineering"                     
## [19] "Field Station"                    "SW hse 36"                       
## [21] "Infolab"                          "SW hse 21-23"                    
## [23] "LICA"                             "FU Hse 71-74"                    
## [25] "Grizedale"                        "Charles Carter"                  
## [27] "Furness"                          "SW hse 43-45"                    
## [29] "SW hse 12-16"                     "Bowland Main"                    
## [31] "SW hse 35"                        "Human Resources"                 
## [33] "County"                           "FY Hse 65-70"                    
## [35] "Barker House Farm"                "SW hse 27-28"                    
## [37] "Bowland Annexe"                   "John Creed"                      
## [39] "grize-res"                        "SW hse 24-26"                    
## [41] "SW hse 20"                        "Ruskin Library"                  
## [43] "County South/Cartmel"             "Library"                         
## [45] "Conference Centre"                "Postgrad Stats (PSC)"            
## [47] "Bowland North"                    "George Fox"                      
## [49] "Hse 75 77"                        "Bailrigg House"                  
## [51] "Sports Centre"                    "SW hse 30-31"                    
## [53] "Bowland Ash"                      "Alex Square"                     
## [55] "LEC"                              "MDC"                             
## [57] "ISS Building"                     "Chaplaincy Centre"               
## [59] "CETAD"                            "SW hse 50-52"                    
## [61] "SW hse 17-19"                     "Central Workshops"               
## [63] "SW hse 46-49"                     "SW hse 39"                       
## [65] "Science and technology"           "Whewell"                         
## [67] "SW hse 37-38"                     "Lonsdale College (SW)"           
## [69] "Physics"                          "County Field"                    
## [71] "Great Hall"                       "SW hse-158-179"                  
## [73] "Preschool"                        "Reception"                       
## [75] "Hazelrigg"                        "Energy Centre"
wifi_library <- wifi[wifi$Building == "Library",]
head(wifi_library)
wifi_lib_10min <- wifi_library %>%
  mutate(time = floor_date(time, "10 minutes")) %>%
  group_by(time) %>%
  summarise(`Associated Client Count` = mean(`Associated Client Count`, na.rm = TRUE)) %>%
  ungroup()
head(wifi_lib_10min)
sum(duplicated(wifi_lib_10min))
## [1] 0
sum(duplicated(library1))
## [1] 0
sum(duplicated(library2))
## [1] 0
sum(duplicated(library3))
## [1] 0
colSums(is.na(wifi))
##                       time                 Event Time 
##                          0                          0 
##    Associated Client Count Authenticated Client Count 
##                          0                          0 
##                        Uni                   Building 
##                          0                          0 
##                      Floor 
##                          0
colSums(is.na(library1))
##         ts       name    reading      units cumulative       rate 
##          0       3041       3041       3041       3041       3047
colSums(is.na(library2))
##         ts       name    reading      units cumulative       rate 
##          0       3041       3041       3041       3041       3047
colSums(is.na(library3))
##         ts       name    reading      units cumulative       rate 
##          0       3041       3041       3041       3041       3047
library1 <- library1 %>%
  mutate(rate = ifelse(is.na(rate), mean(rate[1:144], na.rm = TRUE), rate))

library2 <- library2 %>%
  mutate(rate = ifelse(is.na(rate), mean(rate[1:144], na.rm = TRUE), rate))

library3 <- library3 %>%
  mutate(rate = ifelse(is.na(rate), mean(rate[1:144], na.rm = TRUE), rate))

head(library1)
head(library2)
head(library3)
tail(library1)
tail(library2)
tail(library3)
sum(is.na(library1$rate))
## [1] 0
sum(is.na(library2$rate))
## [1] 0
sum(is.na(library3$rate))
## [1] 0
energy <- data.frame(
  time = library1$ts,
  total_rate = library1$rate + library2$rate + library3$rate
)

head(energy)
tail(energy)
min_time <- min(wifi_lib_10min$time, na.rm = TRUE)
max_time <- max(wifi_lib_10min$time, na.rm = TRUE)

energy_filt <- energy %>%
  filter(time >= min_time & time <= max_time)
head(energy_filt)
dim(wifi_lib_10min)
## [1] 3683    2
dim(energy_filt)
## [1] 3683    2
df <- wifi_lib_10min %>%
  select(time, occupancy = `Associated Client Count`) %>%
  inner_join(energy_filt %>% select(time, rate = total_rate), by = "time")

head(df)
ggplot(df, aes(x = time)) +
  geom_line(aes(y = occupancy, color = "Occupancy (WiFi)"), alpha = 0.7) +
  geom_line(aes(y = rate, color = "Energy Consumption (kWh)"), alpha = 0.7) +
  labs(
    title = "Time Series of Library Occupancy vs Energy Consumption",
    x = "Time",
    y = "Count / kWh",
    color = ""
  ) +
  theme_minimal(base_size = 15) +
  theme(aspect.ratio = 1/3, legend.position = "bottom")

ggplot(df, aes(x = occupancy, y = rate)) +
  geom_point(
    color = "black",
    fill = "blue",
    shape = 21
  ) +
  labs(
    title = "Scatterplot Occupancy vs Rate",
    x = "Occupancy",
    y = "Rate"
  ) +
  theme_minimal(base_size = 15)

lastdate <- max(as.Date(df$time))
df <- df %>% filter(as.Date(time) < lastdate)

df <- df %>%
  mutate(
    time = time,
    hour = hour(time) + minute(time) / 60,
    date = as.Date(time)
  )

df <- df %>%
  mutate(hour_of_day = floor_date(time, unit = "10 minutes"))

df_daily <- df %>%
  group_by(hour) %>%
  summarise(
    occupancy = mean(occupancy, na.rm = TRUE),
    rate = mean(rate, na.rm = TRUE),
    .groups = "drop"
  ) %>%
  mutate(
    hour_of_day = as_datetime(hour * 3600) %>% 
                  round_date(unit = "10 minutes")
  )
df$hour_of_day <- hms::as_hms(df$hour_of_day)
df_daily$hour_of_day <- hms::as_hms(df_daily$hour_of_day)
plot_occupancy <- ggplot() +
  geom_line(data = df, aes(x = hour_of_day, y = occupancy, group = date), 
            color = "blue", alpha = 0.2) +
  geom_line(data = df_daily, aes(x = hour_of_day, y = occupancy), 
            color = "red", linewidth = 1, linetype = "solid") +
  scale_x_time(breaks = hms::hms(hours = seq(0, 24, 4)),
               labels = scales::time_format("%H:%M")) +
  labs(title = "Daily Profiles of Occupancy (01 Feb 2020 - 25 Feb 2020)",
       y = "Occupancy", x = "") +
  theme_minimal()

plot_rate <- ggplot() +
  geom_line(data = df, aes(x = hour_of_day, y = rate, group = date), 
            color = "green", alpha = 0.2) +
  geom_line(data = df_daily, aes(x = hour_of_day, y = rate), 
            color = "orange", linewidth = 1, linetype = "solid") +
  scale_x_time(breaks = hms::hms(hours = seq(0, 24, 4)),
               labels = scales::time_format("%H:%M")) +
  labs(title = "Daily Profiles of Rate (01 Feb 2020 - 25 Feb 2020)",
       y = "Rate", x = "Hour of Day") +
  theme_minimal()

final_plot <- plot_occupancy / plot_rate
print(final_plot)

df$day_of_week <- lubridate::wday(df$time, week_start = 1)
df$weekend <- df$day_of_week >= 6
head(df)
df_weekday <- df %>%
  filter(!weekend) %>%
  group_by(hour = format(hour_of_day, "%H:%M:%S")) %>%
  summarise(
    occupancy = mean(occupancy, na.rm = TRUE),
    rate = mean(rate, na.rm = TRUE),
    .groups = "drop"
  )

df_weekend <- df %>%
  filter(weekend) %>%
  group_by(hour = format(hour_of_day, "%H:%M:%S")) %>%
  summarise(
    occupancy = mean(occupancy, na.rm = TRUE),
    rate = mean(rate, na.rm = TRUE),
    .groups = "drop"
  )

head(df_weekday)
head(df_weekend)
df_weekday <- df_weekday %>%
  mutate(hour_time = hms::as_hms(hour))

df_weekend <- df_weekend %>%
  mutate(hour_time = hms::as_hms(hour))

df_plot <- bind_rows(
  df_weekday %>% mutate(type = "Weekday"),
  df_weekend %>% mutate(type = "Weekend")
)

#Occupancy
p1 <- ggplot(df_plot, aes(x = hour_time, y = occupancy, color = type)) +
  geom_line(linewidth = 1.2) +
  labs(title = "Weekday vs Weekend - Occupancy",
       x = "Hour of Day", y = "Occupancy") +
  scale_x_time(
    breaks = seq(0, 24*3600, by = 4*3600),  # tiap 4 jam
    labels = scales::time_format("%H:%M")
  ) +
  theme_minimal()

#Rate
p2 <- ggplot(df_plot, aes(x = hour_time, y = rate, color = type)) +
  geom_line(linewidth = 1.2) +
  labs(title = "Weekday vs Weekend - Rate",
       x = "Hour of Day", y = "Rate") +
  scale_x_time(
    breaks = seq(0, 24*3600, by = 4*3600),
    labels = scales::time_format("%H:%M")
  ) +
  theme_minimal()

day_end <- p1/p2
print(day_end)

df_daily <- df_daily %>%
  mutate(hour_of_day = hour(hour_of_day)) %>%
  group_by(hour_of_day) %>%
  summarise(`occupancy` = mean(`occupancy`, na.rm = TRUE)) %>%
  ungroup()
head(df_daily)
ggplot(df_daily, aes(x = hour_of_day, y = occupancy)) +
  geom_bar(stat = "identity", fill = "blue") +
  labs(title = "Barplot Occupancy per Hour",
       x = "Hour",
       y = "Occupancy") +
  theme_minimal() +
  theme(
    axis.text.x = element_text(size = 10),
    axis.text.y = element_text(size = 10)
  )

correlation <- cor(df$occupancy, df$rate, use = "complete.obs", method = "pearson")
cat("Korelasi Occupancy vs Rate :", correlation)
## Korelasi Occupancy vs Rate : 0.8777686
zdf <- df %>%
  mutate(
    z_rate = scale(rate)[,1],
    z_occupancy = scale(occupancy)[,1],
    anomaly = (abs(z_rate) > 2) & (abs(z_occupancy) < 2)
  )

head(zdf)
ggplot(zdf, aes(x = occupancy, y = rate)) +
  geom_point(data = subset(zdf, !anomaly), aes(color = "Normal"), alpha = 0.7) +
  geom_point(data = subset(zdf, anomaly), aes(color = "Anomaly"), alpha = 0.7) +
  scale_color_manual(values = c("Normal" = "blue", "Anomaly" = "red")) +
  labs(
    title = "Occupancy vs Rate (Anomaly Highlight)",
    x = "Occupancy",
    y = "Energy Rate",
    color = "Legend"
  ) +
  theme_minimal()