library(tidyverse)
library(gridExtra)
library(readxl)
library(lubridate)
library(hms)
library(forecast)
library(fastDummies)
library(imputeTS)
library(scales)
library(kableExtra)
library(modeltime)
library(Metrics)
library(dplyr)
library(lubridate)
library(patchwork)
wifi <- read_csv('C:/Users/user/Downloads/wifi.csv',col_names = TRUE,show_col_types = FALSE)
library1 <- read_csv('C:/Users/user/Downloads/library1.csv',col_names = TRUE,show_col_types = FALSE)
library2 <- read_csv('C:/Users/user/Downloads/library2.csv',col_names = TRUE,show_col_types = FALSE)
library3 <- read_csv('C:/Users/user/Downloads/library3.csv',col_names = TRUE,show_col_types = FALSE)
head(wifi)
head(library1)
head(library2)
head(library3)
dim(wifi)
## [1] 1883844 7
dim(library1)
## [1] 18864 6
dim(library2)
## [1] 18864 6
dim(library3)
## [1] 18864 6
unique(wifi$Building)
## [1] "Graduate College" "Management School"
## [3] "SW hse 32-33" "SW hse 29"
## [5] "Furness outer" "Slaidburn House (LUSU)"
## [7] "SW hse 34" "Bowland hall"
## [9] "Fylde" "SW hse 55-56"
## [11] "SW hse 40-42" "Bowland Twr (Old Bowland Annexe)"
## [13] "Pendle" "Faraday and cTAP"
## [15] "Institute for Advanced Studies" "University House"
## [17] "SW hse 53-54" "Engineering"
## [19] "Field Station" "SW hse 36"
## [21] "Infolab" "SW hse 21-23"
## [23] "LICA" "FU Hse 71-74"
## [25] "Grizedale" "Charles Carter"
## [27] "Furness" "SW hse 43-45"
## [29] "SW hse 12-16" "Bowland Main"
## [31] "SW hse 35" "Human Resources"
## [33] "County" "FY Hse 65-70"
## [35] "Barker House Farm" "SW hse 27-28"
## [37] "Bowland Annexe" "John Creed"
## [39] "grize-res" "SW hse 24-26"
## [41] "SW hse 20" "Ruskin Library"
## [43] "County South/Cartmel" "Library"
## [45] "Conference Centre" "Postgrad Stats (PSC)"
## [47] "Bowland North" "George Fox"
## [49] "Hse 75 77" "Bailrigg House"
## [51] "Sports Centre" "SW hse 30-31"
## [53] "Bowland Ash" "Alex Square"
## [55] "LEC" "MDC"
## [57] "ISS Building" "Chaplaincy Centre"
## [59] "CETAD" "SW hse 50-52"
## [61] "SW hse 17-19" "Central Workshops"
## [63] "SW hse 46-49" "SW hse 39"
## [65] "Science and technology" "Whewell"
## [67] "SW hse 37-38" "Lonsdale College (SW)"
## [69] "Physics" "County Field"
## [71] "Great Hall" "SW hse-158-179"
## [73] "Preschool" "Reception"
## [75] "Hazelrigg" "Energy Centre"
wifi_library <- wifi[wifi$Building == "Library",]
head(wifi_library)
wifi_lib_10min <- wifi_library %>%
mutate(time = floor_date(time, "10 minutes")) %>%
group_by(time) %>%
summarise(`Associated Client Count` = mean(`Associated Client Count`, na.rm = TRUE)) %>%
ungroup()
head(wifi_lib_10min)
sum(duplicated(wifi_lib_10min))
## [1] 0
sum(duplicated(library1))
## [1] 0
sum(duplicated(library2))
## [1] 0
sum(duplicated(library3))
## [1] 0
colSums(is.na(wifi))
## time Event Time
## 0 0
## Associated Client Count Authenticated Client Count
## 0 0
## Uni Building
## 0 0
## Floor
## 0
colSums(is.na(library1))
## ts name reading units cumulative rate
## 0 3041 3041 3041 3041 3047
colSums(is.na(library2))
## ts name reading units cumulative rate
## 0 3041 3041 3041 3041 3047
colSums(is.na(library3))
## ts name reading units cumulative rate
## 0 3041 3041 3041 3041 3047
library1 <- library1 %>%
mutate(rate = ifelse(is.na(rate), mean(rate[1:144], na.rm = TRUE), rate))
library2 <- library2 %>%
mutate(rate = ifelse(is.na(rate), mean(rate[1:144], na.rm = TRUE), rate))
library3 <- library3 %>%
mutate(rate = ifelse(is.na(rate), mean(rate[1:144], na.rm = TRUE), rate))
head(library1)
head(library2)
head(library3)
tail(library1)
tail(library2)
tail(library3)
sum(is.na(library1$rate))
## [1] 0
sum(is.na(library2$rate))
## [1] 0
sum(is.na(library3$rate))
## [1] 0
energy <- data.frame(
time = library1$ts,
total_rate = library1$rate + library2$rate + library3$rate
)
head(energy)
tail(energy)
min_time <- min(wifi_lib_10min$time, na.rm = TRUE)
max_time <- max(wifi_lib_10min$time, na.rm = TRUE)
energy_filt <- energy %>%
filter(time >= min_time & time <= max_time)
head(energy_filt)
dim(wifi_lib_10min)
## [1] 3683 2
dim(energy_filt)
## [1] 3683 2
df <- wifi_lib_10min %>%
select(time, occupancy = `Associated Client Count`) %>%
inner_join(energy_filt %>% select(time, rate = total_rate), by = "time")
head(df)
ggplot(df, aes(x = time)) +
geom_line(aes(y = occupancy, color = "Occupancy (WiFi)"), alpha = 0.7) +
geom_line(aes(y = rate, color = "Energy Consumption (kWh)"), alpha = 0.7) +
labs(
title = "Time Series of Library Occupancy vs Energy Consumption",
x = "Time",
y = "Count / kWh",
color = ""
) +
theme_minimal(base_size = 15) +
theme(aspect.ratio = 1/3, legend.position = "bottom")

ggplot(df, aes(x = occupancy, y = rate)) +
geom_point(
color = "black",
fill = "blue",
shape = 21
) +
labs(
title = "Scatterplot Occupancy vs Rate",
x = "Occupancy",
y = "Rate"
) +
theme_minimal(base_size = 15)

lastdate <- max(as.Date(df$time))
df <- df %>% filter(as.Date(time) < lastdate)
df <- df %>%
mutate(
time = time,
hour = hour(time) + minute(time) / 60,
date = as.Date(time)
)
df <- df %>%
mutate(hour_of_day = floor_date(time, unit = "10 minutes"))
df_daily <- df %>%
group_by(hour) %>%
summarise(
occupancy = mean(occupancy, na.rm = TRUE),
rate = mean(rate, na.rm = TRUE),
.groups = "drop"
) %>%
mutate(
hour_of_day = as_datetime(hour * 3600) %>%
round_date(unit = "10 minutes")
)
df$hour_of_day <- hms::as_hms(df$hour_of_day)
df_daily$hour_of_day <- hms::as_hms(df_daily$hour_of_day)
plot_occupancy <- ggplot() +
geom_line(data = df, aes(x = hour_of_day, y = occupancy, group = date),
color = "blue", alpha = 0.2) +
geom_line(data = df_daily, aes(x = hour_of_day, y = occupancy),
color = "red", linewidth = 1, linetype = "solid") +
scale_x_time(breaks = hms::hms(hours = seq(0, 24, 4)),
labels = scales::time_format("%H:%M")) +
labs(title = "Daily Profiles of Occupancy (01 Feb 2020 - 25 Feb 2020)",
y = "Occupancy", x = "") +
theme_minimal()
plot_rate <- ggplot() +
geom_line(data = df, aes(x = hour_of_day, y = rate, group = date),
color = "green", alpha = 0.2) +
geom_line(data = df_daily, aes(x = hour_of_day, y = rate),
color = "orange", linewidth = 1, linetype = "solid") +
scale_x_time(breaks = hms::hms(hours = seq(0, 24, 4)),
labels = scales::time_format("%H:%M")) +
labs(title = "Daily Profiles of Rate (01 Feb 2020 - 25 Feb 2020)",
y = "Rate", x = "Hour of Day") +
theme_minimal()
final_plot <- plot_occupancy / plot_rate
print(final_plot)

df$day_of_week <- lubridate::wday(df$time, week_start = 1)
df$weekend <- df$day_of_week >= 6
head(df)
df_weekday <- df %>%
filter(!weekend) %>%
group_by(hour = format(hour_of_day, "%H:%M:%S")) %>%
summarise(
occupancy = mean(occupancy, na.rm = TRUE),
rate = mean(rate, na.rm = TRUE),
.groups = "drop"
)
df_weekend <- df %>%
filter(weekend) %>%
group_by(hour = format(hour_of_day, "%H:%M:%S")) %>%
summarise(
occupancy = mean(occupancy, na.rm = TRUE),
rate = mean(rate, na.rm = TRUE),
.groups = "drop"
)
head(df_weekday)
head(df_weekend)
df_weekday <- df_weekday %>%
mutate(hour_time = hms::as_hms(hour))
df_weekend <- df_weekend %>%
mutate(hour_time = hms::as_hms(hour))
df_plot <- bind_rows(
df_weekday %>% mutate(type = "Weekday"),
df_weekend %>% mutate(type = "Weekend")
)
#Occupancy
p1 <- ggplot(df_plot, aes(x = hour_time, y = occupancy, color = type)) +
geom_line(linewidth = 1.2) +
labs(title = "Weekday vs Weekend - Occupancy",
x = "Hour of Day", y = "Occupancy") +
scale_x_time(
breaks = seq(0, 24*3600, by = 4*3600), # tiap 4 jam
labels = scales::time_format("%H:%M")
) +
theme_minimal()
#Rate
p2 <- ggplot(df_plot, aes(x = hour_time, y = rate, color = type)) +
geom_line(linewidth = 1.2) +
labs(title = "Weekday vs Weekend - Rate",
x = "Hour of Day", y = "Rate") +
scale_x_time(
breaks = seq(0, 24*3600, by = 4*3600),
labels = scales::time_format("%H:%M")
) +
theme_minimal()
day_end <- p1/p2
print(day_end)

df_daily <- df_daily %>%
mutate(hour_of_day = hour(hour_of_day)) %>%
group_by(hour_of_day) %>%
summarise(`occupancy` = mean(`occupancy`, na.rm = TRUE)) %>%
ungroup()
head(df_daily)
ggplot(df_daily, aes(x = hour_of_day, y = occupancy)) +
geom_bar(stat = "identity", fill = "blue") +
labs(title = "Barplot Occupancy per Hour",
x = "Hour",
y = "Occupancy") +
theme_minimal() +
theme(
axis.text.x = element_text(size = 10),
axis.text.y = element_text(size = 10)
)

correlation <- cor(df$occupancy, df$rate, use = "complete.obs", method = "pearson")
cat("Korelasi Occupancy vs Rate :", correlation)
## Korelasi Occupancy vs Rate : 0.8777686
zdf <- df %>%
mutate(
z_rate = scale(rate)[,1],
z_occupancy = scale(occupancy)[,1],
anomaly = (abs(z_rate) > 2) & (abs(z_occupancy) < 2)
)
head(zdf)
ggplot(zdf, aes(x = occupancy, y = rate)) +
geom_point(data = subset(zdf, !anomaly), aes(color = "Normal"), alpha = 0.7) +
geom_point(data = subset(zdf, anomaly), aes(color = "Anomaly"), alpha = 0.7) +
scale_color_manual(values = c("Normal" = "blue", "Anomaly" = "red")) +
labs(
title = "Occupancy vs Rate (Anomaly Highlight)",
x = "Occupancy",
y = "Energy Rate",
color = "Legend"
) +
theme_minimal()
