Data Integration & Cleaning
# WiFi Dataset
wifi <- read.csv("C:/Users/L E N O V O/Downloads/wifi.csv")
wifi$time <- ymd_hms(wifi$time)
wifi <- wifi %>%
filter(Building == " Library ") %>%
select(time, Associated.Client.Count) %>%
mutate(time = floor_date(time, "10 minutes")) %>%
group_by(time) %>%
summarise(mean_client_count = mean(Associated.Client.Count, na.rm = TRUE), .groups="drop")
# Electricity Dataset
lib1 <- read.csv("C:/Users/L E N O V O/Downloads/library1.csv")
lib2 <- read.csv("C:/Users/L E N O V O/Downloads/library2.csv")
lib3 <- read.csv("C:/Users/L E N O V O/Downloads/library3.csv")
lib1$ts <- ymd_hms(lib1$ts)
lib2$ts <- ymd_hms(lib2$ts)
lib3$ts <- ymd_hms(lib3$ts)
fill_na <- function(x) {
x[is.na(x)] <- mean(head(x, 144), na.rm = TRUE)
return(x)
}
lib1$rate <- fill_na(lib1$rate)
lib2$rate <- fill_na(lib2$rate)
lib3$rate <- fill_na(lib3$rate)
electricity <- lib1 %>%
rename(rate_1 = rate) %>%
full_join(rename(lib2, rate_2 = rate), by = "ts") %>%
full_join(rename(lib3, rate_3 = rate), by = "ts") %>%
mutate(total_electricity = rate_1 + rate_2 + rate_3,
time = ts)
df <- inner_join(wifi, electricity %>% select(time, total_electricity), by = "time")
Time Series Occupancy dan Electricity
ggplot(df, aes(x = time)) +
geom_line(aes(y = mean_client_count, color = "Clients")) +
geom_line(aes(y = total_electricity, color = "Electricity")) +
labs(x = "Time", y = "Value", title = "Time Series of Occupancy and Electricity") +
scale_color_manual(values = c("Clients" = "blue", "Electricity" = "red"))

Korelasi Occupancy dan Electricity
ggplot(df, aes(x = mean_client_count, y = total_electricity)) +
geom_point(alpha = 0.6, color="black") +
labs(x = "WiFi Clients", y = "Electricity", title = "Occupancy vs Electricity")

Nilai korelasi Pearson yang diperoleh sebesar 0.878 (p < 0.001),
yang berarti hubungan keduanya sangat kuat dan signifikan.
ggplot(df, aes(x = mean_client_count, y = total_electricity)) +
geom_point(alpha=0.5, color="steelblue") +
geom_smooth(method="lm", se=FALSE, color="red", linetype="dashed") +
labs(title="Occupancy vs Energy Consumption",
x="WiFi Clients (Occupancy)", y="Total Electricity")
## `geom_smooth()` using formula = 'y ~ x'

Garis regresi linier menegaskan bahwa peningkatan jumlah pengguna
cenderung diikuti kenaikan konsumsi energi.
Daily Profile
daily_profile <- df %>%
mutate(date = as.Date(time),
hour_minute = format(time, "%H:%M")) %>%
group_by(date, hour_minute) %>%
summarise(mean_clients = mean(mean_client_count, na.rm=TRUE),
total_energy = mean(total_electricity, na.rm=TRUE), .groups="drop")
avg_daily_profile <- daily_profile %>%
group_by(hour_minute) %>%
summarise(mean_clients = mean(mean_clients, na.rm=TRUE),
total_energy = mean(total_energy, na.rm=TRUE), .groups="drop")
ggplot(daily_profile, aes(x = as.POSIXct(hour_minute, format="%H:%M"),
y = mean_clients, group = date)) +
geom_line(color="grey70", alpha=0.6) +
geom_line(data = avg_daily_profile, aes(x = as.POSIXct(hour_minute, format="%H:%M"),
y = mean_clients, group=1),
color="blue", size=1.2) +
labs(title="Daily Profile - Occupancy (WiFi)", x="Time of Day", y="Avg Client Count") +
theme_minimal()
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

Analisis Peak Hour & Mismatch
peak_hour <- df %>%
group_by(date = as.Date(time), hour = hour(time)) %>%
summarise(mean_clients = mean(mean_client_count, na.rm=TRUE), .groups="drop") %>%
slice_max(mean_clients, n=1, with_ties = FALSE)
avg_peak_hour <- peak_hour %>%
count(hour) %>%
slice_max(n, n=1)
cat("Peak hour terbanyak terjadi pada jam:", avg_peak_hour$hour, "\n")
## Peak hour terbanyak terjadi pada jam: 15
Jam puncak (peak hour) terbanyak terjadi pada pukul 15.
Analisis Mismatch (Case Energi vs Occupancy Tidak Sinkron)
low_occ <- quantile(df$mean_client_count, 0.25, na.rm=TRUE)
high_energy <- quantile(df$total_electricity, 0.90, na.rm=TRUE)
high_occ <- quantile(df$mean_client_count, 0.90, na.rm=TRUE)
low_energy <- quantile(df$total_electricity, 0.25, na.rm=TRUE)
mismatch_highE_lowO <- df %>%
filter(mean_client_count <= low_occ & total_electricity >= high_energy)
mismatch_highO_lowE <- df %>%
filter(mean_client_count >= high_occ & total_electricity <= low_energy)
cat("Jumlah kasus Energi Tinggi - Occupancy Rendah:", nrow(mismatch_highE_lowO), "\n")
## Jumlah kasus Energi Tinggi - Occupancy Rendah: 0
cat("Jumlah kasus Energi Rendah - Occupancy Tinggi:", nrow(mismatch_highO_lowE), "\n")
## Jumlah kasus Energi Rendah - Occupancy Tinggi: 0
Tidak ditemukan kasus mismatch (energi tinggi tapi okupansi rendah,
atau sebaliknya).
Weekday VS Weekend
df <- df %>%
mutate(weekday = wday(time, label = TRUE),
weekend = ifelse(weekday %in% c("Sat", "Sun"), "Weekend", "Weekday"))
ggplot(df, aes(x = time, y = mean_client_count, color = weekend)) +
geom_line() +
facet_wrap(~weekend, scales="free_x", ncol=1) +
labs(title="Occupancy - Weekday vs Weekend", x="Time", y="Client Count") +
theme_minimal()

Sama halnya, konsumsi listrik di weekday jauh lebih tinggi dibanding
weekend. Meski begitu, pola kenaikan siang hari dan penurunan malam hari
tetap terlihat pada kedua kategori.
ggplot(df, aes(x=mean_client_count, y=total_electricity, color=weekend)) +
geom_point(alpha=0.5) +
labs(title="Occupancy vs Energy - Weekday vs Weekend",
x="WiFi Clients", y="Energy Consumption")

Scatter plot weekday vs weekend memperlihatkan pola hubungan serupa
(keduanya tetap berkorelasi positif). Namun, pada weekday, kepadatan
titik lebih tinggi di area okupansi besar, sedangkan pada weekend lebih
tersebar di okupansi rendah hingga menengah.
Daily Profile dengan Weekend vs Weekday
daily_profile_wd <- df %>%
mutate(date = as.Date(time),
hour_minute = format(time, "%H:%M"),
day_type = ifelse(weekdays(date) %in% c("Saturday", "Sunday"), "Weekend", "Weekday")) %>%
group_by(day_type, date, hour_minute) %>%
summarise(mean_clients = mean(mean_client_count, na.rm=TRUE),
total_energy = mean(total_electricity, na.rm=TRUE), .groups="drop")
avg_daily_profile_wd <- daily_profile_wd %>%
group_by(day_type, hour_minute) %>%
summarise(mean_clients = mean(mean_clients, na.rm=TRUE),
total_energy = mean(total_energy, na.rm=TRUE), .groups="drop")
# Plot Occupancy Daily Profile (Weekday vs Weekend)
ggplot(daily_profile_wd, aes(x = as.POSIXct(hour_minute, format="%H:%M"),
y = mean_clients, group = date, color=day_type)) +
geom_line(alpha=0.4) +
geom_line(data = avg_daily_profile_wd,
aes(x = as.POSIXct(hour_minute, format="%H:%M"),
y = mean_clients, color=day_type, group=day_type),
size=1.2) +
labs(title="Daily Profile - Occupancy (Weekday vs Weekend)",
x="Time of Day", y="Avg Client Count") +
theme_minimal()
