# =========================================================
# ANALISIS DATA WIFI & ENERGI PERPUSTAKAAN
# =========================================================
# List semua package yang dibutuhkan
packages <- c("readxl", "dplyr", "lubridate", "zoo", "ggplot2", "tidyr")
# Install package jika belum ada
installed_packages <- packages %in% rownames(installed.packages())
if(any(!installed_packages)) {
install.packages(packages[!installed_packages])
}
# Load semua library
lapply(packages, library, character.only = TRUE)
## Warning: package 'readxl' was built under R version 4.4.3
## Warning: package 'dplyr' was built under R version 4.4.3
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
## Warning: package 'lubridate' was built under R version 4.4.3
##
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
##
## date, intersect, setdiff, union
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
## Warning: package 'ggplot2' was built under R version 4.4.3
## Warning: package 'tidyr' was built under R version 4.4.3
## [[1]]
## [1] "readxl" "stats" "graphics" "grDevices" "utils" "datasets"
## [7] "methods" "base"
##
## [[2]]
## [1] "dplyr" "readxl" "stats" "graphics" "grDevices" "utils"
## [7] "datasets" "methods" "base"
##
## [[3]]
## [1] "lubridate" "dplyr" "readxl" "stats" "graphics" "grDevices"
## [7] "utils" "datasets" "methods" "base"
##
## [[4]]
## [1] "zoo" "lubridate" "dplyr" "readxl" "stats" "graphics"
## [7] "grDevices" "utils" "datasets" "methods" "base"
##
## [[5]]
## [1] "ggplot2" "zoo" "lubridate" "dplyr" "readxl" "stats"
## [7] "graphics" "grDevices" "utils" "datasets" "methods" "base"
##
## [[6]]
## [1] "tidyr" "ggplot2" "zoo" "lubridate" "dplyr" "readxl"
## [7] "stats" "graphics" "grDevices" "utils" "datasets" "methods"
## [13] "base"
# Atur working directory (opsional, kalau perlu)
# setwd("D:/COOLYAH TIARA/SEMESTER 7/Data Mining dan Visualisasi/Datmin Tiara/Tugas Kelompok 1/Data")
# =========================================================
# 1. Import Data
# =========================================================
wifi <- read_excel("C:/Users/LENOVO/Downloads/wifi.xlsx")
lib1 <- read_excel("C:/Users/LENOVO/Downloads/library1.xlsx")
lib2 <- read_excel("C:/Users/LENOVO/Downloads/library2.xlsx")
lib3 <- read_excel("C:/Users/LENOVO/Downloads/library3.xlsx")
# =========================================================
# 2. Filter WiFi hanya untuk Library
# =========================================================
wifi_lib <- wifi %>%
filter(Building == "Library") %>%
mutate(Timestamp = ymd_hms(time))
# Resample WiFi (interval 10 menit, rata-rata)
wifi_lib_10min <- wifi_lib %>%
mutate(Timestamp = floor_date(Timestamp, "10 minutes")) %>%
group_by(Timestamp) %>%
summarise(Occupancy = mean(`Associated Client Count`, na.rm = TRUE), .groups = "drop")
# =========================================================
# 3. Gabungkan data energi (lib1, lib2, lib3)
# =========================================================
lib1 <- lib1 %>% mutate(Timestamp = as.POSIXct(ts, tz = "UTC"))
lib2 <- lib2 %>% mutate(Timestamp = as.POSIXct(ts, tz = "UTC"))
lib3 <- lib3 %>% mutate(Timestamp = as.POSIXct(ts, tz = "UTC"))
# Cleaning: isi NA dengan rata-rata 144 observasi pertama
fill_na <- function(x){
if(any(is.na(x))){
x[is.na(x)] <- mean(x[1:144], na.rm = TRUE)
}
return(x)
}
lib1$Value <- fill_na(lib1$rate)
lib2$Value <- fill_na(lib2$rate)
lib3$Value <- fill_na(lib3$rate)
# Gabungkan semua meteran jadi total energi
energy <- lib1 %>%
rename(E1 = rate) %>%
inner_join(lib2 %>% rename(E2 = rate), by = "Timestamp") %>%
inner_join(lib3 %>% rename(E3 = rate), by = "Timestamp") %>%
mutate(Energy = E1 + E2 + E3) %>%
select(Timestamp, Energy)
# =========================================================
# 4. Sinkronisasi Occupancy & Energy
# =========================================================
data_all <- wifi_lib_10min %>%
inner_join(energy, by = "Timestamp") %>%
mutate(
Day = weekdays(Timestamp),
Category = ifelse(Day %in% c("Saturday", "Sunday"), "Weekend", "Weekday"),
IsWeekend = factor(Category, levels = c("Weekday", "Weekend"))
)
# =========================================================
# 5. Daily Profiles
# =========================================================
data_all <- data_all %>%
mutate(Date = as.Date(Timestamp),
Time = format(Timestamp, "%H:%M"))
daily_profiles <- data_all %>%
group_by(Date, Time) %>%
summarise(
Occupancy = mean(Occupancy, na.rm = TRUE),
Energy = mean(Energy, na.rm = TRUE),
.groups = "drop"
) %>%
mutate(
IsWeekend = factor(ifelse(wday(Date) %in% c(1,7), "Weekend", "Weekday"),
levels = c("Weekday", "Weekend")),
Hour = hour(as.POSIXct(Time, format="%H:%M")),
TimePOSIX = as.POSIXct(Time, format="%H:%M") # tambahan untuk sumbu X jam-only
)
# =========================================================
# 6. VISUALISASI
# =========================================================
# --- Time Series Plot Occupancy & Energy (pakai tanggal+jam)
ggplot(data_all, aes(x = Timestamp)) +
geom_line(aes(y = Occupancy, color = "Occupancy")) +
geom_line(aes(y = Energy, color = "Energy")) +
scale_x_datetime(date_labels = "%d-%m %H:%M", date_breaks = "6 hour") +
labs(title = "Time Series: Occupancy vs Energy (Library)",
x = "Tanggal & Jam", y = "Nilai", color = "Legend") +
theme_minimal()
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_line()`).

# --- Scatter Plot Occupancy vs Energy
ggplot(data_all, aes(x = Occupancy, y = Energy)) +
geom_point(alpha = 0.5, color = "blue") +
geom_smooth(method = "lm", se = FALSE, color = "red") +
labs(title = "Scatter Plot: Occupancy vs Energy",
x = "Occupancy (WiFi)", y = "Energy Consumption") +
theme_minimal()
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 1 row containing non-finite outside the scale range
## (`stat_smooth()`).
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_point()`).

# --- Daily Profile Plot (pakai jam-only 00:00, 02:00, dst.)
ggplot(daily_profiles, aes(x = TimePOSIX, y = Occupancy, group = Date)) +
geom_line(alpha = 0.2, color = "red") +
stat_summary(fun = mean, geom = "line", aes(group = 1), color = "yellow", size = 1.2) +
scale_x_datetime(date_labels = "%H:%M", date_breaks = "2 hour") +
labs(title = "Daily Profile Occupancy (merah=harian, kuning=rata-rata)",
x = "Jam", y = "Occupancy") +
theme_minimal()
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

# =========================================================
# 7. ANALISIS
# =========================================================
# Peak hours occupancy
peak_hours <- daily_profiles %>%
group_by(Hour) %>%
summarise(Avg_Occupancy = mean(Occupancy, na.rm = TRUE), .groups = "drop") %>%
arrange(desc(Avg_Occupancy))
print(peak_hours)
## # A tibble: 24 × 2
## Hour Avg_Occupancy
## <int> <dbl>
## 1 15 362.
## 2 14 355.
## 3 16 334.
## 4 13 330.
## 5 12 298.
## 6 17 279.
## 7 11 258.
## 8 18 221.
## 9 19 184.
## 10 10 182.
## # ℹ 14 more rows
# Korelasi Occupancy vs Energy
cor_test <- cor.test(data_all$Occupancy, data_all$Energy)
print(cor_test)
##
## Pearson's product-moment correlation
##
## data: data_all$Occupancy and data_all$Energy
## t = 79.03, df = 2052, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.8564723 0.8778939
## sample estimates:
## cor
## 0.867585
# =========================================================
# 8. WEEKDAY vs WEEKEND ANALYSIS
# =========================================================
# Scatter Plot
ggplot(data_all %>% filter(is.finite(Occupancy), is.finite(Energy)),
aes(x = Occupancy, y = Energy, color = IsWeekend)) +
geom_point(alpha = 0.4) +
labs(title = "Occupancy vs Energy (Weekday vs Weekend)") +
theme_minimal()

# Time Series Comparison (Energy)
ggplot(data_all %>% filter(is.finite(Energy)),
aes(x = Timestamp, y = Energy, color = IsWeekend)) +
geom_line(alpha = 0.7) +
scale_x_datetime(date_labels = "%d-%m %H:%M", date_breaks = "6 hour") +
labs(title = "Perbandingan Energy (Weekday vs Weekend)") +
theme_minimal()

# Time Series Plot (Occupancy & Energy, dengan secondary axis)
install.packages("ggplot2")
## Warning: package 'ggplot2' is in use and will not be installed
library(ggplot2)
library(dplyr)
ggplot(data_all %>% filter(is.finite(Occupancy), is.finite(Energy)),
aes(x = Timestamp)) +
geom_line(aes(y = Occupancy, color = "Occupancy"), alpha = 0.7) +
geom_line(aes(y = Energy/mean(Energy, na.rm = TRUE)*mean(Occupancy, na.rm = TRUE),
color = "Energy"), alpha = 0.7) +
facet_wrap(~IsWeekend, ncol = 1, scales = "free_x") +
scale_x_datetime(date_labels = "%d-%m %H:%M", date_breaks = "6 hour") +
scale_y_continuous(
name = "Occupancy",
sec.axis = sec_axis(~./mean(data_all$Occupancy, na.rm = TRUE)*mean(data_all$Energy, na.rm = TRUE),
name = "Energy Consumption")
) +
scale_color_manual(values = c("Occupancy" = "blue", "Energy" = "green")) +
labs(title = "Occupancy vs Energy Over Time (Weekday vs Weekend)",
x = "Tanggal & Jam", color = "Metric") +
theme_minimal()

# Boxplot Occupancy (Weekday vs Weekend)
ggplot(daily_profiles %>% filter(is.finite(Occupancy)),
aes(x = IsWeekend, y = Occupancy, fill = IsWeekend)) +
geom_boxplot(alpha = 0.7) +
labs(title = "Occupancy Distribution (Weekday vs Weekend)",
x = "", y = "Occupancy") +
theme_minimal()

# Boxplot Energy (Weekday vs Weekend)
ggplot(daily_profiles %>% filter(is.finite(Energy)),
aes(x = IsWeekend, y = Energy, fill = IsWeekend)) +
geom_boxplot(alpha = 0.7) +
labs(title = "Energy Consumption Distribution (Weekday vs Weekend)",
x = "", y = "Energy") +
theme_minimal()
