# =========================
# Install & Load Packages
# =========================
packages <- c("tidyverse", "lubridate", "ggplot2", "readr")
installed_packages <- rownames(installed.packages())
for (p in packages) {
if (!(p %in% installed_packages)) {
install.packages(p)
}
}
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.2 ✔ tibble 3.2.1
## ✔ lubridate 1.9.4 ✔ tidyr 1.3.1
## ✔ purrr 1.0.4
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(lubridate)
library(ggplot2)
library(readr)
# =========================
# 1. Load dataset
# =========================
library(tidyverse)
df_wifi <- read_csv("D:/Documents/AAA IRFAN/wifi.csv")
## Rows: 1883844 Columns: 7
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): Event Time, Uni, Building, Floor
## dbl (2): Associated Client Count, Authenticated Client Count
## dttm (1): time
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
df_energy1 <- read_csv("D:/Documents/AAA IRFAN/library1.csv")
## Rows: 18864 Columns: 6
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): name, units
## dbl (3): reading, cumulative, rate
## dttm (1): ts
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
df_energy2 <- read_csv("D:/Documents/AAA IRFAN/library2.csv")
## Rows: 18864 Columns: 6
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): name, units
## dbl (3): reading, cumulative, rate
## dttm (1): ts
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
df_energy3 <- read_csv("D:/Documents/AAA IRFAN/library3.csv")
## Rows: 18864 Columns: 6
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): name, units
## dbl (3): reading, cumulative, rate
## dttm (1): ts
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# =========================
# 2. Data WiFi Cleaning
# =========================
subset_cols <- c("time", "Building", "Floor", "Associated Client Count")
df_wifi_cleaned <- df_wifi %>%
distinct(across(all_of(subset_cols)), .keep_all = TRUE) %>%
filter(str_trim(tolower(Building)) == "library") %>%
mutate(time = ymd_hms(time))
# =========================
# 3. Data Energi Cleaning
# =========================
df_energy1 <- df_energy1 %>% mutate(ts = ymd_hms(ts))
## Warning: There was 1 warning in `mutate()`.
## ℹ In argument: `ts = ymd_hms(ts)`.
## Caused by warning:
## ! 131 failed to parse.
df_energy2 <- df_energy2 %>% mutate(ts = ymd_hms(ts))
## Warning: There was 1 warning in `mutate()`.
## ℹ In argument: `ts = ymd_hms(ts)`.
## Caused by warning:
## ! 131 failed to parse.
df_energy3 <- df_energy3 %>% mutate(ts = ymd_hms(ts))
## Warning: There was 1 warning in `mutate()`.
## ℹ In argument: `ts = ymd_hms(ts)`.
## Caused by warning:
## ! 131 failed to parse.
fill_missing <- function(df) {
mean_first_144 <- mean(head(df$rate, 144), na.rm = TRUE)
df$rate[is.na(df$rate)] <- mean_first_144
df
}
df_energy1 <- fill_missing(df_energy1)
df_energy2 <- fill_missing(df_energy2)
df_energy3 <- fill_missing(df_energy3)
df_energy_total <- bind_rows(df_energy1, df_energy2, df_energy3) %>%
group_by(ts) %>%
summarise(total_rate = sum(rate, na.rm = TRUE), .groups = "drop")
# =========================
# 4. Resample WiFi (10 menit rata-rata)
# =========================
df_wifi_resampled <- df_wifi_cleaned %>%
mutate(time_10min = floor_date(time, "10 minutes")) %>%
group_by(time_10min) %>%
summarise(occupancy = mean(`Associated Client Count`, na.rm = TRUE), .groups = "drop")
# =========================
# 5. Gabung WiFi + Energi
# =========================
df_final <- inner_join(df_wifi_resampled, df_energy_total,
by = c("time_10min" = "ts"))
# =========================
# 6. Visualisasi
# =========================
## Time Series
ggplot(df_final, aes(x = time_10min)) +
geom_line(aes(y = occupancy, color = "WiFi Occupancy")) +
geom_line(aes(y = total_rate, color = "Energy Consumption")) +
labs(title = "Tren Okupansi Perpustakaan vs. Konsumsi Energi",
x = "Waktu", y = "Nilai") +
scale_color_manual(values = c("WiFi Occupancy" = "blue", "Energy Consumption" = "red")) +
theme_minimal()

## Scatter Plot
ggplot(df_final, aes(x = occupancy, y = total_rate)) +
geom_point(alpha = 0.3, color = "darkgreen") +
geom_smooth(method = "lm", se = FALSE, color = "red") +
labs(title = "Hubungan Okupansi vs Konsumsi Energi",
x = "Okupansi (WiFi)", y = "Energi")
## `geom_smooth()` using formula = 'y ~ x'

## Daily Profile
df_daily <- df_final %>%
mutate(hour = hour(time_10min)) %>%
group_by(hour) %>%
summarise(occupancy = mean(occupancy, na.rm = TRUE),
total_rate = mean(total_rate, na.rm = TRUE))
ggplot(df_daily, aes(x = hour)) +
geom_line(aes(y = occupancy, color = "WiFi")) +
geom_line(aes(y = total_rate, color = "Energy")) +
labs(title = "Pola Harian Rata-rata",
x = "Jam (0-23)", y = "Nilai") +
theme_minimal()

# =========================
# 7. Weekday vs Weekend
# =========================
df_final <- df_final %>%
mutate(day_of_week = wday(time_10min, label = TRUE),
day_type = ifelse(wday(time_10min) %in% 2:6, "Weekday", "Weekend"),
hour = hour(time_10min))
## Scatter comparison
ggplot(df_final, aes(x = occupancy, y = total_rate, color = day_type)) +
geom_point(alpha = 0.3) +
geom_smooth(method = "lm", se = FALSE) +
labs(title = "Okupansi vs Energi: Weekday vs Weekend")
## `geom_smooth()` using formula = 'y ~ x'

## Daily Profile comparison
df_daytype <- df_final %>%
group_by(day_type, hour) %>%
summarise(occupancy = mean(occupancy, na.rm = TRUE),
total_rate = mean(total_rate, na.rm = TRUE), .groups = "drop")
ggplot(df_daytype, aes(x = hour, group = day_type)) +
geom_line(aes(y = occupancy, color = day_type)) +
geom_line(aes(y = total_rate, linetype = day_type), color = "red") +
labs(title = "Perbandingan Pola Harian: Weekday vs Weekend",
x = "Jam", y = "Rata-rata")

list.files("C:/Users/wdarm/Documents/College/Semester 5/Data Mining & Visualization/WEEK 2/Library Wifi Analysis Project")
## character(0)