Anggota Kelompok:

  1. Nurralia Rayhan Naifah (5003231092)

  2. Adinda Putri Novianti (5003231107)

  3. Selvi Maulidyah (5003231112)

  4. Veronica Febriani Putri (5003231121)

Tujuan Analisis:

  1. Mengetahui pola okupansi perpustakaan dari data penggunaan Wi-Fi

  2. Melihat hubungan antara okupnasi dan konsumsi energi

  3. Mengidentifikasi jam sibuk perpustakaan

  4. Membandingkan pola antara weekday vs weekend

Berikut adalah tahapan analisis:

  1. Menyiapkan Data
library(lubridate)
## Warning: package 'lubridate' was built under R version 4.4.3
## 
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
## 
##     date, intersect, setdiff, union
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.4.3
library(dplyr)
## Warning: package 'dplyr' was built under R version 4.4.3
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(patchwork)
## Warning: package 'patchwork' was built under R version 4.4.3
library(scales)
## Warning: package 'scales' was built under R version 4.4.3
library(hms)
## Warning: package 'hms' was built under R version 4.4.3
## 
## Attaching package: 'hms'
## The following object is masked from 'package:lubridate':
## 
##     hms
#1. Baca semua file

wifi       <- read.csv("C:/Users/acer/Downloads/wifi.csv")
library1   <- read.csv("C:/Users/acer/Downloads/library1.csv")
library2   <- read.csv("C:/Users/acer/Downloads/library2.csv")
library3   <- read.csv("C:/Users/acer/Downloads/library3.csv")

#2. Ambil kolom yg penting & rename
wifi <- wifi[wifi$Building == " Library ", c("time", "Associated.Client.Count")]

library1 <- library1[, c("ts", "rate")]
colnames(library1)[2] <- "rate1"

library2 <- library2[, c("ts", "rate")]
colnames(library2)[2] <- "rate2"

library3 <- library3[, c("ts", "rate")]
colnames(library3)[2] <- "rate3"

#3. Ubah ke date time
wifi$time     <- ymd_hms(wifi$time)
library1$ts   <- ymd_hms(library1$ts)
library2$ts   <- ymd_hms(library2$ts)
library3$ts   <- ymd_hms(library3$ts)

#4. Resample wifi setiap 10 menit
wifi$time10 <- floor_date(wifi$time, "10 minutes")
wifi_agg <- aggregate(`Associated.Client.Count` ~ time10,
                      data = wifi,
                      FUN = mean)
colnames(wifi_agg)[2] <- "mean_client_count"

#5. Gabungkan semua dataset
merged <- merge(wifi_agg, library1,
                by.x = "time10", by.y = "ts", all = FALSE)
merged <- merge(merged, library2,
                by.x = "time10", by.y = "ts", all = FALSE)
merged <- merge(merged, library3,
                by.x = "time10", by.y = "ts", all = FALSE)

#6. Hitung Total Rate
merged$Total_Rate <- merged$rate1 + merged$rate2 + merged$rate3

#7. Cleaning NA (isi dengan rata-rata 144 observasi pertama)
no_missing <- function(df) {
  for (col in names(df)) {
    if (is.numeric(df[[col]])) {
      first144 <- head(df[[col]], 144)
      mean_val <- mean(first144, na.rm = TRUE)
      df[[col]][is.na(df[[col]])] <- mean_val
    }
  }
  return(df)
}
merged <- no_missing(merged)

#8. hasil
head(merged)
##                time10 mean_client_count    rate1    rate2    rate3 Total_Rate
## 1 2020-02-01 00:00:00            22.750 15.83916 24.34965 82.60839   122.7972
## 2 2020-02-01 00:10:00            20.625 13.00000 11.00000 73.00000    97.0000
## 3 2020-02-01 00:20:00            17.250 13.00000 11.00000 72.00000    96.0000
## 4 2020-02-01 00:30:00            15.375 11.00000 15.00000 74.00000   100.0000
## 5 2020-02-01 00:40:00            12.375 11.00000 13.00000 65.00000    89.0000
## 6 2020-02-01 00:50:00            10.500 11.00000 16.00000 64.00000    91.0000
  1. Visualisasi Data
#time series
merged$time10 <- as.POSIXct(merged$time10)

ggplot(merged, aes(x = time10)) +
  geom_line(aes(y = mean_client_count, color = "Occupancy (WiFi)"), linewidth = 1, alpha = 0.7) +
  geom_line(aes(y = Total_Rate, color = "Energy Consumption (kWh)"), linewidth = 1, alpha = 0.7) +
  labs(title = "Time Series of Library Occupancy vs Energy Consumption",
       x = "Time", y = "Count / kWh") +
  scale_color_manual(values = c("Occupancy (WiFi)" = "blue",
                                "Energy Consumption (kWh)" = "red")) +
  theme_minimal(base_size = 14) +
  theme(plot.title = element_text(face = "bold"))

Grafik time-series menunjukkan hubungan okupansi perpustakaan (biru) dan konsumsi energi (merah) pada Februari 2020. Terlihat pola harian berulang dengan okupansi dan konsumsi energi memuncak pada siang hingga sore hari dan menurun pada malam hari. Konsumsi energi mengikuti tren okupansi, tetapi tidak pernah mencapai nilai nol karena adanya beban dasar listrik. Hal ini menunjukkan adanya korelasi positif antara jumlah pengunjung dan penggunaan energi, dengan tambahan besar dasar listrik

# Scatter Plot
correlation <- cor(merged$mean_client_count, merged$Total_Rate, use = "complete.obs")

ggplot(merged, aes(x = mean_client_count, y = Total_Rate)) +
  geom_point(alpha = 0.6, color = "green", size = 2) +
  ggtitle("Scatter Plot: Occupancy vs Energy Consumption") +
  xlab("Occupancy (Client Count)") +
  ylab("Energy Consumption (Total Rate)") +
  annotate("text", x = max(merged$mean_client_count, na.rm = TRUE) * 0.3,
           y = max(merged$Total_Rate, na.rm = TRUE) * 0.9,
           label = paste0("Correlation: ", round(correlation, 3)),
           hjust = 0, size = 5, color = "black",
           fontface = "bold", alpha = 0.8) +
  theme_minimal(base_size = 14)

Scatter plot menunjukkan hubungan non-linear positif yang kuat antara jumlah klien (okupansi) dan konsumsi energi dengan korelasi 0,878. Semakin banyak klien, konsumsi energi meningkat. Tetapi, Konsumsi energi akan tumbuh secara melandai seiring bertambahnya jumlah klien.Hal ini menandakan bahwa okupnasi sangat memengaruhi konsumsi energi, meski ada faktor lain yang ikut berperan.

merged <- merged %>%
  mutate(
    hour = hour(time10),
    minute = minute(time10),
    time_of_day = hour + minute/60,
    day_of_week = wday(time10, week_start = 1) - 1,  # Senin = 0, Minggu = 6
    day_name = wday(time10, label = TRUE, abbr = FALSE, week_start = 1)
  )

# Daily profiles
daily_occupancy <- merged %>%
  group_by(day_of_week, time_of_day) %>%
  summarise(mean_client = mean(mean_client_count, na.rm = TRUE), .groups = "drop")

daily_energy <- merged %>%
  group_by(day_of_week, time_of_day) %>%
  summarise(mean_energy = mean(Total_Rate, na.rm = TRUE), .groups = "drop")

# Overall averages
avg_occupancy <- merged %>%
  group_by(time_of_day) %>%
  summarise(mean_client = mean(mean_client_count, na.rm = TRUE))

avg_energy <- merged %>%
  group_by(time_of_day) %>%
  summarise(mean_energy = mean(Total_Rate, na.rm = TRUE))

# Occupancy
ggplot(daily_occupancy, aes(x = time_of_day, y = mean_client, color = factor(day_of_week))) +
  geom_line(alpha = 0.7, size = 1) +
  geom_line(data = avg_occupancy, aes(x = time_of_day, y = mean_client),
            color = "black", linetype = "dashed", size = 1.2, inherit.aes = FALSE) +
  scale_color_brewer(palette = "Set1", name = "Day of Week",
                     labels = c("Mon","Tue","Wed","Thu","Fri","Sat","Sun")) +
  labs(title = "Daily Occupancy Profiles by Day of Week",
       x = "Time of Day (Hours)", y = "Average Occupancy") +
  theme_minimal(base_size = 14)
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

# Energy
ggplot(daily_energy, aes(x = time_of_day, y = mean_energy, color = factor(day_of_week))) +
  geom_line(alpha = 0.7, size = 1) +
  geom_line(data = avg_energy, aes(x = time_of_day, y = mean_energy),
            color = "black", linetype = "dashed", size = 1.2, inherit.aes = FALSE) +
  scale_color_brewer(palette = "Set1", name = "Day of Week",
                     labels = c("Mon","Tue","Wed","Thu","Fri","Sat","Sun")) +
  labs(title = "Daily Energy Consumption Profiles by Day of Week",
       x = "Time of Day (Hours)", y = "Average Energy Consumption") +
  theme_minimal(base_size = 14)

Grafik tersebut memperlihatkan bahwa okupansi dan konsumsi energi sama-sama meningkat sejak pagi sekitar pukul 08.00, mencapai puncak pada siang hingga sore hari (sekitar pukul 13.00 - 15.00), lalu menurun secara bertahap hingga malam. Pada hari kerja, terutama Senin hingga kamis, tingkat okupansi dan konsumsi energi cenderung lebih tinggi dan stabil, sedangkan Jumat sedikit lebih rendah. Sementara itu, akhir pekan menunjukkan angka yang jauh lebih rendah, mencerminkan minimnya aktivitas. Hal ini menegaskan adanya hubungan yang kuat antara jumlah pengguna dengan tingkat konsumsi energi harian.

#Bagi data weekday dan weekend
weekday <- merged %>% filter(wday(time10, week_start=1)<6)
weekend <- merged %>% filter (wday(time10, week_start = 1)>=6)

#plot time series weekday + weekend
ts_weekday <- ggplot(weekday, aes(x = time10)) +
  geom_line(aes(y = mean_client_count, color = "Associated Client Count")) +
  geom_line(aes(y = Total_Rate, color = "Total Rate")) +
  labs(title = "Pola Weekday", x = "Waktu", y = "Nilai") +
  scale_color_manual(values = c("blue", "red")) +
  theme_minimal()

ts_weekend <- ggplot(weekend, aes(x = time10)) +
  geom_line(aes(y = mean_client_count, color = "Associated Client Count")) +
  geom_line(aes(y = Total_Rate, color = "Total Rate")) +
  labs(title = "Pola Weekend", x = "Waktu", y = "Nilai") +
  scale_color_manual(values = c("blue", "red")) +
  theme_minimal()

ts_weekday

ts_weekend

Setelah melihat pola okupansi dan konsumsi energi selama sebulan penuh, analisis dilanjutkan dengan melihat pola okupansi dan konsumsi energi berdasarkan weekdays dan weekend. Dari grafik time-series tersebut, terlihat bahwa pola mingguan yang relatif sama, baik weekday maupun weekend sendiri. Pengguna Wi-Fi perpustkaan pada saat weekday mampu mencapai 500 pengguna, sedangkan pada saat weekend hanya mampu mencapai 250 hinga 300 orang. Saat weekday, jumlah pengguna Wi-Fi perpustakaan cenderung menurun ketika mendekati weekend. Tetapi, saat weekend, jumlah pengguna Wi-Fi cenderung naik ketika mendekati weekday.

#Perbandingan pola jumlah klien dan konsumsi energi per hari (weekday + weekend)
weekday_feb3 <- weekday %>% filter(as_date(time10) == ymd("2020-02-03"))
weekend_feb1 <- weekend %>% filter(as_date(time10) == ymd("2020-02-01"))

ggplot(weekday_feb3, aes(x=time10))+
  geom_line(aes(y=mean_client_count, color = "Associated Client Count"))+
  geom_line(aes(y = Total_Rate, color = "Total Rate")) +
  labs(title = "Pola Weekday (3 Februari 2020)",
       x = "Waktu (Jam:Menit)", y = "Nilai") +
  scale_color_manual(values = c("blue", "red")) +
  theme_minimal()

ggplot(weekend_feb1, aes(x=time10))+
  geom_line(aes(y=mean_client_count, color = "Associated Client Count"))+
  geom_line(aes(y = Total_Rate, color = "Total Rate")) +
  labs(title = "Pola Weekend (1 Februari 2020)",
       x = "Waktu (Jam:Menit)", y = "Nilai") +
  scale_color_manual(values = c("blue", "red")) +
  theme_minimal()

Mari lihat pola okupansi dan konsumsi energi dalam satu hari. Baik saat weekday maupun weekend, jumlah pengguna Wi-Fi perpustakaan mulai naik sekitar pukul 09.00 dan mulai turun sekitar pukul 15.00. Meskipun pengguna Wi-Fi saat weekday lebih besar daripada saat weekend, jumlah konsumsi energi pada kedua kelompok hampir sama. Hal ini bisa saja terkait dengan aktivitas yang dilakukan oleh para pengguna selama mengakses jaringan.

#Scatter plot weekday + weekend
ggplot(weekday, aes(x = mean_client_count, y = Total_Rate)) +
  geom_point(alpha = 0.6, color = "blue") +
  labs(title = "Scatter Plot For Weekday: Occupancy (X) vs Energy Consumption (Y)",
       x = "Occupancy (X)", y = "Energy Consumption (Y)") +
  theme_minimal()

ggplot(weekend, aes(x = mean_client_count, y = Total_Rate)) +
  geom_point(alpha = 0.6, color = "blue") +
  labs(title = "Scatter Plot For Weekend: Occupancy (X) vs Energy Consumption (Y)",
       x = "Occupancy (X)", y = "Energy Consumption (Y)") +
  theme_minimal()

Berdasarkan scatter plot yang telah dibentul, konsumsi energi meningkat secara cepat ketika jumlah pengguna masih sedikit. Setelah jumlah pengguna mencapai titik tertentu, konsumsi energi masih mengalami peningkatan tetapi tidak cepat seperti sebelumnya.

Pada saat weekday, titik-titik data berkumpul hingga membentuk kurva yang sangat jelas dan padat. Pola ini menunjukkan bahwa konsumsi energi sangat konsisten dan dapat diprediksi saat ada banyak orang. Berbeda dengan saat weekend, titik-titik data terlihat lebih menyebar. Hal ini mengindikasikan bahwa konsumsi energi mungkin lebih tidak stabil.

weekday <- weekday %>%
  mutate(date = as_date(time10),
         time_only = as_hms(time10))

weekend <- weekend %>%
  mutate(date = as_date(time10),
         time_only = as_hms(time10))

# Data Occupancy per tanggal
weekday_dp_ACC <- weekday %>%
  group_by(time_only, date) %>%
  summarise(acc = mean(mean_client_count, na.rm = TRUE), .groups = "drop")

weekend_dp_ACC <- weekend %>%
  group_by(time_only, date) %>%
  summarise(acc = mean(mean_client_count, na.rm = TRUE), .groups = "drop")

#Rata-Rata semua weekday + weekend
weekday_avg_ACC <- weekday_dp_ACC %>%
  group_by(time_only) %>%
  summarise(avg_acc = mean(acc, na.rm = TRUE))

weekend_avg_ACC <- weekend_dp_ACC %>%
  group_by(time_only) %>%
  summarise(avg_acc = mean(acc, na.rm = TRUE))
#Daily Profile Occupancy (Weekday + Weekend)
ggplot(weekday_dp_ACC, aes(x = time_only, y = acc, group = date, color = factor(date))) +
  geom_line(alpha = 0.7) +
  geom_line(data = weekday_avg_ACC,
            aes(x = time_only, y = avg_acc, group = 1),
            color = "black", size = 1.2, linetype = "dashed", inherit.aes = FALSE) +
  labs(title = "Daily Profiles Occupancy di Weekdays",
       x = "Waktu dalam Sehari", y = "Jumlah Klien Terkait", color = "Tanggal") +
  theme_minimal() +
  theme(legend.position = "right")

ggplot(weekend_dp_ACC, aes(x = time_only, y = acc, group = date, color = factor(date))) +
  geom_line(alpha = 0.7) +
  geom_line(data = weekend_avg_ACC,
            aes(x = time_only, y = avg_acc, group = 1),
            color = "black", size = 1.2, linetype = "dashed", inherit.aes = FALSE) +
  labs(title = "Daily Profiles Occupancy di Weekends",
       x = "Waktu dalam Sehari", y = "Jumlah Klien Terkait", color = "Tanggal") +
  theme_minimal() +
  theme(legend.position = "right")

Pada pukul 11.06 hingga 16.40, penggunaan Wi-Fi perpustakaan pada saat weekdays dan weekends menunjukkan tingkat aktivitas yang sangat tinggi. Pada saat weekdays, jumlah pengguna berkisar dari 300 hingga 500 orang, sedangkan pada saat weekends, jumlah pengguna berkisar dari 200 hingga 350 orang.

Pola penggunaan wi-Fi perpustakaan pada saat weekdays relatif seragam untuk tiap harinya. Hal ini berbanding terbalik dengan pola penggunaan Wi-Fi perpustakaan pada saat weekend yang bervariasi dan kurang terstruktur.

#Daily Profile energy consumption (weekday + weekend)
weekday_dp_TR <- weekday %>%
  group_by(time_only, date) %>%
  summarise(tr = mean(Total_Rate, na.rm = TRUE), .groups = "drop")

weekend_dp_TR <- weekend %>%
  group_by(time_only, date) %>%
  summarise(tr = mean(Total_Rate, na.rm = TRUE), .groups = "drop")

weekday_avg_TR <- weekday_dp_TR %>%
  group_by(time_only) %>%
  summarise(avg_tr = mean(tr, na.rm = TRUE))

weekend_avg_TR <- weekend_dp_TR %>%
  group_by(time_only) %>%
  summarise(avg_tr = mean(tr, na.rm = TRUE))

ggplot(weekday_dp_TR, aes(x = time_only, y = tr, group = date, color = factor(date))) +
  geom_line(alpha = 0.7) +
  geom_line(data = weekday_avg_TR,
            aes(x = time_only, y = avg_tr, group = 1),
            color = "black", size = 1.2, linetype = "dashed", inherit.aes = FALSE) +
  labs(title = "Daily Profiles Energy Consumptions di Weekdays",
       x = "Waktu dalam Sehari", 
       y = "Total Rate Terkait", 
       color = "Tanggal") +
  theme_minimal() +
  theme(legend.position = "right")

ggplot(weekend_dp_TR, aes(x = time_only, y = tr, group = date, color = factor(date))) +
  geom_line(alpha = 0.7) +
  geom_line(data = weekend_avg_TR,
            aes(x = time_only, y = avg_tr, group = 1),
            color = "black", size = 1.2, linetype = "dashed", inherit.aes = FALSE) +
  labs(title = "Daily Profiles Energy Consumptions di Weekends",
       x = "Waktu dalam Sehari", 
       y = "Total Rate Terkait", 
       color = "Tanggal") +
  theme_minimal() +
  theme(legend.position = "right")

Berbeda dengan pola okupansi sebelumnya, konsumsi energi pada saat weekdays dimulai dari level yang cukup tinggi pada dini hari, kemudian mengalami penurunan hingga pagi, dan setelah itu meningkat tajam hingga mencapai puncaknya pada sore hari. Setelah puncak, nilai kembali menurun. Rata-rata harian mencapai puncaknya di sekitar 200, menunjukkan tingkat konsumsi yang tinggi pada sore hari. Pola yang sama masih terlihat pada konsumsi energi pada saat weekends, tetapi dengan nilai keseluruhan yang jauh lebih rendah. Rata-rata hariannya berada di level yang lebih rendah dan pucnaknya tidak setinggi weekdays.