Kelompok 3
Anggota:
- Fikri Rizal Dhiya Ul Haq (5003221105)
- Ghifari Muammar Abimanyu (5003221127)
- Ardhi Nugroho (5003221174)
- Muhammad Faiz Maulana (5003221188)
Link RPubs: https://rpubs.com/KripikPedas/LibraryOccupancyEnergyConsumption

Library

library(tidyverse)
library(lubridate)

Pre-Processing Data

wifi_df <- read_csv('wifi.csv')
head(wifi_df)
library_df <- wifi_df %>%
  mutate(across(where(is.character), str_trim)) %>%
  filter(Building == "Library") %>%
  distinct()

head(library_df)

Mengubah data menjadi per 10 menit

library_df <- library_df %>%
  mutate(
    time = as.POSIXct(time, format="%Y-%m-%d %H:%M:%S"),
    time_floored = floor_date(time, "10 minutes")
  )

library_df

Resampling data dengan menggabungkan baris dengan rata-rata

library_df_clean <- library_df %>%
  group_by(time_floored) %>%
  summarise(AssociatedClientCount_mean = mean(`Associated Client Count`, na.rm = TRUE))

head(library_df_clean)

Mengextract data library 1 2 3

elec1 <- read_csv('library1.csv')
elec2 <- read_csv('library2.csv')
elec3 <- read_csv('library3.csv')

elec1
elec2
elec3

Data Integration

Menggabungkan semua dataset menjadi 1

merged_library <- library_df_clean %>%
  left_join(elec1, by = c("time_floored" = "ts")) %>%
  left_join(elec2, by = c("time_floored" = "ts"), suffix = c("", "_elec2")) %>%
  left_join(elec3, by = c("time_floored" = "ts"), suffix = c("", "_elec3"))

merged_library

Menghitung total pemakaian listrik

merged_library <- merged_library %>%
  mutate(total_rate = rowSums(select(., rate, rate_elec2, rate_elec3), na.rm = TRUE))

merged_library

Memilih variabel yang akan digunakan

# Memilih kolom yang relevan
merged_library <- merged_library %>%
  select(time_floored, AssociatedClientCount_mean, total_rate)

merged_library

Menghitung rata-rata untuk melengkapi row pertama total rate

mean_rate_first_day <- merged_library %>%
  filter(row_number() > 1 & row_number() <= 144) %>%
  summarise(mean_rate = mean(total_rate, na.rm = TRUE)) %>%
  pull(mean_rate)

merged_library$total_rate[1] <- mean_rate_first_day

merged_library

Visualisasi

Plot Time Series

ggplot(merged_library, aes(x = time_floored)) +
  geom_line(aes(y = AssociatedClientCount_mean, colour = "Occupancy")) +
  geom_line(aes(y = total_rate, colour = "Energy Consumption")) +
  labs(
    title = "Time Series of Occupancy & Energy Consumption",
    x = "Time",
    y = "Value",
    colour = "Legend"
  ) +
  theme_minimal()

Scatter Plot

ggplot(merged_library, aes(x = AssociatedClientCount_mean, y = total_rate)) +
  geom_point(alpha = 0.6) +
  labs(
    title = "Scatter Plot of Occupancy vs. Energy Consumption",
    x = "Occupancy",
    y = "Energy Consumption"
  ) +
  theme_minimal()

Daily Profile Cycle

# Buat kolom hari dan format waktu
merged_library <- merged_library %>%
  mutate(
    day_of_week = wday(time_floored, label = TRUE, week_start = 1),
    time_of_day = format(time_floored, "%H:%M")
  )

# Buat urutan waktu yang benar untuk digunakan sebagai level faktor
time_order <- format(
  seq.POSIXt(as.POSIXct("00:00", format = "%H:%M"), 
             as.POSIXct("23:50", format = "%H:%M"), 
             by = "10 min"), 
  "%H:%M"
)

# Hitung rata-rata per 10 menit untuk setiap hari
avg_per_10min_per_day <- merged_library %>%
  group_by(day_of_week, time_of_day) %>%
  summarise(
    avg_occupancy = mean(AssociatedClientCount_mean, na.rm = TRUE),
    avg_energy = mean(total_rate, na.rm = TRUE),
    .groups = 'drop' # .groups = 'drop' setara dengan ungroup()
  ) %>%
  # Ubah time_of_day menjadi faktor untuk memastikan urutan yang benar di plot
  mutate(time_of_day = factor(time_of_day, levels = time_order))

# Tentukan label waktu yang akan ditampilkan (setiap jam)
time_breaks <- time_order[seq(1, length(time_order), by = 6)]

occ_day <- ggplot(avg_per_10min_per_day, aes(x = time_of_day, y = avg_occupancy, color = day_of_week, group = day_of_week)) +
  geom_line(linewidth = 1) +
  scale_x_discrete(breaks = time_breaks) + 
  labs(
    title = 'Daily Occupancy (Monday-Sunday)',
    x = 'Time',
    y = 'Occupancy',
    color = 'Day'
  ) +
  theme_minimal() +
  theme(
    axis.text.x = element_text(angle = 45, hjust = 1),
    legend.position = "bottom"
  )
occ_day


rate_day <- ggplot(avg_per_10min_per_day, aes(x = time_of_day, y = avg_energy, color = day_of_week, group = day_of_week)) +
  geom_line(linewidth = 1) +
  scale_x_discrete(breaks = time_breaks) +
  labs(
    title = 'Daily Energy Consumption (Monday-Sunday)',
    x = 'Time',
    y = 'Energy Consumption',
    color = 'Day'
  ) +
  theme_minimal() +
  theme(
    axis.text.x = element_text(angle = 45, hjust = 1),
    legend.position = "bottom"
  )
rate_day

Average Daily Profile

# Hitung rata-rata okupansi dan energi untuk setiap slot 10 menit
avg_per_10min <- merged_library %>%
  group_by(time_of_day) %>%
  summarise(
    `Rata-rata Okupansi` = mean(AssociatedClientCount_mean, na.rm = TRUE),
    `Rata-rata Konsumsi Energi` = mean(total_rate, na.rm = TRUE),
    .groups = 'drop'
  ) %>%
  mutate(time_of_day = factor(time_of_day, levels = time_order))

# Ubah data dari format lebar ke panjang untuk plotting dengan ggplot
avg_per_10min_long <- avg_per_10min %>%
  pivot_longer(
    cols = c(`Rata-rata Okupansi`, `Rata-rata Konsumsi Energi`),
    names_to = "TipeMetrik",
    values_to = "NilaiRataRata"
  )

# Tentukan jeda label untuk sumbu-x
time_breaks <- time_order[seq(1, length(time_order), by = 6)]

# Buat plot
ggplot(avg_per_10min_long, aes(x = time_of_day, y = NilaiRataRata, color = TipeMetrik, group = TipeMetrik)) +
  geom_line(linewidth = 1) +
  scale_x_discrete(breaks = time_breaks) +
  scale_color_manual(values = c("Rata-rata Okupansi" = "#1f77b4", "Rata-rata Konsumsi Energi" = "darkorange")) + 
  labs(
    title = 'Average Occupancy & Energy Consumption',
    x = 'Time',
    y = 'Average Value',
    color = 'Legend'
  ) +
  theme_minimal() +
  theme(
    axis.text.x = element_text(angle = 45, hjust = 1),
    legend.position = "bottom"
  )

Analysis

Peak Hour

merged_library <- merged_library %>%
  mutate(hour = hour(time_floored))

hourly_avg_all_days <- merged_library %>%
  group_by(hour) %>%
  summarise(Avg_Occupancy = mean(AssociatedClientCount_mean, na.rm = TRUE))

peak_hour_data <- hourly_avg_all_days %>% filter(Avg_Occupancy == max(Avg_Occupancy))
peak_hour <- peak_hour_data$hour
peak_value <- peak_hour_data$Avg_Occupancy

cat(sprintf("Jam puncak okupansi adalah pada jam %d:00 dengan rata-rata %.0f orang.\n", peak_hour, peak_value))
Jam puncak okupansi adalah pada jam 15:00 dengan rata-rata 370 orang.
# Plot Jam Puncak
ggplot(hourly_avg_all_days, aes(x = hour, y = Avg_Occupancy)) +
  geom_bar(stat = "identity", aes(fill = ifelse(hour == peak_hour, "Peak", "Normal"))) +
  geom_text(aes(label = round(Avg_Occupancy, 0)), vjust = -0.5) +
  scale_fill_manual(values = c("Peak" = "#ff7f0e", "Normal" = "#1f77b4"), guide = "none") +
  labs(
    title = "Average Occupancy",
    x = "Hour",
    y = "Average Occupancy"
  ) +
  scale_x_continuous(breaks = seq(0, 23, 1)) +
  theme_minimal()

Occupancy Influence on Energy Consumption

correlation_test <- cor.test(merged_library$AssociatedClientCount_mean, merged_library$total_rate)
corr_value <- correlation_test$estimate
p_value <- correlation_test$p.value

cat(sprintf("Korelasi Pearson antara okupansi dan konsumsi energi: %.3f (p-value: %.3e)\n", corr_value, p_value))
Korelasi Pearson antara okupansi dan konsumsi energi: 0.878 (p-value: 0.000e+00)
if (corr_value > 0.5) {
  influence <- "korelasi positif yang kuat"
} else if (corr_value > 0.3) {
  influence <- "korelasi positif yang sedang"
} else if (corr_value > 0) {
  influence <- "korelasi positif yang lemah"
} else if (corr_value < -0.5) {
  influence <- "korelasi negatif yang kuat"
} else if (corr_value < -0.3) {
  influence <- "korelasi negatif yang sedang"
} else {
  influence <- "korelasi negatif yang lemah"
}
cat(sprintf("Ini menunjukkan adanya %s.\n", influence))
Ini menunjukkan adanya korelasi positif yang kuat.

Cases where energy consumption does not align with occupancy

# Analisis Kasus di mana Konsumsi Energi Tidak Sejalan dengan Okupansi
# Hitung z-scores
merged_library <- merged_library %>%
  mutate(
    occ_z = as.vector(scale(AssociatedClientCount_mean)),
    energy_z = as.vector(scale(total_rate))
  )

# Cari kasus di mana perbedaannya besar (misalnya, > 2 standar deviasi)
misaligned_cases <- merged_library %>%
  filter(abs(occ_z - energy_z) > 2)

if (nrow(misaligned_cases) == 0) {
  cat("Tidak ada kasus yang tidak selaras antara okupansi dan konsumsi energi.\n")
} else {
  print(misaligned_cases %>% select(time_floored, AssociatedClientCount_mean, total_rate, occ_z, energy_z))
}
Tidak ada kasus yang tidak selaras antara okupansi dan konsumsi energi.

Weekday Vs. Weekend

# Buat kolom tipe hari (Hari Kerja / Akhir Pekan)
merged_library <- merged_library %>%
  mutate(
    TipeHari = if_else(wday(time_floored, week_start = 1) < 6, "Weekday", "Weekend"),
    time_of_day = format(time_floored, "%H:%M")
  )

# Buat urutan waktu yang benar untuk plotting
time_order <- format(
  seq.POSIXt(as.POSIXct("00:00", format = "%H:%M"), 
             as.POSIXct("23:50", format = "%H:%M"), 
             by = "10 min"), 
  "%H:%M"
)

# Hitung rata-rata per 10 menit untuk setiap tipe hari
avg_per_tipe_hari <- merged_library %>%
  group_by(TipeHari, time_of_day) %>%
  summarise(
    `Okupansi Rata-rata` = mean(AssociatedClientCount_mean, na.rm = TRUE),
    `Konsumsi Energi Rata-rata` = mean(total_rate, na.rm = TRUE),
    .groups = 'drop'
  ) %>%
  mutate(time_of_day = factor(time_of_day, levels = time_order))

# Tentukan jeda label untuk sumbu-x yang akan digunakan di kedua plot
time_breaks <- time_order[seq(1, length(time_order), by = 6)]

# Occupancy
ggplot(avg_per_tipe_hari, aes(x = time_of_day, y = `Okupansi Rata-rata`, color = TipeHari, group = TipeHari)) +
  geom_line(linewidth = 1) +
  scale_x_discrete(breaks = time_breaks) +
  scale_color_brewer(palette = "Set1") +
  labs(
    title = "Average Occupancy Weekday vs. Weekend",
    x = "Time",
    y = "Average Occupancy",
    color = "Day Type"
  ) +
  theme_bw() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))


#Energy Consumption
ggplot(avg_per_tipe_hari, aes(x = time_of_day, y = `Konsumsi Energi Rata-rata`, color = TipeHari, group = TipeHari)) +
  geom_line(linewidth = 1) +
  scale_x_discrete(breaks = time_breaks) +
  scale_color_brewer(palette = "Dark2") +
  labs(
    title = "Average Energy Consumption Weekday Vs. Weekend",
    x = "Time",
    y = "Average Energy Consumption",
    color = "Day Type"
  ) +
  theme_bw() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

