##Instalasi dan Load Library

install.packages('tidyverse')
install.packages('lubridate')
install.packages('ggplot2')
install.packages('zoo')
install.packages('gridExtra')

library(tidyverse)
library(lubridate)
library(ggplot2)
library(zoo)       # untuk resample (rollapply)
library(gridExtra) # untuk plot berdampingan
library(reshape2)  # untuk melt data frame

##Memuat Dataset

df_wifi <- read.csv("C:/Users/ilham/OneDrive/Documents/Datmin/wifi.csv")
df_energy1 <- read.csv("C:/Users/ilham/OneDrive/Documents/Datmin/library1.csv")
df_energy2 <- read.csv("C:/Users/ilham/OneDrive/Documents/Datmin/library2.csv")
df_energy3 <- read.csv("C:/Users/ilham/OneDrive/Documents/Datmin/library3.csv")

head(df_wifi)

##Menghapus Duplikat Sempurna dari Data WiFi

cat("Jumlah baris awal di df_wifi:", nrow(df_wifi), "\n")

# Cek nama kolom
colnames(df_wifi)

# Sesuaikan nama kolom yang akan dipakai untuk distinct
subset_cols <- c('time', 'Event.Time', 'Associated.Client.Count', 'Authenticated.Client.Count', 'Uni', 'Building', 'Floor')

df_wifi_cleaned <- df_wifi %>%
  distinct(across(all_of(subset_cols)), .keep_all = TRUE)

cat("Jumlah baris di df_wifi setelah duplikat sempurna dihapus:", nrow(df_wifi_cleaned), "\n")

##Filter Data Gedung “Library” (case insensitive)

df_wifi_library <- df_wifi_cleaned %>%
  filter(str_to_lower(str_trim(Building)) == "library")

head(df_wifi_library)

##Konversi Kolom Waktu ke Datetime

df_wifi_library <- df_wifi_library %>%
  mutate(time = ymd_hms(time))

df_energy1 <- df_energy1 %>% mutate(ts = ymd_hms(ts))
df_energy2 <- df_energy2 %>% mutate(ts = ymd_hms(ts))
df_energy3 <- df_energy3 %>% mutate(ts = ymd_hms(ts))

##Cek Duplikat Berdasarkan Waktu (Timestamp)

cat("Jumlah duplikat di df_wifi_library berdasarkan time:", sum(duplicated(df_wifi_library$time)), "\n")
cat("Jumlah duplikat di df_energy1 berdasarkan ts:", sum(duplicated(df_energy1$ts)), "\n")
cat("Jumlah duplikat di df_energy2 berdasarkan ts:", sum(duplicated(df_energy2$ts)), "\n")
cat("Jumlah duplikat di df_energy3 berdasarkan ts:", sum(duplicated(df_energy3$ts)), "\n")

# Hapus duplikat timestamp di data energi
df_energy1 <- df_energy1 %>% distinct(ts, .keep_all = TRUE)
df_energy2 <- df_energy2 %>% distinct(ts, .keep_all = TRUE)
df_energy3 <- df_energy3 %>% distinct(ts, .keep_all = TRUE)

##Cek Missing Values di df_energy1

missing_df1 <- colSums(is.na(df_energy1))
print(missing_df1)

df_energy1_melt <- melt(is.na(df_energy1))
ggplot(df_energy1_melt, aes(x=Var2, y=Var1, fill=value)) +
  geom_tile() +
  scale_fill_manual(values=c("white", "black")) +
  labs(title="Peta Data Hilang pada df_energy1", x="", y="") +
  theme(axis.text.x=element_text(angle=90, hjust=1))

##Mengisi Nilai Hilang dengan Rata-rata 144 Baris Pertama

fill_missing_energy <- function(df) {
  mean_first_144 <- mean(head(df$rate, 144), na.rm = TRUE)
  df$rate[is.na(df$rate)] <- mean_first_144
  return(df)
}

df_energy1 <- fill_missing_energy(df_energy1)
df_energy2 <- fill_missing_energy(df_energy2)
df_energy3 <- fill_missing_energy(df_energy3)

##Menjumlahkan Konsumsi Energi dari Ketiga Dataframe

all_rates <- reduce(list(df_energy1$rate, df_energy2$rate, df_energy3$rate), `+`)

df_energy_total <- tibble(ts = df_energy1$ts, total_rate = all_rates)

head(df_energy_total)

##Resampling Data WiFi menjadi Interval 10 Menit dengan Rata-rata

df_wifi_library <- df_wifi_library %>%
  mutate(time_10min = floor_date(time, unit = "10 minutes"))

df_wifi_resampled <- df_wifi_library %>%
  group_by(time_10min) %>%
  summarise(occupancy = mean(`Associated.Client.Count`, na.rm = TRUE)) %>%
  ungroup()

##Menggabungkan Data WiFi dan Energi Berdasarkan Waktu

df_final <- df_wifi_resampled %>%
  inner_join(df_energy_total, by = c("time_10min" = "ts"))

head(df_final)
glimpse(df_final)

##Visualisasi Hubungan Okupansi dan Energi

library(scales)
library(grid)

ggplot(df_final, aes(x = time_10min)) +
  geom_line(aes(y = occupancy, color = "Okupansi (WiFi)"), linewidth = 1) +
  geom_line(aes(y = total_rate, color = "Konsumsi Energi"), linewidth = 1) +
  scale_y_continuous(
    name = "Okupansi (WiFi)",
    sec.axis = sec_axis(~ ., name = "Total Konsumsi Energi (rate)")
  ) +
  scale_color_manual(values = c("Okupansi (WiFi)" = "blue", "Konsumsi Energi" = "red")) +
  labs(title = "Tren Okupansi Perpustakaan vs. Konsumsi Energi", x = "Tanggal", color = "") +
  theme_minimal()

##Scatter Plot (Korelasi)

ggplot(df_final, aes(x = occupancy, y = total_rate)) +
  geom_point(alpha = 0.3) +
  geom_smooth(method = "lm", color = "red") +
  labs(title = "Hubungan Korelasi antara Okupansi dan Konsumsi Energi",
       x = "Okupansi (Rata-rata Pengguna WiFi)",
       y = "Total Konsumsi Energi (rate)") +
  theme_minimal()

##Pola Harian 24 Jam

df_final <- df_final %>%
  mutate(hour = hour(time_10min))

daily_profile <- df_final %>%
  group_by(hour) %>%
  summarise(
    occupancy = mean(occupancy, na.rm = TRUE),
    total_rate = mean(total_rate, na.rm = TRUE)
  )

p1 <- ggplot(daily_profile, aes(x = hour)) +
  geom_line(aes(y = occupancy), color = "blue", linewidth = 1) +
  geom_point(aes(y = occupancy), color = "blue") +
  scale_x_continuous(breaks = 0:23) +
  labs(x = "Jam dalam Sehari (0-23)", y = "Rata-rata Jumlah Pengguna WiFi", title = "Pola Harian Okupansi (WiFi)") +
  theme_minimal()

p2 <- ggplot(daily_profile, aes(x = hour)) +
  geom_line(aes(y = total_rate), color = "red", linewidth = 1) +
  geom_point(aes(y = total_rate), color = "red") +
  scale_x_continuous(breaks = 0:23) +
  labs(x = "Jam dalam Sehari (0-23)", y = "Total Konsumsi Energi (rate)", title = "Pola Harian Konsumsi Energi") +
  theme_minimal()

grid.arrange(p1, p2, ncol = 1)

##Analisis

peak_hour <- daily_profile$hour[which.max(daily_profile$occupancy)]
cat("Jam tersibuk di perpustakaan (puncak okupansi) rata-rata terjadi pada jam:", peak_hour, ":00\n")

correlation <- cor(df_final$occupancy, df_final$total_rate, use = "complete.obs")
cat(sprintf("Koefisien korelasi antara okupansi dan konsumsi energi adalah: %.4f\n", correlation))

##Perbandingan Weekend vs Weekday

df_final <- df_final %>%
  mutate(
    day_of_week = wday(time_10min, week_start = 1) - 1, # Senin=0, Minggu=6
    day_type = if_else(day_of_week < 5, "Weekday", "Weekend")
  )

df_weekday <- filter(df_final, day_type == "Weekday")
df_weekend <- filter(df_final, day_type == "Weekend")

cat("Data berhasil dipisahkan.\n")
cat("Jumlah data hari kerja (Weekday):", nrow(df_weekday), "baris\n")
cat("Jumlah data akhir pekan (Weekend):", nrow(df_weekend), "baris\n")

##Scatter Plot Hari Kerja dan Akhir Pekan

p_weekday <- ggplot(df_weekday, aes(x = occupancy, y = total_rate)) +
  geom_point(alpha = 0.3) +
  geom_smooth(method = "lm", color = "green") +
  labs(title = "Hubungan Okupansi vs. Energi (Hari Kerja)",
       x = "Okupansi (Pengguna WiFi)", y = "Total Konsumsi Energi") +
  theme_minimal()

p_weekend <- ggplot(df_weekend, aes(x = occupancy, y = total_rate)) +
  geom_point(alpha = 0.3) +
  geom_smooth(method = "lm", color = "orange") +
  labs(title = "Hubungan Okupansi vs. Energi (Akhir Pekan)",
       x = "Okupansi (Pengguna WiFi)", y = "") +
  theme_minimal()

grid.arrange(p_weekday, p_weekend, ncol = 2)

##Perbandingan Pola Harian Weekday vs Weekend

weekday_profile <- df_weekday %>%
  group_by(hour) %>%
  summarise(
    occupancy = mean(occupancy, na.rm = TRUE),
    total_rate = mean(total_rate, na.rm = TRUE)
  )

weekend_profile <- df_weekend %>%
  group_by(hour) %>%
  summarise(
    occupancy = mean(occupancy, na.rm = TRUE),
    total_rate = mean(total_rate, na.rm = TRUE)
  )

p1 <- ggplot() +
  geom_line(data = weekday_profile, aes(x = hour, y = occupancy, color = "Hari Kerja"), linewidth = 1) +
  geom_point(data = weekday_profile, aes(x = hour, y = occupancy, color = "Hari Kerja")) +
  geom_line(data = weekend_profile, aes(x = hour, y = occupancy, color = "Akhir Pekan"), linewidth = 1, linetype = "dashed") +
  geom_point(data = weekend_profile, aes(x = hour, y = occupancy, color = "Akhir Pekan")) +
  scale_color_manual(values = c("Hari Kerja" = "blue", "Akhir Pekan" = "cyan")) +
  scale_x_continuous(breaks = seq(0, 23, 2)) +
  labs(x = "Jam dalam Sehari (0-23)", y = "Rata-rata Jumlah Pengguna WiFi", color = "Tipe Hari",
       title = "Perbandingan Pola Harian Okupansi") +
  theme_minimal()

p2 <- ggplot() +
  geom_line(data = weekday_profile, aes(x = hour, y = total_rate, color = "Hari Kerja"), linewidth = 1) +
  geom_point(data = weekday_profile, aes(x = hour, y = total_rate, color = "Hari Kerja")) +
  geom_line(data = weekend_profile, aes(x = hour, y = total_rate, color = "Akhir Pekan"), linewidth = 1, linetype = "dashed") +
  geom_point(data = weekend_profile, aes(x = hour, y = total_rate, color = "Akhir Pekan")) +
  scale_color_manual(values = c("Hari Kerja" = "red", "Akhir Pekan" = "magenta")) +
  scale_x_continuous(breaks = seq(0, 23, 2)) +
  labs(x = "Jam dalam Sehari (0-23)", y = "Rata-rata Konsumsi Energi", color = "Tipe Hari",
       title = "Perbandingan Pola Harian Konsumsi Energi") +
  theme_minimal()

grid.arrange(p1, p2, ncol = 1)