##Instalasi dan Load Library
install.packages('tidyverse')
install.packages('lubridate')
install.packages('ggplot2')
install.packages('zoo')
install.packages('gridExtra')
library(tidyverse)
library(lubridate)
library(ggplot2)
library(zoo) # untuk resample (rollapply)
library(gridExtra) # untuk plot berdampingan
library(reshape2) # untuk melt data frame
##Memuat Dataset
df_wifi <- read.csv("C:/Users/ilham/OneDrive/Documents/Datmin/wifi.csv")
df_energy1 <- read.csv("C:/Users/ilham/OneDrive/Documents/Datmin/library1.csv")
df_energy2 <- read.csv("C:/Users/ilham/OneDrive/Documents/Datmin/library2.csv")
df_energy3 <- read.csv("C:/Users/ilham/OneDrive/Documents/Datmin/library3.csv")
head(df_wifi)
##Menghapus Duplikat Sempurna dari Data WiFi
cat("Jumlah baris awal di df_wifi:", nrow(df_wifi), "\n")
# Cek nama kolom
colnames(df_wifi)
# Sesuaikan nama kolom yang akan dipakai untuk distinct
subset_cols <- c('time', 'Event.Time', 'Associated.Client.Count', 'Authenticated.Client.Count', 'Uni', 'Building', 'Floor')
df_wifi_cleaned <- df_wifi %>%
distinct(across(all_of(subset_cols)), .keep_all = TRUE)
cat("Jumlah baris di df_wifi setelah duplikat sempurna dihapus:", nrow(df_wifi_cleaned), "\n")
##Filter Data Gedung “Library” (case insensitive)
df_wifi_library <- df_wifi_cleaned %>%
filter(str_to_lower(str_trim(Building)) == "library")
head(df_wifi_library)
##Konversi Kolom Waktu ke Datetime
df_wifi_library <- df_wifi_library %>%
mutate(time = ymd_hms(time))
df_energy1 <- df_energy1 %>% mutate(ts = ymd_hms(ts))
df_energy2 <- df_energy2 %>% mutate(ts = ymd_hms(ts))
df_energy3 <- df_energy3 %>% mutate(ts = ymd_hms(ts))
##Cek Duplikat Berdasarkan Waktu (Timestamp)
cat("Jumlah duplikat di df_wifi_library berdasarkan time:", sum(duplicated(df_wifi_library$time)), "\n")
cat("Jumlah duplikat di df_energy1 berdasarkan ts:", sum(duplicated(df_energy1$ts)), "\n")
cat("Jumlah duplikat di df_energy2 berdasarkan ts:", sum(duplicated(df_energy2$ts)), "\n")
cat("Jumlah duplikat di df_energy3 berdasarkan ts:", sum(duplicated(df_energy3$ts)), "\n")
# Hapus duplikat timestamp di data energi
df_energy1 <- df_energy1 %>% distinct(ts, .keep_all = TRUE)
df_energy2 <- df_energy2 %>% distinct(ts, .keep_all = TRUE)
df_energy3 <- df_energy3 %>% distinct(ts, .keep_all = TRUE)
##Cek Missing Values di df_energy1
missing_df1 <- colSums(is.na(df_energy1))
print(missing_df1)
df_energy1_melt <- melt(is.na(df_energy1))
ggplot(df_energy1_melt, aes(x=Var2, y=Var1, fill=value)) +
geom_tile() +
scale_fill_manual(values=c("white", "black")) +
labs(title="Peta Data Hilang pada df_energy1", x="", y="") +
theme(axis.text.x=element_text(angle=90, hjust=1))
##Mengisi Nilai Hilang dengan Rata-rata 144 Baris Pertama
fill_missing_energy <- function(df) {
mean_first_144 <- mean(head(df$rate, 144), na.rm = TRUE)
df$rate[is.na(df$rate)] <- mean_first_144
return(df)
}
df_energy1 <- fill_missing_energy(df_energy1)
df_energy2 <- fill_missing_energy(df_energy2)
df_energy3 <- fill_missing_energy(df_energy3)
##Menjumlahkan Konsumsi Energi dari Ketiga Dataframe
all_rates <- reduce(list(df_energy1$rate, df_energy2$rate, df_energy3$rate), `+`)
df_energy_total <- tibble(ts = df_energy1$ts, total_rate = all_rates)
head(df_energy_total)
##Resampling Data WiFi menjadi Interval 10 Menit dengan Rata-rata
df_wifi_library <- df_wifi_library %>%
mutate(time_10min = floor_date(time, unit = "10 minutes"))
df_wifi_resampled <- df_wifi_library %>%
group_by(time_10min) %>%
summarise(occupancy = mean(`Associated.Client.Count`, na.rm = TRUE)) %>%
ungroup()
##Menggabungkan Data WiFi dan Energi Berdasarkan Waktu
df_final <- df_wifi_resampled %>%
inner_join(df_energy_total, by = c("time_10min" = "ts"))
head(df_final)
glimpse(df_final)
##Visualisasi Hubungan Okupansi dan Energi
library(scales)
library(grid)
ggplot(df_final, aes(x = time_10min)) +
geom_line(aes(y = occupancy, color = "Okupansi (WiFi)"), linewidth = 1) +
geom_line(aes(y = total_rate, color = "Konsumsi Energi"), linewidth = 1) +
scale_y_continuous(
name = "Okupansi (WiFi)",
sec.axis = sec_axis(~ ., name = "Total Konsumsi Energi (rate)")
) +
scale_color_manual(values = c("Okupansi (WiFi)" = "blue", "Konsumsi Energi" = "red")) +
labs(title = "Tren Okupansi Perpustakaan vs. Konsumsi Energi", x = "Tanggal", color = "") +
theme_minimal()
##Scatter Plot (Korelasi)
ggplot(df_final, aes(x = occupancy, y = total_rate)) +
geom_point(alpha = 0.3) +
geom_smooth(method = "lm", color = "red") +
labs(title = "Hubungan Korelasi antara Okupansi dan Konsumsi Energi",
x = "Okupansi (Rata-rata Pengguna WiFi)",
y = "Total Konsumsi Energi (rate)") +
theme_minimal()
##Pola Harian 24 Jam
df_final <- df_final %>%
mutate(hour = hour(time_10min))
daily_profile <- df_final %>%
group_by(hour) %>%
summarise(
occupancy = mean(occupancy, na.rm = TRUE),
total_rate = mean(total_rate, na.rm = TRUE)
)
p1 <- ggplot(daily_profile, aes(x = hour)) +
geom_line(aes(y = occupancy), color = "blue", linewidth = 1) +
geom_point(aes(y = occupancy), color = "blue") +
scale_x_continuous(breaks = 0:23) +
labs(x = "Jam dalam Sehari (0-23)", y = "Rata-rata Jumlah Pengguna WiFi", title = "Pola Harian Okupansi (WiFi)") +
theme_minimal()
p2 <- ggplot(daily_profile, aes(x = hour)) +
geom_line(aes(y = total_rate), color = "red", linewidth = 1) +
geom_point(aes(y = total_rate), color = "red") +
scale_x_continuous(breaks = 0:23) +
labs(x = "Jam dalam Sehari (0-23)", y = "Total Konsumsi Energi (rate)", title = "Pola Harian Konsumsi Energi") +
theme_minimal()
grid.arrange(p1, p2, ncol = 1)
##Analisis
peak_hour <- daily_profile$hour[which.max(daily_profile$occupancy)]
cat("Jam tersibuk di perpustakaan (puncak okupansi) rata-rata terjadi pada jam:", peak_hour, ":00\n")
correlation <- cor(df_final$occupancy, df_final$total_rate, use = "complete.obs")
cat(sprintf("Koefisien korelasi antara okupansi dan konsumsi energi adalah: %.4f\n", correlation))
##Perbandingan Weekend vs Weekday
df_final <- df_final %>%
mutate(
day_of_week = wday(time_10min, week_start = 1) - 1, # Senin=0, Minggu=6
day_type = if_else(day_of_week < 5, "Weekday", "Weekend")
)
df_weekday <- filter(df_final, day_type == "Weekday")
df_weekend <- filter(df_final, day_type == "Weekend")
cat("Data berhasil dipisahkan.\n")
cat("Jumlah data hari kerja (Weekday):", nrow(df_weekday), "baris\n")
cat("Jumlah data akhir pekan (Weekend):", nrow(df_weekend), "baris\n")
##Scatter Plot Hari Kerja dan Akhir Pekan
p_weekday <- ggplot(df_weekday, aes(x = occupancy, y = total_rate)) +
geom_point(alpha = 0.3) +
geom_smooth(method = "lm", color = "green") +
labs(title = "Hubungan Okupansi vs. Energi (Hari Kerja)",
x = "Okupansi (Pengguna WiFi)", y = "Total Konsumsi Energi") +
theme_minimal()
p_weekend <- ggplot(df_weekend, aes(x = occupancy, y = total_rate)) +
geom_point(alpha = 0.3) +
geom_smooth(method = "lm", color = "orange") +
labs(title = "Hubungan Okupansi vs. Energi (Akhir Pekan)",
x = "Okupansi (Pengguna WiFi)", y = "") +
theme_minimal()
grid.arrange(p_weekday, p_weekend, ncol = 2)
##Perbandingan Pola Harian Weekday vs Weekend
weekday_profile <- df_weekday %>%
group_by(hour) %>%
summarise(
occupancy = mean(occupancy, na.rm = TRUE),
total_rate = mean(total_rate, na.rm = TRUE)
)
weekend_profile <- df_weekend %>%
group_by(hour) %>%
summarise(
occupancy = mean(occupancy, na.rm = TRUE),
total_rate = mean(total_rate, na.rm = TRUE)
)
p1 <- ggplot() +
geom_line(data = weekday_profile, aes(x = hour, y = occupancy, color = "Hari Kerja"), linewidth = 1) +
geom_point(data = weekday_profile, aes(x = hour, y = occupancy, color = "Hari Kerja")) +
geom_line(data = weekend_profile, aes(x = hour, y = occupancy, color = "Akhir Pekan"), linewidth = 1, linetype = "dashed") +
geom_point(data = weekend_profile, aes(x = hour, y = occupancy, color = "Akhir Pekan")) +
scale_color_manual(values = c("Hari Kerja" = "blue", "Akhir Pekan" = "cyan")) +
scale_x_continuous(breaks = seq(0, 23, 2)) +
labs(x = "Jam dalam Sehari (0-23)", y = "Rata-rata Jumlah Pengguna WiFi", color = "Tipe Hari",
title = "Perbandingan Pola Harian Okupansi") +
theme_minimal()
p2 <- ggplot() +
geom_line(data = weekday_profile, aes(x = hour, y = total_rate, color = "Hari Kerja"), linewidth = 1) +
geom_point(data = weekday_profile, aes(x = hour, y = total_rate, color = "Hari Kerja")) +
geom_line(data = weekend_profile, aes(x = hour, y = total_rate, color = "Akhir Pekan"), linewidth = 1, linetype = "dashed") +
geom_point(data = weekend_profile, aes(x = hour, y = total_rate, color = "Akhir Pekan")) +
scale_color_manual(values = c("Hari Kerja" = "red", "Akhir Pekan" = "magenta")) +
scale_x_continuous(breaks = seq(0, 23, 2)) +
labs(x = "Jam dalam Sehari (0-23)", y = "Rata-rata Konsumsi Energi", color = "Tipe Hari",
title = "Perbandingan Pola Harian Konsumsi Energi") +
theme_minimal()
grid.arrange(p1, p2, ncol = 1)