# Load library yang dibutuhkan
library(data.table)
library(dplyr)
library(lubridate)
library(ggplot2)
library(tidyr)
library(patchwork)

# Membaca dataset yang digunakan
wifi_data    <- read.csv(r"C:/Users/DHEA CANTIK/Downloads/wifi.csv")
energy_lib1  <- read.csv(r"C:/Users/DHEA CANTIK/Downloads/library1.csv")
energy_lib2  <- read.csv(r"C:/Users/DHEA CANTIK/Downloads/library2.csv")
energy_lib3  <- read.csv(r"C:/Users/DHEA CANTIK/Downloads/library3.csv")

# Menyimpan nama kolom yang relevan
keep_cols <- c("time", "Building", "Floor", "Associated Client Count")

# Cek jumlah observasi awal
message("Jumlah baris awal wifi_data: ", nrow(wifi_data))

# Menghapus duplikat yang benar-benar identik
wifi_clean <- wifi_data %>%
  distinct(across(all_of(keep_cols)), .keep_all = TRUE)

message("Jumlah baris setelah duplikat dihapus: ", nrow(wifi_clean))

# Memfokuskan pada data Library saja
wifi_library <- wifi_clean %>%
  filter(tolower(trimws(Building)) == "library")

# Ubah kolom time ke format datetime
wifi_library$time <- ymd_hms(wifi_library$time, quiet = TRUE)
# Format kolom waktu untuk semua dataset energi
energy_lib1$ts <- ymd_hms(energy_lib1$ts, quiet = TRUE)
energy_lib2$ts <- ymd_hms(energy_lib2$ts, quiet = TRUE)
energy_lib3$ts <- ymd_hms(energy_lib3$ts, quiet = TRUE)

# Mengubah nama kolom agar konsisten sebelum digabung
colnames(energy_lib1)[colnames(energy_lib1) == "ts"]   <- "time"
colnames(energy_lib2)[colnames(energy_lib2) == "ts"]   <- "time"
colnames(energy_lib3)[colnames(energy_lib3) == "ts"]   <- "time"
colnames(energy_lib1)[colnames(energy_lib1) == "kWh"]  <- "Energy"
colnames(energy_lib2)[colnames(energy_lib2) == "kWh"]  <- "Energy"
colnames(energy_lib3)[colnames(energy_lib3) == "kWh"]  <- "Energy"

# Gabungkan semua data energi jadi satu
energy_all <- bind_rows(
  energy_lib1 %>% mutate(Source = "Lib1"),
  energy_lib2 %>% mutate(Source = "Lib2"),
  energy_lib3 %>% mutate(Source = "Lib3")
)

# Urutkan berdasarkan waktu
energy_all <- energy_all %>% arrange(time)

message("Ukuran data energi gabungan: ", nrow(energy_all), " baris")

# Membuat kolom waktu (jam) agar lebih mudah dianalisis
wifi_library <- wifi_library %>%
  mutate(hour = floor_date(time, unit = "hour"))

energy_all <- energy_all %>%
  mutate(hour = floor_date(time, unit = "hour"))

# Ringkas data wifi per jam
wifi_hourly <- wifi_library %>%
  group_by(hour, Floor) %>%
  summarise(avg_clients = mean(`Associated Client Count`, na.rm = TRUE),
            .groups = "drop")

# Ringkas data energi per jam
energy_hourly <- energy_all %>%
  group_by(hour) %>%
  summarise(total_energy = sum(Energy, na.rm = TRUE),
            .groups = "drop")
# Visualisasi tren rata-rata jumlah pengguna wifi per lantai
plot_wifi <- ggplot(wifi_hourly, aes(x = hour, y = avg_clients, color = Floor)) +
  geom_line(size = 1) +
  labs(
    title = "Rata-rata Jumlah Client WiFi per Jam di Perpustakaan",
    x = "Waktu (Jam)",
    y = "Jumlah Client"
  ) +
  theme_minimal()

# Visualisasi tren total energi yang dipakai
plot_energy <- ggplot(energy_hourly, aes(x = hour, y = total_energy)) +
  geom_line(color = "steelblue", size = 1) +
  labs(
    title = "Total Konsumsi Energi per Jam",
    x = "Waktu (Jam)",
    y = "Energi (kWh)"
  ) +
  theme_light()

# Gabungkan kedua grafik agar bisa dibandingkan
plot_wifi + plot_energy

# Cek korelasi sederhana antara jumlah client dan konsumsi energi
merged_data <- wifi_hourly %>%
  group_by(hour) %>%
  summarise(total_clients = sum(avg_clients, na.rm = TRUE), .groups = "drop") %>%
  left_join(energy_hourly, by = "hour")

correlation_value <- cor(merged_data$total_clients, merged_data$total_energy, use = "complete.obs")
message("Nilai korelasi antara jumlah client wifi dan energi: ", round(correlation_value, 3))
# Analisis detail per lantai
wifi_floor_summary <- wifi_library %>%
  group_by(Floor, hour) %>%
  summarise(mean_clients = mean(`Associated Client Count`, na.rm = TRUE),
            .groups = "drop")

ggplot(wifi_floor_summary, aes(x = hour, y = mean_clients, color = Floor)) +
  geom_line(linewidth = 1) +
  facet_wrap(~Floor, scales = "free_y") +
  labs(
    title = "Tren Rata-rata Pengguna WiFi per Lantai",
    x = "Jam",
    y = "Rata-rata Client"
  ) +
  theme_bw()

# Visualisasi distribusi energi untuk masing-masing sumber
ggplot(energy_all, aes(x = hour, y = Energy, color = Source)) +
  geom_line() +
  facet_wrap(~Source, scales = "free_y") +
  labs(
    title = "Distribusi Pemakaian Energi per Sumber",
    x = "Waktu (Jam)",
    y = "Energi (kWh)"
  ) +
  theme_minimal()

# Patchwork: gabungan visualisasi wifi & energi dalam 1 layout
p1 <- ggplot(wifi_hourly, aes(x = hour, y = avg_clients)) +
  geom_line(color = "darkgreen") +
  labs(
    title = "Tren Rata-rata Client WiFi",
    x = "Jam",
    y = "Jumlah Client"
  )

p2 <- ggplot(energy_hourly, aes(x = hour, y = total_energy)) +
  geom_line(color = "darkred") +
  labs(
    title = "Tren Total Energi",
    x = "Jam",
    y = "Energi (kWh)"
  )

# Gabungkan dengan patchwork
p1 / p2

# Scatter plot untuk melihat hubungan client vs energi
ggplot(merged_data, aes(x = total_clients, y = total_energy)) +
  geom_point(alpha = 0.5, color = "purple") +
  geom_smooth(method = "lm", se = FALSE, color = "black") +
  labs(
    title = "Hubungan antara Jumlah Client WiFi dan Konsumsi Energi",
    x = "Total Client",
    y = "Total Energi (kWh)"
  ) +
  theme_classic()
# Kesimpulan analisis
cat("
Dari hasil eksplorasi data, terlihat bahwa:
1. Jumlah pengguna WiFi di perpustakaan bervariasi menurut jam dan lantai, dengan pola tertentu yang muncul di jam sibuk.
2. Konsumsi energi juga menunjukkan tren fluktuatif seiring dengan aktivitas pengguna.
3. Korelasi positif terdeteksi antara jumlah client WiFi dengan pemakaian energi, yang artinya semakin banyak pengguna, semakin tinggi pula konsumsi energi.
4. Visualisasi gabungan memperkuat hubungan ini dan memberikan gambaran menyeluruh mengenai interaksi antara penggunaan fasilitas jaringan dan energi listrik.
")
---
title: "Redo Assignment Data Maddness (Revised)"
output: html_notebook

Name : Dhea Ary Shofyan
NRP : 5003231210
Class : Data Mining and Visualization K
---

```{r}
# Load library yang dibutuhkan
library(data.table)
library(dplyr)
library(lubridate)
library(ggplot2)
library(tidyr)
library(patchwork)

# Membaca dataset yang digunakan
wifi_data    <- read.csv(r"C:/Users/DHEA CANTIK/Downloads/wifi.csv")
energy_lib1  <- read.csv(r"C:/Users/DHEA CANTIK/Downloads/library1.csv")
energy_lib2  <- read.csv(r"C:/Users/DHEA CANTIK/Downloads/library2.csv")
energy_lib3  <- read.csv(r"C:/Users/DHEA CANTIK/Downloads/library3.csv")

# Menyimpan nama kolom yang relevan
keep_cols <- c("time", "Building", "Floor", "Associated Client Count")

# Cek jumlah observasi awal
message("Jumlah baris awal wifi_data: ", nrow(wifi_data))

# Menghapus duplikat yang benar-benar identik
wifi_clean <- wifi_data %>%
  distinct(across(all_of(keep_cols)), .keep_all = TRUE)

message("Jumlah baris setelah duplikat dihapus: ", nrow(wifi_clean))

# Memfokuskan pada data Library saja
wifi_library <- wifi_clean %>%
  filter(tolower(trimws(Building)) == "library")

# Ubah kolom time ke format datetime
wifi_library$time <- ymd_hms(wifi_library$time, quiet = TRUE)

```{r}
# Format kolom waktu untuk semua dataset energi
energy_lib1$ts <- ymd_hms(energy_lib1$ts, quiet = TRUE)
energy_lib2$ts <- ymd_hms(energy_lib2$ts, quiet = TRUE)
energy_lib3$ts <- ymd_hms(energy_lib3$ts, quiet = TRUE)

# Mengubah nama kolom agar konsisten sebelum digabung
colnames(energy_lib1)[colnames(energy_lib1) == "ts"]   <- "time"
colnames(energy_lib2)[colnames(energy_lib2) == "ts"]   <- "time"
colnames(energy_lib3)[colnames(energy_lib3) == "ts"]   <- "time"
colnames(energy_lib1)[colnames(energy_lib1) == "kWh"]  <- "Energy"
colnames(energy_lib2)[colnames(energy_lib2) == "kWh"]  <- "Energy"
colnames(energy_lib3)[colnames(energy_lib3) == "kWh"]  <- "Energy"

# Gabungkan semua data energi jadi satu
energy_all <- bind_rows(
  energy_lib1 %>% mutate(Source = "Lib1"),
  energy_lib2 %>% mutate(Source = "Lib2"),
  energy_lib3 %>% mutate(Source = "Lib3")
)

# Urutkan berdasarkan waktu
energy_all <- energy_all %>% arrange(time)

message("Ukuran data energi gabungan: ", nrow(energy_all), " baris")

# Membuat kolom waktu (jam) agar lebih mudah dianalisis
wifi_library <- wifi_library %>%
  mutate(hour = floor_date(time, unit = "hour"))

energy_all <- energy_all %>%
  mutate(hour = floor_date(time, unit = "hour"))

# Ringkas data wifi per jam
wifi_hourly <- wifi_library %>%
  group_by(hour, Floor) %>%
  summarise(avg_clients = mean(`Associated Client Count`, na.rm = TRUE),
            .groups = "drop")

# Ringkas data energi per jam
energy_hourly <- energy_all %>%
  group_by(hour) %>%
  summarise(total_energy = sum(Energy, na.rm = TRUE),
            .groups = "drop")

```{r}
# Visualisasi tren rata-rata jumlah pengguna wifi per lantai
plot_wifi <- ggplot(wifi_hourly, aes(x = hour, y = avg_clients, color = Floor)) +
  geom_line(size = 1) +
  labs(
    title = "Rata-rata Jumlah Client WiFi per Jam di Perpustakaan",
    x = "Waktu (Jam)",
    y = "Jumlah Client"
  ) +
  theme_minimal()

# Visualisasi tren total energi yang dipakai
plot_energy <- ggplot(energy_hourly, aes(x = hour, y = total_energy)) +
  geom_line(color = "steelblue", size = 1) +
  labs(
    title = "Total Konsumsi Energi per Jam",
    x = "Waktu (Jam)",
    y = "Energi (kWh)"
  ) +
  theme_light()

# Gabungkan kedua grafik agar bisa dibandingkan
plot_wifi + plot_energy

# Cek korelasi sederhana antara jumlah client dan konsumsi energi
merged_data <- wifi_hourly %>%
  group_by(hour) %>%
  summarise(total_clients = sum(avg_clients, na.rm = TRUE), .groups = "drop") %>%
  left_join(energy_hourly, by = "hour")

correlation_value <- cor(merged_data$total_clients, merged_data$total_energy, use = "complete.obs")
message("Nilai korelasi antara jumlah client wifi dan energi: ", round(correlation_value, 3))

```{r}
# Analisis detail per lantai
wifi_floor_summary <- wifi_library %>%
  group_by(Floor, hour) %>%
  summarise(mean_clients = mean(`Associated Client Count`, na.rm = TRUE),
            .groups = "drop")

ggplot(wifi_floor_summary, aes(x = hour, y = mean_clients, color = Floor)) +
  geom_line(linewidth = 1) +
  facet_wrap(~Floor, scales = "free_y") +
  labs(
    title = "Tren Rata-rata Pengguna WiFi per Lantai",
    x = "Jam",
    y = "Rata-rata Client"
  ) +
  theme_bw()

# Visualisasi distribusi energi untuk masing-masing sumber
ggplot(energy_all, aes(x = hour, y = Energy, color = Source)) +
  geom_line() +
  facet_wrap(~Source, scales = "free_y") +
  labs(
    title = "Distribusi Pemakaian Energi per Sumber",
    x = "Waktu (Jam)",
    y = "Energi (kWh)"
  ) +
  theme_minimal()

# Patchwork: gabungan visualisasi wifi & energi dalam 1 layout
p1 <- ggplot(wifi_hourly, aes(x = hour, y = avg_clients)) +
  geom_line(color = "darkgreen") +
  labs(
    title = "Tren Rata-rata Client WiFi",
    x = "Jam",
    y = "Jumlah Client"
  )

p2 <- ggplot(energy_hourly, aes(x = hour, y = total_energy)) +
  geom_line(color = "darkred") +
  labs(
    title = "Tren Total Energi",
    x = "Jam",
    y = "Energi (kWh)"
  )

# Gabungkan dengan patchwork
p1 / p2

# Scatter plot untuk melihat hubungan client vs energi
ggplot(merged_data, aes(x = total_clients, y = total_energy)) +
  geom_point(alpha = 0.5, color = "purple") +
  geom_smooth(method = "lm", se = FALSE, color = "black") +
  labs(
    title = "Hubungan antara Jumlah Client WiFi dan Konsumsi Energi",
    x = "Total Client",
    y = "Total Energi (kWh)"
  ) +
  theme_classic()

```{r}
# Kesimpulan analisis
cat("
Dari hasil eksplorasi data, terlihat bahwa:
1. Jumlah pengguna WiFi di perpustakaan bervariasi menurut jam dan lantai, dengan pola tertentu yang muncul di jam sibuk.
2. Konsumsi energi juga menunjukkan tren fluktuatif seiring dengan aktivitas pengguna.
3. Korelasi positif terdeteksi antara jumlah client WiFi dengan pemakaian energi, yang artinya semakin banyak pengguna, semakin tinggi pula konsumsi energi.
4. Visualisasi gabungan memperkuat hubungan ini dan memberikan gambaran menyeluruh mengenai interaksi antara penggunaan fasilitas jaringan dan energi listrik.
")



