library(data.table)
library(dplyr)
library(lubridate)
library(ggplot2)
library(tidyr)
library(patchwork)
df_wifi <- fread("C:/Users/Noval/Documents/Kuliah/SEMESTER 5/Data Mining dan Visualisasi/wifi.csv")
df_energy1 <- fread("C:/Users/Noval/Documents/Kuliah/SEMESTER 5/Data Mining dan Visualisasi/library1.csv")
df_energy2 <- fread("C:/Users/Noval/Documents/Kuliah/SEMESTER 5/Data Mining dan Visualisasi/library2.csv")
df_energy3 <- fread("C:/Users/Noval/Documents/Kuliah/SEMESTER 5/Data Mining dan Visualisasi/library3.csv")
subset_cols <- c("time", "Building", "Floor", "Associated Client Count")
cat("Jumlah baris awal di df_wifi:", nrow(df_wifi), "\n")
df_wifi_cleaned <- df_wifi %>%
distinct(across(all_of(subset_cols)), .keep_all = TRUE)
cat("Jumlah baris di df_wifi setelah duplikat sempurna dihapus:", nrow(df_wifi_cleaned), "\n")
df_wifi_library <- df_wifi_cleaned %>%
filter(tolower(trimws(Building)) == "library")
df_wifi_library$time <- ymd_hms(df_wifi_library$time, quiet = TRUE)
df_energy1$ts <- ymd_hms(df_energy1$ts, quiet = TRUE)
df_energy2$ts <- ymd_hms(df_energy2$ts, quiet = TRUE)
df_energy3$ts <- ymd_hms(df_energy3$ts, quiet = TRUE)
df_energy1 <- df_energy1 %>% distinct(ts, .keep_all = TRUE)
df_energy2 <- df_energy2 %>% distinct(ts, .keep_all = TRUE)
df_energy3 <- df_energy3 %>% distinct(ts, .keep_all = TRUE)
fill_missing_energy <- function(df) {
mean_first_144 <- mean(head(df$rate, 144), na.rm=TRUE)
df$rate[is.na(df$rate)] <- mean_first_144
return(df)
}
df_energy1 <- fill_missing_energy(df_energy1)
df_energy2 <- fill_missing_energy(df_energy2)
df_energy3 <- fill_missing_energy(df_energy3)
all_rates <- data.frame(
rate1 = df_energy1$rate,
rate2 = df_energy2$rate,
rate3 = df_energy3$rate
)
total_rate <- rowSums(all_rates, na.rm=TRUE)
df_energy_total <- data.frame(ts = df_energy1$ts, total_rate = total_rate)
df_wifi_library <- df_wifi_library %>%
arrange(time) %>%
mutate(time_10min = floor_date(time, unit = "10 minutes"))
df_wifi_resampled <- df_wifi_library %>%
group_by(time_10min) %>%
summarise(occupancy = mean(`Associated Client Count`, na.rm=TRUE)) %>%
ungroup()
# Gabungan data WiFi dan energi berdasarkan waktu 10 menit
df_final <- inner_join(df_wifi_resampled, df_energy_total, by = c("time_10min" = "ts"))
# Renamea kolom waktu
colnames(df_final)[1] <- "time"
library(scales)
p1 <- ggplot(df_final, aes(x = time)) +
geom_line(aes(y = occupancy, color = "Okupansi (WiFi)")) +
geom_line(aes(y = total_rate, color = "Konsumsi Energi")) +
scale_y_continuous(
name = "Rata-rata Jumlah Pengguna WiFi",
sec.axis = sec_axis(~ ., name = "Total Konsumsi Energi (rate)")
) +
scale_color_manual(values = c("Okupansi (WiFi)" = "blue", "Konsumsi Energi" = "red")) +
labs(title = "Tren Okupansi Perpustakaan vs. Konsumsi Energi", x = "Tanggal", color = "") +
theme_minimal()
ggsave("time_series_plot.png", p1, width=15, height=7)
print(p1)
p2 <- ggplot(df_final, aes(x = occupancy, y = total_rate)) +
geom_point(alpha = 0.3) +
geom_smooth(method = "lm", color = "red") +
labs(title = "Hubungan Korelasi antara Okupansi dan Konsumsi Energi",
x = "Okupansi (Rata-rata Pengguna WiFi)",
y = "Total Konsumsi Energi (rate)") +
theme_minimal()
ggsave("scatter_plot.png", p2, width=10, height=6)
print(p2)
df_final$hour <- hour(df_final$time)
daily_profile <- df_final %>%
group_by(hour) %>%
summarise(
occupancy = mean(occupancy, na.rm=TRUE),
total_rate = mean(total_rate, na.rm=TRUE)
)
p3 <- ggplot(daily_profile, aes(x = hour)) +
geom_line(aes(y = occupancy, color = "Okupansi (WiFi)"), size=1) +
geom_point(aes(y = occupancy, color = "Okupansi (WiFi)"), size=2) +
geom_line(aes(y = total_rate, color = "Konsumsi Energi"), size=1, linetype="dashed") +
geom_point(aes(y = total_rate, color = "Konsumsi Energi"), shape=4, size=2) +
scale_x_continuous(breaks = 0:23) +
scale_color_manual(values = c("Okupansi (WiFi)" = "blue", "Konsumsi Energi" = "red")) +
labs(title = "Pola Harian Rata-rata: Okupansi vs. Konsumsi Energi",
x = "Jam dalam Sehari (0-23)",
y = NULL,
color = "") +
theme_minimal()
ggsave("daily_profile_plot.png", p3, width=12, height=6)
print(p3)
peak_hour <- daily_profile$hour[which.max(daily_profile$occupancy)]
cat(sprintf("Jam tersibuk di perpustakaan (puncak okupansi) rata-rata terjadi pada jam: %02d:00\n", peak_hour))
correlation <- cor(df_final$occupancy, df_final$total_rate, use = "complete.obs")
cat(sprintf("Koefisien korelasi antara okupansi dan konsumsi energi adalah: %.4f\n", correlation))
df_final$day_of_week <- (wday(df_final$time) + 5) %% 7
df_final$day_type <- ifelse(df_final$day_of_week < 5, "Weekday", "Weekend")
df_weekday <- filter(df_final, day_type == "Weekday")
df_weekend <- filter(df_final, day_type == "Weekend")
("Data berhasil dipisahkan.\n")
cat("Jumlah data hari kerja (Weekday):", nrow(df_weekday), "baris\n")
cat("Jumlah data akhir pekan (Weekend):", nrow(df_weekend), "baris\n")
# Scatter plot hari kerja
p_weekday <- ggplot(df_weekday, aes(x = occupancy, y = total_rate)) +
geom_point(alpha = 0.3) +
geom_smooth(method = "lm", color = "green") +
labs(title = "Hubungan Okupansi vs. Energi (Hari Kerja)",
x = "Okupansi (Pengguna WiFi)",
y = "Total Konsumsi Energi") +
theme_minimal()
# Scatter plot akhir pekan
p_weekend <- ggplot(df_weekend, aes(x = occupancy, y = total_rate)) +
geom_point(alpha = 0.3) +
geom_smooth(method = "lm", color = "orange") +
labs(title = "Hubungan Okupansi vs. Energi (Akhir Pekan)",
x = "Okupansi (Pengguna WiFi)",
y = NULL) +
theme_minimal()
combined_plot <- p_weekday | p_weekend + plot_annotation(title = "Perbandingan Korelasi: Hari Kerja vs. Akhir Pekan")
print(combined_plot)
ggsave("scatter_comparison.png", combined_plot, width=16, height=6)
# Profil harian rata-rata per day_type
weekday_profile <- df_weekday %>%
group_by(hour = hour(time)) %>%
summarise(
occupancy = mean(occupancy, na.rm=TRUE),
total_rate = mean(total_rate, na.rm=TRUE)
)
weekend_profile <- df_weekend %>%
group_by(hour = hour(time)) %>%
summarise(
occupancy = mean(occupancy, na.rm=TRUE),
total_rate = mean(total_rate, na.rm=TRUE)
)
profile_combined <- bind_rows(
weekday_profile %>% mutate(day_type = "Hari Kerja"),
weekend_profile %>% mutate(day_type = "Akhir Pekan")
)
p_daily_compare <- ggplot(profile_combined, aes(x = hour)) +
geom_line(aes(y = occupancy, color = day_type, linetype = day_type), size=1) +
geom_point(aes(y = occupancy, color = day_type, shape = day_type), size=2) +
geom_line(aes(y = total_rate, color = day_type, linetype = day_type), size=1, alpha=0.7) +
geom_point(aes(y = total_rate, color = day_type, shape = day_type), size=2, alpha=0.7) +
scale_x_continuous(breaks = seq(0, 23, 2)) +
scale_color_manual(values = c("Hari Kerja" = "blue", "Akhir Pekan" = "cyan4")) +
scale_linetype_manual(values = c("Hari Kerja" = "solid", "Akhir Pekan" = "dashed")) +
scale_shape_manual(values = c("Hari Kerja" = 16, "Akhir Pekan" = 17)) +
labs(title = "Perbandingan Pola Harian Rata-rata: Hari Kerja vs. Akhir Pekan",
x = "Jam dalam Sehari (0-23)",
y = NULL,
color = "Tipe Hari",
linetype = "Tipe Hari",
shape = "Tipe Hari") +
theme_minimal() +
theme(legend.position = "top")
print(p_daily_compare)
ggsave("daily_profile_comparison.png", p_daily_compare, width=14, height=7)
---
title: "Redo assignment"
output: html_notebook

Nama   : Naufal Mahdy Nashrullah
NRP    : 5003231209
Kelas  : DATA MINING DAN VISUALISASI K
---

```{r}
library(data.table)
library(dplyr)
library(lubridate)
library(ggplot2)
library(tidyr)
library(patchwork)
```
```{r}
df_wifi <- fread("C:/Users/Noval/Documents/Kuliah/SEMESTER 5/Data Mining dan Visualisasi/wifi.csv")
df_energy1 <- fread("C:/Users/Noval/Documents/Kuliah/SEMESTER 5/Data Mining dan Visualisasi/library1.csv")
df_energy2 <- fread("C:/Users/Noval/Documents/Kuliah/SEMESTER 5/Data Mining dan Visualisasi/library2.csv")
df_energy3 <- fread("C:/Users/Noval/Documents/Kuliah/SEMESTER 5/Data Mining dan Visualisasi/library3.csv")
```
```{r}
subset_cols <- c("time", "Building", "Floor", "Associated Client Count")
cat("Jumlah baris awal di df_wifi:", nrow(df_wifi), "\n")
```
```{r}
df_wifi_cleaned <- df_wifi %>%
  distinct(across(all_of(subset_cols)), .keep_all = TRUE)

cat("Jumlah baris di df_wifi setelah duplikat sempurna dihapus:", nrow(df_wifi_cleaned), "\n")
```
```{r}
df_wifi_library <- df_wifi_cleaned %>%
  filter(tolower(trimws(Building)) == "library")


df_wifi_library$time <- ymd_hms(df_wifi_library$time, quiet = TRUE)
df_energy1$ts <- ymd_hms(df_energy1$ts, quiet = TRUE)
df_energy2$ts <- ymd_hms(df_energy2$ts, quiet = TRUE)
df_energy3$ts <- ymd_hms(df_energy3$ts, quiet = TRUE)


df_energy1 <- df_energy1 %>% distinct(ts, .keep_all = TRUE)
df_energy2 <- df_energy2 %>% distinct(ts, .keep_all = TRUE)
df_energy3 <- df_energy3 %>% distinct(ts, .keep_all = TRUE)
```
```{r}
fill_missing_energy <- function(df) {
  mean_first_144 <- mean(head(df$rate, 144), na.rm=TRUE)
  df$rate[is.na(df$rate)] <- mean_first_144
  return(df)
}

df_energy1 <- fill_missing_energy(df_energy1)
df_energy2 <- fill_missing_energy(df_energy2)
df_energy3 <- fill_missing_energy(df_energy3)

all_rates <- data.frame(
  rate1 = df_energy1$rate,
  rate2 = df_energy2$rate,
  rate3 = df_energy3$rate
)

total_rate <- rowSums(all_rates, na.rm=TRUE)
df_energy_total <- data.frame(ts = df_energy1$ts, total_rate = total_rate)
```
```{r}
df_wifi_library <- df_wifi_library %>%
  arrange(time) %>%
  mutate(time_10min = floor_date(time, unit = "10 minutes"))

df_wifi_resampled <- df_wifi_library %>%
  group_by(time_10min) %>%
  summarise(occupancy = mean(`Associated Client Count`, na.rm=TRUE)) %>%
  ungroup()

# Gabungan data WiFi dan energi berdasarkan waktu 10 menit
df_final <- inner_join(df_wifi_resampled, df_energy_total, by = c("time_10min" = "ts"))

# Renamea kolom waktu
colnames(df_final)[1] <- "time"
```
```{r}
library(scales)

p1 <- ggplot(df_final, aes(x = time)) +
  geom_line(aes(y = occupancy, color = "Okupansi (WiFi)")) +
  geom_line(aes(y = total_rate, color = "Konsumsi Energi")) +
  scale_y_continuous(
    name = "Rata-rata Jumlah Pengguna WiFi",
    sec.axis = sec_axis(~ ., name = "Total Konsumsi Energi (rate)")
  ) +
  scale_color_manual(values = c("Okupansi (WiFi)" = "blue", "Konsumsi Energi" = "red")) +
  labs(title = "Tren Okupansi Perpustakaan vs. Konsumsi Energi", x = "Tanggal", color = "") +
  theme_minimal()

ggsave("time_series_plot.png", p1, width=15, height=7)
print(p1)
```
```{r}
p2 <- ggplot(df_final, aes(x = occupancy, y = total_rate)) +
  geom_point(alpha = 0.3) +
  geom_smooth(method = "lm", color = "red") +
  labs(title = "Hubungan Korelasi antara Okupansi dan Konsumsi Energi",
       x = "Okupansi (Rata-rata Pengguna WiFi)",
       y = "Total Konsumsi Energi (rate)") +
  theme_minimal()

ggsave("scatter_plot.png", p2, width=10, height=6)
print(p2)
```
```{r}
df_final$hour <- hour(df_final$time)

daily_profile <- df_final %>%
  group_by(hour) %>%
  summarise(
    occupancy = mean(occupancy, na.rm=TRUE),
    total_rate = mean(total_rate, na.rm=TRUE)
  )

p3 <- ggplot(daily_profile, aes(x = hour)) +
  geom_line(aes(y = occupancy, color = "Okupansi (WiFi)"), size=1) +
  geom_point(aes(y = occupancy, color = "Okupansi (WiFi)"), size=2) +
  geom_line(aes(y = total_rate, color = "Konsumsi Energi"), size=1, linetype="dashed") +
  geom_point(aes(y = total_rate, color = "Konsumsi Energi"), shape=4, size=2) +
  scale_x_continuous(breaks = 0:23) +
  scale_color_manual(values = c("Okupansi (WiFi)" = "blue", "Konsumsi Energi" = "red")) +
  labs(title = "Pola Harian Rata-rata: Okupansi vs. Konsumsi Energi",
       x = "Jam dalam Sehari (0-23)",
       y = NULL,
       color = "") +
  theme_minimal()

ggsave("daily_profile_plot.png", p3, width=12, height=6)
print(p3)
```

```{r}
peak_hour <- daily_profile$hour[which.max(daily_profile$occupancy)]
cat(sprintf("Jam tersibuk di perpustakaan (puncak okupansi) rata-rata terjadi pada jam: %02d:00\n", peak_hour))
```
```{r}
correlation <- cor(df_final$occupancy, df_final$total_rate, use = "complete.obs")
cat(sprintf("Koefisien korelasi antara okupansi dan konsumsi energi adalah: %.4f\n", correlation))
```
```{r}
df_final$day_of_week <- (wday(df_final$time) + 5) %% 7
df_final$day_type <- ifelse(df_final$day_of_week < 5, "Weekday", "Weekend")

df_weekday <- filter(df_final, day_type == "Weekday")
df_weekend <- filter(df_final, day_type == "Weekend")
```
```{r}
("Data berhasil dipisahkan.\n")
cat("Jumlah data hari kerja (Weekday):", nrow(df_weekday), "baris\n")
cat("Jumlah data akhir pekan (Weekend):", nrow(df_weekend), "baris\n")
```
```{r}
# Scatter plot hari kerja
p_weekday <- ggplot(df_weekday, aes(x = occupancy, y = total_rate)) +
  geom_point(alpha = 0.3) +
  geom_smooth(method = "lm", color = "green") +
  labs(title = "Hubungan Okupansi vs. Energi (Hari Kerja)",
       x = "Okupansi (Pengguna WiFi)",
       y = "Total Konsumsi Energi") +
  theme_minimal()

# Scatter plot akhir pekan
p_weekend <- ggplot(df_weekend, aes(x = occupancy, y = total_rate)) +
  geom_point(alpha = 0.3) +
  geom_smooth(method = "lm", color = "orange") +
  labs(title = "Hubungan Okupansi vs. Energi (Akhir Pekan)",
       x = "Okupansi (Pengguna WiFi)",
       y = NULL) +
  theme_minimal()

combined_plot <- p_weekday | p_weekend + plot_annotation(title = "Perbandingan Korelasi: Hari Kerja vs. Akhir Pekan")
print(combined_plot)
ggsave("scatter_comparison.png", combined_plot, width=16, height=6)

# Profil harian rata-rata per day_type
weekday_profile <- df_weekday %>%
  group_by(hour = hour(time)) %>%
  summarise(
    occupancy = mean(occupancy, na.rm=TRUE),
    total_rate = mean(total_rate, na.rm=TRUE)
  )

weekend_profile <- df_weekend %>%
  group_by(hour = hour(time)) %>%
  summarise(
    occupancy = mean(occupancy, na.rm=TRUE),
    total_rate = mean(total_rate, na.rm=TRUE)
  )

profile_combined <- bind_rows(
  weekday_profile %>% mutate(day_type = "Hari Kerja"),
  weekend_profile %>% mutate(day_type = "Akhir Pekan")
)

p_daily_compare <- ggplot(profile_combined, aes(x = hour)) +
  geom_line(aes(y = occupancy, color = day_type, linetype = day_type), size=1) +
  geom_point(aes(y = occupancy, color = day_type, shape = day_type), size=2) +
  geom_line(aes(y = total_rate, color = day_type, linetype = day_type), size=1, alpha=0.7) +
  geom_point(aes(y = total_rate, color = day_type, shape = day_type), size=2, alpha=0.7) +
  scale_x_continuous(breaks = seq(0, 23, 2)) +
  scale_color_manual(values = c("Hari Kerja" = "blue", "Akhir Pekan" = "cyan4")) +
  scale_linetype_manual(values = c("Hari Kerja" = "solid", "Akhir Pekan" = "dashed")) +
  scale_shape_manual(values = c("Hari Kerja" = 16, "Akhir Pekan" = 17)) +
  labs(title = "Perbandingan Pola Harian Rata-rata: Hari Kerja vs. Akhir Pekan",
       x = "Jam dalam Sehari (0-23)",
       y = NULL,
       color = "Tipe Hari",
       linetype = "Tipe Hari",
       shape = "Tipe Hari") +
  theme_minimal() +
  theme(legend.position = "top")

print(p_daily_compare)
ggsave("daily_profile_comparison.png", p_daily_compare, width=14, height=7)
```
