Load Library

library(writexl) 
library(dplyr) 
library(lubridate) 
library(readr)
library(tidyverse) 
library(ggplot2)

Data Load wifi

wifi <- read.csv("C:\\Users\\Diana Eka Justitia\\Downloads\\wifi.csv")
head(wifi)

lib1 <- read.csv("C:\\Users\\Diana Eka Justitia\\Downloads\\library1.csv")
head(lib1)

lib2 <- read.csv("C:\\Users\\Diana Eka Justitia\\Downloads\\library2.csv")
head(lib2)

lib3 <- read.csv("C:\\Users\\Diana Eka Justitia\\Downloads\\library3.csv")
head(lib3)
NA

Cek data Building

table(wifi$Building)

                      Alex Square                     Bailrigg House                  Barker House Farm                     Bowland Annexe  
                             14328                              21230                               7164                              28654 
                      Bowland Ash                       Bowland hall                       Bowland Main                      Bowland North  
                             28652                              28652                              28656                              28655 
 Bowland Twr (Old Bowland Annexe)                  Central Workshops                              CETAD                  Chaplaincy Centre  
                            107447                              14326                              14328                              14328 
                   Charles Carter                  Conference Centre                             County                       County Field  
                             28656                               7164                              35816                              28656 
             County South/Cartmel                      Energy Centre                        Engineering                   Faraday and cTAP  
                             35819                               7164                              35980                              28656 
                    Field Station                       FU Hse 71-74                            Furness                      Furness outer  
                              7164                              42877                              35817                              28656 
                     FY Hse 65-70                              Fylde                         George Fox                   Graduate College  
                             42978                              35819                              28654                              28653 
                       Great Hall                          grize-res                          Grizedale                          Hazelrigg  
                             14328                              28652                              14326                               7164 
                        Hse 75 77                    Human Resources                            Infolab     Institute for Advanced Studies  
                             35820                              14328                              28656                               7164 
                     ISS Building                         John Creed                                LEC                            Library  
                             21492                              28652                              28656                              28652 
                             LICA              Lonsdale College (SW)                  Management School                                MDC  
                             21491                              14328                              28655                               7164 
                           Pendle                            Physics               Postgrad Stats (PSC)                          Preschool  
                             28652                              28656                              14326                               7163 
                        Reception                     Ruskin Library             Science and technology             Slaidburn House (LUSU)  
                             14328                              21491                              21492                              35820 
                    Sports Centre                     SW hse-158-179                       SW hse 12-16                       SW hse 17-19  
                             14326                               7163                              21489                              21489 
                        SW hse 20                       SW hse 21-23                       SW hse 24-26                       SW hse 27-28  
                             21489                              24926                              32408                              28655 
                        SW hse 29                       SW hse 30-31                       SW hse 32-33                          SW hse 34  
                             28655                              28656                              28652                              14326 
                        SW hse 35                          SW hse 36                       SW hse 37-38                          SW hse 39  
                             21489                              28652                              28652                              28656 
                     SW hse 40-42                       SW hse 43-45                       SW hse 46-49                       SW hse 50-52  
                             28656                              28656                              28656                              28656 
                     SW hse 53-54                       SW hse 55-56                   University House                            Whewell  
                             28656                              21492                              28656                              14328 

Hapus spasi dan ubah ke lowercase

wifi <- wifi[tolower(trimws(wifi$Building)) == "library", ] 
# jumlah baris 
cat("Jumlah baris untuk Library:", nrow(wifi), "\n")
Jumlah baris untuk Library: 28652 

Wifi

cat("Wifi missing values:\n")
Wifi missing values:
colSums(is.na(wifi))
                      time                 Event.Time    Associated.Client.Count Authenticated.Client.Count                        Uni                   Building 
                         0                          0                          0                          0                          0                          0 
                     Floor 
                         0 

Library1

cat("Library1 missing values:\n")
Library1 missing values:
colSums(is.na(lib1))
        ts       name    reading      units cumulative       rate 
         0          0       3041          0       3041       3047 

Library2

cat("Library2 missing values:\n")
Library2 missing values:
colSums(is.na(lib2))
        ts       name    reading      units cumulative       rate 
         0          0       3041          0       3041       3047 

Library3

cat("Library3 missing values:\n")
Library3 missing values:
colSums(is.na(lib3))
        ts       name    reading      units cumulative       rate 
         0          0       3041          0       3041       3047 

Isi kolom missing values

dfs <- list(lib1 = lib1, lib2 = lib2, lib3 = lib3)

for (name in names(dfs)) {
  df <- dfs[[name]]
  
  # kolom numerik yang mau diisi
  numeric_cols <- c("reading", "cumulative", "rate")
  
  # hitung rata-rata dari 144 baris pertama
  means <- colMeans(df[1:144, numeric_cols], na.rm = TRUE)
  
  # ganti NA dengan mean
  for (col in numeric_cols) {
    na_idx <- is.na(df[[col]])
    df[na_idx, col] <- means[col]
  }
  
  dfs[[name]] <- df
}

Kembalikan ke variabel asli

lib1 <- dfs$lib1
lib2 <- dfs$lib2
lib3 <- dfs$lib3

Cek kembali missing value

# Wifi
cat("Wifi missing values:\n")
Wifi missing values:
colSums(is.na(wifi))
                      time                 Event.Time    Associated.Client.Count Authenticated.Client.Count                        Uni                   Building 
                         0                          0                          0                          0                          0                          0 
                     Floor 
                         0 
# Library1
cat("Library1 missing values:\n")
Library1 missing values:
colSums(is.na(lib1))
        ts       name    reading      units cumulative       rate 
         0          0          0          0          0          0 
# Library2
cat("Library2 missing values:\n")
Library2 missing values:
colSums(is.na(lib2))
        ts       name    reading      units cumulative       rate 
         0          0          0          0          0          0 
# Library3
cat("Library3 missing values:\n")
Library3 missing values:
colSums(is.na(lib3))
        ts       name    reading      units cumulative       rate 
         0          0          0          0          0          0 

Cek Duplikat

cat("\nJumlah duplikat WiFi:", sum(duplicated(wifi)), "\n")

Jumlah duplikat WiFi: 0 
cat("Jumlah duplikat Library1:", sum(duplicated(lib1)), "\n")
Jumlah duplikat Library1: 0 
cat("Jumlah duplikat Library2:", sum(duplicated(lib2)), "\n")
Jumlah duplikat Library2: 0 
cat("Jumlah duplikat Library3:", sum(duplicated(lib3)), "\n")
Jumlah duplikat Library3: 0 
sapply(wifi, class)
                      time                 Event.Time    Associated.Client.Count Authenticated.Client.Count                        Uni                   Building 
               "character"                "character"                  "integer"                  "integer"                "character"                "character" 
                     Floor 
               "character" 
sapply(lib1, class)
         ts        name     reading       units  cumulative        rate 
"character" "character"   "numeric" "character"   "numeric"   "numeric" 
sapply(lib2, class)
         ts        name     reading       units  cumulative        rate 
"character" "character"   "numeric" "character"   "numeric"   "numeric" 
sapply(lib3, class)
         ts        name     reading       units  cumulative        rate 
"character" "character"   "numeric" "character"   "numeric"   "numeric" 
wifi <- wifi %>%
  mutate(
    time = ymd_hms(time))          # otomatis parse jadi datetime

lib1 <- lib1 %>%
  mutate(ts = ymd_hms(ts))

lib2 <- lib2 %>%
  mutate(ts = ymd_hms(ts))

lib3 <- lib3 %>%
  mutate(ts = ymd_hms(ts))



sapply(wifi, class)
$time
[1] "POSIXct" "POSIXt" 

$Event.Time
[1] "character"

$Associated.Client.Count
[1] "integer"

$Authenticated.Client.Count
[1] "integer"

$Uni
[1] "character"

$Building
[1] "character"

$Floor
[1] "character"
sapply(lib1, class)
$ts
[1] "POSIXct" "POSIXt" 

$name
[1] "character"

$reading
[1] "numeric"

$units
[1] "character"

$cumulative
[1] "numeric"

$rate
[1] "numeric"
sapply(lib2, class)
$ts
[1] "POSIXct" "POSIXt" 

$name
[1] "character"

$reading
[1] "numeric"

$units
[1] "character"

$cumulative
[1] "numeric"

$rate
[1] "numeric"
sapply(lib3, class)
$ts
[1] "POSIXct" "POSIXt" 

$name
[1] "character"

$reading
[1] "numeric"

$units
[1] "character"

$cumulative
[1] "numeric"

$rate
[1] "numeric"

Ambil kolom ts dan rate masing-masing energi

lib1_clean <- lib1 %>%
  select(ts, rate) %>%
  rename(energy1 = rate)

lib2_clean <- lib2 %>%
  select(ts, rate) %>%
  rename(energy2 = rate)

lib3_clean <- lib3 %>%
  select(ts, rate) %>%
  rename(energy3 = rate)

energy_df <- lib1_clean %>%
  inner_join(lib2_clean, by = "ts") %>%
  inner_join(lib3_clean, by = "ts")

Hitung total konsumsi energi

energy_df <- energy_df %>%
  mutate(Total_Energy = energy1 + energy2 + energy3) %>%
  arrange(ts)
head(energy_df,10)

Pilih hanya kolom yang dibutuhkan di wifi

wifi <- wifi %>%
  select(time, `Associated.Client.Count`) %>%
  rename(Associated_Client_Count = `Associated.Client.Count`)

head(wifi,10)

Resampling per 10 menit (ambil rata-rata jumlah koneksi)

wifi_resampled <- wifi %>%
  mutate(time = floor_date(time, "10 minutes")) %>%
  group_by(time) %>%
  summarise(Associated_Client_Count = mean(Associated_Client_Count, na.rm = TRUE)) %>%
  ungroup()

head(wifi_resampled,10)

Gabungkan berdasarkan timestamp

final_df <- wifi_resampled %>%
  inner_join(energy_df, by = c("time" = "ts"))

head(final_df,10)

Simpan sebagai CSV

write_csv(final_df, "final_wifi_energy.csv")
getwd()
[1] "C:/Users/Diana Eka Justitia/Downloads"
##jika excel
#write_xlsx(final_df, "final_wifi_energy.xlsx")

Visualisasi

Time Series Plot

final_df <- read.csv("C:\\Users\\Diana Eka Justitia\\Downloads\\final_wifi_energy.csv")
head(final_df) 

Ubah format variabel time

final_df$time <- as.POSIXct(final_df$time, format = "%Y-%m-%dT%H:%M:%SZ", tz = "UTC")
str(final_df$time)
 POSIXct[1:3683], format: "2020-02-01 00:00:00" "2020-02-01 00:10:00" "2020-02-01 00:20:00" "2020-02-01 00:30:00" "2020-02-01 00:40:00" "2020-02-01 00:50:00" "2020-02-01 01:00:00" ...
head(final_df$time)
[1] "2020-02-01 00:00:00 UTC" "2020-02-01 00:10:00 UTC" "2020-02-01 00:20:00 UTC" "2020-02-01 00:30:00 UTC" "2020-02-01 00:40:00 UTC" "2020-02-01 00:50:00 UTC"

Plot Time Series

ggplot(final_df) +
  geom_line(aes(x = time, y = Associated_Client_Count, color = "Associated_Client_Count")) + 
  geom_line(aes(x = time, y = Total_Energy, color = "Total Energy")) +
  scale_x_datetime(date_breaks = "1 day", date_labels = "%b-%d") +
  xlab("") +
  ylab("Value") +
  ggtitle("Time Series: Associated_Client_Count vs Total Energy - Februari")  +
  scale_color_manual(
    name = "Variable",
    values = c("Associated_Client_Count" = "blue", "Total Energy" = "red")
  )+theme(axis.text.x = element_text(angle = 90, hjust = 1))

Scatter Plot Occupancy vs Total Energy

ggplot(final_df, aes(x =Associated_Client_Count , y = Total_Energy)) +
  geom_point(color = "blue", size = 2, alpha = 0.6) +
  xlab("Occupancy") +
  ylab("Total Energy") +
  ggtitle("Scatter Plot: Occupancy vs Total Energy") +
  theme_minimal()

Daily Profile Occupancy

tambah variabel time_of_day

final_df <- final_df %>%
  mutate(time_of_day = as.numeric(format(time, "%H")) + as.numeric(format(time, "%M"))/60,
 date = as.factor(as.Date(time)))

Hitung rata-rata per 10 menit

daily_avg <- final_df %>%
  group_by(time_of_day) %>%
  summarise(
    avg_occupancy = mean(Associated_Client_Count, na.rm = TRUE),
    avg_energy = mean(Total_Energy, na.rm = TRUE)
  )

Plot Daily Occupancy Profile

ggplot(final_df, aes(x = time_of_day, y = Associated_Client_Count)) +
  geom_line(aes(group = date), color = "gray", alpha = 0.3) +  # tiap hari
  geom_line(data = daily_avg, aes(x = time_of_day, y = avg_occupancy), color = "red", size = 1.5) +
  xlab("Hour of Day") +
  ylab("Occupancy") +
  ggtitle("Daily Occupancy Profiles with Average (26 Days)") +
  scale_x_continuous(breaks = seq(0, 24, by = 1)) +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 90, hjust = 1))

Analisis

Peak

Peak Occupancy

peak_occupancy_time <- daily_avg$time_of_day[which.max(daily_avg$avg_occupancy)]

Peak Total Energy

peak_energy_time <- daily_avg$time_of_day[which.max(daily_avg$avg_energy)]

Plot Peak Time

ggplot(daily_avg, aes(x = time_of_day)) +
  geom_line(aes(y = avg_occupancy, color = "Occupancy"), linewidth = 1.2) +
  geom_line(aes(y = avg_energy, color = "Total Energy"), linewidth = 1.2) +
  # Garis vertikal peak otomatis
  geom_vline(xintercept = peak_occupancy_time, color = "blue", linetype = "dashed", linewidth = 1) +
  geom_vline(xintercept = peak_energy_time, color = "red", linetype = "dashed", linewidth = 1) +
  xlab("Jam") +
  ylab("Rata-rata") +
  ggtitle("Profil Harian (10 Menit) - Occupancy & Total Energy dengan Peak Otomatis") +
  scale_x_continuous(
    breaks = seq(0, 24, by = 10/60),
    labels = function(x) sprintf("%02d:%02d", floor(x), round((x - floor(x))*60))
  ) +
  scale_color_manual(values = c("Occupancy" = "blue", "Total Energy" = "red")) +
  theme_minimal() +
  theme(
    axis.text.x = element_text(angle = 90, hjust = 1),
    legend.title = element_blank()
  )

Regres Polinomial

# orde 2
model_poly2 <- lm(Total_Energy ~ poly(Associated_Client_Count, 2, raw = TRUE), data = final_df)

# ringkasan model
summary(model_poly2)

Call:
lm(formula = Total_Energy ~ poly(Associated_Client_Count, 2, 
    raw = TRUE), data = final_df)

Residuals:
    Min      1Q  Median      3Q     Max 
-45.619  -9.043  -0.504   8.922  50.936 

Coefficients:
                                                Estimate Std. Error t value Pr(>|t|)    
(Intercept)                                    8.784e+01  4.173e-01  210.51   <2e-16 ***
poly(Associated_Client_Count, 2, raw = TRUE)1  6.288e-01  5.821e-03  108.02   <2e-16 ***
poly(Associated_Client_Count, 2, raw = TRUE)2 -8.855e-04  1.367e-05  -64.78   <2e-16 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 14.29 on 3680 degrees of freedom
Multiple R-squared:  0.8931,    Adjusted R-squared:  0.8931 
F-statistic: 1.538e+04 on 2 and 3680 DF,  p-value: < 2.2e-16

Anomalies

#menghitung residual 
final_df <- final_df %>%
  mutate(predicted_energy = predict(model_poly2, newdata = ),
         residual = Total_Energy - predicted_energy)

# menghitung batas toleransi (+-2σ)
res_sd <- sd(final_df$residual, na.rm = TRUE)
upper_bound <- 2 * res_sd
lower_bound <- -2 * res_sd

# Menandai anomali berdasarkan 
final_df <- final_df %>%
  mutate(anomaly = case_when(
    residual > upper_bound ~ "High Energy",
    residual < lower_bound ~ "Low Energy",
    TRUE ~ "Normal"
  ))

Plot Anomali

ggplot(final_df, aes(x = Associated_Client_Count, y = Total_Energy, color = anomaly)) +
  geom_point(alpha = 0.6) +
  geom_line(aes(y = predicted_energy), color = "green", linewidth = 1) +
  scale_color_manual(values = c("Normal" = "gray", "High Energy" = "red", "Low Energy" = "blue")) +
  xlab("Occupancy") +
  ylab("Total Energy") +
  ggtitle("Deteksi Anomali Energy vs Occupancy (±2σ Residual)") +
  theme_minimal()

Weekend VS Weekday

Memberi label untuk hari

final_df$day_name <- weekdays(final_df$time)
head(final_df)

Memberi label weekend dan weekday

final_df$is_weekend <- ifelse(final_df$day_name %in% c("Saturday", "Sunday"), "Weekend", "Weekday")
head(final_df)

Plot

daily_avg <- final_df %>%
  group_by(time_of_day, is_weekend) %>%
  summarise(avg_occupancy = mean(Associated_Client_Count, na.rm = TRUE),
            .groups = "drop")


ggplot(daily_avg, aes(x = time_of_day, y = avg_occupancy, color = is_weekend)) +
  geom_line(linewidth = 1.2) +
  scale_x_continuous(
    breaks = seq(0, 24, by = 2),
    labels = function(x) sprintf("%02d:%02d", floor(x), round((x - floor(x))*60))
  ) +
  labs(title = "Profil Harian Occupancy: Weekday vs Weekend",
       x = "Jam", y = "Rata-rata Occupancy", color = "Kategori") +
  theme_minimal()

NA
NA

Plot Time Series Weekend dan Weekday

final_df_hourly <- final_df %>%
  mutate(hour = lubridate::floor_date(time, "hour")) %>%
  group_by(hour, is_weekend, day_name) %>%
  summarise(avg_occupancy = mean (Associated_Client_Count, na.rm = TRUE), .group = "drop")
`summarise()` has grouped output by 'hour', 'is_weekend'. You can override using the `.groups` argument.
ggplot(final_df_hourly, aes(x = hour, y = avg_occupancy,
                             color = is_weekend,
                             group = is_weekend)) +
  geom_line(linewidth = 0.9) +
  labs(title = "Time Series Occupancy per Jam (Weekday vs Weekend)",
       x = "Waktu", y = "Rata-rata Occupancy", color = "Kategori") +
  theme_minimal()

NA
NA

Scatter Plot Weekend dan Weekday

ggplot(final_df, aes(x = Associated_Client_Count, 
                     y = Total_Energy, 
                     color = is_weekend)) +
  geom_point(alpha = 0.6) +
  geom_smooth(method = "loess", se = FALSE, color = "green", linewidth = 1) +
  xlab("Occupancy") +
  ylab("Total Energy") +
  ggtitle("Occupancy vs Total Energy: Weekend vs Weekday") +
  theme_minimal()

NA
NA
---
title: "Data Mining AdventureR"
output: html_notebook
---

## Load Library

```{r}
library(writexl) 
library(dplyr) 
library(lubridate) 
library(readr)
library(tidyverse) 
library(ggplot2)
```
### Data Load wifi

```{r}
wifi <- read.csv("C:\\Users\\Diana Eka Justitia\\Downloads\\wifi.csv")
head(wifi)

lib1 <- read.csv("C:\\Users\\Diana Eka Justitia\\Downloads\\library1.csv")
head(lib1)

lib2 <- read.csv("C:\\Users\\Diana Eka Justitia\\Downloads\\library2.csv")
head(lib2)

lib3 <- read.csv("C:\\Users\\Diana Eka Justitia\\Downloads\\library3.csv")
head(lib3)

```

## Cek data Building
```{r}
table(wifi$Building)
```
## Hapus spasi dan ubah ke lowercase 

```{r}
wifi <- wifi[tolower(trimws(wifi$Building)) == "library", ] 
# jumlah baris 
cat("Jumlah baris untuk Library:", nrow(wifi), "\n")
```
### Wifi

```{r}
cat("Wifi missing values:\n")
colSums(is.na(wifi))
```
### Library1

```{r}
cat("Library1 missing values:\n")
colSums(is.na(lib1))
```

### Library2

```{r}
cat("Library2 missing values:\n")
colSums(is.na(lib2))
```

### Library3

```{r}
cat("Library3 missing values:\n")
colSums(is.na(lib3))
```

### Isi kolom missing values
```{r}
dfs <- list(lib1 = lib1, lib2 = lib2, lib3 = lib3)

for (name in names(dfs)) {
  df <- dfs[[name]]
  
  # kolom numerik yang mau diisi
  numeric_cols <- c("reading", "cumulative", "rate")
  
  # hitung rata-rata dari 144 baris pertama
  means <- colMeans(df[1:144, numeric_cols], na.rm = TRUE)
  
  # ganti NA dengan mean
  for (col in numeric_cols) {
    na_idx <- is.na(df[[col]])
    df[na_idx, col] <- means[col]
  }
  
  dfs[[name]] <- df
}
```
### Kembalikan ke variabel asli

```{r}
lib1 <- dfs$lib1
lib2 <- dfs$lib2
lib3 <- dfs$lib3
```

### Cek kembali missing value 

```{r}
# Wifi
cat("Wifi missing values:\n")
colSums(is.na(wifi))

# Library1
cat("Library1 missing values:\n")
colSums(is.na(lib1))

# Library2
cat("Library2 missing values:\n")
colSums(is.na(lib2))

# Library3
cat("Library3 missing values:\n")
colSums(is.na(lib3))
```

### Cek Duplikat

```{r}
cat("\nJumlah duplikat WiFi:", sum(duplicated(wifi)), "\n")
cat("Jumlah duplikat Library1:", sum(duplicated(lib1)), "\n")
cat("Jumlah duplikat Library2:", sum(duplicated(lib2)), "\n")
cat("Jumlah duplikat Library3:", sum(duplicated(lib3)), "\n")

sapply(wifi, class)
sapply(lib1, class)
sapply(lib2, class)
sapply(lib3, class)

wifi <- wifi %>%
  mutate(
    time = ymd_hms(time))          # otomatis parse jadi datetime

lib1 <- lib1 %>%
  mutate(ts = ymd_hms(ts))

lib2 <- lib2 %>%
  mutate(ts = ymd_hms(ts))

lib3 <- lib3 %>%
  mutate(ts = ymd_hms(ts))



sapply(wifi, class)
sapply(lib1, class)
sapply(lib2, class)
sapply(lib3, class)
```


### Ambil kolom ts dan rate masing-masing energi

```{r}
lib1_clean <- lib1 %>%
  select(ts, rate) %>%
  rename(energy1 = rate)

lib2_clean <- lib2 %>%
  select(ts, rate) %>%
  rename(energy2 = rate)

lib3_clean <- lib3 %>%
  select(ts, rate) %>%
  rename(energy3 = rate)
```

###

```{r}
energy_df <- lib1_clean %>%
  inner_join(lib2_clean, by = "ts") %>%
  inner_join(lib3_clean, by = "ts")
```
### Hitung total konsumsi energi

```{r}
energy_df <- energy_df %>%
  mutate(Total_Energy = energy1 + energy2 + energy3) %>%
  arrange(ts)
head(energy_df,10)
```
### Pilih hanya kolom yang dibutuhkan di wifi

```{r}
wifi <- wifi %>%
  select(time, `Associated.Client.Count`) %>%
  rename(Associated_Client_Count = `Associated.Client.Count`)

head(wifi,10)
```

### Resampling per 10 menit (ambil rata-rata jumlah koneksi)

```{r}
wifi_resampled <- wifi %>%
  mutate(time = floor_date(time, "10 minutes")) %>%
  group_by(time) %>%
  summarise(Associated_Client_Count = mean(Associated_Client_Count, na.rm = TRUE)) %>%
  ungroup()

head(wifi_resampled,10)
```

### Gabungkan berdasarkan timestamp

```{r}
final_df <- wifi_resampled %>%
  inner_join(energy_df, by = c("time" = "ts"))

head(final_df,10)
```

### Simpan sebagai CSV
```{r}
write_csv(final_df, "final_wifi_energy.csv")
getwd()

##jika excel
#write_xlsx(final_df, "final_wifi_energy.xlsx")
```
## **Visualisasi**
### *Time Series Plot*
```{r}
final_df <- read.csv("C:\\Users\\Diana Eka Justitia\\Downloads\\final_wifi_energy.csv")
head(final_df) 
```

### Ubah format variabel time
```{r}
final_df$time <- as.POSIXct(final_df$time, format = "%Y-%m-%dT%H:%M:%SZ", tz = "UTC")
str(final_df$time)
head(final_df$time)
```
### Plot Time Series
```{r}
ggplot(final_df) +
  geom_line(aes(x = time, y = Associated_Client_Count, color = "Associated_Client_Count")) + 
  geom_line(aes(x = time, y = Total_Energy, color = "Total Energy")) +
  scale_x_datetime(date_breaks = "1 day", date_labels = "%b-%d") +
  xlab("") +
  ylab("Value") +
  ggtitle("Time Series: Associated_Client_Count vs Total Energy - Februari")  +
  scale_color_manual(
    name = "Variable",
    values = c("Associated_Client_Count" = "blue", "Total Energy" = "red")
  )+theme(axis.text.x = element_text(angle = 90, hjust = 1))
```
### Scatter Plot Occupancy vs Total Energy
```{r}
ggplot(final_df, aes(x =Associated_Client_Count , y = Total_Energy)) +
  geom_point(color = "blue", size = 2, alpha = 0.6) +
  xlab("Occupancy") +
  ylab("Total Energy") +
  ggtitle("Scatter Plot: Occupancy vs Total Energy") +
  theme_minimal()
```
### Daily Profile Occupancy
#### *tambah variabel time_of_day*
```{r}
final_df <- final_df %>%
  mutate(time_of_day = as.numeric(format(time, "%H")) + as.numeric(format(time, "%M"))/60,
 date = as.factor(as.Date(time)))
```
#### *Hitung rata-rata per 10 menit*
```{r}
daily_avg <- final_df %>%
  group_by(time_of_day) %>%
  summarise(
    avg_occupancy = mean(Associated_Client_Count, na.rm = TRUE),
    avg_energy = mean(Total_Energy, na.rm = TRUE)
  )
```

### Plot Daily Occupancy Profile
```{r}
ggplot(final_df, aes(x = time_of_day, y = Associated_Client_Count)) +
  geom_line(aes(group = date), color = "gray", alpha = 0.3) +  # tiap hari
  geom_line(data = daily_avg, aes(x = time_of_day, y = avg_occupancy), color = "red", size = 1.5) +
  xlab("Hour of Day") +
  ylab("Occupancy") +
  ggtitle("Daily Occupancy Profiles with Average (26 Days)") +
  scale_x_continuous(breaks = seq(0, 24, by = 1)) +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 90, hjust = 1))
```
## **Analisis**
### *Peak*
#### Peak Occupancy
```{r}
peak_occupancy_time <- daily_avg$time_of_day[which.max(daily_avg$avg_occupancy)]
```
#### Peak Total Energy

```{r}
peak_energy_time <- daily_avg$time_of_day[which.max(daily_avg$avg_energy)]
```
### Plot Peak Time
```{r}
ggplot(daily_avg, aes(x = time_of_day)) +
  geom_line(aes(y = avg_occupancy, color = "Occupancy"), linewidth = 1.2) +
  geom_line(aes(y = avg_energy, color = "Total Energy"), linewidth = 1.2) +
  # Garis vertikal peak otomatis
  geom_vline(xintercept = peak_occupancy_time, color = "blue", linetype = "dashed", linewidth = 1) +
  geom_vline(xintercept = peak_energy_time, color = "red", linetype = "dashed", linewidth = 1) +
  xlab("Jam") +
  ylab("Rata-rata") +
  ggtitle("Profil Harian (10 Menit) - Occupancy & Total Energy dengan Peak Otomatis") +
  scale_x_continuous(
    breaks = seq(0, 24, by = 10/60),
    labels = function(x) sprintf("%02d:%02d", floor(x), round((x - floor(x))*60))
  ) +
  scale_color_manual(values = c("Occupancy" = "blue", "Total Energy" = "red")) +
  theme_minimal() +
  theme(
    axis.text.x = element_text(angle = 90, hjust = 1),
    legend.title = element_blank()
  )
```
### Regres Polinomial 
```{r}
# orde 2
model_poly2 <- lm(Total_Energy ~ poly(Associated_Client_Count, 2, raw = TRUE), data = final_df)

# ringkasan model
summary(model_poly2)
```
### Anomalies

```{r}
#menghitung residual 
final_df <- final_df %>%
  mutate(predicted_energy = predict(model_poly2, newdata = ),
         residual = Total_Energy - predicted_energy)

# menghitung batas toleransi (+-2σ)
res_sd <- sd(final_df$residual, na.rm = TRUE)
upper_bound <- 2 * res_sd
lower_bound <- -2 * res_sd

# Menandai anomali berdasarkan 
final_df <- final_df %>%
  mutate(anomaly = case_when(
    residual > upper_bound ~ "High Energy",
    residual < lower_bound ~ "Low Energy",
    TRUE ~ "Normal"
  ))
```
### Plot Anomali

```{r}
ggplot(final_df, aes(x = Associated_Client_Count, y = Total_Energy, color = anomaly)) +
  geom_point(alpha = 0.6) +
  geom_line(aes(y = predicted_energy), color = "green", linewidth = 1) +
  scale_color_manual(values = c("Normal" = "gray", "High Energy" = "red", "Low Energy" = "blue")) +
  xlab("Occupancy") +
  ylab("Total Energy") +
  ggtitle("Deteksi Anomali Energy vs Occupancy (±2σ Residual)") +
  theme_minimal()
```
## Weekend VS Weekday
### Memberi label untuk hari

```{r}
final_df$day_name <- weekdays(final_df$time)
head(final_df)
```
### Memberi label weekend dan weekday

```{r}
final_df$is_weekend <- ifelse(final_df$day_name %in% c("Saturday", "Sunday"), "Weekend", "Weekday")
head(final_df)
```
### Plot
```{r}
daily_avg <- final_df %>%
  group_by(time_of_day, is_weekend) %>%
  summarise(avg_occupancy = mean(Associated_Client_Count, na.rm = TRUE),
            .groups = "drop")


ggplot(daily_avg, aes(x = time_of_day, y = avg_occupancy, color = is_weekend)) +
  geom_line(linewidth = 1.2) +
  scale_x_continuous(
    breaks = seq(0, 24, by = 2),
    labels = function(x) sprintf("%02d:%02d", floor(x), round((x - floor(x))*60))
  ) +
  labs(title = "Profil Harian Occupancy: Weekday vs Weekend",
       x = "Jam", y = "Rata-rata Occupancy", color = "Kategori") +
  theme_minimal()


```
### Plot Time Series Weekend dan Weekday
```{r}
final_df_hourly <- final_df %>%
  mutate(hour = lubridate::floor_date(time, "hour")) %>%
  group_by(hour, is_weekend, day_name) %>%
  summarise(avg_occupancy = mean (Associated_Client_Count, na.rm = TRUE), .group = "drop")

ggplot(final_df_hourly, aes(x = hour, y = avg_occupancy,
                             color = is_weekend,
                             group = is_weekend)) +
  geom_line(linewidth = 0.9) +
  labs(title = "Time Series Occupancy per Jam (Weekday vs Weekend)",
       x = "Waktu", y = "Rata-rata Occupancy", color = "Kategori") +
  theme_minimal()


```
### Scatter Plot Weekend dan Weekday
```{r}
ggplot(final_df, aes(x = Associated_Client_Count, 
                     y = Total_Energy, 
                     color = is_weekend)) +
  geom_point(alpha = 0.6) +
  geom_smooth(method = "loess", se = FALSE, color = "green", linewidth = 1) +
  xlab("Occupancy") +
  ylab("Total Energy") +
  ggtitle("Occupancy vs Total Energy: Weekend vs Weekday") +
  theme_minimal()


```