knitr::opts_chunk$set(echo = TRUE)
library(readr)
library(tidyverse)
library(lubridate)
library(zoo)    
library(broom)  
library(scales) 
library(ggplot2)

1. Filter and Synchronize the Data

This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.

When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:

wifi  <- read_csv("C:/Users/Muhammad Nisar/Downloads/wifi.csv")
## Rows: 1048575 Columns: 7
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (5): time, Event Time, Uni, Building, Floor
## dbl (2): Associated Client Count, Authenticated Client Count
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
head(wifi)
## # A tibble: 6 × 7
##   time          `Event Time` Associated Client Co…¹ Authenticated Client…² Uni  
##   <chr>         <chr>                         <dbl>                  <dbl> <chr>
## 1 2/1/2020 0:02 Sat Feb 01 …                    184                    182 Lanc…
## 2 2/1/2020 0:02 Sat Feb 01 …                      6                      6 Lanc…
## 3 2/1/2020 0:02 Sat Feb 01 …                     18                     18 Lanc…
## 4 2/1/2020 0:02 Sat Feb 01 …                     23                     23 Lanc…
## 5 2/1/2020 0:02 Sat Feb 01 …                     45                     45 Lanc…
## 6 2/1/2020 0:02 Sat Feb 01 …                     16                     16 Lanc…
## # ℹ abbreviated names: ¹​`Associated Client Count`,
## #   ²​`Authenticated Client Count`
## # ℹ 2 more variables: Building <chr>, Floor <chr>
str(wifi)
## spc_tbl_ [1,048,575 × 7] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
##  $ time                      : chr [1:1048575] "2/1/2020 0:02" "2/1/2020 0:02" "2/1/2020 0:02" "2/1/2020 0:02" ...
##  $ Event Time                : chr [1:1048575] "Sat Feb 01 00:02:12 UTC 2020" "Sat Feb 01 00:02:12 UTC 2020" "Sat Feb 01 00:02:12 UTC 2020" "Sat Feb 01 00:02:12 UTC 2020" ...
##  $ Associated Client Count   : num [1:1048575] 184 6 18 23 45 16 32 6 73 5 ...
##  $ Authenticated Client Count: num [1:1048575] 182 6 18 23 45 16 32 6 73 5 ...
##  $ Uni                       : chr [1:1048575] "Lancaster University" "Lancaster University" "Lancaster University" "Lancaster University" ...
##  $ Building                  : chr [1:1048575] "Graduate College" "Management School" "SW hse 32-33" "SW hse 29" ...
##  $ Floor                     : chr [1:1048575] "A Floor" "C Floor" "D Floor" "B floor" ...
##  - attr(*, "spec")=
##   .. cols(
##   ..   time = col_character(),
##   ..   `Event Time` = col_character(),
##   ..   `Associated Client Count` = col_double(),
##   ..   `Authenticated Client Count` = col_double(),
##   ..   Uni = col_character(),
##   ..   Building = col_character(),
##   ..   Floor = col_character()
##   .. )
##  - attr(*, "problems")=<externalptr>
lib1  <- read_csv("C:/Users/Muhammad Nisar/Downloads/library1.csv")
## Rows: 18864 Columns: 6
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (2): name, units
## dbl  (3): reading, cumulative, rate
## dttm (1): ts
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
lib2  <- read_csv("C:/Users/Muhammad Nisar/Downloads/library2.csv")
## Rows: 18864 Columns: 6
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (2): name, units
## dbl  (3): reading, cumulative, rate
## dttm (1): ts
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
lib3  <- read_csv("C:/Users/Muhammad Nisar/Downloads/library3.csv")
## Rows: 18864 Columns: 6
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (2): name, units
## dbl  (3): reading, cumulative, rate
## dttm (1): ts
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
head(lib1); head(lib2); head(lib3)
## # A tibble: 6 × 6
##   ts                  name              reading units cumulative  rate
##   <dttm>              <chr>               <dbl> <chr>      <dbl> <dbl>
## 1 2020-01-01 00:00:00 MC065-L01/M9R2048 1489442 KWh      1489442    NA
## 2 2020-01-01 00:10:00 MC065-L01/M9R2048 1489449 KWh      1489449     7
## 3 2020-01-01 00:20:00 MC065-L01/M9R2048 1489456 KWh      1489456     7
## 4 2020-01-01 00:30:00 MC065-L01/M9R2048 1489464 KWh      1489464     8
## 5 2020-01-01 00:40:00 MC065-L01/M9R2048 1489471 KWh      1489471     7
## 6 2020-01-01 00:50:00 MC065-L01/M9R2048 1489479 KWh      1489479     8
## # A tibble: 6 × 6
##   ts                  name               reading units cumulative  rate
##   <dttm>              <chr>                <dbl> <chr>      <dbl> <dbl>
## 1 2020-01-01 00:00:00 MC065-L01/M11R2056 2129016 KWh      2129016    NA
## 2 2020-01-01 00:10:00 MC065-L01/M11R2056 2129034 KWh      2129034    18
## 3 2020-01-01 00:20:00 MC065-L01/M11R2056 2129054 KWh      2129054    20
## 4 2020-01-01 00:30:00 MC065-L01/M11R2056 2129071 KWh      2129071    17
## 5 2020-01-01 00:40:00 MC065-L01/M11R2056 2129086 KWh      2129086    15
## 6 2020-01-01 00:50:00 MC065-L01/M11R2056 2129103 KWh      2129103    17
## # A tibble: 6 × 6
##   ts                  name               reading units cumulative  rate
##   <dttm>              <chr>                <dbl> <chr>      <dbl> <dbl>
## 1 2020-01-01 00:00:00 MC065-L01/M13R2064 6914209 KWh      6914209    NA
## 2 2020-01-01 00:10:00 MC065-L01/M13R2064 6914266 KWh      6914266    57
## 3 2020-01-01 00:20:00 MC065-L01/M13R2064 6914310 KWh      6914310    44
## 4 2020-01-01 00:30:00 MC065-L01/M13R2064 6914376 KWh      6914376    66
## 5 2020-01-01 00:40:00 MC065-L01/M13R2064 6914439 KWh      6914439    63
## 6 2020-01-01 00:50:00 MC065-L01/M13R2064 6914474 KWh      6914474    35
str(lib1); str(lib2); str(lib3)
## spc_tbl_ [18,864 × 6] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
##  $ ts        : POSIXct[1:18864], format: "2020-01-01 00:00:00" "2020-01-01 00:10:00" ...
##  $ name      : chr [1:18864] "MC065-L01/M9R2048" "MC065-L01/M9R2048" "MC065-L01/M9R2048" "MC065-L01/M9R2048" ...
##  $ reading   : num [1:18864] 1489442 1489449 1489456 1489464 1489471 ...
##  $ units     : chr [1:18864] "KWh" "KWh" "KWh" "KWh" ...
##  $ cumulative: num [1:18864] 1489442 1489449 1489456 1489464 1489471 ...
##  $ rate      : num [1:18864] NA 7 7 8 7 8 7 8 7 8 ...
##  - attr(*, "spec")=
##   .. cols(
##   ..   ts = col_datetime(format = ""),
##   ..   name = col_character(),
##   ..   reading = col_double(),
##   ..   units = col_character(),
##   ..   cumulative = col_double(),
##   ..   rate = col_double()
##   .. )
##  - attr(*, "problems")=<externalptr>
## spc_tbl_ [18,864 × 6] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
##  $ ts        : POSIXct[1:18864], format: "2020-01-01 00:00:00" "2020-01-01 00:10:00" ...
##  $ name      : chr [1:18864] "MC065-L01/M11R2056" "MC065-L01/M11R2056" "MC065-L01/M11R2056" "MC065-L01/M11R2056" ...
##  $ reading   : num [1:18864] 2129016 2129034 2129054 2129071 2129086 ...
##  $ units     : chr [1:18864] "KWh" "KWh" "KWh" "KWh" ...
##  $ cumulative: num [1:18864] 2129016 2129034 2129054 2129071 2129086 ...
##  $ rate      : num [1:18864] NA 18 20 17 15 17 17 17 17 17 ...
##  - attr(*, "spec")=
##   .. cols(
##   ..   ts = col_datetime(format = ""),
##   ..   name = col_character(),
##   ..   reading = col_double(),
##   ..   units = col_character(),
##   ..   cumulative = col_double(),
##   ..   rate = col_double()
##   .. )
##  - attr(*, "problems")=<externalptr>
## spc_tbl_ [18,864 × 6] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
##  $ ts        : POSIXct[1:18864], format: "2020-01-01 00:00:00" "2020-01-01 00:10:00" ...
##  $ name      : chr [1:18864] "MC065-L01/M13R2064" "MC065-L01/M13R2064" "MC065-L01/M13R2064" "MC065-L01/M13R2064" ...
##  $ reading   : num [1:18864] 6914209 6914266 6914310 6914376 6914439 ...
##  $ units     : chr [1:18864] "KWh" "KWh" "KWh" "KWh" ...
##  $ cumulative: num [1:18864] 6914209 6914266 6914310 6914376 6914439 ...
##  $ rate      : num [1:18864] NA 57 44 66 63 35 47 55 34 41 ...
##  - attr(*, "spec")=
##   .. cols(
##   ..   ts = col_datetime(format = ""),
##   ..   name = col_character(),
##   ..   reading = col_double(),
##   ..   units = col_character(),
##   ..   cumulative = col_double(),
##   ..   rate = col_double()
##   .. )
##  - attr(*, "problems")=<externalptr>
#1a. Filter: gunakan hanya data WiFi dari Library building
wifi_lib <- wifi %>%
  filter(Building == "Library")

#1b. Standardisasi waktu ke interval 10 menit
wifi_lib <- wifi_lib %>%
  mutate(
    time = mdy_hm(time),  # ubah dari character ke POSIXct
    time = floor_date(time, unit = "10 minutes")  # bulatkan ke 10 menit
    )
str(wifi_lib$time)
##  POSIXct[1:15948], format: "2020-02-01 00:00:00" "2020-02-01 00:00:00" "2020-02-01 00:00:00" ...
head(wifi_lib$time, 10)
##  [1] "2020-02-01 00:00:00 UTC" "2020-02-01 00:00:00 UTC"
##  [3] "2020-02-01 00:00:00 UTC" "2020-02-01 00:00:00 UTC"
##  [5] "2020-02-01 00:00:00 UTC" "2020-02-01 00:00:00 UTC"
##  [7] "2020-02-01 00:00:00 UTC" "2020-02-01 00:00:00 UTC"
##  [9] "2020-02-01 00:10:00 UTC" "2020-02-01 00:10:00 UTC"
# 1c. Resample WiFi data: rata-rata jumlah client tiap 10 menit
colnames(wifi_lib)
## [1] "time"                       "Event Time"                
## [3] "Associated Client Count"    "Authenticated Client Count"
## [5] "Uni"                        "Building"                  
## [7] "Floor"
wifi_10min <- wifi_lib %>%
  group_by(time) %>%
  summarise(occupancy = mean(`Associated Client Count`, na.rm = TRUE)) %>%
  arrange(time)
head(wifi_10min, 10)
## # A tibble: 10 × 2
##    time                occupancy
##    <dttm>                  <dbl>
##  1 2020-02-01 00:00:00     22.8 
##  2 2020-02-01 00:10:00     20.6 
##  3 2020-02-01 00:20:00     17.2 
##  4 2020-02-01 00:30:00     15.4 
##  5 2020-02-01 00:40:00     12.4 
##  6 2020-02-01 00:50:00     10.5 
##  7 2020-02-01 01:00:00      9.25
##  8 2020-02-01 01:10:00      8.67
##  9 2020-02-01 01:20:00      8.5 
## 10 2020-02-01 01:30:00      8.38
#1d. Gabungkan library meters → total electricity per 10 menit
# memastikan timestamp di tiap dataset (lib1, lib2, lib3) diparse dulu
colnames(lib1)
## [1] "ts"         "name"       "reading"    "units"      "cumulative"
## [6] "rate"
head(lib1, 5)
## # A tibble: 5 × 6
##   ts                  name              reading units cumulative  rate
##   <dttm>              <chr>               <dbl> <chr>      <dbl> <dbl>
## 1 2020-01-01 00:00:00 MC065-L01/M9R2048 1489442 KWh      1489442    NA
## 2 2020-01-01 00:10:00 MC065-L01/M9R2048 1489449 KWh      1489449     7
## 3 2020-01-01 00:20:00 MC065-L01/M9R2048 1489456 KWh      1489456     7
## 4 2020-01-01 00:30:00 MC065-L01/M9R2048 1489464 KWh      1489464     8
## 5 2020-01-01 00:40:00 MC065-L01/M9R2048 1489471 KWh      1489471     7
lib1 <- lib1 %>%
  mutate(time = floor_date(ts, "10 minutes"))
lib2 <- lib2 %>%
  mutate(time = floor_date(ts, "10 minutes"))
lib3 <- lib3 %>%
  mutate(time = floor_date(ts, "10 minutes"))

#Gabung total Energy
energy_10min <- lib1 %>%
  select(time, rate1 = rate) %>%
  full_join(lib2 %>% select(time, rate2 = rate), by = "time") %>%
  full_join(lib3 %>% select(time, rate3 = rate), by = "time") %>%
  mutate(total_energy = rowSums(across(starts_with("rate")), na.rm = TRUE)) %>%
  arrange(time)

head(energy_10min, 10)
## # A tibble: 10 × 5
##    time                rate1 rate2 rate3 total_energy
##    <dttm>              <dbl> <dbl> <dbl>        <dbl>
##  1 2020-01-01 00:00:00    NA    NA    NA            0
##  2 2020-01-01 00:10:00     7    18    57           82
##  3 2020-01-01 00:20:00     7    20    44           71
##  4 2020-01-01 00:30:00     8    17    66           91
##  5 2020-01-01 00:40:00     7    15    63           85
##  6 2020-01-01 00:50:00     8    17    35           60
##  7 2020-01-01 01:00:00     7    17    47           71
##  8 2020-01-01 01:10:00     8    17    55           80
##  9 2020-01-01 01:20:00     7    17    34           58
## 10 2020-01-01 01:30:00     8    17    41           66

##2. Data Cleaning

#2a. Isi Missing Values rata-rata 144 observasi pertama
fill_mean <- function(x) {
  avg <- mean(head(x, 144), na.rm = TRUE)
  zoo::na.fill(x, fill = avg)
}

energy_10min <- energy_10min %>%
  mutate(across(starts_with("rate"), fill_mean),
         total_energy = rowSums(across(starts_with("rate")), na.rm = TRUE))
#2b. Standarisasi
wifi_10min <- wifi_10min %>%
  distinct(time, .keep_all = TRUE)

energy_10min <- energy_10min %>%
  distinct(time, .keep_all = TRUE)

#2c. Dokumentasi Cleaning
cat("Cleaning Steps:\n")
## Cleaning Steps:
cat("1. Missing energy readings diisi dengan mean dari 144 observasi pertama.\n")
## 1. Missing energy readings diisi dengan mean dari 144 observasi pertama.
cat("2. Timestamp dibulatkan ke 10 menit (wifi & energy).\n")
## 2. Timestamp dibulatkan ke 10 menit (wifi & energy).
cat("3. Duplicate records dihapus.\n")
## 3. Duplicate records dihapus.
data_final <- wifi_10min %>%
  inner_join(energy_10min %>% select(time, total_energy), by = "time")

head(data_final, 10)
## # A tibble: 10 × 3
##    time                occupancy total_energy
##    <dttm>                  <dbl>        <dbl>
##  1 2020-02-01 00:00:00     22.8          64.8
##  2 2020-02-01 00:10:00     20.6          97  
##  3 2020-02-01 00:20:00     17.2          96  
##  4 2020-02-01 00:30:00     15.4         100  
##  5 2020-02-01 00:40:00     12.4          89  
##  6 2020-02-01 00:50:00     10.5          91  
##  7 2020-02-01 01:00:00      9.25         90  
##  8 2020-02-01 01:10:00      8.67         92  
##  9 2020-02-01 01:20:00      8.5          87  
## 10 2020-02-01 01:30:00      8.38         83

##3. Visualisasi

## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

## `geom_smooth()` using formula = 'y ~ x'

## 
##  Pearson's product-moment correlation
## 
## data:  data_final$occupancy and data_final$total_energy
## t = 79.016, df = 2053, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.8563791 0.8778089
## sample estimates:
##       cor 
## 0.8674959

##4. Analisis

#4a. Identifikasi peak hours of occupancy
# Peak hours occupancy
peak_hours <- data_final %>%
  mutate(hour = hour(time)) %>%
  group_by(hour) %>%
  summarise(avg_occ = mean(occupancy, na.rm = TRUE), .groups = "drop") %>%
  arrange(desc(avg_occ))
peak_hours %>% head(5)  # top (5) jam tertinggi
## # A tibble: 5 × 2
##    hour avg_occ
##   <int>   <dbl>
## 1    15    362.
## 2    14    355.
## 3    16    334.
## 4    13    330.
## 5    12    298.
ggplot(peak_hours, aes(x = hour, y = avg_occ)) +
  geom_col(fill = "violet") +
  labs(title = "Average Occupancy by Hour",
       x = "Hour of Day", y = "Average Occupancy") +
  theme_minimal()

#4b. Examine whether occupancy significantly influences energy consumption
# Scatter occupancy vs energy + regresi
ggplot(data_final, aes(x = occupancy, y = total_energy)) +
  geom_point(alpha = 0.3, color = "green") +
  geom_smooth(method = "lm", se = TRUE, color = "darkgreen") +
  labs(title = "Relationship between Occupancy and Energy Consumption",
       x = "Occupancy", y = "Total Energy") +
  theme_minimal()
## `geom_smooth()` using formula = 'y ~ x'

#Uji Signifikan
model <- lm(total_energy ~ occupancy, data = data_final)
summary(model)
## 
## Call:
## lm(formula = total_energy ~ occupancy, data = data_final)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -57.455 -15.955   1.376  16.868  50.975 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 1.039e+02  6.677e-01  155.60   <2e-16 ***
## occupancy   2.626e-01  3.324e-03   79.02   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 21.36 on 2053 degrees of freedom
## Multiple R-squared:  0.7525, Adjusted R-squared:  0.7524 
## F-statistic:  6244 on 1 and 2053 DF,  p-value: < 2.2e-16
#4c. Highlight cases where energy consumption does not align with occupancy
colnames(data_final)
## [1] "time"         "occupancy"    "total_energy"
# Deteksi anomali: energi tinggi, occupancy rendah
threshold_occ <- quantile(data_final$occupancy, 0.25, na.rm = TRUE)   # 25% occupancy terendah
threshold_energy <- quantile(data_final$total_energy, 0.75, na.rm = TRUE) # 25% energi tertinggi

anomalies <- data_final %>%
  filter(occupancy < threshold_occ, total_energy > threshold_energy)
head(anomalies, 10)
## # A tibble: 0 × 3
## # ℹ 3 variables: time <dttm>, occupancy <dbl>, total_energy <dbl>
ggplot(data_final, aes(x = occupancy, y = total_energy)) +
  geom_point(alpha = 0.3, color = "gray") +
  geom_point(data = anomalies, aes(x = occupancy, y = total_energy),
             color = "darkred", size = 2) +
  labs(title = "Cases where Energy vs Occupancy (Anomalies Highlighted)",
       x = "Occupancy", y = "Total Energy") +
  theme_minimal()

#Lebih lanjut
# Hitung threshold
threshold_occ <- quantile(data_final$occupancy, 0.25, na.rm = TRUE)
threshold_energy <- quantile(data_final$total_energy, 0.75, na.rm = TRUE)

# Tandai anomali
data_final <- data_final %>%
  mutate(anomaly = ifelse(occupancy < threshold_occ & total_energy > threshold_energy, "Anomaly", "Normal"))

# Plot dengan highlight
ggplot(data_final, aes(x = occupancy, y = total_energy, color = anomaly)) +
  geom_point(alpha = 0.6) +
  scale_color_manual(values = c("Normal" = "chocolate", "Anomaly" = "red")) +
  labs(title = "Cases where Energy Vs Occupancy (Anomalies Highlighted)",
       x = "Occupancy", y = "Total Energy") +
  theme_minimal()

##5. Weekday vs Weekend

#5a. Separate weekdays vs weekends
library(dplyr)
library(lubridate)

data_final <- data_final %>%
  mutate(day_type = if_else(wday(time, week_start = 1) <= 5, "Weekday", "Weekend"))

#5b. Visualisasi Ulanh
library(ggplot2)

# Scatter plot dibandingkan
ggplot(data_final, aes(x = occupancy, y = total_energy, color = day_type)) +
  geom_point(alpha = 0.3) +
  scale_color_manual(values = c("Weekday" = "orange", "Weekend" = "darkblue")) +
  labs(title = "Occupancy vs Energy: Weekday vs Weekend",
       x = "Occupancy", y = "Total Energy", color = "Day Type") +
  theme_minimal()

# Daily profiles (average overlay)
avg_profile_daytype <- data_final %>%
  mutate(tod = hour(time) + minute(time)/60) %>%
  group_by(day_type, tod) %>%
  summarise(avg_occ = mean(occupancy, na.rm = TRUE),
            avg_energy = mean(total_energy, na.rm = TRUE), .groups = "drop")

ggplot(avg_profile_daytype, aes(x = tod)) +
  geom_line(aes(y = avg_occ, color = day_type), size = 1) +
  labs(title = "Average Daily Profile: Occupancy (Weekday vs Weekend)",
       x = "Hour of Day", y = "Occupancy", color = "Day Type") +
  theme_minimal()

ggplot(avg_profile_daytype, aes(x = tod)) +
  geom_line(aes(y = avg_energy, color = day_type), size = 1) +
  labs(title = "Average Daily Profile: Energy (Weekday vs Weekend)",
       x = "Hour of Day", y = "Total Energy", color = "Day Type") +
  theme_minimal()

# Gabungan Occupancy & Energy (Weekday vs Weekend)
# Pastikan ada kolom time of day & day_type
library(lubridate)
data_final <- data_final %>%
  mutate(
    day_type = if_else(wday(time, week_start = 1) <= 5, "Weekday", "Weekend"),
    tod = hour(time) + minute(time) / 60
  )
avg_profile_bytype <- data_final %>%
  group_by(day_type, tod) %>%
  summarise(
    avg_occ = mean(occupancy, na.rm = TRUE),
    avg_energy = mean(total_energy, na.rm = TRUE),
    .groups = "drop"
  )
ggplot(avg_profile_bytype, aes(x = tod)) +
  geom_line(aes(y = avg_occ, color = paste(day_type, "Occupancy")), size = 1.2) +
  geom_line(aes(y = avg_energy, color = paste(day_type, "Energy")), size = 1.2) +
  scale_color_manual(
    values = c(
      "Weekday Occupancy" = "blue",
      "Weekday Energy" = "darkorange",
      "Weekend Occupancy" = "darkblue",
      "Weekend Energy" = "darkred"
    )
  ) +
  labs(
    title = "Average Daily Profiles: Occupancy vs Energy (Weekday vs Weekend)",
    x = "Hour of Day", y = "Value", color = "Legend"
  ) +
  theme_minimal()

# Ringkasan
summary_stats <- data_final %>%
  group_by(day_type) %>%
  summarise(
    avg_occ = mean(occupancy, na.rm = TRUE),
    avg_energy = mean(total_energy, na.rm = TRUE),
    median_occ = median(occupancy, na.rm = TRUE),
    median_energy = median(total_energy, na.rm = TRUE),
    .groups = "drop"
  )

print(summary_stats)
## # A tibble: 2 × 5
##   day_type avg_occ avg_energy median_occ median_energy
##   <chr>      <dbl>      <dbl>      <dbl>         <dbl>
## 1 Weekday    169.        146.      118.           158.
## 2 Weekend     80.0       130.       47.1          129