–>

1. Import Library

library(tidyverse)
library(lubridate)
library(zoo)    
library(broom)  
library(scales)

2. Loading data

# Wifi
wifi <- read_csv("C:/Users/ASUS/Downloads/wifi.csv")
# Preview 
head(wifi)

## # A tibble: 6 × 7
##   time                `Event Time` Associated Client Co…¹ Authenticated Client…²
##   <dttm>              <chr>                         <dbl>                  <dbl>
## 1 2020-02-01 00:02:12 Sat Feb 01 …                    184                    182
## 2 2020-02-01 00:02:12 Sat Feb 01 …                      6                      6
## 3 2020-02-01 00:02:12 Sat Feb 01 …                     18                     18
## 4 2020-02-01 00:02:12 Sat Feb 01 …                     23                     23
## 5 2020-02-01 00:02:12 Sat Feb 01 …                     45                     45
## 6 2020-02-01 00:02:12 Sat Feb 01 …                     16                     16
## # ℹ abbreviated names: ¹`Associated Client Count`,
## #   ²`Authenticated Client Count`
## # ℹ 3 more variables: Uni <chr>, Building <chr>, Floor <chr>

str(wifi)

## spc_tbl_ [1,883,844 × 7] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
##  $ time                      : POSIXct[1:1883844], format: "2020-02-01 00:02:12" "2020-02-01 00:02:12" ...
##  $ Event Time                : chr [1:1883844] "Sat Feb 01 00:02:12 UTC 2020" "Sat Feb 01 00:02:12 UTC 2020" "Sat Feb 01 00:02:12 UTC 2020" "Sat Feb 01 00:02:12 UTC 2020" ...
##  $ Associated Client Count   : num [1:1883844] 184 6 18 23 45 16 32 6 73 5 ...
##  $ Authenticated Client Count: num [1:1883844] 182 6 18 23 45 16 32 6 73 5 ...
##  $ Uni                       : chr [1:1883844] "Lancaster University" "Lancaster University" "Lancaster University" "Lancaster University" ...
##  $ Building                  : chr [1:1883844] "Graduate College" "Management School" "SW hse 32-33" "SW hse 29" ...
##  $ Floor                     : chr [1:1883844] "A Floor" "C Floor" "D Floor" "B floor" ...
##  - attr(*, "spec")=
##   .. cols(
##   ..   time = col_datetime(format = ""),
##   ..   `Event Time` = col_character(),
##   ..   `Associated Client Count` = col_double(),
##   ..   `Authenticated Client Count` = col_double(),
##   ..   Uni = col_character(),
##   ..   Building = col_character(),
##   ..   Floor = col_character()
##   .. )
##  - attr(*, "problems")=<externalptr>

# Library energy datasets
lib1 <- read_csv("C:/Users/ASUS/Downloads/library1.csv")
lib2 <- read_csv("C:/Users/ASUS/Downloads/library2.csv")
lib3 <- read_csv("C:/Users/Asus/Downloads/library3.csv")

head(lib1); head(lib2); head(lib3)

## # A tibble: 6 × 6
##   ts                  name              reading units cumulative  rate
##   <dttm>              <chr>               <dbl> <chr>      <dbl> <dbl>
## 1 2020-01-01 00:00:00 MC065-L01/M9R2048 1489442 KWh      1489442    NA
## 2 2020-01-01 00:10:00 MC065-L01/M9R2048 1489449 KWh      1489449     7
## 3 2020-01-01 00:20:00 MC065-L01/M9R2048 1489456 KWh      1489456     7
## 4 2020-01-01 00:30:00 MC065-L01/M9R2048 1489464 KWh      1489464     8
## 5 2020-01-01 00:40:00 MC065-L01/M9R2048 1489471 KWh      1489471     7
## 6 2020-01-01 00:50:00 MC065-L01/M9R2048 1489479 KWh      1489479     8

## # A tibble: 6 × 6
##   ts                  name               reading units cumulative  rate
##   <dttm>              <chr>                <dbl> <chr>      <dbl> <dbl>
## 1 2020-01-01 00:00:00 MC065-L01/M11R2056 2129016 KWh      2129016    NA
## 2 2020-01-01 00:10:00 MC065-L01/M11R2056 2129034 KWh      2129034    18
## 3 2020-01-01 00:20:00 MC065-L01/M11R2056 2129054 KWh      2129054    20
## 4 2020-01-01 00:30:00 MC065-L01/M11R2056 2129071 KWh      2129071    17
## 5 2020-01-01 00:40:00 MC065-L01/M11R2056 2129086 KWh      2129086    15
## 6 2020-01-01 00:50:00 MC065-L01/M11R2056 2129103 KWh      2129103    17

## # A tibble: 6 × 6
##   ts                  name               reading units cumulative  rate
##   <dttm>              <chr>                <dbl> <chr>      <dbl> <dbl>
## 1 2020-01-01 00:00:00 MC065-L01/M13R2064 6914209 KWh      6914209    NA
## 2 2020-01-01 00:10:00 MC065-L01/M13R2064 6914266 KWh      6914266    57
## 3 2020-01-01 00:20:00 MC065-L01/M13R2064 6914310 KWh      6914310    44
## 4 2020-01-01 00:30:00 MC065-L01/M13R2064 6914376 KWh      6914376    66
## 5 2020-01-01 00:40:00 MC065-L01/M13R2064 6914439 KWh      6914439    63
## 6 2020-01-01 00:50:00 MC065-L01/M13R2064 6914474 KWh      6914474    35

str(lib1)

## spc_tbl_ [18,864 × 6] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
##  $ ts        : POSIXct[1:18864], format: "2020-01-01 00:00:00" "2020-01-01 00:10:00" ...
##  $ name      : chr [1:18864] "MC065-L01/M9R2048" "MC065-L01/M9R2048" "MC065-L01/M9R2048" "MC065-L01/M9R2048" ...
##  $ reading   : num [1:18864] 1489442 1489449 1489456 1489464 1489471 ...
##  $ units     : chr [1:18864] "KWh" "KWh" "KWh" "KWh" ...
##  $ cumulative: num [1:18864] 1489442 1489449 1489456 1489464 1489471 ...
##  $ rate      : num [1:18864] NA 7 7 8 7 8 7 8 7 8 ...
##  - attr(*, "spec")=
##   .. cols(
##   ..   ts = col_datetime(format = ""),
##   ..   name = col_character(),
##   ..   reading = col_double(),
##   ..   units = col_character(),
##   ..   cumulative = col_double(),
##   ..   rate = col_double()
##   .. )
##  - attr(*, "problems")=<externalptr>

3. Data Cleaning and Integration

3.1 Wifi Dataset

# Convert 'time' to datetime
wifi <- wifi %>% mutate(time = ymd_hms(time))
str(wifi)

## tibble [1,883,844 × 7] (S3: tbl_df/tbl/data.frame)
##  $ time                      : POSIXct[1:1883844], format: "2020-02-01 00:02:12" "2020-02-01 00:02:12" ...
##  $ Event Time                : chr [1:1883844] "Sat Feb 01 00:02:12 UTC 2020" "Sat Feb 01 00:02:12 UTC 2020" "Sat Feb 01 00:02:12 UTC 2020" "Sat Feb 01 00:02:12 UTC 2020" ...
##  $ Associated Client Count   : num [1:1883844] 184 6 18 23 45 16 32 6 73 5 ...
##  $ Authenticated Client Count: num [1:1883844] 182 6 18 23 45 16 32 6 73 5 ...
##  $ Uni                       : chr [1:1883844] "Lancaster University" "Lancaster University" "Lancaster University" "Lancaster University" ...
##  $ Building                  : chr [1:1883844] "Graduate College" "Management School" "SW hse 32-33" "SW hse 29" ...
##  $ Floor                     : chr [1:1883844] "A Floor" "C Floor" "D Floor" "B floor" ...

# Lihat unique Building
unique(wifi$Building)

##  [1] "Graduate College"                 "Management School"               
##  [3] "SW hse 32-33"                     "SW hse 29"                       
##  [5] "Furness outer"                    "Slaidburn House (LUSU)"          
##  [7] "SW hse 34"                        "Bowland hall"                    
##  [9] "Fylde"                            "SW hse 55-56"                    
## [11] "SW hse 40-42"                     "Bowland Twr (Old Bowland Annexe)"
## [13] "Pendle"                           "Faraday and cTAP"                
## [15] "Institute for Advanced Studies"   "University House"                
## [17] "SW hse 53-54"                     "Engineering"                     
## [19] "Field Station"                    "SW hse 36"                       
## [21] "Infolab"                          "SW hse 21-23"                    
## [23] "LICA"                             "FU Hse 71-74"                    
## [25] "Grizedale"                        "Charles Carter"                  
## [27] "Furness"                          "SW hse 43-45"                    
## [29] "SW hse 12-16"                     "Bowland Main"                    
## [31] "SW hse 35"                        "Human Resources"                 
## [33] "County"                           "FY Hse 65-70"                    
## [35] "Barker House Farm"                "SW hse 27-28"                    
## [37] "Bowland Annexe"                   "John Creed"                      
## [39] "grize-res"                        "SW hse 24-26"                    
## [41] "SW hse 20"                        "Ruskin Library"                  
## [43] "County South/Cartmel"             "Library"                         
## [45] "Conference Centre"                "Postgrad Stats (PSC)"            
## [47] "Bowland North"                    "George Fox"                      
## [49] "Hse 75 77"                        "Bailrigg House"                  
## [51] "Sports Centre"                    "SW hse 30-31"                    
## [53] "Bowland Ash"                      "Alex Square"                     
## [55] "LEC"                              "MDC"                             
## [57] "ISS Building"                     "Chaplaincy Centre"               
## [59] "CETAD"                            "SW hse 50-52"                    
## [61] "SW hse 17-19"                     "Central Workshops"               
## [63] "SW hse 46-49"                     "SW hse 39"                       
## [65] "Science and technology"           "Whewell"                         
## [67] "SW hse 37-38"                     "Lonsdale College (SW)"           
## [69] "Physics"                          "County Field"                    
## [71] "Great Hall"                       "SW hse-158-179"                  
## [73] "Preschool"                        "Reception"                       
## [75] "Hazelrigg"                        "Energy Centre"

# Filter hanya rekaman Library
wifi <- wifi %>% mutate(Building = str_trim(Building))
wifi1 <- wifi %>% filter(Building == "Library") %>%
  select(time, `Associated Client Count`)

# Resample / aggregate ke interval 10 menit (rata-rata Associated Client Count)
wifi1 <- wifi1 %>%
  mutate(time = floor_date(time, unit = "10 minutes")) %>%
  group_by(time) %>%
  summarise(Mean_Client = mean(`Associated Client Count`, na.rm = TRUE)) %>%
  ungroup()

# Cek missing dan duplicate
wifi1 %>% summarise_all(~ sum(is.na(.)))

## # A tibble: 1 × 2
##    time Mean_Client
##   <int>       <int>
## 1     0           0

sum(duplicated(wifi1))

## [1] 0

head(wifi1)

## # A tibble: 6 × 2
##   time                Mean_Client
##   <dttm>                    <dbl>
## 1 2020-02-01 00:00:00        22.8
## 2 2020-02-01 00:10:00        20.6
## 3 2020-02-01 00:20:00        17.2
## 4 2020-02-01 00:30:00        15.4
## 5 2020-02-01 00:40:00        12.4
## 6 2020-02-01 00:50:00        10.5

3.2 Library Energy Dataset

# Convert 'ts' ke datetime
lib1 <- lib1 %>% mutate(ts = ymd_hms(ts))
lib2 <- lib2 %>% mutate(ts = ymd_hms(ts))
lib3 <- lib3 %>% mutate(ts = ymd_hms(ts))

# Cek persentase missing
for(i in list(lib1, lib2, lib3)){
  df <- i
  print(100 * sum(is.na(df$rate)) / nrow(df))
}

## [1] 16.15246
## [1] 16.15246
## [1] 16.15246

# Hitung mean dari 144 pengamatan pertama per meter dan isi NA
mean_lib1 <- mean(lib1$rate[1:144], na.rm = TRUE)
mean_lib2 <- mean(lib2$rate[1:144], na.rm = TRUE)
mean_lib3 <- mean(lib3$rate[1:144], na.rm = TRUE)

lib1 <- lib1 %>% mutate(rate = if_else(is.na(rate), mean_lib1, rate))
lib2 <- lib2 %>% mutate(rate = if_else(is.na(rate), mean_lib2, rate))
lib3 <- lib3 %>% mutate(rate = if_else(is.na(rate), mean_lib3, rate))

# Cek lagi missing
for(i in list(lib1, lib2, lib3)){
  df <- i
  print(100 * sum(is.na(df$rate)) / nrow(df))
}

## [1] 0
## [1] 0
## [1] 0

# Cek duplicate
cat("Duplicates lib1:", sum(duplicated(lib1)), "\n")

## Duplicates lib1: 21

cat("Duplicates lib2:", sum(duplicated(lib2)), "\n")

## Duplicates lib2: 21

cat("Duplicates lib3:", sum(duplicated(lib3)), "\n")

## Duplicates lib3: 21

3.3 Merge tiga dataset listrik

# Pilih ts & rate lalu gabung pada ts
elec12 <- lib1 %>% select(ts, rate) %>%
  rename(rate_1 = rate) %>%
  inner_join(lib2 %>% select(ts, rate) %>% rename(rate_2 = rate), by = "ts")

electricity <- elec12 %>%
  inner_join(lib3 %>% select(ts, rate) %>% rename(rate_3 = rate), by = "ts") %>%
  mutate(total_electricity = rate_1 + rate_2 + rate_3) %>%
  rename(time = ts)

# Cek missing & duplicates
sum(is.na(electricity))

## [1] 2248091

sum(duplicated(electricity))

## [1] 2225430

head(electricity)

## # A tibble: 6 × 5
##   time   rate_1 rate_2 rate_3 total_electricity
##   <dttm>  <dbl>  <dbl>  <dbl>             <dbl>
## 1 NA       7.38   17.9   39.6              64.8
## 2 NA       7.38   17.9   39                64.3
## 3 NA       7.38   17.9   35                60.3
## 4 NA       7.38   17.9   70                95.3
## 5 NA       7.38   17.9   40                65.3
## 6 NA       7.38   17.9   38                63.3

3.4 Standardisasi interval 10 menit untuk listrik (jika perlu)

electricity <- electricity %>%
  mutate(time = floor_date(time, unit = "10 minutes")) %>%
  group_by(time) %>%
  summarise(rate_1 = mean(rate_1, na.rm = TRUE),
            rate_2 = mean(rate_2, na.rm = TRUE),
            rate_3 = mean(rate_3, na.rm = TRUE)) %>%
  ungroup() %>%
  mutate(total_electricity = rate_1 + rate_2 + rate_3)

head(electricity)

## # A tibble: 6 × 5
##   time                rate_1 rate_2 rate_3 total_electricity
##   <dttm>               <dbl>  <dbl>  <dbl>             <dbl>
## 1 2020-01-01 00:10:00      7     18     57                82
## 2 2020-01-01 00:20:00      7     20     44                71
## 3 2020-01-01 00:30:00      8     17     66                91
## 4 2020-01-01 00:40:00      7     15     63                85
## 5 2020-01-01 00:50:00      8     17     35                60
## 6 2020-01-01 01:00:00      7     17     47                71

3.5 Gabungkan Wifi dan Electricity

# Inner join pada waktu yang sama
data <- inner_join(wifi1, electricity, by = "time")

# Pilih kolom yang relevan
df <- data %>% select(time, Mean_Client, total_electricity)

head(df)

## # A tibble: 6 × 3
##   time                Mean_Client total_electricity
##   <dttm>                    <dbl>             <dbl>
## 1 2020-02-01 00:10:00       20.6                 97
## 2 2020-02-01 00:20:00       17.2                 96
## 3 2020-02-01 00:30:00       15.4                100
## 4 2020-02-01 00:40:00       12.4                 89
## 5 2020-02-01 00:50:00       10.5                 91
## 6 2020-02-01 01:00:00        9.25                90

4. Visualisasi

4.1 Time Series Plot of Occupancy and Electricity Consumption

library(ggplot2)

ggplot(df) +
  geom_line(aes(x = time, y = Mean_Client, colour = "Occupancy (WiFi)")) +
  geom_line(aes(x = time, y = total_electricity, colour = "Electricity")) +
  labs(title = "Library Occupancy vs Electricity Consumption (Time Series)",
       x = "Time", y = "Counts / Electricity Rate") +
  scale_colour_manual(name = "Legend", values = c("Occupancy (WiFi)" = "blue", "Electricity" = "red")) +
  theme_minimal()

4.2 Scatter Plot: Occupancy (X) vs Electricity (Y)

ggplot(df, aes(x = Mean_Client, y = total_electricity)) +
  geom_point(alpha = 0.5) +
  labs(title = "Scatter Plot: Occupancy vs Electricity Consumption",
       x = "Library Occupancy (Mean Client Count)",
       y = "Electricity Consumption (Total Rate)") +
  theme_minimal()

## 4.3 Average Daily Profile (24h)

library(tidyverse)
library(lubridate)

df <- df %>%
  mutate(hour = hour(time))

daily_profile <- df %>%
  group_by(hour) %>%
  summarise(
    Mean_Client      = mean(Mean_Client, na.rm = TRUE),
    total_electricity = mean(total_electricity, na.rm = TRUE),
    .groups = "drop"
  )

p <- ggplot(daily_profile, aes(x = hour)) +
  geom_line(aes(y = Mean_Client, colour = "Avg Occupancy"), linewidth = 1) +
  geom_line(aes(y = total_electricity, colour = "Avg Electricity"), linewidth = 1) +
  scale_x_continuous(
    breaks = 0:23,
    labels = sprintf("%02d:00", 0:23)
  ) +
  labs(
    title = "Average Daily Profile: Occupancy vs Electricity",
    x = "Hour of Day",
    y = "Average Value"
  ) +
  scale_colour_manual(
    name   = "",
    values = c("Avg Occupancy" = "blue", "Avg Electricity" = "red")
  ) +
  theme_minimal(base_size = 13)

print(p)

5. Analisis

5.1 Peak hours of occupancy

library(dplyr)

peak_hours <- df %>%
  mutate(hour = hour(time)) %>%               # ambil angka jam
  group_by(hour) %>%
  summarise(avg_client = mean(Mean_Client, na.rm = TRUE),
            .groups = "drop") %>%
  arrange(desc(avg_client)) %>%
  mutate(hour_label = sprintf("%02d:00", hour))   # ubah jadi "15:00"

cat("Peak hours of occupancy (top 5):\n")

## Peak hours of occupancy (top 5):

print(head(peak_hours[, c("hour_label", "avg_client")], 5))

## # A tibble: 5 × 2
##   hour_label avg_client
##   <chr>           <dbl>
## 1 15:00            370.
## 2 14:00            357.
## 3 16:00            346.
## 4 13:00            331.
## 5 12:00            300.

# Peak occupancy pada timestamp spesifik
peak_timestamp <- df %>%
  filter(Mean_Client == max(Mean_Client, na.rm = TRUE)) %>%
  pull(time)

cat("\nPeak occupancy at specific timestamp:\n")

## 
## Peak occupancy at specific timestamp:

print(peak_timestamp)

## [1] "2020-02-25 15:20:00 UTC"

5.2 Korelasi & Regresi Linear

# Siapkan data untuk regresi
df_reg <- df %>% drop_na(Mean_Client, total_electricity)

model <- lm(total_electricity ~ Mean_Client, data = df_reg)
summary(model)

## 
## Call:
## lm(formula = total_electricity ~ Mean_Client, data = df_reg)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -57.17 -15.63   1.72  16.24  51.08 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 1.036e+02  4.965e-01   208.6   <2e-16 ***
## Mean_Client 2.673e-01  2.399e-03   111.4   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 20.89 on 3655 degrees of freedom
## Multiple R-squared:  0.7726, Adjusted R-squared:  0.7725 
## F-statistic: 1.241e+04 on 1 and 3655 DF,  p-value: < 2.2e-16

# fitted & residual
df_reg <- df_reg %>%
  mutate(fitted = fitted(model), residual = total_electricity - fitted)

threshold <- 2 * sd(df_reg$residual, na.rm = TRUE)
df_reg <- df_reg %>% mutate(unusual = abs(residual) > threshold)

# Plot observed vs fitted time series
p1 <- ggplot(df_reg) +
  geom_line(aes(x = time, y = total_electricity, colour = "Observed"), alpha = 0.7) +
  geom_line(aes(x = time, y = fitted, colour = "Fitted (Regression)"), linetype = "dashed") +
  geom_line(aes(x = time, y = Mean_Client, colour = "Occupancy (WiFi)"), alpha = 0.7) +
  scale_colour_manual(values = c("Observed" = "red", "Fitted (Regression)" = "brown", "Occupancy (WiFi)" = "blue")) +
  labs(title = "Regresi Linear: Konsumsi Listrik ~ Okupansi", x = "Waktu", y = "Nilai") +
  theme_minimal()

print(p1)

# Scatter plot highlight 'unusual'
p2 <- ggplot(df_reg, aes(x = Mean_Client,
                         y = total_electricity,
                         colour = unusual)) +
  geom_point(alpha = 0.6) +
  scale_colour_manual(
    name   = "Status",
    values = c("TRUE" = "red", "FALSE" = "blue"),
    labels = c("TRUE" = "Tidak Sejalan", "FALSE" = "Sejalan")
  ) +
  labs(
    title = "Okupansi vs Konsumsi Listrik",
    subtitle = "Merah = Kasus Tidak Sejalan",
    x = "Mean Client (Okupansi)",
    y = "Total Electricity"
  ) +
  theme_minimal()

print(p2)

— 6. Weekend vs Weekday Comparison —

df <- df %>% 
  mutate(dayofweek = wday(time) - 1,   # 0 = Senin .. 6 = Minggu
         day_type = if_else(dayofweek < 5, "Weekday", "Weekend"))

# Time series comparison (overlay)
p_ts <- ggplot() +
  geom_line(data = df, aes(x = time, y = total_electricity, colour = day_type), alpha = 0.6) +
  geom_line(data = df, aes(x = time, y = Mean_Client, colour = day_type), alpha = 0.6, linetype = "dashed") +
  labs(title = "Perbandingan Time Series: Weekday vs Weekend",
       x = "Waktu", y = "Nilai") +
  theme_minimal()
print(p_ts)

# Scatter plot per kategori
p_sc <- ggplot(df, aes(x = Mean_Client, y = total_electricity, colour = day_type)) +
  geom_point(alpha = 0.5) +
  labs(title = "Scatter Plot Okupansi vs Konsumsi Listrik\nWeekday vs Weekend") +
  theme_minimal()
print(p_sc)

# --- Profil harian rata-rata per kategori ---
# 1) Hitung nilai maksimum dari semua Mean_Client & total_electricity
max_y <- df %>%
  summarise(
    max_occ = max(Mean_Client, na.rm = TRUE),
    max_ele = max(total_electricity, na.rm = TRUE)
  ) %>%
  as.numeric() %>%
  max()

# 2) Plot per kategori dengan Y limit yang sama
for(cat in unique(df$day_type)){
  profile <- df %>% 
    filter(day_type == cat) %>%
    mutate(hour_num  = hour(time),
           hour_lbl  = sprintf("%02d:00", hour_num)) %>%
    group_by(hour_num, hour_lbl) %>%
    summarise(
      Mean_Client = mean(Mean_Client, na.rm = TRUE),
      total_electricity = mean(total_electricity, na.rm = TRUE),
      .groups = "drop"
    ) %>%
    arrange(hour_num)

  p <- ggplot(profile, aes(x = hour_num)) +
    geom_line(aes(y = Mean_Client, colour = "Okupansi (WiFi)")) +
    geom_line(aes(y = total_electricity, colour = "Konsumsi Listrik")) +
    scale_x_continuous(
      breaks = 0:23,
      labels = sprintf("%02d:00", 0:23)
    ) +
    scale_y_continuous(limits = c(0, max_y)) +
    labs(title = paste0("Profil Harian Rata-rata (", cat, ")"),
         x = "Jam", y = "Rata-rata") +
    theme_minimal() +
    theme(axis.text.x = element_text(angle = 45, hjust = 1))
  print(p)
}

Data Mining AdeventureR

Kelompok 4 | Institut Teknologi Sepuluh Nopember 1. Nurul Fadhila Y.S. (5003231086) 2. Dian Astralia Damayanti (5003231183) 3. Eren Lutfimorea (5003231084) 4. Revalina Putri Nailah (5003231076)

1. Import Library

2. Loading data

3. Data Cleaning and Integration

3.1 Wifi Dataset

3.2 Library Energy Dataset

3.3 Merge tiga dataset listrik

3.4 Standardisasi interval 10 menit untuk listrik (jika perlu)

3.5 Gabungkan Wifi dan Electricity

4. Visualisasi

4.1 Time Series Plot of Occupancy and Electricity Consumption

4.2 Scatter Plot: Occupancy (X) vs Electricity (Y)

5. Analisis

5.1 Peak hours of occupancy

5.2 Korelasi & Regresi Linear

— 6. Weekend vs Weekday Comparison —