Import Library

library(lubridate)

## 
## Attaching package: 'lubridate'

## The following objects are masked from 'package:base':
## 
##     date, intersect, setdiff, union

library(tidyverse)

## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr   1.1.4     ✔ readr   2.1.5
## ✔ forcats 1.0.0     ✔ stringr 1.5.2
## ✔ ggplot2 4.0.0     ✔ tibble  3.3.0
## ✔ purrr   1.1.0     ✔ tidyr   1.3.1

## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

library(ggplot2)
library(dplyr)

Membaca data dari file CSV

wifi_data <- read.csv("wifi.csv") # nolint
head(wifi_data)

##                  time                   Event.Time Associated.Client.Count
## 1 2020-02-01 00:02:12 Sat Feb 01 00:02:12 UTC 2020                     184
## 2 2020-02-01 00:02:12 Sat Feb 01 00:02:12 UTC 2020                       6
## 3 2020-02-01 00:02:12 Sat Feb 01 00:02:12 UTC 2020                      18
## 4 2020-02-01 00:02:12 Sat Feb 01 00:02:12 UTC 2020                      23
## 5 2020-02-01 00:02:12 Sat Feb 01 00:02:12 UTC 2020                      45
## 6 2020-02-01 00:02:12 Sat Feb 01 00:02:12 UTC 2020                      16
##   Authenticated.Client.Count                   Uni                 Building
## 1                        182 Lancaster University         Graduate College 
## 2                          6 Lancaster University        Management School 
## 3                         18 Lancaster University             SW hse 32-33 
## 4                         23 Lancaster University                SW hse 29 
## 5                         45 Lancaster University            Furness outer 
## 6                         16 Lancaster University   Slaidburn House (LUSU) 
##      Floor
## 1  A Floor
## 2  C Floor
## 3  D Floor
## 4  B floor
## 5  C floor
## 6  E floor

str(wifi_data)

## 'data.frame':    1883844 obs. of  7 variables:
##  $ time                      : chr  "2020-02-01 00:02:12" "2020-02-01 00:02:12" "2020-02-01 00:02:12" "2020-02-01 00:02:12" ...
##  $ Event.Time                : chr  "Sat Feb 01 00:02:12 UTC 2020" "Sat Feb 01 00:02:12 UTC 2020" "Sat Feb 01 00:02:12 UTC 2020" "Sat Feb 01 00:02:12 UTC 2020" ...
##  $ Associated.Client.Count   : int  184 6 18 23 45 16 32 6 73 5 ...
##  $ Authenticated.Client.Count: int  182 6 18 23 45 16 32 6 73 5 ...
##  $ Uni                       : chr  "Lancaster University " "Lancaster University " "Lancaster University " "Lancaster University " ...
##  $ Building                  : chr  " Graduate College " " Management School " " SW hse 32-33 " " SW hse 29 " ...
##  $ Floor                     : chr  " A Floor" " C Floor" " D Floor" " B floor" ...

Mengubah kolom ‘time’ menjadi format POSIXct

wifi_data$time <- ymd_hms(wifi_data$time)
str(wifi_data)

## 'data.frame':    1883844 obs. of  7 variables:
##  $ time                      : POSIXct, format: "2020-02-01 00:02:12" "2020-02-01 00:02:12" ...
##  $ Event.Time                : chr  "Sat Feb 01 00:02:12 UTC 2020" "Sat Feb 01 00:02:12 UTC 2020" "Sat Feb 01 00:02:12 UTC 2020" "Sat Feb 01 00:02:12 UTC 2020" ...
##  $ Associated.Client.Count   : int  184 6 18 23 45 16 32 6 73 5 ...
##  $ Authenticated.Client.Count: int  182 6 18 23 45 16 32 6 73 5 ...
##  $ Uni                       : chr  "Lancaster University " "Lancaster University " "Lancaster University " "Lancaster University " ...
##  $ Building                  : chr  " Graduate College " " Management School " " SW hse 32-33 " " SW hse 29 " ...
##  $ Floor                     : chr  " A Floor" " C Floor" " D Floor" " B floor" ...

Menampilkan shape data

dim(wifi_data)

## [1] 1883844       7

Mengecek berapa banyak nilai unik pada setiap variabel

sapply(wifi_data, function(x) length(unique(x)))

##                       time                 Event.Time 
##                       7165                       7165 
##    Associated.Client.Count Authenticated.Client.Count 
##                        895                        878 
##                        Uni                   Building 
##                          1                         76 
##                      Floor 
##                         42

Mengecek nilai unik pada kolom Building

unique(wifi_data$Building)

##  [1] " Graduate College "                 " Management School "               
##  [3] " SW hse 32-33 "                     " SW hse 29 "                       
##  [5] " Furness outer "                    " Slaidburn House (LUSU) "          
##  [7] " SW hse 34 "                        " Bowland hall "                    
##  [9] " Fylde "                            " SW hse 55-56 "                    
## [11] " SW hse 40-42 "                     " Bowland Twr (Old Bowland Annexe) "
## [13] " Pendle "                           " Faraday and cTAP "                
## [15] " Institute for Advanced Studies "   " University House "                
## [17] " SW hse 53-54 "                     " Engineering "                     
## [19] " Field Station "                    " SW hse 36 "                       
## [21] " Infolab "                          " SW hse 21-23 "                    
## [23] " LICA "                             " FU Hse 71-74 "                    
## [25] " Grizedale "                        " Charles Carter "                  
## [27] " Furness "                          " SW hse 43-45 "                    
## [29] " SW hse 12-16 "                     " Bowland Main "                    
## [31] " SW hse 35 "                        " Human Resources "                 
## [33] " County "                           " FY Hse 65-70 "                    
## [35] " Barker House Farm "                " SW hse 27-28 "                    
## [37] " Bowland Annexe "                   " John Creed "                      
## [39] " grize-res "                        " SW hse 24-26 "                    
## [41] " SW hse 20 "                        " Ruskin Library "                  
## [43] " County South/Cartmel "             " Library "                         
## [45] " Conference Centre "                " Postgrad Stats (PSC) "            
## [47] " Bowland North "                    " George Fox "                      
## [49] " Hse 75 77 "                        " Bailrigg House "                  
## [51] " Sports Centre "                    " SW hse 30-31 "                    
## [53] " Bowland Ash "                      " Alex Square "                     
## [55] " LEC "                              " MDC "                             
## [57] " ISS Building "                     " Chaplaincy Centre "               
## [59] " CETAD "                            " SW hse 50-52 "                    
## [61] " SW hse 17-19 "                     " Central Workshops "               
## [63] " SW hse 46-49 "                     " SW hse 39 "                       
## [65] " Science and technology "           " Whewell "                         
## [67] " SW hse 37-38 "                     " Lonsdale College (SW) "           
## [69] " Physics "                          " County Field "                    
## [71] " Great Hall "                       " SW hse-158-179 "                  
## [73] " Preschool "                        " Reception "                       
## [75] " Hazelrigg "                        " Energy Centre "

Filter data hanya mengambil data library dari building

library_data <- subset(wifi_data, Building == " Library ")
head(library_data)

##                    time                   Event.Time Associated.Client.Count
## 74  2020-02-01 00:02:12 Sat Feb 01 00:02:12 UTC 2020                      29
## 158 2020-02-01 00:02:12 Sat Feb 01 00:02:12 UTC 2020                       0
## 235 2020-02-01 00:02:12 Sat Feb 01 00:02:12 UTC 2020                      21
## 260 2020-02-01 00:02:12 Sat Feb 01 00:02:12 UTC 2020                      38
## 291 2020-02-01 00:07:34 Sat Feb 01 00:07:34 UTC 2020                       0
## 497 2020-02-01 00:07:34 Sat Feb 01 00:07:34 UTC 2020                      33
##     Authenticated.Client.Count                   Uni  Building     Floor
## 74                          28 Lancaster University   Library    A floor
## 158                          0 Lancaster University   Library   LG floor
## 235                         20 Lancaster University   Library    C floor
## 260                         37 Lancaster University   Library    B floor
## 291                          0 Lancaster University   Library   LG floor
## 497                         31 Lancaster University   Library    A floor

Melakukan resample library_data menjadi interval 10 menit

Resample Associated Client Count menggunakan mean

library_data_resampled <- library_data %>%
  arrange(time) %>%
  group_by(interval = floor_date(time, "10 minutes")) %>%
  summarise(mean_client_count = mean(Associated.Client.Count, na.rm = TRUE)) %>%
  ungroup()
tail(library_data_resampled)

## # A tibble: 6 × 2
##   interval            mean_client_count
##   <dttm>                          <dbl>
## 1 2020-02-26 12:50:00              374.
## 2 2020-02-26 13:00:00              377.
## 3 2020-02-26 13:10:00              392.
## 4 2020-02-26 13:20:00              411.
## 5 2020-02-26 13:30:00              430.
## 6 2020-02-26 13:40:00              432.

Membaca data library1 dari file CSV

df1 <- read.csv("library1.csv") # nolint
head(df1)

##                    ts              name reading units cumulative rate
## 1 2020-01-01 00:00:00 MC065-L01/M9R2048 1489442   KWh    1489442   NA
## 2 2020-01-01 00:10:00 MC065-L01/M9R2048 1489449   KWh    1489449    7
## 3 2020-01-01 00:20:00 MC065-L01/M9R2048 1489456   KWh    1489456    7
## 4 2020-01-01 00:30:00 MC065-L01/M9R2048 1489464   KWh    1489464    8
## 5 2020-01-01 00:40:00 MC065-L01/M9R2048 1489471   KWh    1489471    7
## 6 2020-01-01 00:50:00 MC065-L01/M9R2048 1489479   KWh    1489479    8

Membaca data library2 dari file CSV

df2 <- read.csv("library2.csv") # nolint
head(df2)

##                    ts               name reading units cumulative rate
## 1 2020-01-01 00:00:00 MC065-L01/M11R2056 2129016   KWh    2129016   NA
## 2 2020-01-01 00:10:00 MC065-L01/M11R2056 2129034   KWh    2129034   18
## 3 2020-01-01 00:20:00 MC065-L01/M11R2056 2129054   KWh    2129054   20
## 4 2020-01-01 00:30:00 MC065-L01/M11R2056 2129071   KWh    2129071   17
## 5 2020-01-01 00:40:00 MC065-L01/M11R2056 2129086   KWh    2129086   15
## 6 2020-01-01 00:50:00 MC065-L01/M11R2056 2129103   KWh    2129103   17

Membaca data library3 dari file CSV

df3 <- read.csv("library3.csv") # nolint
head(df3)

##                    ts               name reading units cumulative rate
## 1 2020-01-01 00:00:00 MC065-L01/M13R2064 6914209   KWh    6914209   NA
## 2 2020-01-01 00:10:00 MC065-L01/M13R2064 6914266   KWh    6914266   57
## 3 2020-01-01 00:20:00 MC065-L01/M13R2064 6914310   KWh    6914310   44
## 4 2020-01-01 00:30:00 MC065-L01/M13R2064 6914376   KWh    6914376   66
## 5 2020-01-01 00:40:00 MC065-L01/M13R2064 6914439   KWh    6914439   63
## 6 2020-01-01 00:50:00 MC065-L01/M13R2064 6914474   KWh    6914474   35

Mengubah kolom ‘time’ menjadi format POSIXct

df1$ts <- ymd_hms(df1$ts)
df2$ts <- ymd_hms(df2$ts)
df3$ts <- ymd_hms(df3$ts)

Menggabungkan ketiga data frame berdasarkan kolom ‘ts’ dan Menjumlahkan kolom rate pada setiap interval waktu; nolint

combined_data <- df1 %>%
  full_join(df2, by = "ts") %>%
  full_join(df3, by = "ts") %>%
  mutate(total_rate = rowSums(select(., starts_with("rate")), na.rm = TRUE)) %>%
  select(ts, total_rate)
head(combined_data)

##                    ts total_rate
## 1 2020-01-01 00:00:00          0
## 2 2020-01-01 00:10:00         82
## 3 2020-01-01 00:20:00         71
## 4 2020-01-01 00:30:00         91
## 5 2020-01-01 00:40:00         85
## 6 2020-01-01 00:50:00         60

imputasi nilai 0 pada kolom total_rate dengan rata-rata 144 pengamatan pertama

combined_data$total_rate[combined_data$total_rate == 0] <- mean(combined_data$total_rate[1:144], na.rm = TRUE) # nolint
head(combined_data)

##                    ts total_rate
## 1 2020-01-01 00:00:00   64.36806
## 2 2020-01-01 00:10:00   82.00000
## 3 2020-01-01 00:20:00   71.00000
## 4 2020-01-01 00:30:00   91.00000
## 5 2020-01-01 00:40:00   85.00000
## 6 2020-01-01 00:50:00   60.00000

Menggabungkan library_data_resampled dengan combined_data berdasarkan interval waktu # nolint

final_data <- library_data_resampled %>%
  inner_join(combined_data, by = c("interval" = "ts"))
head(final_data)

## # A tibble: 6 × 3
##   interval            mean_client_count total_rate
##   <dttm>                          <dbl>      <dbl>
## 1 2020-02-01 00:00:00              22.8       64.4
## 2 2020-02-01 00:10:00              20.6       97  
## 3 2020-02-01 00:20:00              17.2       96  
## 4 2020-02-01 00:30:00              15.4      100  
## 5 2020-02-01 00:40:00              12.4       89  
## 6 2020-02-01 00:50:00              10.5       91

Melihat shape final_data

dim(final_data)

## [1] 3683    3

Mengecek missing value pada final_data

sum(is.na(final_data))

## [1] 0

Mengecek data duplikat pada final_data

sum(duplicated(final_data))

## [1] 0

Visualisasi Data

Time Series Plot

time_series_plot <- ggplot(final_data, aes(x = interval)) +
  geom_line(aes(y = mean_client_count, color = "Occupancy")) +
  geom_line(aes(y = total_rate, color = "Energy Consumption")) +
  labs(title = "Time Series Plot of Occcupancy and Energy Consumption",
       x = "Interval", y = "Value", color = "Legend") +
  theme_minimal()
print(time_series_plot)

# Scatter Plot

scatter_plot <- ggplot(final_data, aes(x = mean_client_count, y = total_rate)) +
  geom_point(alpha = 0.5, color = "blue") +
  geom_smooth(method = "lm", se = FALSE, color = "red") +
  labs(title = "Scatter Plot: Occupancy vs Energy Consumption",
       x = "Occupancy", y = "Energy Consumption") +
  theme_minimal()
print(scatter_plot)

## `geom_smooth()` using formula = 'y ~ x'

# Daily Profile

daily_profiles <- final_data %>%
  mutate(date = as.Date(interval),
         hour = hour(interval) + minute(interval)/60) %>%
  group_by(date, hour) %>%
  summarise(occupancy = mean(mean_client_count, na.rm = TRUE),
            energy = mean(total_rate, na.rm = TRUE))

## `summarise()` has grouped output by 'date'. You can override using the
## `.groups` argument.

daily1 <- ggplot(daily_profiles, aes(x = hour, y = energy, group = date)) +
  geom_line(alpha = 0.3, color = "blue") +
  labs(title = "Daily Energy Profiles (24h)",
       x = "Hour of Day", y = "Energy Consumption") +
  theme_minimal()
print(daily_profiles)

## # A tibble: 3,683 × 4
## # Groups:   date [26]
##    date        hour occupancy energy
##    <date>     <dbl>     <dbl>  <dbl>
##  1 2020-02-01 0         22.8    64.4
##  2 2020-02-01 0.167     20.6    97  
##  3 2020-02-01 0.333     17.2    96  
##  4 2020-02-01 0.5       15.4   100  
##  5 2020-02-01 0.667     12.4    89  
##  6 2020-02-01 0.833     10.5    91  
##  7 2020-02-01 1          9.25   90  
##  8 2020-02-01 1.17       8.67   92  
##  9 2020-02-01 1.33       8.5    87  
## 10 2020-02-01 1.5        8.38   83  
## # ℹ 3,673 more rows

print(daily1)

# Overall Average Daily Profile

# Tambahkan kolom jam-menit dalam sehari
final_data <- final_data %>%
  mutate(time_of_day = format(interval, "%H"))

# Buat profil rata-rata harian
avg_daily <- final_data %>%
  group_by(time_of_day) %>%
  summarise(
    avg_occupancy = mean(mean_client_count, na.rm = TRUE),
    avg_energy = mean(total_rate, na.rm = TRUE)
  )
# Visualisasi profil rata-rata harian
ggplot(avg_daily, aes(x = time_of_day)) +
  geom_line(aes(y = avg_occupancy, color = "Occupancy", group = 1), linewidth = 1) +
  geom_line(aes(y = avg_energy, color = "Energy Consumption", group = 1), linewidth = 1) +
  labs(title = "Overall Average Daily Profile",
       x = "Time of Day", y = "Average Value", color = "Legend") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

# Profil rata-rata harian berdasarkan weekend vs weekday

# Tambahkan kolom hari
final_data <- final_data %>%
  mutate(
    day_of_week = wday(interval, label = TRUE),  # Hari dalam format factor (Sun, Mon, ...)
    day_type = ifelse(day_of_week %in% c("Sat", "Sun"), "Weekend", "Weekday")
  )
avg_daily_daytype <- final_data %>%
  mutate(hour = hour(interval) + minute(interval)/60) %>%
  group_by(day_type, hour) %>%
  summarise(
    avg_occupancy = mean(mean_client_count, na.rm = TRUE),
    avg_energy = mean(total_rate, na.rm = TRUE)
  ) %>%
  ungroup()

## `summarise()` has grouped output by 'day_type'. You can override using the
## `.groups` argument.

Visualisasi Occupancy

ggplot(avg_daily_daytype, aes(x = hour, y = avg_occupancy, color = day_type)) +
  geom_line(linewidth = 1) +
  labs(title = "Average Occupancy: Weekend vs Weekday",
       x = "Hour of Day", y = "Average Occupancy", color = "Day Type") +
  theme_minimal()

# Visualisasi Energy Consumption

ggplot(avg_daily_daytype, aes(x = hour, y = avg_energy, color = day_type)) +
  geom_line(linewidth = 1) +
  labs(title = "Average Energy Consumption: Weekend vs Weekday",
       x = "Hour of Day", y = "Average Energy Consumption", color = "Day Type") +
  theme_minimal()

Note that the echo = FALSE parameter was added to the code chunk to prevent printing of the R code that generated the plot.

AdventureR

Ahmad Rizal Saputra(062), Multazam Zidan(071), Khafid Alfian Saputra(082), Happy Dwi Ramadhany Muhammad(083)

2025-09-14

Membaca data dari file CSV

Mengubah kolom ‘time’ menjadi format POSIXct

Menampilkan shape data

Mengecek berapa banyak nilai unik pada setiap variabel

Mengecek nilai unik pada kolom Building

Filter data hanya mengambil data library dari building

Melakukan resample library_data menjadi interval 10 menit

Resample Associated Client Count menggunakan mean

Membaca data library1 dari file CSV

Membaca data library2 dari file CSV

Membaca data library3 dari file CSV

Mengubah kolom ‘time’ menjadi format POSIXct

Menggabungkan ketiga data frame berdasarkan kolom ‘ts’ dan Menjumlahkan kolom rate pada setiap interval waktu; nolint

imputasi nilai 0 pada kolom total_rate dengan rata-rata 144 pengamatan pertama

Menggabungkan library_data_resampled dengan combined_data berdasarkan interval waktu # nolint

Melihat shape final_data

Mengecek missing value pada final_data

Mengecek data duplikat pada final_data

Visualisasi Data

Time Series Plot

Visualisasi Occupancy