Import Library
library(lubridate)
##
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
##
## date, intersect, setdiff, union
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.2
## ✔ ggplot2 4.0.0 ✔ tibble 3.3.0
## ✔ purrr 1.1.0 ✔ tidyr 1.3.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(ggplot2)
library(dplyr)
wifi_data <- read.csv("wifi.csv") # nolint
head(wifi_data)
## time Event.Time Associated.Client.Count
## 1 2020-02-01 00:02:12 Sat Feb 01 00:02:12 UTC 2020 184
## 2 2020-02-01 00:02:12 Sat Feb 01 00:02:12 UTC 2020 6
## 3 2020-02-01 00:02:12 Sat Feb 01 00:02:12 UTC 2020 18
## 4 2020-02-01 00:02:12 Sat Feb 01 00:02:12 UTC 2020 23
## 5 2020-02-01 00:02:12 Sat Feb 01 00:02:12 UTC 2020 45
## 6 2020-02-01 00:02:12 Sat Feb 01 00:02:12 UTC 2020 16
## Authenticated.Client.Count Uni Building
## 1 182 Lancaster University Graduate College
## 2 6 Lancaster University Management School
## 3 18 Lancaster University SW hse 32-33
## 4 23 Lancaster University SW hse 29
## 5 45 Lancaster University Furness outer
## 6 16 Lancaster University Slaidburn House (LUSU)
## Floor
## 1 A Floor
## 2 C Floor
## 3 D Floor
## 4 B floor
## 5 C floor
## 6 E floor
str(wifi_data)
## 'data.frame': 1883844 obs. of 7 variables:
## $ time : chr "2020-02-01 00:02:12" "2020-02-01 00:02:12" "2020-02-01 00:02:12" "2020-02-01 00:02:12" ...
## $ Event.Time : chr "Sat Feb 01 00:02:12 UTC 2020" "Sat Feb 01 00:02:12 UTC 2020" "Sat Feb 01 00:02:12 UTC 2020" "Sat Feb 01 00:02:12 UTC 2020" ...
## $ Associated.Client.Count : int 184 6 18 23 45 16 32 6 73 5 ...
## $ Authenticated.Client.Count: int 182 6 18 23 45 16 32 6 73 5 ...
## $ Uni : chr "Lancaster University " "Lancaster University " "Lancaster University " "Lancaster University " ...
## $ Building : chr " Graduate College " " Management School " " SW hse 32-33 " " SW hse 29 " ...
## $ Floor : chr " A Floor" " C Floor" " D Floor" " B floor" ...
wifi_data$time <- ymd_hms(wifi_data$time)
str(wifi_data)
## 'data.frame': 1883844 obs. of 7 variables:
## $ time : POSIXct, format: "2020-02-01 00:02:12" "2020-02-01 00:02:12" ...
## $ Event.Time : chr "Sat Feb 01 00:02:12 UTC 2020" "Sat Feb 01 00:02:12 UTC 2020" "Sat Feb 01 00:02:12 UTC 2020" "Sat Feb 01 00:02:12 UTC 2020" ...
## $ Associated.Client.Count : int 184 6 18 23 45 16 32 6 73 5 ...
## $ Authenticated.Client.Count: int 182 6 18 23 45 16 32 6 73 5 ...
## $ Uni : chr "Lancaster University " "Lancaster University " "Lancaster University " "Lancaster University " ...
## $ Building : chr " Graduate College " " Management School " " SW hse 32-33 " " SW hse 29 " ...
## $ Floor : chr " A Floor" " C Floor" " D Floor" " B floor" ...
dim(wifi_data)
## [1] 1883844 7
sapply(wifi_data, function(x) length(unique(x)))
## time Event.Time
## 7165 7165
## Associated.Client.Count Authenticated.Client.Count
## 895 878
## Uni Building
## 1 76
## Floor
## 42
unique(wifi_data$Building)
## [1] " Graduate College " " Management School "
## [3] " SW hse 32-33 " " SW hse 29 "
## [5] " Furness outer " " Slaidburn House (LUSU) "
## [7] " SW hse 34 " " Bowland hall "
## [9] " Fylde " " SW hse 55-56 "
## [11] " SW hse 40-42 " " Bowland Twr (Old Bowland Annexe) "
## [13] " Pendle " " Faraday and cTAP "
## [15] " Institute for Advanced Studies " " University House "
## [17] " SW hse 53-54 " " Engineering "
## [19] " Field Station " " SW hse 36 "
## [21] " Infolab " " SW hse 21-23 "
## [23] " LICA " " FU Hse 71-74 "
## [25] " Grizedale " " Charles Carter "
## [27] " Furness " " SW hse 43-45 "
## [29] " SW hse 12-16 " " Bowland Main "
## [31] " SW hse 35 " " Human Resources "
## [33] " County " " FY Hse 65-70 "
## [35] " Barker House Farm " " SW hse 27-28 "
## [37] " Bowland Annexe " " John Creed "
## [39] " grize-res " " SW hse 24-26 "
## [41] " SW hse 20 " " Ruskin Library "
## [43] " County South/Cartmel " " Library "
## [45] " Conference Centre " " Postgrad Stats (PSC) "
## [47] " Bowland North " " George Fox "
## [49] " Hse 75 77 " " Bailrigg House "
## [51] " Sports Centre " " SW hse 30-31 "
## [53] " Bowland Ash " " Alex Square "
## [55] " LEC " " MDC "
## [57] " ISS Building " " Chaplaincy Centre "
## [59] " CETAD " " SW hse 50-52 "
## [61] " SW hse 17-19 " " Central Workshops "
## [63] " SW hse 46-49 " " SW hse 39 "
## [65] " Science and technology " " Whewell "
## [67] " SW hse 37-38 " " Lonsdale College (SW) "
## [69] " Physics " " County Field "
## [71] " Great Hall " " SW hse-158-179 "
## [73] " Preschool " " Reception "
## [75] " Hazelrigg " " Energy Centre "
library_data <- subset(wifi_data, Building == " Library ")
head(library_data)
## time Event.Time Associated.Client.Count
## 74 2020-02-01 00:02:12 Sat Feb 01 00:02:12 UTC 2020 29
## 158 2020-02-01 00:02:12 Sat Feb 01 00:02:12 UTC 2020 0
## 235 2020-02-01 00:02:12 Sat Feb 01 00:02:12 UTC 2020 21
## 260 2020-02-01 00:02:12 Sat Feb 01 00:02:12 UTC 2020 38
## 291 2020-02-01 00:07:34 Sat Feb 01 00:07:34 UTC 2020 0
## 497 2020-02-01 00:07:34 Sat Feb 01 00:07:34 UTC 2020 33
## Authenticated.Client.Count Uni Building Floor
## 74 28 Lancaster University Library A floor
## 158 0 Lancaster University Library LG floor
## 235 20 Lancaster University Library C floor
## 260 37 Lancaster University Library B floor
## 291 0 Lancaster University Library LG floor
## 497 31 Lancaster University Library A floor
library_data_resampled <- library_data %>%
arrange(time) %>%
group_by(interval = floor_date(time, "10 minutes")) %>%
summarise(mean_client_count = mean(Associated.Client.Count, na.rm = TRUE)) %>%
ungroup()
tail(library_data_resampled)
## # A tibble: 6 × 2
## interval mean_client_count
## <dttm> <dbl>
## 1 2020-02-26 12:50:00 374.
## 2 2020-02-26 13:00:00 377.
## 3 2020-02-26 13:10:00 392.
## 4 2020-02-26 13:20:00 411.
## 5 2020-02-26 13:30:00 430.
## 6 2020-02-26 13:40:00 432.
df1 <- read.csv("library1.csv") # nolint
head(df1)
## ts name reading units cumulative rate
## 1 2020-01-01 00:00:00 MC065-L01/M9R2048 1489442 KWh 1489442 NA
## 2 2020-01-01 00:10:00 MC065-L01/M9R2048 1489449 KWh 1489449 7
## 3 2020-01-01 00:20:00 MC065-L01/M9R2048 1489456 KWh 1489456 7
## 4 2020-01-01 00:30:00 MC065-L01/M9R2048 1489464 KWh 1489464 8
## 5 2020-01-01 00:40:00 MC065-L01/M9R2048 1489471 KWh 1489471 7
## 6 2020-01-01 00:50:00 MC065-L01/M9R2048 1489479 KWh 1489479 8
df2 <- read.csv("library2.csv") # nolint
head(df2)
## ts name reading units cumulative rate
## 1 2020-01-01 00:00:00 MC065-L01/M11R2056 2129016 KWh 2129016 NA
## 2 2020-01-01 00:10:00 MC065-L01/M11R2056 2129034 KWh 2129034 18
## 3 2020-01-01 00:20:00 MC065-L01/M11R2056 2129054 KWh 2129054 20
## 4 2020-01-01 00:30:00 MC065-L01/M11R2056 2129071 KWh 2129071 17
## 5 2020-01-01 00:40:00 MC065-L01/M11R2056 2129086 KWh 2129086 15
## 6 2020-01-01 00:50:00 MC065-L01/M11R2056 2129103 KWh 2129103 17
df3 <- read.csv("library3.csv") # nolint
head(df3)
## ts name reading units cumulative rate
## 1 2020-01-01 00:00:00 MC065-L01/M13R2064 6914209 KWh 6914209 NA
## 2 2020-01-01 00:10:00 MC065-L01/M13R2064 6914266 KWh 6914266 57
## 3 2020-01-01 00:20:00 MC065-L01/M13R2064 6914310 KWh 6914310 44
## 4 2020-01-01 00:30:00 MC065-L01/M13R2064 6914376 KWh 6914376 66
## 5 2020-01-01 00:40:00 MC065-L01/M13R2064 6914439 KWh 6914439 63
## 6 2020-01-01 00:50:00 MC065-L01/M13R2064 6914474 KWh 6914474 35
df1$ts <- ymd_hms(df1$ts)
df2$ts <- ymd_hms(df2$ts)
df3$ts <- ymd_hms(df3$ts)
combined_data <- df1 %>%
full_join(df2, by = "ts") %>%
full_join(df3, by = "ts") %>%
mutate(total_rate = rowSums(select(., starts_with("rate")), na.rm = TRUE)) %>%
select(ts, total_rate)
head(combined_data)
## ts total_rate
## 1 2020-01-01 00:00:00 0
## 2 2020-01-01 00:10:00 82
## 3 2020-01-01 00:20:00 71
## 4 2020-01-01 00:30:00 91
## 5 2020-01-01 00:40:00 85
## 6 2020-01-01 00:50:00 60
combined_data$total_rate[combined_data$total_rate == 0] <- mean(combined_data$total_rate[1:144], na.rm = TRUE) # nolint
head(combined_data)
## ts total_rate
## 1 2020-01-01 00:00:00 64.36806
## 2 2020-01-01 00:10:00 82.00000
## 3 2020-01-01 00:20:00 71.00000
## 4 2020-01-01 00:30:00 91.00000
## 5 2020-01-01 00:40:00 85.00000
## 6 2020-01-01 00:50:00 60.00000
final_data <- library_data_resampled %>%
inner_join(combined_data, by = c("interval" = "ts"))
head(final_data)
## # A tibble: 6 × 3
## interval mean_client_count total_rate
## <dttm> <dbl> <dbl>
## 1 2020-02-01 00:00:00 22.8 64.4
## 2 2020-02-01 00:10:00 20.6 97
## 3 2020-02-01 00:20:00 17.2 96
## 4 2020-02-01 00:30:00 15.4 100
## 5 2020-02-01 00:40:00 12.4 89
## 6 2020-02-01 00:50:00 10.5 91
dim(final_data)
## [1] 3683 3
sum(is.na(final_data))
## [1] 0
sum(duplicated(final_data))
## [1] 0
time_series_plot <- ggplot(final_data, aes(x = interval)) +
geom_line(aes(y = mean_client_count, color = "Occupancy")) +
geom_line(aes(y = total_rate, color = "Energy Consumption")) +
labs(title = "Time Series Plot of Occcupancy and Energy Consumption",
x = "Interval", y = "Value", color = "Legend") +
theme_minimal()
print(time_series_plot)
# Scatter Plot
scatter_plot <- ggplot(final_data, aes(x = mean_client_count, y = total_rate)) +
geom_point(alpha = 0.5, color = "blue") +
geom_smooth(method = "lm", se = FALSE, color = "red") +
labs(title = "Scatter Plot: Occupancy vs Energy Consumption",
x = "Occupancy", y = "Energy Consumption") +
theme_minimal()
print(scatter_plot)
## `geom_smooth()` using formula = 'y ~ x'
# Daily Profile
daily_profiles <- final_data %>%
mutate(date = as.Date(interval),
hour = hour(interval) + minute(interval)/60) %>%
group_by(date, hour) %>%
summarise(occupancy = mean(mean_client_count, na.rm = TRUE),
energy = mean(total_rate, na.rm = TRUE))
## `summarise()` has grouped output by 'date'. You can override using the
## `.groups` argument.
daily1 <- ggplot(daily_profiles, aes(x = hour, y = energy, group = date)) +
geom_line(alpha = 0.3, color = "blue") +
labs(title = "Daily Energy Profiles (24h)",
x = "Hour of Day", y = "Energy Consumption") +
theme_minimal()
print(daily_profiles)
## # A tibble: 3,683 × 4
## # Groups: date [26]
## date hour occupancy energy
## <date> <dbl> <dbl> <dbl>
## 1 2020-02-01 0 22.8 64.4
## 2 2020-02-01 0.167 20.6 97
## 3 2020-02-01 0.333 17.2 96
## 4 2020-02-01 0.5 15.4 100
## 5 2020-02-01 0.667 12.4 89
## 6 2020-02-01 0.833 10.5 91
## 7 2020-02-01 1 9.25 90
## 8 2020-02-01 1.17 8.67 92
## 9 2020-02-01 1.33 8.5 87
## 10 2020-02-01 1.5 8.38 83
## # ℹ 3,673 more rows
print(daily1)
# Overall Average Daily Profile
# Tambahkan kolom jam-menit dalam sehari
final_data <- final_data %>%
mutate(time_of_day = format(interval, "%H"))
# Buat profil rata-rata harian
avg_daily <- final_data %>%
group_by(time_of_day) %>%
summarise(
avg_occupancy = mean(mean_client_count, na.rm = TRUE),
avg_energy = mean(total_rate, na.rm = TRUE)
)
# Visualisasi profil rata-rata harian
ggplot(avg_daily, aes(x = time_of_day)) +
geom_line(aes(y = avg_occupancy, color = "Occupancy", group = 1), linewidth = 1) +
geom_line(aes(y = avg_energy, color = "Energy Consumption", group = 1), linewidth = 1) +
labs(title = "Overall Average Daily Profile",
x = "Time of Day", y = "Average Value", color = "Legend") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
# Profil rata-rata harian berdasarkan weekend vs weekday
# Tambahkan kolom hari
final_data <- final_data %>%
mutate(
day_of_week = wday(interval, label = TRUE), # Hari dalam format factor (Sun, Mon, ...)
day_type = ifelse(day_of_week %in% c("Sat", "Sun"), "Weekend", "Weekday")
)
avg_daily_daytype <- final_data %>%
mutate(hour = hour(interval) + minute(interval)/60) %>%
group_by(day_type, hour) %>%
summarise(
avg_occupancy = mean(mean_client_count, na.rm = TRUE),
avg_energy = mean(total_rate, na.rm = TRUE)
) %>%
ungroup()
## `summarise()` has grouped output by 'day_type'. You can override using the
## `.groups` argument.
ggplot(avg_daily_daytype, aes(x = hour, y = avg_occupancy, color = day_type)) +
geom_line(linewidth = 1) +
labs(title = "Average Occupancy: Weekend vs Weekday",
x = "Hour of Day", y = "Average Occupancy", color = "Day Type") +
theme_minimal()
# Visualisasi Energy Consumption
ggplot(avg_daily_daytype, aes(x = hour, y = avg_energy, color = day_type)) +
geom_line(linewidth = 1) +
labs(title = "Average Energy Consumption: Weekend vs Weekday",
x = "Hour of Day", y = "Average Energy Consumption", color = "Day Type") +
theme_minimal()
Note that the
echo = FALSE
parameter was added to the code
chunk to prevent printing of the R code that generated the plot.