Gunakan hanya rekaman WiFi gedung Perpustakaan.
Pastikan semua data distandarisasi pada interval 10 menit:
#library(readr)
url <- "https://drive.usercontent.google.com/download?id=1E-vNdxENsEnkhOmbW3qMsmdkmt_2NL-G&export=download&authuser=0&confirm=t&uuid=d09da2d1-26ee-44b7-8432-e5e55ba9c4b7&at=AN8xHopp75gU1rWvlXb9avWJn3IQ%3A1756903057866"
wifi <- read.csv(url)
head(wifi)
## time Event.Time Associated.Client.Count
## 1 2020-02-01 00:02:12 Sat Feb 01 00:02:12 UTC 2020 184
## 2 2020-02-01 00:02:12 Sat Feb 01 00:02:12 UTC 2020 6
## 3 2020-02-01 00:02:12 Sat Feb 01 00:02:12 UTC 2020 18
## 4 2020-02-01 00:02:12 Sat Feb 01 00:02:12 UTC 2020 23
## 5 2020-02-01 00:02:12 Sat Feb 01 00:02:12 UTC 2020 45
## 6 2020-02-01 00:02:12 Sat Feb 01 00:02:12 UTC 2020 16
## Authenticated.Client.Count Uni Building
## 1 182 Lancaster University Graduate College
## 2 6 Lancaster University Management School
## 3 18 Lancaster University SW hse 32-33
## 4 23 Lancaster University SW hse 29
## 5 45 Lancaster University Furness outer
## 6 16 Lancaster University Slaidburn House (LUSU)
## Floor
## 1 A Floor
## 2 C Floor
## 3 D Floor
## 4 B floor
## 5 C floor
## 6 E floor
Melihat data unique
unique(wifi$Building)
## [1] " Graduate College " " Management School "
## [3] " SW hse 32-33 " " SW hse 29 "
## [5] " Furness outer " " Slaidburn House (LUSU) "
## [7] " SW hse 34 " " Bowland hall "
## [9] " Fylde " " SW hse 55-56 "
## [11] " SW hse 40-42 " " Bowland Twr (Old Bowland Annexe) "
## [13] " Pendle " " Faraday and cTAP "
## [15] " Institute for Advanced Studies " " University House "
## [17] " SW hse 53-54 " " Engineering "
## [19] " Field Station " " SW hse 36 "
## [21] " Infolab " " SW hse 21-23 "
## [23] " LICA " " FU Hse 71-74 "
## [25] " Grizedale " " Charles Carter "
## [27] " Furness " " SW hse 43-45 "
## [29] " SW hse 12-16 " " Bowland Main "
## [31] " SW hse 35 " " Human Resources "
## [33] " County " " FY Hse 65-70 "
## [35] " Barker House Farm " " SW hse 27-28 "
## [37] " Bowland Annexe " " John Creed "
## [39] " grize-res " " SW hse 24-26 "
## [41] " SW hse 20 " " Ruskin Library "
## [43] " County South/Cartmel " " Library "
## [45] " Conference Centre " " Postgrad Stats (PSC) "
## [47] " Bowland North " " George Fox "
## [49] " Hse 75 77 " " Bailrigg House "
## [51] " Sports Centre " " SW hse 30-31 "
## [53] " Bowland Ash " " Alex Square "
## [55] " LEC " " MDC "
## [57] " ISS Building " " Chaplaincy Centre "
## [59] " CETAD " " SW hse 50-52 "
## [61] " SW hse 17-19 " " Central Workshops "
## [63] " SW hse 46-49 " " SW hse 39 "
## [65] " Science and technology " " Whewell "
## [67] " SW hse 37-38 " " Lonsdale College (SW) "
## [69] " Physics " " County Field "
## [71] " Great Hall " " SW hse-158-179 "
## [73] " Preschool " " Reception "
## [75] " Hazelrigg " " Energy Centre "
library_keys <- unique(wifi$Building)
library_keys <- library_keys[grepl("library", library_keys, ignore.case = TRUE)]
library_keys
## [1] " Ruskin Library " " Library "
wifi_library <- wifi[wifi$Building == " Library ", ]
dim(wifi_library)
## [1] 28652 7
#llibrary(readr)
library1 = read.csv("https://drive.google.com/uc?export=download&id=14Kx3ySk0hYJ4GpMWGdncrjzsMJnoGGEb")
head(library1)
## ts name reading units cumulative rate
## 1 2020-01-01 00:00:00 MC065-L01/M9R2048 1489442 KWh 1489442 NA
## 2 2020-01-01 00:10:00 MC065-L01/M9R2048 1489449 KWh 1489449 7
## 3 2020-01-01 00:20:00 MC065-L01/M9R2048 1489456 KWh 1489456 7
## 4 2020-01-01 00:30:00 MC065-L01/M9R2048 1489464 KWh 1489464 8
## 5 2020-01-01 00:40:00 MC065-L01/M9R2048 1489471 KWh 1489471 7
## 6 2020-01-01 00:50:00 MC065-L01/M9R2048 1489479 KWh 1489479 8
library2 = read.csv("https://drive.google.com/uc?export=download&id=1iVN6vQ2kTLBfieeQpuGl-26zR6URJxos")
head(library2)
## ts name reading units cumulative rate
## 1 2020-01-01 00:00:00 MC065-L01/M11R2056 2129016 KWh 2129016 NA
## 2 2020-01-01 00:10:00 MC065-L01/M11R2056 2129034 KWh 2129034 18
## 3 2020-01-01 00:20:00 MC065-L01/M11R2056 2129054 KWh 2129054 20
## 4 2020-01-01 00:30:00 MC065-L01/M11R2056 2129071 KWh 2129071 17
## 5 2020-01-01 00:40:00 MC065-L01/M11R2056 2129086 KWh 2129086 15
## 6 2020-01-01 00:50:00 MC065-L01/M11R2056 2129103 KWh 2129103 17
library3 = read.csv("https://drive.google.com/uc?export=download&id=1mzE25o2PLvM_NHRqsGkaTorIlV0mKoNJ")
head(library3)
## ts name reading units cumulative rate
## 1 2020-01-01 00:00:00 MC065-L01/M13R2064 6914209 KWh 6914209 NA
## 2 2020-01-01 00:10:00 MC065-L01/M13R2064 6914266 KWh 6914266 57
## 3 2020-01-01 00:20:00 MC065-L01/M13R2064 6914310 KWh 6914310 44
## 4 2020-01-01 00:30:00 MC065-L01/M13R2064 6914376 KWh 6914376 66
## 5 2020-01-01 00:40:00 MC065-L01/M13R2064 6914439 KWh 6914439 63
## 6 2020-01-01 00:50:00 MC065-L01/M13R2064 6914474 KWh 6914474 35
Pastikan semua data distandarisasi pada interval 10 menit: Sampel ulang data WiFi (rata-rata Jumlah Klien Terkait). Menggabungkan meteran perpustakaan menjadi total tarif listrik per 10 menit.
library(dplyr)
## Warning: package 'dplyr' was built under R version 4.5.1
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(lubridate)
## Warning: package 'lubridate' was built under R version 4.5.1
##
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
##
## date, intersect, setdiff, union
library(zoo)
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
# 1. Filter hanya building = " Library "
wifi_library <- wifi %>% filter(Building == " Library ")
# 2. Ubah kolom time jadi POSIXct (datetime)
wifi_library$time <- as.POSIXct(wifi_library$time, format = "%Y-%m-%d %H:%M:%S")
# 3. Resample ke interval 10 menit, hitung rata-rata Associated Client Count
wifi_resampled <- wifi_library %>%
mutate(time_10min = floor_date(time, "10 minutes")) %>%
group_by(time_10min) %>%
summarise(mean_client = mean(Associated.Client.Count, na.rm = TRUE))
wifi_resampled
## # A tibble: 3,683 × 2
## time_10min mean_client
## <dttm> <dbl>
## 1 2020-02-01 00:00:00 22.8
## 2 2020-02-01 00:10:00 20.6
## 3 2020-02-01 00:20:00 17.2
## 4 2020-02-01 00:30:00 15.4
## 5 2020-02-01 00:40:00 12.4
## 6 2020-02-01 00:50:00 10.5
## 7 2020-02-01 01:00:00 9.25
## 8 2020-02-01 01:10:00 8.67
## 9 2020-02-01 01:20:00 8.5
## 10 2020-02-01 01:30:00 8.38
## # ℹ 3,673 more rows
library(dplyr)
library(lubridate)
# 1. Gabungkan data library meter
library_combined <- bind_rows(library1, library2, library3)
# 2. Ubah kolom ts jadi datetime
library_combined <- library_combined %>%
mutate(ts = as.POSIXct(ts, format = "%Y-%m-%d %H:%M:%S"))
# 3. Resample ke 10 menit, hitung sum(rate)
library_aggregated <- library_combined %>%
mutate(ts_10min = floor_date(ts, "10 minutes")) %>%
group_by(ts_10min) %>%
summarise(rate_sum = sum(rate, na.rm = TRUE)) %>%
ungroup()
# 4. Merge dengan wifi_resampled (inner join by time index)
merged_df <- wifi_resampled %>%
inner_join(library_aggregated, by = c("time_10min" = "ts_10min"))
# 5. Cek hasil
head(merged_df)
## # A tibble: 6 × 3
## time_10min mean_client rate_sum
## <dttm> <dbl> <dbl>
## 1 2020-02-01 00:00:00 22.8 0
## 2 2020-02-01 00:10:00 20.6 97
## 3 2020-02-01 00:20:00 17.2 96
## 4 2020-02-01 00:30:00 15.4 100
## 5 2020-02-01 00:40:00 12.4 89
## 6 2020-02-01 00:50:00 10.5 91
str(merged_df)
## tibble [3,683 × 3] (S3: tbl_df/tbl/data.frame)
## $ time_10min : POSIXct[1:3683], format: "2020-02-01 00:00:00" "2020-02-01 00:10:00" ...
## $ mean_client: num [1:3683] 22.8 20.6 17.2 15.4 12.4 ...
## $ rate_sum : num [1:3683] 0 97 96 100 89 91 90 92 87 83 ...
Atasi pembacaan listrik yang hilang dengan mengisi rata-rata dari 144 pengamatan pertama (per meteran energi).
fill_missing_with_initial_average <- function(df) {
avg_144 <- mean(head(na.omit(df$rate), 144)) # rata-rata 144 pengamatan pertama yang tidak NA
df$rate[is.na(df$rate)] <- avg_144 # isi NA dengan nilai rata-rata
return(df)
}
library1 <- fill_missing_with_initial_average(library1)
library2 <- fill_missing_with_initial_average(library2)
library3 <- fill_missing_with_initial_average(library3)
#standardisasi timestamp & hapus duplikat # Gabungkan kembali data library yang sudah diisi NA
library_combined_cleaned <- bind_rows(library1, library2, library3) %>%
mutate(ts = as.POSIXct(ts, format = "%Y-%m-%d %H:%M:%S"))
library_aggregated_cleaned <- library_combined_cleaned %>%
mutate(ts_10min = floor_date(ts, "10 minutes")) %>%
group_by(ts_10min) %>%
summarise(rate = sum(rate, na.rm = TRUE), .groups = "drop")
initial_rows <- nrow(library_aggregated_cleaned)
library_aggregated_cleaned <- library_aggregated_cleaned %>%
distinct(ts_10min, .keep_all = TRUE)
rows_after <- nrow(library_aggregated_cleaned)
#Cek apakah masih ada duplikasi
cat("Initial rows:", initial_rows, "\n")
## Initial rows: 18864
cat("Rows after dropping duplicates:", rows_after, "\n")
## Rows after dropping duplicates: 18864
if (rows_after < initial_rows) {
cat("Duplicates were found and removed.\n")
} else {
cat("No duplicates found.\n")
}
## No duplicates found.
merged_df_cleaned <- wifi_resampled %>%
inner_join(library_aggregated_cleaned,
by = c("time_10min" = "ts_10min"))
head(merged_df_cleaned)
## # A tibble: 6 × 3
## time_10min mean_client rate
## <dttm> <dbl> <dbl>
## 1 2020-02-01 00:00:00 22.8 64.8
## 2 2020-02-01 00:10:00 20.6 97
## 3 2020-02-01 00:20:00 17.2 96
## 4 2020-02-01 00:30:00 15.4 100
## 5 2020-02-01 00:40:00 12.4 89
## 6 2020-02-01 00:50:00 10.5 91
str(merged_df_cleaned)
## tibble [3,683 × 3] (S3: tbl_df/tbl/data.frame)
## $ time_10min : POSIXct[1:3683], format: "2020-02-01 00:00:00" "2020-02-01 00:10:00" ...
## $ mean_client: num [1:3683] 22.8 20.6 17.2 15.4 12.4 ...
## $ rate : num [1:3683] 64.8 97 96 100 89 ...
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.5.1
# Plot Time Series: WiFi Client vs Listrik
ggplot(merged_df_cleaned, aes(x = time_10min)) +
geom_line(aes(y = mean_client, color = "Mean Client")) +
geom_line(aes(y = rate, color = "Electricity Rate")) +
labs(title = "Time Series: WiFi Clients & Electricity Usage",
x = "Time (10-min interval)",
y = "Value") +
scale_color_manual(name = "Legend",
values = c("Mean Client" = "blue",
"Electricity Rate" = "red")) +
theme_minimal()
ggplot(merged_df_cleaned, aes(x = mean_client, y = rate)) +
geom_point(alpha = 0.6, color = "darkgreen") +
labs(title = "Scatter Plot: WiFi Clients vs Electricity Usage",
x = "Mean Client Count",
y = "Electricity Rate") +
theme_minimal()
library(dplyr)
library(ggplot2)
library(lubridate)
# Tambahkan kolom date dan time of day
daily_profiles <- merged_df_cleaned %>%
mutate(date = as.Date(time_10min),
tod = format(time_10min, "%H:%M:%S"))
# Plot Daily Profile untuk mean_client
ggplot(daily_profiles, aes(x = tod, y = mean_client, group = date, color = date)) +
geom_line(alpha = 0.6) +
labs(title = "Daily Profiles of WiFi Clients (Per Day)",
x = "Time of Day",
y = "Mean Client Count") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
guides(color = "none")
ggplot(daily_profiles, aes(x = tod, y = rate, group = date, color = date)) +
geom_line(alpha = 0.6) +
labs(title = "Daily Profiles of Electricity Usage (Per Day)",
x = "Time of Day",
y = "Electricity Rate") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
guides(color = "none")
library(dplyr)
library(ggplot2)
library(lubridate)
library(hms)
##
## Attaching package: 'hms'
## The following object is masked from 'package:lubridate':
##
## hms
library(scales)
# Tambahkan kolom tanggal dan jam-hari (time of day)
daily_profiles <- merged_df_cleaned %>%
mutate(
date = as.Date(time_10min),
tod = hms::as_hms(time_10min) # waktu dalam sehari (jam:menit:detik)
)
# Plot Daily Profile untuk WiFi Clients
p1 <- ggplot(daily_profiles, aes(x = tod, y = mean_client, group = date, color = date)) +
geom_line(alpha = 0.6) +
scale_x_time(
breaks = seq(0, 24*3600, by = 2*3600), # jeda tiap 2 jam
labels = scales::time_format("%H:%M") # format HH:MM
) +
labs(title = "Daily Profiles of WiFi Clients",
x = "Time of Day",
y = "Mean Client Count") +
theme_minimal() +
guides(color = "none")
# Plot Daily Profile untuk Electricity Usage
p2 <- ggplot(daily_profiles, aes(x = tod, y = rate, group = date, color = date)) +
geom_line(alpha = 0.6) +
scale_x_time(
breaks = seq(0, 24*3600, by = 2*3600),
labels = scales::time_format("%H:%M")
) +
labs(title = "Daily Profiles of Electricity Usage",
x = "Time of Day",
y = "Electricity Rate") +
theme_minimal() +
guides(color = "none")
p1
# Daily profile dari konsumsi listrik
p2
# Plot time series weekday vs weekend
weekend_shading <- merged_df_cleaned %>%
# Filter untuk mendapatkan hari Sabtu (6) dan Minggu (7)
# Menggunakan standar di mana Senin=1 dan Minggu=7
filter(wday(time_10min, week_start = 1) %in% c(6, 7)) %>%
# Buat ID unik untuk setiap blok akhir pekan
group_by(weekend_id = floor_date(time_10min, unit = "week", week_start = 1)) %>%
# Hitung awal (xmin) dan akhir (xmax) dari setiap blok
summarise(
xmin = min(time_10min),
xmax = max(time_10min),
.groups = 'drop'
)
ggplot(merged_df_cleaned, aes(x = time_10min)) +
# Layer latar belakang (sekarang akan mencakup Sabtu dan Minggu)
geom_rect(
data = weekend_shading,
aes(xmin = xmin, xmax = xmax, ymin = -Inf, ymax = Inf),
fill = "grey80", alpha = 0.4, inherit.aes = FALSE
) +
# Layer garis data
geom_line(aes(y = mean_client, color = "Mean Client")) +
geom_line(aes(y = rate, color = "Electricity Rate")) +
# Sisa pengaturan (label, warna, sumbu, tema)
labs(title = "Time Series: WiFi Clients & Electricity Usage",
x = "Tanggal di Bulan Februari",
y = "Value") +
scale_color_manual(name = "Legend",
values = c("Mean Client" = "blue", "Electricity Rate" = "red")) +
scale_x_datetime(date_breaks = "1 day", date_labels = "%d") +
theme_minimal() +
theme(
# --- PENGATURAN LEGENDA YANG DIRAPIKAN ---
legend.position = c(0.01, 0.99),
legend.justification = c("left", "top"),
legend.title = element_blank(), # Menghilangkan judul "Legend"
legend.text = element_text(size = 7), # Mengecilkan teks
legend.key.size = unit(0.5, "cm"), # Mengecilkan simbol garis warna
legend.background = element_rect(fill = alpha("white", 0.6), color = NA),
# --- Pengaturan Grid/Garis Kisi ---
panel.grid.major.x = element_line(color = "grey85", linewidth = 0.5),
panel.grid.minor.x = element_blank()
)
## Warning: A numeric `legend.position` argument in `theme()` was deprecated in ggplot2
## 3.5.0.
## ℹ Please use the `legend.position.inside` argument of `theme()` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
#Scatter plot wifi client vs elecctricity Usage
library(dplyr)
library(lubridate)
library(ggplot2)
merged_df_cleaned <- merged_df_cleaned %>%
mutate(
day_type = if_else(wday(time_10min, week_start = 1) %in% c(6, 7),
"Weekend",
"Weekday")
)
ggplot(merged_df_cleaned, aes(x = mean_client, y = rate, color = day_type)) +
geom_point(alpha = 0.6) +
# Atur warna secara manual
scale_color_manual(
name = "Tipe Hari",
values = c("Weekday" = "steelblue", "Weekend" = "orangered")
) +
# Sisa kode Anda
labs(title = "Scatter Plot: WiFi Clients vs Electricity Usage",
x = "Mean Client Count",
y = "Electricity Rate") +
theme_minimal() +
# (Opsional) Rapikan posisi legenda jika perlu
theme(legend.position = "top.left")
# Average daily profile Wifi client
library(dplyr)
library(lubridate)
library(ggplot2)
daily_profiles <- merged_df_cleaned %>%
# Pastikan kolom day_type sudah ada
mutate(
day_type = if_else(wday(time_10min, week_start = 1) %in% c(6, 7), "Weekend", "Weekday"),
# Ekstrak hanya waktu (Jam:Menit) dari timestamp
time_of_day = as.POSIXct(format(time_10min, "%H:%M"), format = "%H:%M")
) %>%
# Kelompokkan berdasarkan tipe hari dan waktu
group_by(day_type, time_of_day) %>%
# Hitung nilai rata-rata untuk setiap grup
summarise(
avg_client = mean(mean_client, na.rm = TRUE),
avg_rate = mean(rate, na.rm = TRUE),
.groups = 'drop'
)
ggplot(daily_profiles, aes(x = time_of_day, y = avg_client, color = day_type, group = day_type)) +
geom_line(linewidth = 1) +
# Atur warna agar konsisten dengan plot sebelumnya
scale_color_manual(
name = "Tipe Hari",
values = c("Weekday" = "steelblue", "Weekend" = "orangered")
) +
# Atur format sumbu-x agar menampilkan Jam:Menit
scale_x_datetime(
date_breaks = "2 hours",
date_labels = "%H:%M"
) +
labs(
title = "Profil Harian Rata-rata: WiFi Clients",
subtitle = "Perbandingan antara Hari Kerja (Weekday) dan Akhir Pekan (Weekend)",
x = "Waktu",
y = "Rata-rata Jumlah Client"
) +
theme_minimal() +
theme(legend.position = "top.right")
# Average daily profile Electtricy Usage
ggplot(daily_profiles, aes(x = time_of_day, y = avg_rate, color = day_type, group = day_type)) +
geom_line(linewidth = 1) +
# Atur warna
scale_color_manual(
name = "Tipe Hari",
values = c("Weekday" = "steelblue", "Weekend" = "orangered")
) +
# Atur format sumbu-x
scale_x_datetime(
date_breaks = "2 hours",
date_labels = "%H:%M"
) +
labs(
title = "Profil Harian Rata-rata: Electricity Usage",
subtitle = "Perbandingan antara Hari Kerja (Weekday) dan Akhir Pekan (Weekend)",
x = "Waktu",
y = "Rata-rata Penggunaan Listrik"
) +
theme_minimal() +
theme(legend.position = "top.right")
p1
Berdasarkan plot profil harian, dapat disimpulkan bahwa puncak jam sibuk di perpustakaan adalah sekitar jam 14.00 sampai 16.00
# Impor library
library(tseries)
## Warning: package 'tseries' was built under R version 4.5.1
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
library(forecast)
## Warning: package 'forecast' was built under R version 4.5.1
library(ggplot2)
plot.ts(merged_df_cleaned$rate)
adf.test(merged_df_cleaned$rate)
## Warning in adf.test(merged_df_cleaned$rate): p-value smaller than printed
## p-value
##
## Augmented Dickey-Fuller Test
##
## data: merged_df_cleaned$rate
## Dickey-Fuller = -9.9961, Lag order = 15, p-value = 0.01
## alternative hypothesis: stationary
Uji Augmented Dickey-Fuller tolak H0, artinya data stasioner.
# Membagi data train-test 95:5
n <- length(merged_df_cleaned$rate)
train_size <- floor(n * 0.95)
train_rate <- merged_df_cleaned$rate[1:train_size]
test_rate <- merged_df_cleaned$rate[(train_size+1):n]
train_xreg <- merged_df_cleaned$mean_client[1:train_size]
test_xreg <- merged_df_cleaned$mean_client[(train_size+1):n]
# Fit model dengan auto arima
fit_train <- auto.arima(
train_rate,
seasonal = TRUE,
d = 0, D = 0,
xreg = train_xreg,
max.p = 5, max.q = 5,
max.P = 3, max.Q = 3,
max.order = 10,
stepwise = FALSE,
approximation = FALSE
)
# Forecast periode test
fcast_test <- forecast(fit_train, xreg=test_xreg, h=length(test_rate))
# Model summary
summary(fit_train)
## Series: train_rate
## Regression with ARIMA(2,0,4) errors
##
## Coefficients:
## ar1 ar2 ma1 ma2 ma3 ma4 intercept xreg
## 1.9776 -0.9796 -1.4287 0.3360 0.1072 -0.0070 114.3995 0.1964
## s.e. 0.0041 0.0041 0.0176 0.0293 0.0296 0.0172 1.7597 0.0112
##
## sigma^2 = 83.32: log likelihood = -12696.11
## AIC=25410.22 AICc=25410.27 BIC=25465.66
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set -0.07654337 9.117398 6.914468 -0.7983373 5.823245 0.9226434
## ACF1
## Training set 0.012779
# Hitung p-value koefisien parameter
coefs <- coef(fit_train)
ses <- sqrt(diag(fit_train$var.coef))
z_stats <- coefs / ses
p_values <- 2 * pnorm(-abs(z_stats))
significance_table <- data.frame(
Parameter = names(coefs),
Estimate = round(coefs, 4),
Std_Error = round(ses, 4),
Z_Statistic = round(z_stats, 4),
P_Value = round(p_values, 4),
Significant = ifelse(p_values < 0.05, "Yes", "No")
)
print(significance_table, digits = 4)
## Parameter Estimate Std_Error Z_Statistic P_Value Significant
## ar1 ar1 1.9776 0.0041 482.6470 0.0000 Yes
## ar2 ar2 -0.9796 0.0041 -239.9360 0.0000 Yes
## ma1 ma1 -1.4287 0.0176 -81.2411 0.0000 Yes
## ma2 ma2 0.3360 0.0293 11.4695 0.0000 Yes
## ma3 ma3 0.1072 0.0296 3.6236 0.0003 Yes
## ma4 ma4 -0.0070 0.0172 -0.4063 0.6845 No
## intercept intercept 114.3995 1.7597 65.0093 0.0000 Yes
## xreg xreg 0.1964 0.0112 17.4930 0.0000 Yes
p-value xreg (okupansi) < 0.05, artinya okupansi secara signifikan mempengaruhi konsumsi listrik.
# Uji Ljung-Box
checkresiduals(fit_train)
##
## Ljung-Box test
##
## data: Residuals from Regression with ARIMA(2,0,4) errors
## Q* = 7.1625, df = 4, p-value = 0.1275
##
## Model df: 6. Total lags used: 10
Uji Ljung-Box gagal tolak H0, artinya residual sudah tidak ada autokorelasi.
# Plot hasil forecast
autoplot(fcast_test) +
labs(title = "Forecast dengan ARIMA(5, 0, 3)") +
theme_minimal()
library(ggplot2)
# Create the plot with proper legend handling
p <- ggplot(merged_df_cleaned, aes(x = time_10min)) +
geom_line(aes(y = mean_client, color = "Estimated Occupancy (Avg WiFi Clients)"), size = 1) +
geom_line(aes(y = rate, color = "Total Electricity Rate (KWh)"), size = 1) +
geom_hline(yintercept = 50, color = "red", linetype = "dashed", linewidth = 1.2) +
geom_hline(yintercept = 200, color = "red", linetype = "dashed", linewidth = 1.2) +
labs(
title = "Time Series of Estimated Occupancy and Electricity Rate (10-minute intervals)",
x = "Time",
y = "Count / Rate",
color = NULL # Removes the default "color" label from legend title
) +
scale_color_manual(
values = c(
"Estimated Occupancy (Avg WiFi Clients)" = "blue",
"Total Electricity Rate (KWh)" = "orange"
)
) +
theme_minimal() +
theme(
legend.position = "bottom",
axis.text.x = element_text(angle = 45, hjust = 1),
plot.title = element_text(hjust = 0.5)
)
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
print(p)
Menurut plot di atas, kasus di mana okupansi tidak selaras dengan konsumsi listrik adalah ketika okupansi melebihi 200 namun konsumsi listrik tidak ikut naik melebihi 200 kWh (kapasitas maksimum sistem listrik perpustakaan), selain itu ketika okupansi turun di bawah 50, konsumsi listrik tetap berada di 50 kWh (fasilitas-fasilitas tetap menyala meskipun perpustakaan sepi pengunjung).