library(readr)
library(lubridate)
library(dplyr)
library(magrittr)
library(ggplot2)
library(corrplot)
library(tidyr)
library(cowplot) # For arranging plots
#import file
data_wifi <- read_csv("C:/Users/litat/Downloads/wifi.csv")
data_library1 <- read_csv("C:/Users/litat/Downloads/library1.csv")
data_library2 <- read_csv("C:/Users/litat/Downloads/library2.csv")
data_library3 <- read_csv("C:/Users/litat/Downloads/library3.csv")
#WIFI
#kolom di file wifi
colnames(data_wifi)
## [1] "time" "Event Time"
## [3] "Associated Client Count" "Authenticated Client Count"
## [5] "Uni" "Building"
## [7] "Floor"
#data di kolom Building
unique(data_wifi$Building)
## [1] "Graduate College" "Management School"
## [3] "SW hse 32-33" "SW hse 29"
## [5] "Furness outer" "Slaidburn House (LUSU)"
## [7] "SW hse 34" "Bowland hall"
## [9] "Fylde" "SW hse 55-56"
## [11] "SW hse 40-42" "Bowland Twr (Old Bowland Annexe)"
## [13] "Pendle" "Faraday and cTAP"
## [15] "Institute for Advanced Studies" "University House"
## [17] "SW hse 53-54" "Engineering"
## [19] "Field Station" "SW hse 36"
## [21] "Infolab" "SW hse 21-23"
## [23] "LICA" "FU Hse 71-74"
## [25] "Grizedale" "Charles Carter"
## [27] "Furness" "SW hse 43-45"
## [29] "SW hse 12-16" "Bowland Main"
## [31] "SW hse 35" "Human Resources"
## [33] "County" "FY Hse 65-70"
## [35] "Barker House Farm" "SW hse 27-28"
## [37] "Bowland Annexe" "John Creed"
## [39] "grize-res" "SW hse 24-26"
## [41] "SW hse 20" "Ruskin Library"
## [43] "County South/Cartmel" "Library"
## [45] "Conference Centre" "Postgrad Stats (PSC)"
## [47] "Bowland North" "George Fox"
## [49] "Hse 75 77" "Bailrigg House"
## [51] "Sports Centre" "SW hse 30-31"
## [53] "Bowland Ash" "Alex Square"
## [55] "LEC" "MDC"
## [57] "ISS Building" "Chaplaincy Centre"
## [59] "CETAD" "SW hse 50-52"
## [61] "SW hse 17-19" "Central Workshops"
## [63] "SW hse 46-49" "SW hse 39"
## [65] "Science and technology" "Whewell"
## [67] "SW hse 37-38" "Lonsdale College (SW)"
## [69] "Physics" "County Field"
## [71] "Great Hall" "SW hse-158-179"
## [73] "Preschool" "Reception"
## [75] "Hazelrigg" "Energy Centre"
#filter building=library
wifi_library <- data_wifi[data_wifi$Building == "Library", ]
head(wifi_library)
## # A tibble: 6 × 7
## time `Event Time` Associated Client Co…¹ Authenticated Client…² Uni
## <chr> <chr> <dbl> <dbl> <chr>
## 1 2/1/2020 0:02 Sat Feb 01 … 29 28 Lanc…
## 2 2/1/2020 0:02 Sat Feb 01 … 0 0 Lanc…
## 3 2/1/2020 0:02 Sat Feb 01 … 21 20 Lanc…
## 4 2/1/2020 0:02 Sat Feb 01 … 38 37 Lanc…
## 5 2/1/2020 0:07 Sat Feb 01 … 0 0 Lanc…
## 6 2/1/2020 0:07 Sat Feb 01 … 33 31 Lanc…
## # ℹ abbreviated names: ¹​`Associated Client Count`,
## # ²​`Authenticated Client Count`
## # ℹ 2 more variables: Building <chr>, Floor <chr>
str(wifi_library)
## tibble [15,948 × 7] (S3: tbl_df/tbl/data.frame)
## $ time : chr [1:15948] "2/1/2020 0:02" "2/1/2020 0:02" "2/1/2020 0:02" "2/1/2020 0:02" ...
## $ Event Time : chr [1:15948] "Sat Feb 01 00:02:12 UTC 2020" "Sat Feb 01 00:02:12 UTC 2020" "Sat Feb 01 00:02:12 UTC 2020" "Sat Feb 01 00:02:12 UTC 2020" ...
## $ Associated Client Count : num [1:15948] 29 0 21 38 0 33 22 39 30 0 ...
## $ Authenticated Client Count: num [1:15948] 28 0 20 37 0 31 20 37 27 0 ...
## $ Uni : chr [1:15948] "Lancaster University" "Lancaster University" "Lancaster University" "Lancaster University" ...
## $ Building : chr [1:15948] "Library" "Library" "Library" "Library" ...
## $ Floor : chr [1:15948] "A floor" "LG floor" "C floor" "B floor" ...
#ubah ke datetime
wifi_library$time <- as.POSIXct(wifi_library$time, format = "%m/%d/%Y %H:%M")
#interval 10 menit
wifi_library <- wifi_library %>%
mutate(time = floor_date(time, "10 minutes")) %>%
group_by(time) %>% # kelompokkan per 10 menit
summarise(across(where(is.numeric), mean, na.rm = TRUE), .groups = "drop")
#ENERGY
#cek type data
str(data_library1)
## spc_tbl_ [18,864 × 6] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
## $ ts : chr [1:18864] "1/1/2020 0:00" "1/1/2020 0:10" "1/1/2020 0:20" "1/1/2020 0:30" ...
## $ name : chr [1:18864] "MC065-L01/M9R2048" "MC065-L01/M9R2048" "MC065-L01/M9R2048" "MC065-L01/M9R2048" ...
## $ reading : num [1:18864] 1489442 1489449 1489456 1489464 1489471 ...
## $ units : chr [1:18864] "KWh" "KWh" "KWh" "KWh" ...
## $ cumulative: num [1:18864] 1489442 1489449 1489456 1489464 1489471 ...
## $ rate : num [1:18864] NA 7 7 8 7 8 7 8 7 8 ...
## - attr(*, "spec")=
## .. cols(
## .. ts = col_character(),
## .. name = col_character(),
## .. reading = col_double(),
## .. units = col_character(),
## .. cumulative = col_double(),
## .. rate = col_double()
## .. )
## - attr(*, "problems")=<externalptr>
str(data_library2)
## spc_tbl_ [18,864 × 6] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
## $ ts : POSIXct[1:18864], format: "2020-01-01 00:00:00" "2020-01-01 00:10:00" ...
## $ name : chr [1:18864] "MC065-L01/M11R2056" "MC065-L01/M11R2056" "MC065-L01/M11R2056" "MC065-L01/M11R2056" ...
## $ reading : num [1:18864] 2129016 2129034 2129054 2129071 2129086 ...
## $ units : chr [1:18864] "KWh" "KWh" "KWh" "KWh" ...
## $ cumulative: num [1:18864] 2129016 2129034 2129054 2129071 2129086 ...
## $ rate : num [1:18864] NA 18 20 17 15 17 17 17 17 17 ...
## - attr(*, "spec")=
## .. cols(
## .. ts = col_datetime(format = ""),
## .. name = col_character(),
## .. reading = col_double(),
## .. units = col_character(),
## .. cumulative = col_double(),
## .. rate = col_double()
## .. )
## - attr(*, "problems")=<externalptr>
str(data_library3)
## spc_tbl_ [18,864 × 6] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
## $ ts : POSIXct[1:18864], format: "2020-01-01 00:00:00" "2020-01-01 00:10:00" ...
## $ name : chr [1:18864] "MC065-L01/M13R2064" "MC065-L01/M13R2064" "MC065-L01/M13R2064" "MC065-L01/M13R2064" ...
## $ reading : num [1:18864] 6914209 6914266 6914310 6914376 6914439 ...
## $ units : chr [1:18864] "KWh" "KWh" "KWh" "KWh" ...
## $ cumulative: num [1:18864] 6914209 6914266 6914310 6914376 6914439 ...
## $ rate : num [1:18864] NA 57 44 66 63 35 47 55 34 41 ...
## - attr(*, "spec")=
## .. cols(
## .. ts = col_datetime(format = ""),
## .. name = col_character(),
## .. reading = col_double(),
## .. units = col_character(),
## .. cumulative = col_double(),
## .. rate = col_double()
## .. )
## - attr(*, "problems")=<externalptr>
#ubah ke datetime
data_library1$ts <- as.POSIXct(data_library1$ts, format = "%m/%d/%Y %H:%M")
data_library2$ts <- as.POSIXct(data_library2$ts, format = "%m/%d/%Y %H:%M")
data_library3$ts <- as.POSIXct(data_library3$ts, format = "%m/%d/%Y %H:%M")
#rename kolom ts
names(data_library1)[names(data_library1) == "ts"] <- "time"
names(data_library2)[names(data_library2) == "ts"] <- "time"
names(data_library3)[names(data_library3) == "ts"] <- "time"
#gabungin file library
gabung12 <- merge(data_library1, data_library2, by = "time", all = TRUE) # full join
all_data_library <- merge(gabung12, data_library3, by = "time", all = TRUE)
head(all_data_library)
## time name.x reading.x units.x cumulative.x rate.x
## 1 2020-01-01 00:00:00 MC065-L01/M9R2048 1489442 KWh 1489442 NA
## 2 2020-01-01 00:10:00 MC065-L01/M9R2048 1489449 KWh 1489449 7
## 3 2020-01-01 00:20:00 MC065-L01/M9R2048 1489456 KWh 1489456 7
## 4 2020-01-01 00:30:00 MC065-L01/M9R2048 1489464 KWh 1489464 8
## 5 2020-01-01 00:40:00 MC065-L01/M9R2048 1489471 KWh 1489471 7
## 6 2020-01-01 00:50:00 MC065-L01/M9R2048 1489479 KWh 1489479 8
## name.y reading.y units.y cumulative.y rate.y name
## 1 MC065-L01/M11R2056 2129016 KWh 2129016 NA MC065-L01/M13R2064
## 2 MC065-L01/M11R2056 2129034 KWh 2129034 18 MC065-L01/M13R2064
## 3 MC065-L01/M11R2056 2129054 KWh 2129054 20 MC065-L01/M13R2064
## 4 MC065-L01/M11R2056 2129071 KWh 2129071 17 MC065-L01/M13R2064
## 5 MC065-L01/M11R2056 2129086 KWh 2129086 15 MC065-L01/M13R2064
## 6 MC065-L01/M11R2056 2129103 KWh 2129103 17 MC065-L01/M13R2064
## reading units cumulative rate
## 1 6914209 KWh 6914209 NA
## 2 6914266 KWh 6914266 57
## 3 6914310 KWh 6914310 44
## 4 6914376 KWh 6914376 66
## 5 6914439 KWh 6914439 63
## 6 6914474 KWh 6914474 35
energy <- all_data_library %>%
select(time, rate.x, rate.y, rate) %>% # ambil kolom yang diinginkan
mutate(total_rate = rate.x + rate.y + rate) # jumlahkan
head(energy)
## time rate.x rate.y rate total_rate
## 1 2020-01-01 00:00:00 NA NA NA NA
## 2 2020-01-01 00:10:00 7 18 57 82
## 3 2020-01-01 00:20:00 7 20 44 71
## 4 2020-01-01 00:30:00 8 17 66 91
## 5 2020-01-01 00:40:00 7 15 63 85
## 6 2020-01-01 00:50:00 8 17 35 60
#gabungin wifi dan energy
final_data<- merge(wifi_library, energy, by = "time", all = TRUE)
head(final_data)
## time Associated Client Count Authenticated Client Count rate.x
## 1 2020-01-01 00:00:00 NA NA NA
## 2 2020-01-01 00:10:00 NA NA 7
## 3 2020-01-01 00:20:00 NA NA 7
## 4 2020-01-01 00:30:00 NA NA 8
## 5 2020-01-01 00:40:00 NA NA 7
## 6 2020-01-01 00:50:00 NA NA 8
## rate.y rate total_rate
## 1 NA NA NA
## 2 18 57 82
## 3 20 44 71
## 4 17 66 91
## 5 15 63 85
## 6 17 35 60
write_csv(final_data, "C:/Users/litat/Downloads/final_data.csv")
# =============== BAGIAN 2: DATA CLEANING ===============
# 1. Tangani Missing Values pada Energi
# → isi NA dengan rata-rata dari 144 observasi pertama untuk tiap meter
for (col in c("rate.x", "rate.y", "rate")) {
if (col %in% names(final_data)) {
mean_144 <- mean(final_data[[col]][1:144], na.rm = TRUE)
final_data[[col]][is.na(final_data[[col]])] <- mean_144
}
}
# Energi total juga diperiksa
if ("total_rate" %in% names(final_data)) {
mean_144_total <- mean(final_data$total_rate[1:144], na.rm = TRUE)
final_data$total_rate[is.na(final_data$total_rate)] <- mean_144_total
}
# 2. Standarisasi Timestamp (kolom time)
final_data$time <- as.POSIXct(final_data$time, format="%Y-%m-%d %H:%M:%S", tz="UTC")
# 3. Hapus baris duplikat berdasarkan kolom time
final_data <- final_data[!duplicated(final_data$time), ]
# 4. Urutkan data berdasarkan waktu
final_data <- final_data[order(final_data$time), ]
# 5. Drop NA
final_data <- final_data %>% drop_na()
# 6. Simpan hasil cleaning
write_csv(final_data, "C:/Users/litat/Downloads/cleaned_library_data.csv")
# Cek hasil
head(final_data)
## time Associated Client Count Authenticated Client Count
## 1 2020-01-31 17:00:00 22.750 21.625
## 2 2020-01-31 17:10:00 20.625 19.250
## 3 2020-01-31 17:20:00 17.250 17.000
## 4 2020-01-31 17:30:00 15.375 15.250
## 5 2020-01-31 17:40:00 12.375 12.000
## 6 2020-01-31 17:50:00 10.500 10.500
## rate.x rate.y rate total_rate
## 1 7.384615 17.87413 39.55944 64.81818
## 2 13.000000 11.00000 73.00000 97.00000
## 3 13.000000 11.00000 72.00000 96.00000
## 4 11.000000 15.00000 74.00000 100.00000
## 5 11.000000 13.00000 65.00000 89.00000
## 6 11.000000 16.00000 64.00000 91.00000
# ---- VISUALISASI ----
# 1. Time Series
# TIME SERIES - OCCUPANCY
ggplot(final_data, aes(x = time, y = `Associated Client Count`)) +
geom_line(color = "blue", linewidth = 0.8) +
labs(title = "Time Series of Occupancy in Library",
x = "Time",
y = "Occupancy (Associated Clients)") +
theme_minimal()
# TIME SERIES - ENERGY
ggplot(final_data, aes(x = time, y = total_rate)) +
geom_line(color = "red", linewidth = 0.8) +
labs(title = "Time Series of Energy Consumption in Library",
x = "Time",
y = "Energy Consumption (kWh)") +
theme_minimal()
# 2. SCATTER PLOT
ggplot(final_data, aes(x = `Associated Client Count`, y = total_rate)) +
geom_point(alpha = 0.4, color = "darkgreen") +
geom_smooth(method = "lm", se = TRUE, color = "pink") +
labs(title = "Scatter Plot: Occupancy vs Energy Consumption",
x = "Occupancy (WiFi Clients)",
y = "Energy Consumption (kWh)") +
theme_minimal()
# 3. DAILY PROFILES
# Tambahkan kolom jam & tanggal
final_data <- final_data %>%
mutate(hour = hour(time),
date = as.Date(time))
# Plot siklus harian occupancy
ggplot(final_data, aes(x = hour, y = `Associated Client Count`, group = date)) +
geom_line(alpha = 0.7, color = "black") +
labs(title = "Daily Occupancy Profiles (per Day)",
x = "Hour of Day",
y = "Occupancy (WiFi Clients)") +
theme_minimal()
# Plot siklus harian energi
ggplot(final_data, aes(x = hour, y = total_rate, group = date)) +
geom_line(alpha = 0.7, color = "maroon") +
labs(title = "Daily Energy Consumption Profiles (per Day)",
x = "Hour of Day",
y = "Energy Consumption (kWh)") +
theme_minimal()
# AVERAGE DAILY PROFILE
# Hitung rata-rata per jam (gabungan semua hari)
daily_avg <- final_data %>%
group_by(hour) %>%
summarise(avg_occ = mean(`Associated Client Count`, na.rm = TRUE),
avg_energy = mean(total_rate, na.rm = TRUE))
# Plot Okupansi Rata-rata
p_avg_occ <- ggplot(daily_avg, aes(x = hour, y = avg_occ)) +
geom_line(color = "orange", linewidth = 1.2) +
geom_point(color = "orange", size = 2) +
labs(title = "Average Daily Profile",
x = "hour",
y = "Average Occupancy") +
theme_minimal()
# Plot Energi Rata-rata
p_avg_energy <- ggplot(daily_avg, aes(x = hour, y = avg_energy)) +
geom_line(color = "purple", linewidth = 1.2) +
geom_point(color = "purple", size = 2) +
labs(x = "hour",
y = "Average Energy Consumption (kWh)") +
theme_minimal()
# Gabungkan kedua plot di atas satu sama lain
plot_grid(p_avg_occ, p_avg_energy, ncol = 1, align = 'v')
# 5. Analisis ## 5.1 Peak hours of occupancy
# ---- Analisis ----
# 1. MENENTUKAN JAM PUNCAK KUNJUNGAN
# Mengurutkan data rata-rata kunjungan per jam dari yang tertinggi
peak_hours <- daily_avg %>%
arrange(desc(avg_occ))
# Menampilkan 5 jam tersibuk
print("Lima jam puncak kunjungan:")
## [1] "Lima jam puncak kunjungan:"
print(head(peak_hours, 5))
## # A tibble: 5 × 3
## hour avg_occ avg_energy
## <int> <dbl> <dbl>
## 1 8 362. 190.
## 2 7 355. 188.
## 3 9 334. 190.
## 4 6 330. 187.
## 5 5 298. 181.
# Visualisasi jam puncak dengan bar chart untuk lebih jelas
ggplot(daily_avg, aes(x = reorder(hour, -avg_occ), y = avg_occ)) +
geom_bar(stat = "identity", fill = "steelblue") +
geom_text(aes(label = round(avg_occ, 1)), vjust = -0.5, size = 3.5) +
labs(title = "Rata-Rata Kunjungan per Jam (Jam Puncak)",
x = "Jam dalam Sehari",
y = "Rata-Rata Jumlah Pengunjung (Klien WiFi)") +
theme_minimal()
# 2. MELIHAT PENGARUH JUMLAH KUNJUNGAN TERHADAP KONSUMSI LISTRIK
# Menghitung koefisien korelasi Pearson
# 'use = "complete.obs"' digunakan untuk mengabaikan baris dengan nilai NA
correlation_value <- cor(final_data$`Associated Client Count`, final_data$total_rate, use = "complete.obs")
# Menampilkan hasil korelasi
print(paste("Koefisien korelasi antara jumlah pengunjung dan konsumsi energi adalah:", round(correlation_value, 4)))
## [1] "Koefisien korelasi antara jumlah pengunjung dan konsumsi energi adalah: 0.8675"
# 3. MENCARI KONDISI ANOMALI
# Menentukan batas kuantil (persentil)
# Q3 untuk nilai "tinggi" (75%) dan Q1 untuk nilai "rendah" (25%)
high_energy_threshold <- quantile(final_data$total_rate, 0.75, na.rm = TRUE)
low_occupancy_threshold <- quantile(final_data$`Associated Client Count`, 0.25, na.rm = TRUE)
high_occupancy_threshold <- quantile(final_data$`Associated Client Count`, 0.75, na.rm = TRUE)
low_energy_threshold <- quantile(final_data$total_rate, 0.25, na.rm = TRUE)
# Skenario 1: Listrik tinggi, padahal orang sedikit
high_energy_low_occ <- final_data %>%
filter(total_rate > high_energy_threshold & `Associated Client Count` < low_occupancy_threshold)
print("Kondisi Anomali: Listrik Tinggi, Pengunjung Sedikit")
## [1] "Kondisi Anomali: Listrik Tinggi, Pengunjung Sedikit"
print(head(high_energy_low_occ))
## [1] time Associated Client Count
## [3] Authenticated Client Count rate.x
## [5] rate.y rate
## [7] total_rate hour
## [9] date
## <0 rows> (or 0-length row.names)
# Skenario 2: Listrik rendah, padahal orang banyak (efisiensi energi)
low_energy_high_occ <- final_data %>%
filter(total_rate < low_energy_threshold & `Associated Client Count` > high_occupancy_threshold)
print("Kondisi Anomali: Listrik Rendah, Pengunjung Banyak")
## [1] "Kondisi Anomali: Listrik Rendah, Pengunjung Banyak"
print(head(low_energy_high_occ))
## [1] time Associated Client Count
## [3] Authenticated Client Count rate.x
## [5] rate.y rate
## [7] total_rate hour
## [9] date
## <0 rows> (or 0-length row.names)
# Visualisasi anomali pada scatter plot
ggplot(final_data, aes(x = `Associated Client Count`, y = total_rate)) +
geom_point(alpha = 0.3, color = "grey") + # Data asli sebagai background
geom_point(data = high_energy_low_occ, aes(color = "Listrik Tinggi, Org Sedikit"), size = 2.5) +
geom_point(data = low_energy_high_occ, aes(color = "Listrik Rendah, Org Banyak"), size = 2.5) +
labs(title = "Identifikasi Anomali pada Scatter Plot",
x = "Jumlah Pengunjung (Klien WiFi)",
y = "Konsumsi Energi (kWh)",
color = "Kondisi Anomali") +
theme_minimal()
# — 6. Weekend vs Weekday Comparison —
#WEEKDAY VS WEEKEND
final_data <- final_data %>%
mutate(
date = as.Date(time),
hour = hour(time),
weekday_num = wday(date, week_start = 1), # 1=Mon … 7=Sun
weekday = wday(date, label = TRUE, abbr = FALSE, week_start = 1),
day_type = if_else(weekday_num %in% c(6,7), "Weekend", "Weekday") %>%
factor(levels = c("Weekday","Weekend"))
)
# jumlah pengunjung & energi per hari
daily_counts <- final_data %>%
group_by(date, day_type) %>%
summarise(
occ = mean(`Associated Client Count`, na.rm = TRUE),
energy = mean(total_rate, na.rm = TRUE),
.groups = "drop"
)
# rata-rata per jam
hourly_avg <- final_data %>%
group_by(hour, day_type) %>%
summarise(
avg_occ = mean(`Associated Client Count`, na.rm = TRUE),
avg_energy = mean(total_rate, na.rm = TRUE),
.groups = "drop"
)
ggplot(daily_counts, aes(x = date, y = occ, color = day_type)) +
geom_line() +
labs(title = "Occupancy per Day: Weekday vs Weekend",
x = "Date", y = "Occupancy") +
theme_minimal()
ggplot(daily_counts, aes(x = date, y = energy, color = day_type)) +
geom_line() +
labs(title = "Energy per Day: Weekday vs Weekend",
x = "Date", y = "Energy (kWh)") +
theme_minimal()
ggplot(hourly_avg, aes(x = hour, y = avg_occ, color = day_type)) +
geom_line(linewidth = 1.1) +
geom_point() +
scale_x_continuous(breaks = 0:23) +
labs(title = "Average Occupancy per Hour",
x = "Hour", y = "Avg Occupancy") +
theme_minimal()
ggplot(hourly_avg, aes(x = hour, y = avg_energy, color = day_type)) +
geom_line(linewidth = 1.1) +
geom_point() +
scale_x_continuous(breaks = 0:23) +
labs(title = "Average Energy per Hour",
x = "Hour", y = "Avg Energy (kWh)") +
theme_minimal()
heat_occ <- final_data %>%
group_by(weekday, hour, day_type) %>%
summarise(avg_occ = mean(`Associated Client Count`, na.rm = TRUE), .groups = "drop")
ggplot(heat_occ, aes(x = hour, y = weekday, fill = avg_occ)) +
geom_tile() +
facet_wrap(~day_type) +
scale_x_continuous(breaks = 0:23) +
labs(title = "Heatmap Occupancy", x = "Hour", y = "Day") +
theme_minimal()
# rata-rata occupancy per hari
weekday_occ <- daily_counts %>% filter(day_type=="Weekday") %>% pull(occ)
weekend_occ <- daily_counts %>% filter(day_type=="Weekend") %>% pull(occ)
t.test(weekday_occ, weekend_occ) # kalau datanya normal
##
## Welch Two Sample t-test
##
## data: weekday_occ and weekend_occ
## t = 3.9403, df = 12.2, p-value = 0.001902
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 30.45667 105.49457
## sample estimates:
## mean of x mean of y
## 154.10207 86.12645
# kalau data tidak normal: wilcox.test(weekday_occ, weekend_occ)
# distribusi jam occupancy
tab_hour <- final_data %>%
mutate(hour = factor(hour)) %>%
count(day_type, hour) %>%
pivot_wider(names_from = day_type, values_from = n, values_fill = 0)
chisq.test(as.matrix(tab_hour[, -1]))
##
## Pearson's Chi-squared test
##
## data: as.matrix(tab_hour[, -1])
## X-squared = 0.71355, df = 23, p-value = 1