This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.
When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:
library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.3.3
## Warning: package 'ggplot2' was built under R version 4.3.3
## Warning: package 'tibble' was built under R version 4.3.3
## Warning: package 'tidyr' was built under R version 4.3.3
## Warning: package 'readr' was built under R version 4.3.3
## Warning: package 'purrr' was built under R version 4.3.3
## Warning: package 'dplyr' was built under R version 4.3.3
## Warning: package 'stringr' was built under R version 4.3.3
## Warning: package 'forcats' was built under R version 4.3.3
## Warning: package 'lubridate' was built under R version 4.3.3
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.1 ✔ tibble 3.2.1
## ✔ lubridate 1.9.4 ✔ tidyr 1.3.1
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(lubridate)
library(hms)
## Warning: package 'hms' was built under R version 4.3.3
##
## Attaching package: 'hms'
##
## The following object is masked from 'package:lubridate':
##
## hms
wifi <- read_csv("C:/Users/ZUMROTUS SOLICHA/Documents/MK SEMESTER 5/DATMIN/wifi.csv") %>%
filter(str_trim(Building) == "Library") %>%
select(time, `Associated Client Count`)
## Rows: 1883844 Columns: 7
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): Event Time, Uni, Building, Floor
## dbl (2): Associated Client Count, Authenticated Client Count
## dttm (1): time
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
print("WiFi data (Library only):")
## [1] "WiFi data (Library only):"
print(head(wifi))
## # A tibble: 6 × 2
## time `Associated Client Count`
## <dttm> <dbl>
## 1 2020-02-01 00:02:12 29
## 2 2020-02-01 00:02:12 0
## 3 2020-02-01 00:02:12 21
## 4 2020-02-01 00:02:12 38
## 5 2020-02-01 00:07:34 0
## 6 2020-02-01 00:07:34 33
wifi <- wifi %>%
mutate(time = ymd_hms(time)) %>%
mutate(time = floor_date(time, "10 minutes"))
print("WiFi data after standardizing to 10-minute intervals:")
## [1] "WiFi data after standardizing to 10-minute intervals:"
print(head(wifi))
## # A tibble: 6 × 2
## time `Associated Client Count`
## <dttm> <dbl>
## 1 2020-02-01 00:00:00 29
## 2 2020-02-01 00:00:00 0
## 3 2020-02-01 00:00:00 21
## 4 2020-02-01 00:00:00 38
## 5 2020-02-01 00:00:00 0
## 6 2020-02-01 00:00:00 33
wifi <- wifi %>%
group_by(time) %>%
summarise(mean_client_count = mean(`Associated Client Count`, na.rm = TRUE)) %>%
ungroup()
print("WiFi resampled (mean client count per 10 minutes):")
## [1] "WiFi resampled (mean client count per 10 minutes):"
print(head(wifi))
## # A tibble: 6 × 2
## time mean_client_count
## <dttm> <dbl>
## 1 2020-02-01 00:00:00 22.8
## 2 2020-02-01 00:10:00 20.6
## 3 2020-02-01 00:20:00 17.2
## 4 2020-02-01 00:30:00 15.4
## 5 2020-02-01 00:40:00 12.4
## 6 2020-02-01 00:50:00 10.5
library(readr)
lib1 <- read_csv("C:/Users/ZUMROTUS SOLICHA/Documents/MK SEMESTER 5/DATMIN/library1.csv")
## Rows: 18864 Columns: 6
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): name, units
## dbl (3): reading, cumulative, rate
## dttm (1): ts
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
lib2 <- read_csv("C:/Users/ZUMROTUS SOLICHA/Documents/MK SEMESTER 5/DATMIN/library2.csv")
## Rows: 18864 Columns: 6
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): name, units
## dbl (3): reading, cumulative, rate
## dttm (1): ts
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
lib3 <- read_csv("C:/Users/ZUMROTUS SOLICHA/Documents/MK SEMESTER 5/DATMIN/library3.csv")
## Rows: 18864 Columns: 6
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): name, units
## dbl (3): reading, cumulative, rate
## dttm (1): ts
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
print("Struktur timestamp dari masing-masing listrik:")
## [1] "Struktur timestamp dari masing-masing listrik:"
print(str(lib1$ts))
## POSIXct[1:18864], format: "2020-01-01 00:00:00" "2020-01-01 00:10:00" "2020-01-01 00:20:00" ...
## NULL
print(str(lib2$ts))
## POSIXct[1:18864], format: "2020-01-01 00:00:00" "2020-01-01 00:10:00" "2020-01-01 00:20:00" ...
## NULL
print(str(lib3$ts))
## POSIXct[1:18864], format: "2020-01-01 00:00:00" "2020-01-01 00:10:00" "2020-01-01 00:20:00" ...
## NULL
impute_with_first144 <- function(df, col_name) {
mean_val <- mean(df[[col_name]][1:144], na.rm = TRUE)
df[[col_name]][is.na(df[[col_name]])] <- mean_val
return(df)
}
lib1 <- impute_with_first144(lib1, "rate")
lib2 <- impute_with_first144(lib2, "rate")
lib3 <- impute_with_first144(lib3, "rate")
print("Contoh data listrik setelah imputasi missing value:")
## [1] "Contoh data listrik setelah imputasi missing value:"
print(head(lib1))
## # A tibble: 6 × 6
## ts name reading units cumulative rate
## <dttm> <chr> <dbl> <chr> <dbl> <dbl>
## 1 2020-01-01 00:00:00 MC065-L01/M9R2048 1489442 KWh 1489442 7.38
## 2 2020-01-01 00:10:00 MC065-L01/M9R2048 1489449 KWh 1489449 7
## 3 2020-01-01 00:20:00 MC065-L01/M9R2048 1489456 KWh 1489456 7
## 4 2020-01-01 00:30:00 MC065-L01/M9R2048 1489464 KWh 1489464 8
## 5 2020-01-01 00:40:00 MC065-L01/M9R2048 1489471 KWh 1489471 7
## 6 2020-01-01 00:50:00 MC065-L01/M9R2048 1489479 KWh 1489479 8
library(dplyr)
electricity <- full_join(lib1, lib2, by = "ts", suffix = c("_1", "_2")) %>%
full_join(lib3, by = "ts") %>%
distinct()
# Hitung total electricity
electricity <- electricity %>%
mutate(total_electricity = rate_1 + rate_2 + rate)
print("2b. Data listrik gabungan + total:")
## [1] "2b. Data listrik gabungan + total:"
print(head(electricity))
## # A tibble: 6 × 17
## ts name_1 reading_1 units_1 cumulative_1 rate_1 name_2
## <dttm> <chr> <dbl> <chr> <dbl> <dbl> <chr>
## 1 2020-01-01 00:00:00 MC065-L01/M9… 1489442 KWh 1489442 7.38 MC065…
## 2 2020-01-01 00:10:00 MC065-L01/M9… 1489449 KWh 1489449 7 MC065…
## 3 2020-01-01 00:20:00 MC065-L01/M9… 1489456 KWh 1489456 7 MC065…
## 4 2020-01-01 00:30:00 MC065-L01/M9… 1489464 KWh 1489464 8 MC065…
## 5 2020-01-01 00:40:00 MC065-L01/M9… 1489471 KWh 1489471 7 MC065…
## 6 2020-01-01 00:50:00 MC065-L01/M9… 1489479 KWh 1489479 8 MC065…
## # ℹ 10 more variables: reading_2 <dbl>, units_2 <chr>, cumulative_2 <dbl>,
## # rate_2 <dbl>, name <chr>, reading <dbl>, units <chr>, cumulative <dbl>,
## # rate <dbl>, total_electricity <dbl>
library(dplyr)
wifi <- wifi %>% rename(ts = time)
df <- inner_join(wifi, electricity, by = "ts")
print("Data gabungan WiFi dan listrik:")
## [1] "Data gabungan WiFi dan listrik:"
print(head(df))
## # A tibble: 6 × 18
## ts mean_client_count name_1 reading_1 units_1 cumulative_1
## <dttm> <dbl> <chr> <dbl> <chr> <dbl>
## 1 2020-02-01 00:00:00 22.8 MC065-L0… 1558154 KWh 1558154
## 2 2020-02-01 00:10:00 20.6 MC065-L0… 1558167 KWh 1558167
## 3 2020-02-01 00:20:00 17.2 MC065-L0… 1558180 KWh 1558180
## 4 2020-02-01 00:30:00 15.4 MC065-L0… 1558191 KWh 1558191
## 5 2020-02-01 00:40:00 12.4 MC065-L0… 1558202 KWh 1558202
## 6 2020-02-01 00:50:00 10.5 MC065-L0… 1558213 KWh 1558213
## # ℹ 12 more variables: rate_1 <dbl>, name_2 <chr>, reading_2 <dbl>,
## # units_2 <chr>, cumulative_2 <dbl>, rate_2 <dbl>, name <chr>, reading <dbl>,
## # units <chr>, cumulative <dbl>, rate <dbl>, total_electricity <dbl>
print("Plot Time Series Occupancy vs Electricity")
## [1] "Plot Time Series Occupancy vs Electricity"
library(ggplot2)
ggplot(df, aes(x = ts)) +
geom_line(aes(y = mean_client_count, color = "Occupancy")) +
geom_line(aes(y = total_electricity, color = "Electricity")) +
labs(title = "Time Series: Occupancy vs Electricity",
x = "Time", y = "Value", color = "Legend") +
theme_minimal()
print("Scatter Plot Occupancy vs Electricity")
## [1] "Scatter Plot Occupancy vs Electricity"
library(ggplot2)
ggplot(df, aes(x = mean_client_count, y = total_electricity)) +
geom_point(alpha = 0.5, color = "blue") +
labs(title = "Scatterplot: Occupancy vs Electricity",
x = "Mean Client Count", y = "Total Electricity") +
theme_minimal()
library(dplyr)
library(lubridate)
library(ggplot2)
df <- df %>%
mutate(date = as_date(ts),
time_of_day = as_hms(ts))
print("Plot Daily Profile of Occupancy")
## [1] "Plot Daily Profile of Occupancy"
ggplot(df, aes(x = time_of_day, y = mean_client_count, group = date)) +
geom_line(alpha = 0.2, color = "blue") +
stat_summary(fun = mean, geom = "line", aes(group = 1),
color = "red", linewidth = 1.2) +
labs(title = "Daily Profile of Occupancy",
x = "Time of Day", y = "Mean Client Count") +
theme_minimal()
peak_hours <- df %>%
mutate(hour = hour(ts)) %>%
group_by(hour) %>%
summarise(avg_occupancy = mean(mean_client_count, na.rm = TRUE)) %>%
arrange(desc(avg_occupancy))
print("Peak hours of occupancy:")
## [1] "Peak hours of occupancy:"
print(peak_hours)
## # A tibble: 24 × 2
## hour avg_occupancy
## <int> <dbl>
## 1 15 370.
## 2 14 357.
## 3 16 346.
## 4 13 331.
## 5 12 300.
## 6 17 292.
## 7 11 260.
## 8 18 233.
## 9 19 193.
## 10 10 183.
## # ℹ 14 more rows
correlation <- cor(df$mean_client_count, df$total_electricity, use = "complete.obs")
print("Korelasi Occupancy vs Electricity:")
## [1] "Korelasi Occupancy vs Electricity:"
print(correlation)
## [1] 0.8782117
print("Kasus di mana listrik tinggi tapi occupancy rendah:")
## [1] "Kasus di mana listrik tinggi tapi occupancy rendah:"
df %>%
filter(
mean_client_count < quantile(mean_client_count, 0.25, na.rm = TRUE) &
total_electricity > quantile(total_electricity, 0.75, na.rm = TRUE)
) %>%
head()
## # A tibble: 0 × 20
## # ℹ 20 variables: ts <dttm>, mean_client_count <dbl>, name_1 <chr>,
## # reading_1 <dbl>, units_1 <chr>, cumulative_1 <dbl>, rate_1 <dbl>,
## # name_2 <chr>, reading_2 <dbl>, units_2 <chr>, cumulative_2 <dbl>,
## # rate_2 <dbl>, name <chr>, reading <dbl>, units <chr>, cumulative <dbl>,
## # rate <dbl>, total_electricity <dbl>, date <date>, time_of_day <time>
library(lubridate)
library(dplyr)
df <- df %>%
mutate(
weekday = wday(ts, label = TRUE),
is_weekend = ifelse(weekday %in% c("Sat", "Sun"), "Weekend", "Weekday")
)
head(df)
## # A tibble: 6 × 22
## ts mean_client_count name_1 reading_1 units_1 cumulative_1
## <dttm> <dbl> <chr> <dbl> <chr> <dbl>
## 1 2020-02-01 00:00:00 22.8 MC065-L0… 1558154 KWh 1558154
## 2 2020-02-01 00:10:00 20.6 MC065-L0… 1558167 KWh 1558167
## 3 2020-02-01 00:20:00 17.2 MC065-L0… 1558180 KWh 1558180
## 4 2020-02-01 00:30:00 15.4 MC065-L0… 1558191 KWh 1558191
## 5 2020-02-01 00:40:00 12.4 MC065-L0… 1558202 KWh 1558202
## 6 2020-02-01 00:50:00 10.5 MC065-L0… 1558213 KWh 1558213
## # ℹ 16 more variables: rate_1 <dbl>, name_2 <chr>, reading_2 <dbl>,
## # units_2 <chr>, cumulative_2 <dbl>, rate_2 <dbl>, name <chr>, reading <dbl>,
## # units <chr>, cumulative <dbl>, rate <dbl>, total_electricity <dbl>,
## # date <date>, time_of_day <time>, weekday <ord>, is_weekend <chr>
library(ggplot2)
ggplot(df, aes(x = ts, y = mean_client_count)) +
geom_line(color = "blue", alpha = 0.6) +
facet_wrap(~is_weekend, ncol = 1, scales = "free_x") +
labs(
title = "Occupancy: Weekday vs Weekend (Split)",
x = "Time",
y = "Mean Client Count"
) +
theme_minimal()
library(ggplot2)
ggplot(df, aes(x = ts, y = total_electricity)) +
geom_line(color = "red", alpha = 0.6) +
facet_wrap(~is_weekend, ncol = 1, scales = "free_x") +
labs(title = "Electricity: Weekday vs Weekend (Split)",
x = "Time", y = "Total Electricity") +
theme_minimal()
library(ggplot2)
ggplot(df, aes(x = mean_client_count, y = total_electricity)) +
geom_point(alpha = 0.5, color = "darkgreen") +
facet_wrap(~is_weekend) +
labs(title = "Scatter: Occupancy vs Electricity (Split by Weekday/Weekend)",
x = "Mean Client Count", y = "Total Electricity") +
theme_minimal()
Note that the echo = FALSE
parameter was added to the
code chunk to prevent printing of the R code that generated the
plot.