WebScraping from 2021/11/26
# library
library(rvest)
library(magrittr)
library(jsonlite)
library(dplyr)
## Warning: package 'dplyr' was built under R version 4.0.5
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(lubridate)
##
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
##
## date, intersect, setdiff, union
library(openxlsx)
# create date and variables
date <- as.character(seq(date("2021-11-16"), Sys.Date(), by = 1))
var <- c("datetime",
"srBid", "srBidQse", "srPrice",
"supBid", "supBidQse", "supPrice")
df <- NULL
# loop
for (i in date){
url <- paste0("https://etp.taipower.com.tw/api/infoboard/settle_value/query?startDate=",
i)
# JSON file to data frame
data <- read_html(url) %>%
html_nodes("p") %>%
html_text %>%
fromJSON
data <- data$data
# datetime form
data <- data %>%
mutate(datetime = ymd_hm(paste0(tranDate, " ", tranHour)))
data <- data %>%
select(!!var)
df <- rbind(df, data)
}
head(df)
## datetime srBid srBidQse srPrice supBid supBidQse supPrice
## 1 2021-11-16 00:00:00 376.0 1 400 869.7 0 350
## 2 2021-11-16 01:00:00 346.0 1 400 823.1 0 350
## 3 2021-11-16 02:00:00 254.0 1 400 809.4 0 350
## 4 2021-11-16 03:00:00 363.0 1 400 759.3 0 350
## 5 2021-11-16 04:00:00 346.0 1 400 739.9 0 350
## 6 2021-11-16 05:00:00 361.2 1 400 844.5 0 350
# sr: 即時備轉, sup: 補充備轉
# Bid: 得標容量(國營)(MW), BidQse: 得標容量(民營)(MW), Price: 結清價格(元 / MW·h)
# Quant = Bid + BidQse
df <- df %>%
mutate(month = format(df$datetime, "%Y-%m"),
srQuant = srBid + srBidQse,
supQuant = supBid + supBidQse)
# monthly weighted average price, weight = Quant
month_w_avg_price <- df %>%
group_by(month) %>%
summarise(
w.avg.srPrice = weighted.mean(srPrice, w = srQuant, na.rm = TRUE),
w.avg.supPrice = weighted.mean(supPrice, w = supQuant, na.rm = TRUE))
# mean without weight
#df %>%
# group_by(month) %>%
# summarise(
# across(ends_with("Price"), mean, na.rm = TRUE, .names = "avg.{.col}"))
# output
output <- list(歷史結清價格與交易量 = df, 月均價 = month_w_avg_price)
#write.xlsx(output, file = "歷史結清價格與交易量.xlsx")