Taipower historical settlement trading

WebScraping from 2021/11/26

# library
library(rvest)
library(magrittr)
library(jsonlite)
library(dplyr)
## Warning: package 'dplyr' was built under R version 4.0.5
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(lubridate)
## 
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
## 
##     date, intersect, setdiff, union
library(openxlsx)

# create date and variables
date <- as.character(seq(date("2021-11-16"), Sys.Date(), by = 1))

var <- c("datetime",
    "srBid", "srBidQse", "srPrice",
    "supBid", "supBidQse", "supPrice")

df <- NULL

# loop
for (i in date){

url <- paste0("https://etp.taipower.com.tw/api/infoboard/settle_value/query?startDate=",
    i)

# JSON file to data frame
data <- read_html(url) %>%
    html_nodes("p") %>%
    html_text %>%
    fromJSON

data <- data$data

# datetime form
data <- data %>%
    mutate(datetime = ymd_hm(paste0(tranDate, " ", tranHour)))

data <- data %>%
    select(!!var)

df <- rbind(df, data)

}

head(df)
##              datetime srBid srBidQse srPrice supBid supBidQse supPrice
## 1 2021-11-16 00:00:00 376.0        1     400  869.7         0      350
## 2 2021-11-16 01:00:00 346.0        1     400  823.1         0      350
## 3 2021-11-16 02:00:00 254.0        1     400  809.4         0      350
## 4 2021-11-16 03:00:00 363.0        1     400  759.3         0      350
## 5 2021-11-16 04:00:00 346.0        1     400  739.9         0      350
## 6 2021-11-16 05:00:00 361.2        1     400  844.5         0      350
# sr: 即時備轉, sup: 補充備轉
# Bid: 得標容量(國營)(MW), BidQse: 得標容量(民營)(MW), Price: 結清價格(元 / MW·h)

# Quant = Bid + BidQse
df <- df %>%
    mutate(month = format(df$datetime, "%Y-%m"),
        srQuant = srBid + srBidQse,
        supQuant = supBid + supBidQse) 

# monthly weighted average price, weight = Quant
month_w_avg_price <- df %>%
    group_by(month) %>%
    summarise(
        w.avg.srPrice = weighted.mean(srPrice, w = srQuant, na.rm = TRUE),
        w.avg.supPrice = weighted.mean(supPrice, w = supQuant, na.rm = TRUE))

# mean without weight
#df %>%
#   group_by(month) %>%
#   summarise(
#       across(ends_with("Price"), mean, na.rm = TRUE, .names = "avg.{.col}")) 

# output


output <- list(歷史結清價格與交易量 = df, 月均價 = month_w_avg_price)

#write.xlsx(output, file = "歷史結清價格與交易量.xlsx")