library(solarf,quietly = TRUE)
library(knitr,quietly = TRUE)
library(ggplot2,quietly = TRUE)
library(klaR,quietly= TRUE) #Naive Bayes Library
library(GGally)
library(ggplot2)
library(plyr)
library(zoo)
#Theme for Plot
science_theme = theme(panel.background=element_blank(), panel.grid.major = element_line(size = 0.5, color = "grey"),
axis.line = element_line(size = 0.7, color = "black"), legend.position = c(0.9,
0.7), text = element_text(size = 14))
#Load data
SCBH1 <- readRDS("/Users/carlos/Google Drive/Research/SolarForecasting/datasets/mesowest/data/SCBH1.rds")
Vectorize SCBH1 over the training and test partitions.
SCBH1$SOLR <- SCBH1$SOLR
SCBH1.vector <- station.vectorize(SCBH1,"SOLR")
SCBH1.vector <- SCBH1.vector[,c(1:3,12:21)]
SCBH1.vector.filter <- station.vectorize(SCBH1[!is.na(SCBH1$SOLR),],"SOLR")
SCBH1.vector.filter <- SCBH1.vector.filter[complete.cases(SCBH1.vector.filter),c(1:3,12:21)]
#Separate the year of 2014 for prediction testing
SCBH1.vector.filter.test <- SCBH1.vector.filter[SCBH1.vector.filter$YEAR==2014,]
SCBH1.vector.filter.train <- SCBH1.vector.filter[SCBH1.vector.filter$YEAR!=2014,]
#SCBH1.vector.filter.train <- SCBH1.vector.filter[SCBH1.vector.filter$MON==11 |
# SCBH1.vector.filter$MON==12 |
# SCBH1.vector.filter$MON==1 |
# SCBH1.vector.filter$MON==2 |
# SCBH1.vector.filter$MON==3 |
# SCBH1.vector.filter$MON==4
# ,]
Cluster with K-means = nclusters the training dataset and assign clusters to the daily profiles.
nclusters <- 5
set.seed(1234)
SCBH1.filter.train.clusters <- kmeans(SCBH1.vector.filter.train[,4:ncol(SCBH1.vector.filter)],nclusters)
SCBH1.vector.filter.train$clusterid <- SCBH1.filter.train.clusters[[1]]
SCBH1.vector.filter.train.centroids <- SCBH1.filter.train.clusters[[2]]
intensity.crescent.ordered.clusterid <- as.numeric(names(sort(rowSums(SCBH1.vector.filter.train.centroids))))
#Order the centroids in crescent order of ipntensity and reset rows
SCBH1.vector.filter.train.centroids <- SCBH1.vector.filter.train.centroids[intensity.crescent.ordered.clusterid,]
rownames(SCBH1.vector.filter.train.centroids) <- seq(1,nclusters)
#Update the centroid ids, by mapping the row ids before ordering to after ordering (element position is old ordering, value is new ordering)
SCBH1.vector.filter.train$clusterid <- intensity.crescent.ordered.clusterid[SCBH1.vector.filter.train$clusterid]
centroids_plot <- data.frame(SCBH1.vector.filter.train.centroids)
centroids_plot$color <-rownames(SCBH1.vector.filter.train.centroids)
colnames(centroids_plot) <- c(colnames(SCBH1.vector.filter.train.centroids),"Profile")
kable(SCBH1.vector.filter.train.centroids)
8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 |
---|---|---|---|---|---|---|---|---|---|
93.25782 | 143.7437 | 182.2846 | 205.3249 | 220.7571 | 234.0164 | 240.3055 | 239.8390 | 206.2414 | 113.76751 |
261.61291 | 416.2848 | 518.5012 | 533.2761 | 475.8101 | 406.1173 | 349.1187 | 262.7002 | 173.7578 | 70.17312 |
174.31396 | 304.4604 | 433.1721 | 561.7284 | 656.6293 | 688.9423 | 656.3423 | 555.5505 | 366.2306 | 156.37477 |
365.70480 | 581.6513 | 741.5203 | 808.6900 | 769.6827 | 679.7417 | 555.8782 | 407.4649 | 265.5332 | 109.94280 |
365.05443 | 583.3926 | 761.2091 | 898.6913 | 966.7140 | 958.8854 | 855.4969 | 679.2687 | 450.2462 | 193.17814 |
ggparcoord(
data = centroids_plot,
columns=c(1:ncol(SCBH1.vector.filter.train.centroids)),
scale="globalminmax",
alphaLines=0.2,
groupColumn=ncol(centroids_plot)
) + geom_line(size=2,alpha=0.6) + science_theme + xlab("Hour of the Day") +
ylab("Solar Radiation Itensity (W/m2") + ggtitle("Solar Radiation Discretized Profiles from 2003 to 2013 of Schofield Barracks Station") + theme(axis.text.x = element_text(hjust = 1, size=20,color="black"), axis.text.y = element_text(hjust = 1, size=20,color="black"), plot.title = element_text(size=20), axis.title.y=element_text(size=20), axis.title.x=element_text(size=20))
#+ scale_fill_discrete(name="Discretized\ Profile", breaks=c("1", "2", "3","4","5"), labels=c("1 (Overcast)", "2","3","4","5"))# + scale_colour_brewer(palette="Spectral")
Results.groundtruth <- station.assign.daily.cluster(SCBH1.vector.filter.test,SCBH1.vector.filter.train.centroids)
slide_window <- function(vector,size){
df <- data.frame(rollapply(vector, width = size, by = 1, FUN = f1<-function(x){return (x)}, align = "left"))
return(df)
}
calc_daily_mean_error <- function(train.vector,test.vector,size){
train <- slide_window(train.vector,size)
colnames(train) <- c(paste0("Stm",seq(size-1,1)),"St")
train[] <- lapply(train, as.factor)
test <- slide_window(test.vector,size)
colnames(test) <- c(paste0("Stm",seq(size-1,1)),"St")
test[] <- lapply(test, as.factor)
m <- NaiveBayes(St ~ ., data = train)
forecast <- as.numeric(predict(m,data.frame(test[,1]))$class)
daily.err <- error.absolute.centroid(Results.groundtruth[size:nrow(Results.groundtruth),4:13],forecast,SCBH1.vector.filter.train.centroids)
mean.daily.err <- mean(daily.err)
return (mean.daily.err)
}
#Amount of Consecutive Days. 2 days <=> forecast using one day before.
err.means <- vector("list",7)
for(i in 2:8){
err.means[[i-1]] <- calc_daily_mean_error(SCBH1.vector.filter.train$clusterid,Results.groundtruth$clusterid,i)
}
df.err.means <- as.data.frame(err.means)
colnames(df.err.means) <- c("B1","B2","B3","B4","B5","B6","B7")
labels <- c("B1","B2","B3","B4","B5","B6","B7")
qplot(labels,as.numeric(df.err.means), geom="bar",stat="identity",xlab="Model",ylab="Daily Mean Absolute Error",main="Naive Bayes") + science_theme
kable(df.err.means)
B1 | B2 | B3 | B4 | B5 | B6 | B7 |
---|---|---|---|---|---|---|
1612.885 | 1633.898 | 1640.278 | 1694.219 | 1696.3 | 1749.063 | 1675.328 |