#Load Required Libraries
library(solarf)
library(knitr) #For pretty tables on the report using kable
library(plyr)
library(ggplot2)
library(GGally)
library(caret)
library(extrafont)
#Theme for Plot
science_theme = theme(
panel.background=element_blank(),
#panel.grid.major = element_line(size = 0.5, color = "grey"),
#axis.line = element_line(size = 0.7, color = "black"),
#legend.position = c(0.9, 0.7),
#text = element_text(size = 14),
axis.line=element_blank(),
text = element_text(size=15, family="Interstate"),
axis.text.x = element_text(hjust = 0, size=15,color="black"),
axis.text.y = element_text(hjust = 0, size=15,color="black"),
axis.title.y=element_text(size=15),
axis.title.x=element_text(size=15),
axis.ticks = element_line(colour=NA),
panel.grid.major = element_line(colour = 'gray', linetype = 'dashed'),
panel.grid.minor = element_line(colour = NA),
panel.background = element_rect(colour = 'white'))
Load Station C0875 (Southeast Oahu Island)
C0875 <- read.csv("/Users/carlos/Google Drive/Research/SolarForecasting/datasets/mesowest/data/C0875_2011_2014.csv")
station.vectorize.hour <- function(station.dataframe,variable){
library(reshape2)
use.only.last.ocurrence <- function(minute.obs){
return (minute.obs[1])
}
station.dataframe.vectorized <- dcast(data=station.dataframe, formula=YEAR + MON + DAY + HR ~ MIN,value.var = variable,fun.aggregate=use.only.last.ocurrence)
return (station.dataframe.vectorized)
}
Vectorize Step
Vectorize C0875 over the training and test partitions.
#Moves from minute level to hourly level
C0875.vector.h <- station.vectorize.hour(C0875,"SOLR")
C0875.h <- C0875.vector.h[,c("YEAR","MON","DAY","HR")]
C0875.h$MIN <- 0
C0875.h$HR <- C0875.h$HR + 1
C0875.h$SOLR <- rowMeans(C0875.vector.h[,5:ncol(C0875.vector.h)],na.rm=TRUE) #Calculate the mean for the hour from min 0 to min 60 ; baseline is hence hour and 30 mins.
C0875.h <- readRDS("/Users/carlos/Google Drive/Research/SolarForecasting/datasets/mesowest/data/SCBH1.rds")
#C0875.h <- C0875.h[C0875.h$YEAR > 2011,]
C0875.vector <- station.vectorize(C0875.h,"SOLR")
C0875.vector <- C0875.vector[,c(1:3,12:21)] #SOLR starts on Feb 15, 2012. Change to 12:21 for schb1
Data Point Filtering
#C0875.vector.filter <- station.vectorize(C0875[!is.na(C0875$SOLR),],"SOLR")
C0875.vector.filter <- C0875.vector[complete.cases(C0875.vector),]
Train and Test Split
#Separate the year of 2014 for prediction testing
C0875.vector.filter.test <- C0875.vector.filter[C0875.vector.filter$YEAR==2014,]
C0875.vector.filter.train <- C0875.vector.filter[C0875.vector.filter$YEAR!=2014,]
#C0875.vector.filter.train <- C0875.vector.filter[C0875.vector.filter$MON==11 |
# C0875.vector.filter$MON==12 |
# C0875.vector.filter$MON==1 |
# C0875.vector.filter$MON==2 |
# C0875.vector.filter$MON==3 |
# C0875.vector.filter$MON==4
# ,]
Cluster Step
Cluster Train Dataset
Cluster with K-means = nclusters the training dataset and assign clusters to the daily profiles.
nclusters <- 5
set.seed(1234)
C0875.filter.train.clusters <- kmeans(C0875.vector.filter.train[,4:ncol(C0875.vector.filter)],nclusters)
C0875.vector.filter.train$clusterid <- C0875.filter.train.clusters[[1]]
C0875.vector.filter.train.centroids <- C0875.filter.train.clusters[[2]]
intensity.crescent.ordered.clusterid <- as.numeric(names(sort(rowSums(C0875.vector.filter.train.centroids))))
#Order the centroids in crescent order of ipntensity and reset rows
C0875.vector.filter.train.centroids <- C0875.vector.filter.train.centroids[intensity.crescent.ordered.clusterid,]
rownames(C0875.vector.filter.train.centroids) <- seq(1,nclusters)
#Update the centroid ids, by mapping the row ids before ordering to after ordering (element position is old ordering, value is new ordering)
C0875.vector.filter.train$clusterid <- intensity.crescent.ordered.clusterid[C0875.vector.filter.train$clusterid]
Centroids
centroids_plot <- data.frame(C0875.vector.filter.train.centroids)
centroids_plot$color <-rownames(C0875.vector.filter.train.centroids)
colnames(centroids_plot) <- c(colnames(C0875.vector.filter.train.centroids),"Profile")
kable(C0875.vector.filter.train.centroids)
| 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 |
|---|---|---|---|---|---|---|---|---|---|
| 96.45584 | 145.7208 | 184.0028 | 208.9416 | 225.6738 | 235.2094 | 239.6068 | 238.3433 | 203.0014 | 111.5228 |
| 275.89212 | 431.7226 | 542.3784 | 549.3476 | 472.7911 | 394.2021 | 338.9863 | 254.0959 | 164.6301 | 67.0274 |
| 171.47927 | 302.4083 | 428.9764 | 542.8919 | 646.5695 | 679.3775 | 633.5432 | 530.5714 | 347.3910 | 149.9687 |
| 362.70761 | 583.1298 | 739.1073 | 815.1869 | 784.6263 | 689.3010 | 566.5087 | 413.2837 | 266.8270 | 109.6263 |
| 355.95481 | 572.0663 | 756.5471 | 896.4277 | 969.5854 | 957.8625 | 856.8865 | 690.5565 | 459.9856 | 198.4452 |
ggparcoord(
data = centroids_plot,
columns=c(1:ncol(C0875.vector.filter.train.centroids)),
scale="globalminmax",
alphaLines=0.2,
groupColumn=ncol(centroids_plot)
) + geom_line(size=2,alpha=0.6) + science_theme + xlab("Hour of the Day") +
ylab("Solar Irradiance (W/m^2)") + theme(axis.text.x = element_text(hjust = 1, size=20,color="black"), axis.text.y = element_text(hjust = 1, size=20,color="black"), plot.title = element_text(size=20), axis.title.y=element_text(size=20), axis.title.x=element_text(size=20))
#+ scale_fill_discrete(name="Discretized\ Profile", breaks=c("1", "2", "3","4","5"), labels=c("1 (Overcast)", "2","3","4","5"))# + scale_colour_brewer(palette="Spectral")
Cluster Test Dataset (Ground Truth)
Results.groundtruth <- station.assign.daily.cluster(C0875.vector.filter.test,C0875.vector.filter.train.centroids)
Base Models Creation
Model 1 (J=2)
jointsize <- 2
Create Argmax Model
#Argmax model creation
C0875.vector.filter.train <- station.add.missing.profiles(C0875.vector.filter.train)
## Warning: package 'zoo' was built under R version 3.1.3
##
## Attaching package: 'zoo'
##
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
C0875.filter.train.joint <- station.joint.probability(C0875.vector.filter.train$clusterid,jointsize)
C0875.argmax.model <- model.argmax(C0875.filter.train.joint)
## hash-2.2.6 provided by Decision Patterns
Perform Prediction
#Prediction using the model
C0875.vector.filter.pred <-station.predict.daily.cluster(station.vector.clustered=Results.groundtruth,model=C0875.argmax.model)
All probabilities for each Prediction
#All probabilities using the model
C0875.filter.train.conditional <- station.conditional.probability(C0875.filter.train.joint)
C0875.vector.filter.All.Probs <- station.clusterid.probability(station.conditional.probability=C0875.filter.train.conditional,clusterid.vector = Results.groundtruth$clusterid)
M1 Outcome Dataframe (Prediction|Support|AllProbs)
Results.m1<-data.frame(
Pred = C0875.vector.filter.pred$Daym0,
Support = C0875.vector.filter.pred$Freq,
All.Probs = C0875.vector.filter.All.Probs,
Hist = C0875.vector.filter.pred$Hist
)
kable(head(Results.m1))
| Pred | Support | All.Probs | Hist |
|---|---|---|---|
| NA | NA | NA | NA |
| 1 | 482 | 0.462655601659751 0.0829875518672199 0.0186721991701245 0.282157676348548 0.153526970954357 | 1 |
| 1 | 482 | 0.462655601659751 0.0829875518672199 0.0186721991701245 0.282157676348548 0.153526970954357 | 1 |
| 1 | 482 | 0.462655601659751 0.0829875518672199 0.0186721991701245 0.282157676348548 0.153526970954357 | 1 |
| 1 | 482 | 0.462655601659751 0.0829875518672199 0.0186721991701245 0.282157676348548 0.153526970954357 | 1 |
| 1 | 482 | 0.462655601659751 0.0829875518672199 0.0186721991701245 0.282157676348548 0.153526970954357 | 1 |
Model 2 (J=3)
jointsize <- 3
Create Argmax Model
#Argmax model creation
C0875.filter.train.joint <- station.joint.probability(C0875.vector.filter.train$clusterid,jointsize)
C0875.argmax.model <- model.argmax(C0875.filter.train.joint)
Perform Prediction
#Prediction using the model
C0875.vector.filter.pred <-station.predict.daily.cluster(station.vector.clustered=Results.groundtruth,model=C0875.argmax.model)
All probabilities for each Prediction
#All probabilities using the model
C0875.filter.train.conditional <- station.conditional.probability(C0875.filter.train.joint)
C0875.vector.filter.All.Probs <- station.clusterid.probability(station.conditional.probability=C0875.filter.train.conditional,clusterid.vector = Results.groundtruth$clusterid)
M2 Outcome Dataframe (Prediction|Support|AllProbs)
Results.m2<-data.frame(
Pred = C0875.vector.filter.pred$Daym0,
Support = C0875.vector.filter.pred$Freq,
All.Probs = C0875.vector.filter.All.Probs,
Hist = C0875.vector.filter.pred$Hist
)
kable(head(Results.m2))
| Pred | Support | All.Probs | Hist |
|---|---|---|---|
| NA | NA | NA | NA |
| NA | NA | NA | NA |
| 1 | 207 | 0.507246376811594 0.072463768115942 0.00966183574879227 0.280193236714976 0.130434782608696 | 1 1 |
| 1 | 207 | 0.507246376811594 0.072463768115942 0.00966183574879227 0.280193236714976 0.130434782608696 | 1 1 |
| 1 | 207 | 0.507246376811594 0.072463768115942 0.00966183574879227 0.280193236714976 0.130434782608696 | 1 1 |
| 1 | 207 | 0.507246376811594 0.072463768115942 0.00966183574879227 0.280193236714976 0.130434782608696 | 1 1 |
Model 3 (J=4)
kable(head(Results.m3))
| Pred | Support | All.Probs | Hist |
|---|---|---|---|
| NA | NA | NA | NA |
| NA | NA | NA | NA |
| NA | NA | NA | NA |
| 1 | 101 | 0.554455445544554 0.0693069306930693 0.0099009900990099 0.267326732673267 0.099009900990099 | 1 1 1 |
| 1 | 101 | 0.554455445544554 0.0693069306930693 0.0099009900990099 0.267326732673267 0.099009900990099 | 1 1 1 |
| 1 | 101 | 0.554455445544554 0.0693069306930693 0.0099009900990099 0.267326732673267 0.099009900990099 | 1 1 1 |
Model 4 (J=5)
kable(head(Results.m4))
| Pred | Support | All.Probs | Hist |
|---|---|---|---|
| NA | NA | NA | NA |
| NA | NA | NA | NA |
| NA | NA | NA | NA |
| NA | NA | NA | NA |
| 1 | 53 | 0.566037735849057 0.0943396226415094 0.283018867924528 0.0566037735849057 | 1 1 1 1 |
| 1 | 53 | 0.566037735849057 0.0943396226415094 0.283018867924528 0.0566037735849057 | 1 1 1 1 |
Model 5 (J=6)
kable(head(Results.m5))
| Pred | Support | All.Probs | Hist |
|---|---|---|---|
| NA | NA | NA | NA |
| NA | NA | NA | NA |
| NA | NA | NA | NA |
| NA | NA | NA | NA |
| NA | NA | NA | NA |
| 1 | 29 | 0.655172413793103 0.137931034482759 0.172413793103448 0.0344827586206897 | 1 1 1 1 1 |
Model 6 (J=7)
kable(head(Results.m6))
| Pred | Support | All.Probs | Hist |
|---|---|---|---|
| NA | NA | NA | NA |
| NA | NA | NA | NA |
| NA | NA | NA | NA |
| NA | NA | NA | NA |
| NA | NA | NA | NA |
| NA | NA | NA | NA |
Model 7 (J=8)
kable(head(Results.m7))
| Pred | Support | All.Probs | Hist |
|---|---|---|---|
| NA | NA | NA | NA |
| NA | NA | NA | NA |
| NA | NA | NA | NA |
| NA | NA | NA | NA |
| NA | NA | NA | NA |
| NA | NA | NA | NA |
Entropy Arbtitrer
Minimum entropy selection criteria
Results.decision <- min.entropy.arbitrer(data.frame(
Results.m1$All.Probs,
Results.m2$All.Probs,
Results.m3$All.Probs,
Results.m4$All.Probs,
Results.m5$All.Probs,
Results.m6$All.Probs,
Results.m7$All.Probs
),
data.frame(
Results.m1$Support,
Results.m2$Support,
Results.m3$Support,
Results.m4$Support,
Results.m5$Support,
Results.m6$Support,
Results.m7$Support
)
)
Results.decision.no.support <- min.entropy.arbitrer(data.frame(
Results.m1$All.Probs,
Results.m2$All.Probs,
Results.m3$All.Probs,
Results.m4$All.Probs,
Results.m5$All.Probs,
Results.m6$All.Probs,
Results.m7$All.Probs
))
Error Calculation
Ensemble Prediction
Results <- data.frame(
groundtruth = Results.groundtruth,
m1 = Results.m1,
m2 = Results.m2,
m3 = Results.m3,
m4 = Results.m4,
m5 = Results.m5,
m6 = Results.m6,
m7 = Results.m7,
decision = Results.decision, #Returns Inf if not a single entropy is available due to lack of hist.
decision.nosupport = Results.decision.no.support
)
Output the final predictions using the arbitrer WITH support
Ensemble.Prediction <- data.frame(
m1.pred = Results.m1$Pred,
m2.pred = Results.m2$Pred,
m3.pred = Results.m3$Pred,
m4.pred = Results.m4$Pred,
m5.pred = Results.m5$Pred,
m6.pred = Results.m6$Pred,
m7.pred = Results.m7$Pred,
decision = Results.decision$Index
)
Results$arbitrerPred <- Ensemble.Prediction[cbind(1:nrow(Ensemble.Prediction), Ensemble.Prediction$decision)]
Output the final predictions using the arbitrer WITHOUT support
Ensemble.Prediction.no.support <- data.frame(
m1.pred = Results.m1$Pred,
m2.pred = Results.m2$Pred,
m3.pred = Results.m3$Pred,
m4.pred = Results.m4$Pred,
m5.pred = Results.m5$Pred,
m6.pred = Results.m6$Pred,
m7.pred = Results.m7$Pred,
decision = Results.decision.no.support$Index
)
Results$arbitrerPredNoSupport <- Ensemble.Prediction.no.support[cbind(1:nrow(Ensemble.Prediction.no.support), Ensemble.Prediction.no.support$decision)]
a <- summary(as.factor(C0875.vector.filter.train$clusterid[!is.na(C0875.vector.filter.train$clusterid)]))
base.centroid <- which(a == max(a))
Ensemble.Prediction$m1.pred[is.na(Ensemble.Prediction$m1.pred)] <- base.centroid
Ensemble.Prediction$m2.pred[is.na(Ensemble.Prediction$m2.pred)] <- base.centroid
Ensemble.Prediction$m3.pred[is.na(Ensemble.Prediction$m3.pred)] <- base.centroid
Ensemble.Prediction$m4.pred[is.na(Ensemble.Prediction$m4.pred)] <- base.centroid
Ensemble.Prediction$m5.pred[is.na(Ensemble.Prediction$m5.pred)] <- base.centroid
Ensemble.Prediction$m6.pred[is.na(Ensemble.Prediction$m6.pred)] <- base.centroid
Ensemble.Prediction$m7.pred[is.na(Ensemble.Prediction$m7.pred)] <- base.centroid
Results$arbitrerPred[is.na(Results$arbitrerPred)] <- base.centroid
Results$arbitrerPredNoSupport[is.na(Results$arbitrerPredNoSupport)] <- base.centroid
Individual Errors
Base Model 1 Error
m1.error <- error.absolute.centroid(station.vectorized.pred=C0875.vector.filter.test[,4:ncol(C0875.vector.filter.test)],pred.clusters=Ensemble.Prediction$m1.pred,cluster.centroids=C0875.vector.filter.train.centroids)
hist(m1.error)
mean(m1.error,na.rm=TRUE)
## [1] 1615.588
sum(m1.error,na.rm=TRUE)/(10*length(m1.error[!is.na(m1.error)]))
## [1] 161.5588
Base Model 2 Error
m2.error <- error.absolute.centroid(station.vectorized.pred=C0875.vector.filter.test[,4:ncol(C0875.vector.filter.test)],pred.clusters=Ensemble.Prediction$m2.pred,cluster.centroids=C0875.vector.filter.train.centroids)
hist(m2.error)
mean(m2.error,na.rm=TRUE)
## [1] 1749.771
sum(m2.error,na.rm=TRUE)/(10*length(m2.error[!is.na(m2.error)]))
## [1] 174.9771
Base Model 3 Error
m3.error <- error.absolute.centroid(station.vectorized.pred=C0875.vector.filter.test[,4:ncol(C0875.vector.filter.test)],pred.clusters=Ensemble.Prediction$m3.pred,cluster.centroids=C0875.vector.filter.train.centroids)
hist(m3.error)
mean(m3.error,na.rm=TRUE)
## [1] 1759.679
sum(m3.error,na.rm=TRUE)/(10*length(m3.error[!is.na(m3.error)]))
## [1] 175.9679
Base Model 4 Error
m4.error <- error.absolute.centroid(station.vectorized.pred=C0875.vector.filter.test[,4:ncol(C0875.vector.filter.test)],pred.clusters=Ensemble.Prediction$m4.pred,cluster.centroids=C0875.vector.filter.train.centroids)
hist(m4.error)
mean(m4.error,na.rm=TRUE)
## [1] 1812.23
sum(m4.error,na.rm=TRUE)/(10*length(m4.error[!is.na(m4.error)]))
## [1] 181.223
Base Model 5 Error
m5.error <- error.absolute.centroid(station.vectorized.pred=C0875.vector.filter.test[,4:ncol(C0875.vector.filter.test)],pred.clusters=Ensemble.Prediction$m5.pred,cluster.centroids=C0875.vector.filter.train.centroids)
hist(m5.error)
mean(m5.error,na.rm=TRUE)
## [1] 1788.589
sum(m5.error,na.rm=TRUE)/(10*length(m5.error[!is.na(m5.error)]))
## [1] 178.8589
Base Model 6 Error
m6.error <- error.absolute.centroid(station.vectorized.pred=C0875.vector.filter.test[,4:ncol(C0875.vector.filter.test)],pred.clusters=Ensemble.Prediction$m6.pred,cluster.centroids=C0875.vector.filter.train.centroids)
hist(m6.error)
mean(m6.error,na.rm=TRUE)
## [1] 1754.356
sum(m6.error,na.rm=TRUE)/(10*length(m6.error[!is.na(m6.error)]))
## [1] 175.4356
Base Model 7 Error
m7.error <- error.absolute.centroid(station.vectorized.pred=C0875.vector.filter.test[,4:ncol(C0875.vector.filter.test)],pred.clusters=Ensemble.Prediction$m7.pred,cluster.centroids=C0875.vector.filter.train.centroids)
hist(m7.error)
mean(m7.error,na.rm=TRUE)
## [1] 1728.86
sum(m7.error,na.rm=TRUE)/(10*length(m7.error[!is.na(m7.error)]))
## [1] 172.886
Error From Ensemble Prediction WITHOUT support
ensemble.error.no.support <- error.absolute.centroid(station.vectorized.pred=C0875.vector.filter.test[,4:ncol(C0875.vector.filter.test)],pred.clusters=Results$arbitrerPredNoSupport,cluster.centroids=C0875.vector.filter.train.centroids)
hist(ensemble.error.no.support)
mean(ensemble.error.no.support,na.rm=TRUE)
## [1] 1887.043
sum(ensemble.error.no.support,na.rm=TRUE)/(10*length(ensemble.error.no.support[!is.na(ensemble.error.no.support)]))
## [1] 188.7043
Error From Ensemble Prediction WITH support
ensemble.error <- error.absolute.centroid(station.vectorized.pred=C0875.vector.filter.test[,4:ncol(C0875.vector.filter.test)],pred.clusters=Results$arbitrerPred,cluster.centroids=C0875.vector.filter.train.centroids)
hist(ensemble.error)
mean(ensemble.error,na.rm=TRUE)
## [1] 1918.836
sum(ensemble.error,na.rm=TRUE)/(10*length(ensemble.error[!is.na(ensemble.error)]))
## [1] 191.8836
How much from each model is the ensemble using WITH the support weight?
qplot(decision, data=Ensemble.Prediction, geom="histogram",ylab="Frequency",xlab="w") + scale_x_discrete(limits=c("2","3","4","5","6","7","8")) + science_theme
qplot(decision, data=Ensemble.Prediction.no.support, geom="histogram",ylab="Frequency",xlab="w") + scale_x_discrete(limits=c("2","3","4","5","6","7","8")) + science_theme + theme(axis.text.x = element_text(hjust = 0.5, size=20,color="black"), axis.text.y = element_text(hjust = 1, size=20,color="black"), plot.title = element_text(size=20), axis.title.y=element_text(size=20), axis.title.x=element_text(size=20)) + theme( # remove the vertical grid lines
panel.grid.major.x = element_blank() ,
# explicitly set the horizontal lines (or they will disappear too)
panel.grid.major.y = element_line( size=.1, color="black" )
)
It is noticiable that model B5 is not chosen as much anymore, and B1 now is chosen less than B2. B3 and B4 are now chosen much more for the prediction. This effect gives hopes that now the ensemble will do better, given it is choosen the model with the least amount of error more (B3).
mean_error <- data.frame(error=c(
as.integer(mean(m1.error,na.rm=TRUE)),
as.integer(mean(m2.error,na.rm=TRUE)),
as.integer(mean(m3.error,na.rm=TRUE)),
as.integer(mean(m4.error,na.rm=TRUE)),
as.integer(mean(m5.error,na.rm=TRUE)),
as.integer(mean(m6.error,na.rm=TRUE)),
as.integer(mean(m7.error,na.rm=TRUE)),
as.integer(mean(ensemble.error.no.support,na.rm=TRUE)),
as.integer(mean(ensemble.error,na.rm=TRUE))
))
labels <- c("B1","B2","B3","B4","B5","B6","B7","E","ESpt")
qplot(labels,mean_error$error, geom="bar",stat="identity",xlab="Model",ylab="Daily Mean Absolute Error") + science_theme
rownames(mean_error) <- labels
kable(mean_error)
| error | |
|---|---|
| B1 | 1615 |
| B2 | 1749 |
| B3 | 1759 |
| B4 | 1812 |
| B5 | 1788 |
| B6 | 1754 |
| B7 | 1728 |
| E | 1887 |
| ESpt | 1918 |
mean_error <- data.frame(error=c(
as.integer(sum(m1.error,na.rm=TRUE)/(10*length(m1.error[!is.na(m1.error)]))),
as.integer(sum(m2.error,na.rm=TRUE)/(10*length(m2.error[!is.na(m2.error)]))),
as.integer(sum(m3.error,na.rm=TRUE)/(10*length(m3.error[!is.na(m3.error)]))),
as.integer(sum(m4.error,na.rm=TRUE)/(10*length(m4.error[!is.na(m4.error)]))),
as.integer(sum(m5.error,na.rm=TRUE)/(10*length(m5.error[!is.na(m5.error)]))),
as.integer(sum(m6.error,na.rm=TRUE)/(10*length(m6.error[!is.na(m6.error)]))),
as.integer(sum(m7.error,na.rm=TRUE)/(10*length(m7.error[!is.na(m7.error)]))),
as.integer(sum(ensemble.error.no.support,na.rm=TRUE)/(10*length(ensemble.error.no.support[!is.na(ensemble.error.no.support)]))),
as.integer(sum(ensemble.error,na.rm=TRUE)/(10*length(ensemble.error[!is.na(ensemble.error)])))
))
labels <- c("2","3","4","5","6","7","8","Ent","Spt")
qplot(labels,mean_error$error, geom="bar",stat="identity",xlab="w",ylab="Mean Absolute Error (W/m^2)") + science_theme + theme(axis.text.x = element_text(hjust = 0.5, size=20,color="black"), axis.text.y = element_text(hjust = 1, size=20,color="black"), plot.title = element_text(size=20), axis.title.y=element_text(size=20), axis.title.x=element_text(size=20)) + ylim(0, 400) + theme( # remove the vertical grid lines
panel.grid.major.x = element_blank() ,
# explicitly set the horizontal lines (or they will disappear too)
panel.grid.major.y = element_line( size=.1, color="black" )
)
rownames(mean_error) <- labels
kable(mean_error)
| error | |
|---|---|
| 2 | 161 |
| 3 | 174 |
| 4 | 175 |
| 5 | 181 |
| 6 | 178 |
| 7 | 175 |
| 8 | 172 |
| Ent | 188 |
| Spt | 191 |
We can see that the use of the support value improves compared to just using the minimum entropy value in order to decise which base model to predict.
missing <- data.frame(missing=c(
round(length(Ensemble.Prediction$m1.pred[is.na(Ensemble.Prediction$m1.pred)])/345,3)*100,
round(length(Ensemble.Prediction$m2.pred[is.na(Ensemble.Prediction$m2.pred)])/345,3)*100,
round(length(Ensemble.Prediction$m3.pred[is.na(Ensemble.Prediction$m3.pred)])/345,3)*100,
round(length(Ensemble.Prediction$m4.pred[is.na(Ensemble.Prediction$m4.pred)])/345,3)*100,
round(length(Ensemble.Prediction$m5.pred[is.na(Ensemble.Prediction$m5.pred)])/345,3)*100,
round(length(Ensemble.Prediction$m6.pred[is.na(Ensemble.Prediction$m6.pred)])/345,3)*100,
round(length(Ensemble.Prediction$m7.pred[is.na(Ensemble.Prediction$m7.pred)])/345,3)*100,
round(length(Results$arbitrerPred[is.na(Results$arbitrerPred)])/345,3)*100,
round(length(Results$arbitrerPredNoSupport[is.na(Results$arbitrerPredNoSupport)])/345,3)*100
))
labels <- c("m1","m2","m3","m4","m5","m6","m7","Pm","PmSpt")
qplot(labels,missing$missing, geom="bar",stat="identity",xlab="Model",ylab=" % Daily Missing Predictions") + science_theme + theme(axis.text.x = element_text(hjust = 1, size=20,color="black"), axis.text.y = element_text(hjust = 1, size=20,color="black"), plot.title = element_text(size=20), axis.title.y=element_text(size=20), axis.title.x=element_text(size=20))