#Load Required Libraries
library(solarf)
library(knitr) #For pretty tables on the report using kable
library(plyr)
library(ggplot2)
library(GGally)
library(caret)
#Theme for Plot
science_theme = theme(panel.background=element_blank(), panel.grid.major = element_line(size = 0.5, color = "grey"),
axis.line = element_line(size = 0.7, color = "black"), legend.position = c(0.9,
0.7), text = element_text(size = 14))
Load Station C0875 (Southeast Oahu Island)
C0875 <- read.csv("/Users/carlos/Google Drive/Research/SolarForecasting/datasets/mesowest/data/C0875_2011_2014.csv")
station.vectorize.hour <- function(station.dataframe,variable){
library(reshape2)
use.only.last.ocurrence <- function(minute.obs){
return (minute.obs[1])
}
station.dataframe.vectorized <- dcast(data=station.dataframe, formula=YEAR + MON + DAY + HR ~ MIN,value.var = variable,fun.aggregate=use.only.last.ocurrence)
return (station.dataframe.vectorized)
}
Vectorize Step
Vectorize C0875 over the training and test partitions.
#Moves from minute level to hourly level
C0875.vector.h <- station.vectorize.hour(C0875,"SOLR")
C0875.h <- C0875.vector.h[,c("YEAR","MON","DAY","HR")]
C0875.h$SOLR <- rowMeans(C0875.vector.h[,5:ncol(C0875.vector.h)],na.rm=TRUE) #Calculate the mean for the hour from min 0 to min 60 ; baseline is hence hour and 30 mins.
C0875.h$HR <- C0875.h$HR + 1
C0875.vector <- station.vectorize(C0875.h,"SOLR")
C0875.vector <- C0875.vector[C0875.vector$YEAR>2011,c(1:3,11:20)] #SOLR starts on Feb 15, 2012.
Data Point Filtering
#C0875.vector.filter <- station.vectorize(C0875[!is.na(C0875$SOLR),],"SOLR")
C0875.vector.filter <- C0875.vector[complete.cases(C0875.vector),]
Train and Test Split
#Separate the year of 2014 for prediction testing
C0875.vector.filter.test <- C0875.vector.filter[C0875.vector.filter$YEAR==2014,]
C0875.vector.filter.train <- C0875.vector.filter[C0875.vector.filter$YEAR!=2014,]
#C0875.vector.filter.train <- C0875.vector.filter[C0875.vector.filter$MON==11 |
# C0875.vector.filter$MON==12 |
# C0875.vector.filter$MON==1 |
# C0875.vector.filter$MON==2 |
# C0875.vector.filter$MON==3 |
# C0875.vector.filter$MON==4
# ,]
Cluster Step
Cluster Train Dataset
Cluster with K-means = nclusters the training dataset and assign clusters to the daily profiles.
nclusters <- 5
set.seed(1234)
C0875.filter.train.clusters <- kmeans(C0875.vector.filter.train[,4:ncol(C0875.vector.filter)],nclusters)
C0875.vector.filter.train$clusterid <- C0875.filter.train.clusters[[1]]
C0875.vector.filter.train.centroids <- C0875.filter.train.clusters[[2]]
intensity.crescent.ordered.clusterid <- as.numeric(names(sort(rowSums(C0875.vector.filter.train.centroids))))
#Order the centroids in crescent order of ipntensity and reset rows
C0875.vector.filter.train.centroids <- C0875.vector.filter.train.centroids[intensity.crescent.ordered.clusterid,]
rownames(C0875.vector.filter.train.centroids) <- seq(1,nclusters)
#Update the centroid ids, by mapping the row ids before ordering to after ordering (element position is old ordering, value is new ordering)
C0875.vector.filter.train$clusterid <- intensity.crescent.ordered.clusterid[C0875.vector.filter.train$clusterid]
Centroids
centroids_plot <- data.frame(C0875.vector.filter.train.centroids)
centroids_plot$color <-rownames(C0875.vector.filter.train.centroids)
colnames(centroids_plot) <- c(colnames(C0875.vector.filter.train.centroids),"Profile")
kable(C0875.vector.filter.train.centroids)
| 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 |
|---|---|---|---|---|---|---|---|---|---|
| 63.75098 | 169.7902 | 273.9124 | 292.8852 | 312.6332 | 334.7674 | 341.3711 | 292.0897 | 236.2965 | 144.7556 |
| 85.89587 | 248.3484 | 427.8448 | 571.5542 | 629.9225 | 636.4579 | 573.0717 | 473.9575 | 343.4200 | 200.5349 |
| 95.86391 | 251.6029 | 441.4603 | 599.4685 | 742.0340 | 820.2648 | 836.0977 | 750.7302 | 556.1624 | 328.4749 |
| 196.82671 | 388.6677 | 599.1102 | 755.9045 | 814.2504 | 825.1088 | 758.6555 | 605.5889 | 455.8949 | 266.5185 |
| 207.56872 | 417.2371 | 629.2724 | 825.2460 | 954.5118 | 981.7954 | 940.3175 | 863.0599 | 680.0826 | 452.6717 |
ggparcoord(
data = centroids_plot,
columns=c(1:ncol(C0875.vector.filter.train.centroids)),
scale="globalminmax",
alphaLines=0.2,
groupColumn=ncol(centroids_plot)
) + geom_line(size=2,alpha=0.6) + science_theme + xlab("Hour of the Day") +
ylab("Solar Radiation Itensity (W/m2") + ggtitle("Solar Radiation Discretized Profiles from 2003 to 2013 of Schofield Barracks Station") + theme(axis.text.x = element_text(hjust = 1, size=20,color="black"), axis.text.y = element_text(hjust = 1, size=20,color="black"), plot.title = element_text(size=20), axis.title.y=element_text(size=20), axis.title.x=element_text(size=20))
#+ scale_fill_discrete(name="Discretized\ Profile", breaks=c("1", "2", "3","4","5"), labels=c("1 (Overcast)", "2","3","4","5"))# + scale_colour_brewer(palette="Spectral")
Cluster Test Dataset (Ground Truth)
Results.groundtruth <- station.assign.daily.cluster(C0875.vector.filter.test,C0875.vector.filter.train.centroids)
Base Models Creation
Model 1 (J=2)
jointsize <- 2
Create Argmax Model
#Argmax model creation
C0875.vector.filter.train <- station.add.missing.profiles(C0875.vector.filter.train)
## Warning: package 'zoo' was built under R version 3.1.3
##
## Attaching package: 'zoo'
##
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
C0875.filter.train.joint <- station.joint.probability(C0875.vector.filter.train$clusterid,jointsize)
C0875.argmax.model <- model.argmax(C0875.filter.train.joint)
## hash-2.2.6 provided by Decision Patterns
Perform Prediction
#Prediction using the model
C0875.vector.filter.pred <-station.predict.daily.cluster(station.vector.clustered=Results.groundtruth,model=C0875.argmax.model)
All probabilities for each Prediction
#All probabilities using the model
C0875.filter.train.conditional <- station.conditional.probability(C0875.filter.train.joint)
C0875.vector.filter.All.Probs <- station.clusterid.probability(station.conditional.probability=C0875.filter.train.conditional,clusterid.vector = Results.groundtruth$clusterid)
M1 Outcome Dataframe (Prediction|Support|AllProbs)
Results.m1<-data.frame(
Pred = C0875.vector.filter.pred$Daym0,
Support = C0875.vector.filter.pred$Freq,
All.Probs = C0875.vector.filter.All.Probs,
Hist = C0875.vector.filter.pred$Hist
)
kable(head(Results.m1))
| Pred | Support | All.Probs | Hist |
|---|---|---|---|
| NA | NA | NA | NA |
| 2 | 56 | 0.125 0.5 0.160714285714286 0.125 0.0892857142857143 | 2 |
| 2 | 29 | 0.206896551724138 0.551724137931034 0.137931034482759 0.0344827586206897 0.0689655172413793 | 1 |
| 3 | 48 | 0.0416666666666667 0.291666666666667 0.333333333333333 0.166666666666667 0.166666666666667 | 3 |
| 3 | 48 | 0.0416666666666667 0.291666666666667 0.333333333333333 0.166666666666667 0.166666666666667 | 3 |
| 3 | 48 | 0.0416666666666667 0.291666666666667 0.333333333333333 0.166666666666667 0.166666666666667 | 3 |
Model 2 (J=3)
jointsize <- 3
Create Argmax Model
#Argmax model creation
C0875.filter.train.joint <- station.joint.probability(C0875.vector.filter.train$clusterid,jointsize)
C0875.argmax.model <- model.argmax(C0875.filter.train.joint)
Perform Prediction
#Prediction using the model
C0875.vector.filter.pred <-station.predict.daily.cluster(station.vector.clustered=Results.groundtruth,model=C0875.argmax.model)
All probabilities for each Prediction
#All probabilities using the model
C0875.filter.train.conditional <- station.conditional.probability(C0875.filter.train.joint)
C0875.vector.filter.All.Probs <- station.clusterid.probability(station.conditional.probability=C0875.filter.train.conditional,clusterid.vector = Results.groundtruth$clusterid)
M2 Outcome Dataframe (Prediction|Support|AllProbs)
Results.m2<-data.frame(
Pred = C0875.vector.filter.pred$Daym0,
Support = C0875.vector.filter.pred$Freq,
All.Probs = C0875.vector.filter.All.Probs,
Hist = C0875.vector.filter.pred$Hist
)
kable(head(Results.m2))
| Pred | Support | All.Probs | Hist |
|---|---|---|---|
| NA | NA | NA | NA |
| NA | NA | NA | NA |
| 2 | 7 | 0.142857142857143 0.571428571428571 0.142857142857143 0.142857142857143 | 2 1 |
| 2 | 4 | 0.75 0.25 | 1 3 |
| 3 | 14 | 0.357142857142857 0.5 0.0714285714285714 0.0714285714285714 | 3 3 |
| 3 | 14 | 0.357142857142857 0.5 0.0714285714285714 0.0714285714285714 | 3 3 |
Model 3 (J=4)
kable(head(Results.m3))
| Pred | Support | All.Probs | Hist |
|---|---|---|---|
| NA | NA | NA | NA |
| NA | NA | NA | NA |
| NA | NA | NA | NA |
| 2 | 1 | 1 | 2 1 3 |
| NA | 0 | NA | 1 3 3 |
| 3 | 7 | 0.428571428571429 0.571428571428571 | 3 3 3 |
Model 4 (J=5)
kable(head(Results.m4))
| Pred | Support | All.Probs | Hist |
|---|---|---|---|
| NA | NA | NA | NA |
| NA | NA | NA | NA |
| NA | NA | NA | NA |
| NA | NA | NA | NA |
| NA | 0 | NA | 2 1 3 3 |
| NA | 0 | NA | 1 3 3 3 |
Model 5 (J=6)
kable(head(Results.m5))
| Pred | Support | All.Probs | Hist |
|---|---|---|---|
| NA | NA | NA | NA |
| NA | NA | NA | NA |
| NA | NA | NA | NA |
| NA | NA | NA | NA |
| NA | NA | NA | NA |
| NA | 0 | NA | 2 1 3 3 3 |
Model 6 (J=7)
kable(head(Results.m6))
| Pred | Support | All.Probs | Hist |
|---|---|---|---|
| NA | NA | NA | NA |
| NA | NA | NA | NA |
| NA | NA | NA | NA |
| NA | NA | NA | NA |
| NA | NA | NA | NA |
| NA | NA | NA | NA |
Model 7 (J=8)
kable(head(Results.m7))
| Pred | Support | All.Probs | Hist |
|---|---|---|---|
| NA | NA | NA | NA |
| NA | NA | NA | NA |
| NA | NA | NA | NA |
| NA | NA | NA | NA |
| NA | NA | NA | NA |
| NA | NA | NA | NA |
Entropy Arbtitrer
Minimum entropy selection criteria
Results.decision <- min.entropy.arbitrer(data.frame(
Results.m1$All.Probs,
Results.m2$All.Probs,
Results.m3$All.Probs,
Results.m4$All.Probs,
Results.m5$All.Probs,
Results.m6$All.Probs,
Results.m7$All.Probs
),
data.frame(
Results.m1$Support,
Results.m2$Support,
Results.m3$Support,
Results.m4$Support,
Results.m5$Support,
Results.m6$Support,
Results.m7$Support
)
)
Results.decision.no.support <- min.entropy.arbitrer(data.frame(
Results.m1$All.Probs,
Results.m2$All.Probs,
Results.m3$All.Probs,
Results.m4$All.Probs,
Results.m5$All.Probs,
Results.m6$All.Probs,
Results.m7$All.Probs
))
Error Calculation
Ensemble Prediction
Results <- data.frame(
groundtruth = Results.groundtruth,
m1 = Results.m1,
m2 = Results.m2,
m3 = Results.m3,
m4 = Results.m4,
m5 = Results.m5,
m6 = Results.m6,
m7 = Results.m7,
decision = Results.decision, #Returns Inf if not a single entropy is available due to lack of hist.
decision.nosupport = Results.decision.no.support
)
Output the final predictions using the arbitrer WITH support
Ensemble.Prediction <- data.frame(
m1.pred = Results.m1$Pred,
m2.pred = Results.m2$Pred,
m3.pred = Results.m3$Pred,
m4.pred = Results.m4$Pred,
m5.pred = Results.m5$Pred,
m6.pred = Results.m6$Pred,
m7.pred = Results.m7$Pred,
decision = Results.decision$Index
)
Results$arbitrerPred <- Ensemble.Prediction[cbind(1:nrow(Ensemble.Prediction), Ensemble.Prediction$decision)]
Output the final predictions using the arbitrer WITHOUT support
Ensemble.Prediction.no.support <- data.frame(
m1.pred = Results.m1$Pred,
m2.pred = Results.m2$Pred,
m3.pred = Results.m3$Pred,
m4.pred = Results.m4$Pred,
m5.pred = Results.m5$Pred,
m6.pred = Results.m6$Pred,
m7.pred = Results.m7$Pred,
decision = Results.decision.no.support$Index
)
Results$arbitrerPredNoSupport <- Ensemble.Prediction.no.support[cbind(1:nrow(Ensemble.Prediction.no.support), Ensemble.Prediction.no.support$decision)]
a <- summary(as.factor(C0875.vector.filter.train$clusterid[!is.na(C0875.vector.filter.train$clusterid)]))
base.centroid <- which(a == max(a))
Ensemble.Prediction$m1.pred[is.na(Ensemble.Prediction$m1.pred)] <- base.centroid
Ensemble.Prediction$m2.pred[is.na(Ensemble.Prediction$m2.pred)] <- base.centroid
Ensemble.Prediction$m3.pred[is.na(Ensemble.Prediction$m3.pred)] <- base.centroid
Ensemble.Prediction$m4.pred[is.na(Ensemble.Prediction$m4.pred)] <- base.centroid
Ensemble.Prediction$m5.pred[is.na(Ensemble.Prediction$m5.pred)] <- base.centroid
Ensemble.Prediction$m6.pred[is.na(Ensemble.Prediction$m6.pred)] <- base.centroid
## Warning in `[<-.factor`(`*tmp*`, is.na(Ensemble.Prediction$m6.pred), value
## = structure(c(NA, : invalid factor level, NA generated
Ensemble.Prediction$m7.pred[is.na(Ensemble.Prediction$m7.pred)] <- base.centroid
## Warning in `[<-.factor`(`*tmp*`, is.na(Ensemble.Prediction$m7.pred), value
## = structure(c(NA, : invalid factor level, NA generated
Results$arbitrerPred[is.na(Results$arbitrerPred)] <- base.centroid
Results$arbitrerPredNoSupport[is.na(Results$arbitrerPredNoSupport)] <- base.centroid
Individual Errors
Base Model 1 Error
m1.error <- error.absolute.centroid(station.vectorized.pred=C0875.vector.filter.test[,4:ncol(C0875.vector.filter.test)],pred.clusters=Ensemble.Prediction$m1.pred,cluster.centroids=C0875.vector.filter.train.centroids)
hist(m1.error)
mean(m1.error,na.rm=TRUE)
## [1] 1821.048
sum(m1.error,na.rm=TRUE)/(10*length(m1.error[!is.na(m1.error)]))
## [1] 182.1048
Base Model 2 Error
m2.error <- error.absolute.centroid(station.vectorized.pred=C0875.vector.filter.test[,4:ncol(C0875.vector.filter.test)],pred.clusters=Ensemble.Prediction$m2.pred,cluster.centroids=C0875.vector.filter.train.centroids)
hist(m2.error)
mean(m2.error,na.rm=TRUE)
## [1] 1749.29
sum(m2.error,na.rm=TRUE)/(10*length(m2.error[!is.na(m2.error)]))
## [1] 174.929
Base Model 3 Error
m3.error <- error.absolute.centroid(station.vectorized.pred=C0875.vector.filter.test[,4:ncol(C0875.vector.filter.test)],pred.clusters=Ensemble.Prediction$m3.pred,cluster.centroids=C0875.vector.filter.train.centroids)
hist(m3.error)
mean(m3.error,na.rm=TRUE)
## [1] 1523.576
sum(m3.error,na.rm=TRUE)/(10*length(m3.error[!is.na(m3.error)]))
## [1] 152.3576
Base Model 4 Error
m4.error <- error.absolute.centroid(station.vectorized.pred=C0875.vector.filter.test[,4:ncol(C0875.vector.filter.test)],pred.clusters=Ensemble.Prediction$m4.pred,cluster.centroids=C0875.vector.filter.train.centroids)
hist(m4.error)
mean(m4.error,na.rm=TRUE)
## [1] 1534.698
sum(m4.error,na.rm=TRUE)/(10*length(m4.error[!is.na(m4.error)]))
## [1] 153.4698
Base Model 5 Error
m5.error <- error.absolute.centroid(station.vectorized.pred=C0875.vector.filter.test[,4:ncol(C0875.vector.filter.test)],pred.clusters=Ensemble.Prediction$m5.pred,cluster.centroids=C0875.vector.filter.train.centroids)
hist(m5.error)
mean(m5.error,na.rm=TRUE)
## [1] 2642.861
sum(m5.error,na.rm=TRUE)/(10*length(m5.error[!is.na(m5.error)]))
## [1] 264.2861
Base Model 6 Error
m6.error <- error.absolute.centroid(station.vectorized.pred=C0875.vector.filter.test[,4:ncol(C0875.vector.filter.test)],pred.clusters=Ensemble.Prediction$m6.pred,cluster.centroids=C0875.vector.filter.train.centroids)
hist(m6.error)
mean(m6.error,na.rm=TRUE)
## [1] 2659.16
sum(m6.error,na.rm=TRUE)/(10*length(m6.error[!is.na(m6.error)]))
## [1] 265.916
Base Model 7 Error
m7.error <- error.absolute.centroid(station.vectorized.pred=C0875.vector.filter.test[,4:ncol(C0875.vector.filter.test)],pred.clusters=Ensemble.Prediction$m7.pred,cluster.centroids=C0875.vector.filter.train.centroids)
hist(m7.error)
mean(m7.error,na.rm=TRUE)
## [1] 3566.394
sum(m7.error,na.rm=TRUE)/(10*length(m7.error[!is.na(m7.error)]))
## [1] 356.6394
Error From Ensemble Prediction WITHOUT support
ensemble.error.no.support <- error.absolute.centroid(station.vectorized.pred=C0875.vector.filter.test[,4:ncol(C0875.vector.filter.test)],pred.clusters=Results$arbitrerPredNoSupport,cluster.centroids=C0875.vector.filter.train.centroids)
hist(ensemble.error.no.support)
mean(ensemble.error.no.support,na.rm=TRUE)
## [1] 1520.587
sum(ensemble.error.no.support,na.rm=TRUE)/(10*length(ensemble.error.no.support[!is.na(ensemble.error.no.support)]))
## [1] 152.0587
Error From Ensemble Prediction WITH support
ensemble.error <- error.absolute.centroid(station.vectorized.pred=C0875.vector.filter.test[,4:ncol(C0875.vector.filter.test)],pred.clusters=Results$arbitrerPred,cluster.centroids=C0875.vector.filter.train.centroids)
hist(ensemble.error)
mean(ensemble.error,na.rm=TRUE)
## [1] 1546.307
sum(ensemble.error,na.rm=TRUE)/(10*length(ensemble.error[!is.na(ensemble.error)]))
## [1] 154.6307
How much from each model is the ensemble using WITH the support weight?
qplot(decision, data=Ensemble.Prediction, geom="histogram",ylab="Frequency",xlab="Model") + scale_x_discrete(limits=c("B1","B2","B3","B4","B5","B6","B7")) + science_theme
qplot(decision, data=Ensemble.Prediction.no.support, geom="histogram",ylab="Frequency",xlab="Model") + scale_x_discrete(limits=c("B1","B2","B3","B4","B5","B6","B7")) + science_theme
It is noticiable that model B5 is not chosen as much anymore, and B1 now is chosen less than B2. B3 and B4 are now chosen much more for the prediction. This effect gives hopes that now the ensemble will do better, given it is choosen the model with the least amount of error more (B3).
mean_error <- data.frame(error=c(
as.integer(mean(m1.error,na.rm=TRUE)),
as.integer(mean(m2.error,na.rm=TRUE)),
as.integer(mean(m3.error,na.rm=TRUE)),
as.integer(mean(m4.error,na.rm=TRUE)),
as.integer(mean(m5.error,na.rm=TRUE)),
as.integer(mean(m6.error,na.rm=TRUE)),
as.integer(mean(m7.error,na.rm=TRUE)),
as.integer(mean(ensemble.error.no.support,na.rm=TRUE)),
as.integer(mean(ensemble.error,na.rm=TRUE))
))
labels <- c("B1","B2","B3","B4","B5","B6","B7","E","ESpt")
qplot(labels,mean_error$error, geom="bar",stat="identity",xlab="Model",ylab="Daily Mean Absolute Error") + science_theme
rownames(mean_error) <- labels
kable(mean_error)
| error | |
|---|---|
| B1 | 1821 |
| B2 | 1749 |
| B3 | 1523 |
| B4 | 1534 |
| B5 | 2642 |
| B6 | 2659 |
| B7 | 3566 |
| E | 1520 |
| ESpt | 1546 |
mean_error <- data.frame(error=c(
as.integer(sum(m1.error,na.rm=TRUE)/(10*length(m1.error[!is.na(m1.error)]))),
as.integer(sum(m2.error,na.rm=TRUE)/(10*length(m2.error[!is.na(m2.error)]))),
as.integer(sum(m3.error,na.rm=TRUE)/(10*length(m3.error[!is.na(m3.error)]))),
as.integer(sum(m4.error,na.rm=TRUE)/(10*length(m4.error[!is.na(m4.error)]))),
as.integer(sum(m5.error,na.rm=TRUE)/(10*length(m5.error[!is.na(m5.error)]))),
as.integer(sum(m6.error,na.rm=TRUE)/(10*length(m6.error[!is.na(m6.error)]))),
as.integer(sum(m7.error,na.rm=TRUE)/(10*length(m7.error[!is.na(m7.error)]))),
as.integer(sum(ensemble.error.no.support,na.rm=TRUE)/(10*length(ensemble.error.no.support[!is.na(ensemble.error.no.support)]))),
as.integer(sum(ensemble.error,na.rm=TRUE)/(10*length(ensemble.error[!is.na(ensemble.error)])))
))
labels <- c("m1","m2","m3","m4","m5","m6","m7","M","MSpt")
qplot(labels,mean_error$error, geom="bar",stat="identity",xlab="Model",ylab="Hourly Mean Absolute Error") + science_theme + theme(axis.text.x = element_text(hjust = 1, size=20,color="black"), axis.text.y = element_text(hjust = 1, size=20,color="black"), plot.title = element_text(size=20), axis.title.y=element_text(size=20), axis.title.x=element_text(size=20))
rownames(mean_error) <- labels
kable(mean_error)
| error | |
|---|---|
| m1 | 182 |
| m2 | 174 |
| m3 | 152 |
| m4 | 153 |
| m5 | 264 |
| m6 | 265 |
| m7 | 356 |
| M | 152 |
| MSpt | 154 |
We can see that the use of the support value improves compared to just using the minimum entropy value in order to decise which base model to predict.
missing <- data.frame(missing=c(
round(length(Ensemble.Prediction$m1.pred[is.na(Ensemble.Prediction$m1.pred)])/345,3)*100,
round(length(Ensemble.Prediction$m2.pred[is.na(Ensemble.Prediction$m2.pred)])/345,3)*100,
round(length(Ensemble.Prediction$m3.pred[is.na(Ensemble.Prediction$m3.pred)])/345,3)*100,
round(length(Ensemble.Prediction$m4.pred[is.na(Ensemble.Prediction$m4.pred)])/345,3)*100,
round(length(Ensemble.Prediction$m5.pred[is.na(Ensemble.Prediction$m5.pred)])/345,3)*100,
round(length(Ensemble.Prediction$m6.pred[is.na(Ensemble.Prediction$m6.pred)])/345,3)*100,
round(length(Ensemble.Prediction$m7.pred[is.na(Ensemble.Prediction$m7.pred)])/345,3)*100,
round(length(Results$arbitrerPred[is.na(Results$arbitrerPred)])/345,3)*100,
round(length(Results$arbitrerPredNoSupport[is.na(Results$arbitrerPredNoSupport)])/345,3)*100
))
labels <- c("m1","m2","m3","m4","m5","m6","m7","M","MSpt")
qplot(labels,missing$missing, geom="bar",stat="identity",xlab="Model",ylab=" % Daily Missing Predictions") + science_theme + theme(axis.text.x = element_text(hjust = 1, size=20,color="black"), axis.text.y = element_text(hjust = 1, size=20,color="black"), plot.title = element_text(size=20), axis.title.y=element_text(size=20), axis.title.x=element_text(size=20))