alt text

Discretization

Data and Parameters

#Formula
model <- list()
#Load Stations Raw Data
model[["data"]] <- list()
model[["data"]][["SCBH1"]] <- readRDS("/Users/carlos/Google Drive/Research/SolarForecasting/datasets/mesowest/data/SCBH1.rds")
model[["data"]][["KTAH1"]] <- read.csv("/Users/carlos/Google Drive/Research/SolarForecasting/datasets/mesowest/carlos/solarf/data/KTAH1_north.csv")
model[["data"]][["PLHH1"]] <- read.csv("/Users/carlos/Google Drive/Research/SolarForecasting/datasets/mesowest/carlos/solarf/data/PLHH1_south.csv")
model[["data"]][["WNVH1"]] <- read.csv("/Users/carlos/Google Drive/Research/SolarForecasting/datasets/mesowest/carlos/solarf/data/WNVH1_west.csv")
model[["data"]][["KKRH1"]] <- read.csv("/Users/carlos/Google Drive/Research/SolarForecasting/datasets/mesowest/carlos/solarf/data/KKRH1_vwest.csv")
model[["data"]][["OFRH1"]] <- read.csv("/Users/carlos/Google Drive/Research/SolarForecasting/datasets/mesowest/carlos/solarf/data/OFRH1_east.csv")
#Defines how to discretize each weather variable
model[["discretize"]] <- list()
#Precipitation
model[["discretize"]][["PREC"]][["method"]] <- "binning"
model[["discretize"]][["PREC"]][["ncentroids"]] <- 5
#Solar
model[["discretize"]][["SOLR"]][["method"]] <- "kmeans"
model[["discretize"]][["SOLR"]][["ncentroids"]] <- 5
#Humidity
model[["discretize"]][["RELH"]][["method"]] <- "kmeans"
model[["discretize"]][["RELH"]][["ncentroids"]] <- 5
#Dew Point
model[["discretize"]][["DWPF"]][["method"]] <- "kmeans"
model[["discretize"]][["DWPF"]][["ncentroids"]] <- 5
#Wind Speed
model[["discretize"]][["SKNT"]][["method"]] <- "kmeans"
model[["discretize"]][["SKNT"]][["ncentroids"]] <- 5
#Temperature
model[["discretize"]][["TMPF"]][["method"]] <- "kmeans"
model[["discretize"]][["TMPF"]][["ncentroids"]] <- 5

Models

formulas <- c(
                "SCBH1.SOLR_Daym0|SCBH1.SOLR_Daym1",
                "KTAH1.SOLR_Daym0|KTAH1.SOLR_Daym1",
                "OFRH1.SOLR_Daym0|OFRH1.SOLR_Daym1",
                "PLHH1.SOLR_Daym0|PLHH1.SOLR_Daym1",
                "KKRH1.SOLR_Daym0|KKRH1.SOLR_Daym1"
            )    
set.seed(1234)
errors <- array(,length(formulas))
sds <-  array(,length(formulas))
pmodels <- list()
for(i in 1:length(formulas)){
    formula <- formulas[i]
    model[["formula"]] <- get.pred.obs.fts(formula)
    pmodel <- joint_probability(
        data=model[["data"]],
        formula=model[["formula"]],
        discretize=model[["discretize"]],
        time.length=2012:2013,
        test.length=2014)
    pmodels[[formula]] <- pmodel
    errors[i] <- mean(as.vector(t(pmodel[["errors"]])),na.rm=TRUE)
    sds[i] <- sd(as.vector(t(pmodel[["errors"]])),na.rm=TRUE)
}
#saveRDS(errors,"~/Desktop/SCBH1_errors_single")

Q0 From HECO Meeting adjusted for Randomness

Discretization Errors

Centroid Errors Across 10 Runs

(Total of 100 since K-means runs 10 each)

SCBH1

KTAH1

qplot(as.factor(1:10),pmodels[["KTAH1.SOLR_Daym0|KTAH1.SOLR_Daym1"]][["discrete_profile"]][["KTAH1.SOLR"]][["kmeans_error"]],geom="bar",stat="identity") + science_theme + ggtitle("KTAH1 (North) 2012 and 2013 Discretization Error") + ylab("Hourly Mean Absolute Error of Solar Irradiance (W/m^2)") + xlab("Iteration") + ylim(0,300)

OFRH1

qplot(as.factor(1:10),pmodels[["OFRH1.SOLR_Daym0|OFRH1.SOLR_Daym1"]][["discrete_profile"]][["OFRH1.SOLR"]][["kmeans_error"]],geom="bar",stat="identity") + science_theme + ggtitle("OFRH1 (East) 2012 and 2013 Discretization Error") + ylab("Hourly Mean Absolute Error of Solar Irradiance (W/m^2)") + xlab("Iteration") + ylim(0,300)

PLHH1

qplot(as.factor(1:10),pmodels[["PLHH1.SOLR_Daym0|PLHH1.SOLR_Daym1"]][["discrete_profile"]][["PLHH1.SOLR"]][["kmeans_error"]],geom="bar",stat="identity") + science_theme + ggtitle("PLHH1 (South) 2012 and 2013 Discretization Error") + ylab("Hourly Mean Absolute Error of Solar Irradiance (W/m^2)") + xlab("Iteration") + ylim(0,300)

KKRH1

qplot(as.factor(1:10),pmodels[["KKRH1.SOLR_Daym0|KKRH1.SOLR_Daym1"]][["discrete_profile"]][["KKRH1.SOLR"]][["kmeans_error"]],geom="bar",stat="identity") + science_theme + ggtitle("KKRH1 (West) 2012 and 2013 Discretization Error") + ylab("Hourly Mean Absolute Error of Solar Irradiance (W/m^2)") + xlab("Iteration") + ylim(0,300)

Centroid Errors PER Hour

SCBH1 (Center)

## Using  as id variables

KTAH1 (North)

ggplot(melt(pmodels[["KTAH1.SOLR_Daym0|KTAH1.SOLR_Daym1"]][["discrete_profile"]][["KTAH1.SOLR"]][["error_per_hour"]]), 
    aes(x=variable, y=value)) +
    geom_point(colour="lightblue", alpha=0.5, position="jitter") +
    geom_boxplot(outlier.size=0, alpha=0.2) + coord_flip() + science_theme + 
    xlab("Hour of the day") + 
    ylab(expression(paste("Solar MAE",(w/m^2)))) + 
    ggtitle("KTAH1 (Center) 2012 and 2013 Discretization Error") + ylim(0,650)           

OFRH1 (East)

ggplot(melt(pmodels[["OFRH1.SOLR_Daym0|OFRH1.SOLR_Daym1"]][["discrete_profile"]][["OFRH1.SOLR"]][["error_per_hour"]]), 
    aes(x=variable, y=value)) +
    geom_point(colour="lightblue", alpha=0.5, position="jitter") +
    geom_boxplot(outlier.size=0, alpha=0.2) + coord_flip() + science_theme + 
    xlab("Hour of the day") + 
    ylab(expression(paste("Solar MAE",(w/m^2)))) + 
    ggtitle("OFRH1 (Center) 2012 and 2013 Discretization Error") + ylim(0,650)            

PLHH1 (South)

ggplot(melt(pmodels[["PLHH1.SOLR_Daym0|PLHH1.SOLR_Daym1"]][["discrete_profile"]][["PLHH1.SOLR"]][["error_per_hour"]]), 
    aes(x=variable, y=value)) +
    geom_point(colour="lightblue", alpha=0.5, position="jitter") +
    geom_boxplot(outlier.size=0, alpha=0.2) + coord_flip() + science_theme + 
    xlab("Hour of the day") + 
    ylab(expression(paste("Solar MAE",(w/m^2)))) + 
    ggtitle("PLHH1 (Center) 2012 and 2013 Discretization Error")  + ylim(0,650)           

KKRH1 (West)

ggplot(melt(pmodels[["KKRH1.SOLR_Daym0|KKRH1.SOLR_Daym1"]][["discrete_profile"]][["KKRH1.SOLR"]][["error_per_hour"]]), 
    aes(x=variable, y=value)) +
    geom_point(colour="lightblue", alpha=0.5, position="jitter") +
    geom_boxplot(outlier.size=0, alpha=0.2) + coord_flip() + science_theme + 
    xlab("Hour of the day") + 
    ylab(expression(paste("Solar MAE",(w/m^2)))) + 
    ggtitle("KKRH1 (Center) 2012 and 2013 Discretization Error")   + ylim(0,650)         

Solar Variance

Train Dataset - 2012,2013

filter_hours <- function(station){
    station <- station[station$YEAR >= 2011 & station$YEAR <= 2013,]
    station <- station[station$HR >= 8 & station$HR <= 17 ,]
    station <- station[,"SOLR"]
    return (station)
}
means <- array(,length(formulas))
variances <- array(,length(formulas))



means[1] <- mean(filter_hours(model[["data"]][["SCBH1"]]),na.rm=TRUE)
means[2] <- mean(filter_hours(model[["data"]][["KTAH1"]]),na.rm=TRUE)
means[3] <- mean(filter_hours(model[["data"]][["OFRH1"]]),na.rm=TRUE)
means[4] <- mean(filter_hours(model[["data"]][["PLHH1"]]),na.rm=TRUE)
means[5] <- mean(filter_hours(model[["data"]][["KKRH1"]]),na.rm=TRUE)



variances[1] <- sd(filter_hours(model[["data"]][["SCBH1"]]),na.rm=TRUE)
variances[2] <- sd(filter_hours(model[["data"]][["KTAH1"]]),na.rm=TRUE)
variances[3] <- sd(filter_hours(model[["data"]][["OFRH1"]]),na.rm=TRUE)
variances[4] <- sd(filter_hours(model[["data"]][["PLHH1"]]),na.rm=TRUE)
variances[5] <- sd(filter_hours(model[["data"]][["KKRH1"]]),na.rm=TRUE)

stations <- stations[order(means)]
variances <- variances[order(means)]
means <- means[(order(means))]

qplot(factor(stations,levels=stations),means,stat="identity")  +geom_errorbar(aes(x=stations, ymin=means-variances, ymax=means+variances), width=0.25) + geom_point(size=5) + ylab(expression(paste("Solar  (",W/m^2,")"))) + xlab("Station (Elevation)") + ggtitle(expression(paste("Solar Std. Dev. [2013,2012]"))) + coord_flip() + science_theme + ylim(-20,1000)

Test Dataset - 2014

filter_hours <- function(station){
    station <- station[station$YEAR == 2014,]
    station <- station[station$HR >= 8 & station$HR <= 17 ,]
    station <- station[,"SOLR"]
    return (station)
}
variances <- array(,length(formulas))
means <- array(,length(formulas))


means[1] <- mean(filter_hours(model[["data"]][["SCBH1"]]),na.rm=TRUE)
means[2] <- mean(filter_hours(model[["data"]][["KTAH1"]]),na.rm=TRUE)
means[3] <- mean(filter_hours(model[["data"]][["OFRH1"]]),na.rm=TRUE)
means[4] <- mean(filter_hours(model[["data"]][["PLHH1"]]),na.rm=TRUE)
means[5] <- mean(filter_hours(model[["data"]][["KKRH1"]]),na.rm=TRUE)

variances[1] <- sd(filter_hours(model[["data"]][["SCBH1"]]),na.rm=TRUE)
variances[2] <- sd(filter_hours(model[["data"]][["KTAH1"]]),na.rm=TRUE)
variances[3] <- sd(filter_hours(model[["data"]][["OFRH1"]]),na.rm=TRUE)
variances[4] <- sd(filter_hours(model[["data"]][["PLHH1"]]),na.rm=TRUE)
variances[5] <- sd(filter_hours(model[["data"]][["KKRH1"]]),na.rm=TRUE)

stations <- stations[order(means)]
variances <- variances[order(means)]
means <- means[(order(means))]

qplot(factor(stations,levels=stations),means,stat="identity")  +geom_errorbar(aes(x=stations, ymin=means-variances, ymax=means+variances), width=0.25) + geom_point(size=5) + ylab(expression(paste("Solar  (",W/m^2,")"))) + xlab("Station (Elevation)") + ggtitle(expression(paste("Solar Std. Dev. [2014]"))) + coord_flip() + science_theme + ylim(-20,1000)