Main.R

## main executable script, last ran with this R version info. in the Windows OS:
# platform       x86_64-w64-mingw32          
# arch           x86_64                      
# os             mingw32                     
# system         x86_64, mingw32             
# status                                     
# major          3                           
# minor          2.3                         
# year           2015                        
# month          12                          
# day            10                          
# svn rev        69752                       
# language       R                           
# version.string R version 3.2.3 (2015-12-10)
# nickname       Wooden Christmas-Tree 

# And this RStudio version 0.99.892 - © 2009-2016 RStudio, Inc


#####################################################################
# file and path dependencies stated below, get those files first
#####################################################################
setwd("~/Code/R/Runs")
source('Tools-For-Parsing-FriggsStFergus-Pipesim-Sum-Files.R')
source('MakeListOfFilesOfTypeXInSubdirectories.R')
source("ComputeErrorsAndVariancesFromPIPESIMResultsDataFfame.R")
source("Utility-Functions-For-Point-Model-Classification-And-Analysis.R")

#######################################################################
######################  M A I N   #####################################
#######################################################################

library(ggplot2) # for making nice plots using a grammar of graphics
library(reshape2) # to shape data frames from long to short form and vice versa 
library(NbClust) # to determine recommended number of clusters
 
ps.df <- CreatePipesimResultsDataFrame()
df <- ComputeErrorsAndVariancesFromPIPESIMResultsDataFfame( ps.df )

df2P <- RemoveThreePhaseModels( df )


# Create the a priori categories:
# first create a lookup mapping from flow point model to empirical category
lookup<-unlist(tools.mappingPIPESIMMethodCategories)
# apply it to the column (variable) of point models from the results matrix (data frame)
# this is an example of character matching used for subsetting (a very powerful R idiom)
Type <- lookup[df2P$FModel]
# now put it into the data frame as factors:
df2P$Type <- as.factor(unname(Type))


#######################################################################
#################### Clustering analysis ####################
#######################################################################

# clustering analysis is performed to identify cohesive subgroups in a dataset 

index_df <- ComputeIndexesDataFrame(df2P)

##### Hierarchical clustering methods #####
# eliminate the non-data column of model names
index_df$FModels<-NULL
# compute distance matrix:
d <- dist(index_df)
# compute the hierarchical clusters
fit.averageLink <- hclust(d,method = "average")
fit.singleLink <- hclust(d,method = "single")
fit.completeLink <- hclust(d,method = "complete")
fit.centroid <- hclust(d,method="centroid")
fit.wardD <- hclust(d, method="ward.D")
fit.wardD2 <- hclust(d, method="ward.D2")
fit.mcquitty <- hclust(d, method = "mcquitty")
fit.median <- hclust(d, method= 'median')

plot(fit.averageLink, hang=-1, cex=.8, main = "Average Link")

plot(fit.singleLink, hang=-1, cex=.8, main = "Single Link")

plot(fit.completeLink, hang=-1, cex=.8, main = "Complete Link")

plot(fit.centroid, hang=-1, cex=.8, main = "Centroid")

plot(fit.wardD, hang=-1, cex=.8, main = "Ward D")

plot(fit.wardD2, hang=-1, cex=.8, main = "Ward D2")

plot(fit.mcquitty, hang=-1, cex=.8, main = "WPGMA")

plot(fit.median, hang=-1, cex=.8, main = "Median")

# filter best result from grade analysis:
dfPDownDet<-df2P[df2P$Pset=="PDown" & df2P$ElevProf=="Det",]
index_PDownDet <- ComputeIndexesDataFrame(dfPDownDet)
index_PDownDet$FModels <-NULL

d2 <- dist(index_PDownDet)

# things you can see with pure "Euclidean" distances
# the OLGAS implementations arevary slightly among themselves with the 72 farther from the 50 series than the 627
olgas_distances_matrix<-round(as.matrix(d2)[21:25,21:25],3)
# > olgas_distances_matrix
#             olga_2p_50 olga_2p_53 olga_2p_532 olga_2p_627 olga_2p_72
# olga_2p_50       0.000      0.000       0.000       0.057      0.046
# olga_2p_53       0.000      0.000       0.000       0.057      0.046
# olga_2p_532      0.000      0.000       0.000       0.057      0.046
# olga_2p_627      0.057      0.057       0.057       0.000      0.012
# olga_2p_72       0.046      0.046       0.046       0.012      0.000

# the BB family is all separated when applied to this data set
BB_distance_matrix <- round(as.matrix(d2)[0:8,0:8],3)
# > BB_distance_matrix 
#          BB1   BB2   BBO BBOTD   BBR BBRev1 BBRev2 BBRTD
# BB1    0.000 0.957 0.165 0.089 0.430  0.812  1.015 0.491
# BB2    0.957 0.000 0.879 0.939 0.803  1.274  0.856 0.844
# BBO    0.165 0.879 0.000 0.086 0.305  0.945  0.934 0.364
# BBOTD  0.089 0.939 0.086 0.000 0.368  0.889  0.982 0.425
# BBR    0.430 0.803 0.305 0.368 0.000  1.081  0.661 0.122
# BBRev1 0.812 1.274 0.945 0.889 1.081  0.000  1.321 1.143
# BBRev2 1.015 0.856 0.934 0.982 0.661  1.321  0.000 0.646
# BBRTD  0.491 0.844 0.364 0.425 0.122  1.143  0.646 0.000

# round(as.matrix(d2)[14:25,14:25],3)
#             EatonOli1 HughDuk1 LEDA2P1 LOCKMAR LOCKMARTD    MB NOSLIP olga_2p_50 olga_2p_53 olga_2p_532
# EatonOli1       0.000    0.985   0.139   0.204     0.204 0.379  0.204      0.111      0.111       0.111
# HughDuk1        0.985    0.000   0.937   0.870     0.870 0.999  0.870      0.956      0.956       0.956
# LEDA2P1         0.139    0.937   0.000   0.098     0.098 0.433  0.098      
# LOCKMAR         0.204    0.870   0.098   0.000     0.000 0.487  0.000      0.136      0.136       0.136
# LOCKMARTD       0.204    0.870   0.098   0.000     0.000 0.487  0.000      0.136      0.136       0.136
# MB              0.379    0.999   0.433   0.487     0.487 0.000  0.487      0.395      0.395       0.395
# NOSLIP          0.204    0.870   0.098   0.000     0.000 0.487  0.000      0.136      0.136       0.136
# olga_2p_50      0.111    0.956   0.051   0.136     0.136 0.395  0.136      0.000      0.000       0.000
# olga_2p_53      0.111    0.956   0.051   0.136     0.136 0.395  0.136      0.000      0.000       0.000
# olga_2p_532     0.111    0.956   0.051   0.136     0.136 0.395  0.136      0.000      0.000       0.000
# olga_2p_627     0.101    0.976   0.107   0.183     0.183 0.358  0.183      0.057      0.057       0.057
# olga_2p_72      0.101    0.972   0.096   0.173     0.173 0.364  0.173      0.046      0.046       0.046
# olga_2p_627 olga_2p_72
# EatonOli1         0.101      0.101
# HughDuk1          0.976      0.972
# LEDA2P1           0.107      0.096
# LOCKMAR           0.183      0.173
# LOCKMARTD         0.183      0.173
# MB                0.358      0.364
# NOSLIP            0.183      0.173
# olga_2p_50        0.057      0.046
# olga_2p_53        0.057      0.046
# olga_2p_532       0.057      0.046
# olga_2p_627       0.000      0.012
# olga_2p_72        0.012      0.000


# compute the hierarchical clusters
fit.averageLink2 <- hclust(d2,method = "average")
fit.singleLink2 <- hclust(d2,method = "single")
fit.completeLink2 <- hclust(d2,method = "complete")
fit.centroid2 <- hclust(d2,method="centroid")
fit.wardD2 <- hclust(d2, method="ward.D")
fit.wardD22 <- hclust(d2, method="ward.D2")
fit.mcquitty2 <- hclust(d2, method = "mcquitty")
fit.median2 <- hclust(d2, method= 'median')

# plot dendograms, read from bottom up
# first two closest clusters are joined
# height dimension indicates the criteron value for distance between 
# two clusters dependin on the "method" selected
# Note: good for similarity analysis, not so good for actual
# (hopefully meaningful) group creation
plot(fit.averageLink2, hang=-1, cex=.8, main = "Pressure set downstream, detailed elevation, Average Link")

plot(fit.singleLink2, hang=-1, cex=.8, main = "Pressure set downstream, detailed elevation, Single Link")

plot(fit.completeLink2, hang=-1, cex=.8, main = "Pressure set downstream, detailed elevation, Complete Link")

plot(fit.centroid2, hang=-1, cex=.8, main = "Pressure set downstream, detailed elevation, Centroid")

plot(fit.wardD2, hang=-1, cex=.8, main = "Pressure set downstream, detailed elevation, Ward D")

plot(fit.wardD22, hang=-1, cex=.8, main = "Pressure set downstream, detailed elevation, Ward D2")

plot(fit.mcquitty2, hang=-1, cex=.8, main = "Pressure set downstream, detailed elevation, WPGMA")

plot(fit.median2, hang=-1, cex=.8, main = "Pressure set downstream, detailed elevation, Median")

png("dendo_PDownDet_centroid.png")
plot(fit.centroid2, hang=-1, cex=.8,
     main = "Pressure set downstream, detailed elevation, Centroid",
     xlab="", ylab="Height")
# the following cuts the three at k=3 clusters and draws 3 rectangles around those clusters
rect.hclust(fit.centroid2, k=3) 
dev.off()

## png 
##   2

pdf("dendo_PDownDet_avglink.pdf")
plot(fit.averageLink2, hang=-1, cex=.8,
     main = "Pressure set downstream, detailed elevation, Average Link",
     xlab="", ylab="Height")
rect.hclust(fit.averageLink2, k=3)
dev.off()

## png 
##   2

#######################################################
## Find out the composition of the clusters
#######################################################
# get a character vector with the names of the methods in the clusters
FM <- names(cutree(fit.averageLink2, k=3))
# make a data frame
dfFM <- data.frame(FModels=FM, cluster=unname(cutree(fit.averageLink2,k=3)),stringsAsFactors=FALSE)
# make a vector of types for each method
Type <- lookup[dfFM$FModels]
# add it to the data frame
dfFM$Type <- unname(Type)
# Contingency table to see category of models by cluster
table(dfFM$cluster,dfFM$Type)

##    
##     a b c d e f g h
##   1 8 0 0 1 0 0 0 0
##   2 1 0 3 0 0 0 0 0
##   3 1 2 2 1 3 3 3 7

# produces this output
#   a b c d e f g h
# 1 8 0 0 1 0 0 0 0
# 2 1 0 3 0 0 0 0 0
# 3 1 2 2 1 3 3 3 7

########################################################
## Get the grade of the clusters
########################################################
# get the relative performace by subsetting the models in each cluster
RP_cluster1 <- rowSums(index_PDownDet[dfFM$FModels[dfFM$cluster==1],])
RP_cluster2 <- rowSums(index_PDownDet[dfFM$FModels[dfFM$cluster==2],])
RP_cluster3 <- rowSums(index_PDownDet[dfFM$FModels[dfFM$cluster==3],])

# Function 'ComputeIndexesDataFrame' uses weighted indices which give Max G9=7.3
G9_cluster1 <- mean(1-RP_cluster1/7.3)*100
G9_cluster2 <- mean(1-RP_cluster2/7.3)*100
G9_cluster3 <- mean(1-RP_cluster3/7.3)*100
# Create atomic vector with grades
c(G9_cluster1,G9_cluster2,G9_cluster3)

## [1] 43.61459 30.61936 82.79229

# To determine the number of clusters to use the NbCluster package comes in handy:
set.seed(932637)
devAskNewPage(ask =FALSE) # activates prompt to see next plot
nc_centroid <- NbClust(index_PDownDet, distance = 'euclidean', min.nc = 2,max.nc = 5, method = "centroid")

## *** : The Hubert index is a graphical method of determining the number of clusters.
##                 In the plot of Hubert index, we seek a significant knee that corresponds to a 
##                 significant increase of the value of the measure i.e the significant peak in Hubert
##                 index second differences plot. 
##

## *** : The D index is a graphical method of determining the number of clusters. 
##                 In the plot of D index, we seek a significant knee (the significant peak in Dindex
##                 second differences plot) that corresponds to a significant increase of the value of
##                 the measure. 
##  
## ******************************************************************* 
## * Among all indices:                                                
## * 5 proposed 2 as the best number of clusters 
## * 5 proposed 3 as the best number of clusters 
## * 5 proposed 4 as the best number of clusters 
## * 8 proposed 5 as the best number of clusters 
## 
##                    ***** Conclusion *****                            
##  
## * According to the majority rule, the best number of clusters is  5 
##  
##  
## *******************************************************************

# produces the plots and text output:
# ** : The Hubert index is a graphical method of determining the number of clusters.
# In the plot of Hubert index, we seek a significant knee that corresponds to a 
# significant increase of the value of the measure i.e the significant peak in Hubert
# index second differences plot. 
# 
# Hit <Return> to see next plot: 
#   *** : The D index is a graphical method of determining the number of clusters. 
# In the plot of D index, we seek a significant knee (the significant peak in Dindex
#                                                     second differences plot) that corresponds to a significant increase of the value of
# the measure. 
# 
# ******************************************************************* 
#   * Among all indices:                                                
#   * 5 proposed 2 as the best number of clusters 
# * 7 proposed 3 as the best number of clusters 
# * 4 proposed 4 as the best number of clusters 
# * 7 proposed 5 as the best number of clusters 
# 
# ***** Conclusion *****                            
#   
#   * According to the majority rule, the best number of clusters is  3 
# 
# 
# *******************************************************************
devAskNewPage(ask =FALSE) # get rid of prompt asking to display next plot

table(nc_centroid$Best.nc[1,])

## 
## 0 1 2 3 4 5 
## 2 1 5 5 5 8

# produces
# 0 1 2 3 4 5 
# 2 1 5 7 4 7 

# which can be read as 1 criteria favors 1 cluster, 5 criteria favors 2 clusters,
# 7 criteria favors 3 clusters (top row), so on

set.seed(932637)
nc_kmeans <- NbClust(index_PDownDet, distance = 'euclidean', min.nc = 2,max.nc = 5, method = "kmeans")

## *** : The Hubert index is a graphical method of determining the number of clusters.
##                 In the plot of Hubert index, we seek a significant knee that corresponds to a 
##                 significant increase of the value of the measure i.e the significant peak in Hubert
##                 index second differences plot. 
##

## *** : The D index is a graphical method of determining the number of clusters. 
##                 In the plot of D index, we seek a significant knee (the significant peak in Dindex
##                 second differences plot) that corresponds to a significant increase of the value of
##                 the measure. 
##  
## ******************************************************************* 
## * Among all indices:                                                
## * 8 proposed 2 as the best number of clusters 
## * 6 proposed 3 as the best number of clusters 
## * 6 proposed 4 as the best number of clusters 
## * 3 proposed 5 as the best number of clusters 
## 
##                    ***** Conclusion *****                            
##  
## * According to the majority rule, the best number of clusters is  2 
##  
##  
## *******************************************************************

table(nc_kmeans$Best.nc[1,])

## 
## 0 1 2 3 4 5 
## 2 1 8 6 6 3

set.seed(932637)
fit.km <- kmeans(index_PDownDet, 3, nstart=25)
fit.km$size

## [1]  4 22  9

colSums(fit.km$centers)

##         e        re        ae       are        ve       vre       vae 
## 1.5610906 1.6221988 1.7596759 1.5847271 0.7928268 0.9910265 0.9335123 
##      vare       nsc 
## 0.9417739 0.2502525

# e        re        ae       are        ve       vre       vae      vare       nsc 
# 1.5610906 1.6221988 1.7596759 1.5847271 0.7928268 0.9910265 0.9335123 0.9417739 0.2502525 
rowSums(fit.km$centers)

##        1        2        3 
## 5.064787 1.256163 4.116135

# 1        2        3 
# 5.064787 1.256163 4.116135 
RP<-rowSums(fit.km$centers)
1-RP/9

##         1         2         3 
## 0.4372459 0.8604264 0.5426517

# 1         2         3 
# 0.4372459 0.8604264 0.5426517 
(1-RP/9)*100

##        1        2        3 
## 43.72459 86.04264 54.26517

# 1        2        3 
# 43.72459 86.04264 54.26517 


#################################################################
# USe Partiion around medoids PAM
# less sensitive to outliers and a nice cluster plotting routine
#################################################################
library(cluster)
set.seed(932637)
fit.pam <- pam(index_PDownDet,k = 3, metric = "euclidean",stand = TRUE)
fit.pam$medoids

##                     e        re        ae        are        ve
## BBO         0.8898828 0.8154579 0.8898773 0.79809696 0.2814604
## olga_2p_532 0.2234456 0.1643997 0.2379044 0.03212678 0.1455420
## DKAGAD      0.6130130 0.7687468 0.7228848 0.81334396 0.4473411
##                      vre       vae       vare nsc
## BBO         0.0004916084 0.5919906 0.01143944 0.1
## olga_2p_532 0.0506385942 0.4176158 0.06025481 0.1
## DKAGAD      0.8895965698 0.1205496 0.88590697 0.1

jpeg("pam_bivariate_clusters.jpg")
clusplot(fit.pam,main="Bivariate cluster analysys from PAM")
dev.off()

## png 
##   2

RP_pam<-rowSums(fit.pam$medoids)
G9_medoids <- (1-RP_pam/9)*100

# BBO         olga_2p_532 DKAGAD 
# 51.34781    84.08969    40.42908 



#######################################################################
#################### Grade Performance analysis ####################
#######################################################################

# compute grades for all G-L point models:
g9_all<- ComputeGrade9(df)
g9_2P<-ComputeGrade9(df2P)

## separate the data by pressure set point
dfPSetDown<-df2P[df2P$Pset=="PDown",]
dfPSetUp<-df2P[df2P$Pset=="Pup",]

## separate the data by elevation profile type:
dfCoarse<-df2P[df2P$ElevProf=="Coarse",]
dfDetail<-df2P[df2P$ElevProf=="Det",]

## Separate by Downstream and Elevation profile type:
dfPDownCoarse<-df2P[df2P$Pset=="PDown" & df2P$ElevProf=="Coarse",]
dfPDownDetail<-df2P[df2P$Pset=="PDown" & df2P$ElevProf=="Det",]
dfUpCoarse<-df2P[df2P$Pset=="Pup"& df2P$ElevProf=="Coarse",]
dfUpDetail<-df2P[df2P$Pset=="Pup"& df2P$ElevProf=="Det",]


## To do holdup comparisons the standar bar plots are not good because there is insufficient 
## data points. Instead the values reported by selected models was plotted and compared 
## with the measured value.
## Filter Group 1 Runs 6 and 7 for holdup comparisons: R6 ->360 to 475 m3; R7 -> 630 m3
dfG1R6R7<-df2P[df2P$Group=="FriggsStFergus" & df2P$Run%in% c(6,7),]

G1R6ExpLiqHoldup <- rep(as.double(475),times=length( (dfG1R6R7[dfG1R6R7$Run==6,])[[1]] ) )
G1R7ExpLiqHoldup <- rep(as.double(630),times=length( (dfG1R6R7[dfG1R6R7$Run==7,])[[1]] ) )

dfG1R6R7$ExpLiqHoldup<-c(G1R6ExpLiqHoldup,G1R7ExpLiqHoldup)

dfG1R6R7 <- ComputeLiqHoldupErrorsAndVariancesFromPIPESIMResultsDataFfame( dfG1R6R7 ) 

## filter out selected mechanistic models and some correlations for ilustration: 
dfG1R6R7_SelectedFModels <- dfG1R6R7[as.character(dfG1R6R7$FModel) %in%c("XIAO",
                                                                         "Xiao1",
                                                                         "Xiao2",
                                                                         "olga_2p_72",
                                                                         "olga_2p_627",
                                                                         "LEDA2P1",
                                                                         "BP1",
                                                                         "BP2",
                                                                         "BJA",
                                                                         "Oliemanist1",
                                                                         "NOSLIP",
                                                                         "TUFFPU2P",
                                                                         "TMB",
                                                                         "BB1",
                                                                         "BBR",
                                                                         "MB",
                                                                         "EatonOli1"), ] 


df6<-dfG1R6R7_SelectedFModels[dfG1R6R7_SelectedFModels$Run==6 & dfG1R6R7_SelectedFModels$Pset=="PDown" & dfG1R6R7_SelectedFModels$ElevProf=="Det",]
df7<-dfG1R6R7_SelectedFModels[dfG1R6R7_SelectedFModels$Run==7 & dfG1R6R7_SelectedFModels$Pset=="PDown" & dfG1R6R7_SelectedFModels$ElevProf=="Det",]

df6_o <- transform(df6,FModel=reorder(FModel, LiqHoldup))
df7_o <- transform(df7,FModel=reorder(FModel, LiqHoldup))


LiqHold6_plot <- ( ggplot(data=df6_o, aes(x=FModel, y=LiqHoldup))
                   + labs(title="Calculated holdup (m3) vs. measured at [360,475] m3 (FriggStFergus06)")
                   + geom_bar(position="stack", stat="identity",alpha=0.75)
                   + scale_y_continuous( limit=c(0,4500),expand=c(0,0),breaks = c(0,500,1000,2000,3000,4000) ) 
                   + ylab("") + xlab("")
                   + geom_hline(aes(yintercept=360), size=1,colour="black", linetype="solid")
                   + geom_hline(aes(yintercept=475), size=1,colour="black", linetype="solid")        
                   + coord_flip()
                   + theme(title=element_text(face = "bold", color = "black", size = 14),
                           axis.text.y=element_text(color="black",size=rel(2)),
                           axis.text.x=element_text(color="black",size=rel(2),angle=90),
                           panel.grid.major.x= element_line(colour="gray",linetype = 2),
                           panel.grid.minor.x= element_line(colour="gray",linetype = "dashed") ) )

SaveGGplotObjToPNG(LiqHold6_plot)

## Saving 7 x 5 in image

LiqHold7_plot <- ( ggplot(data=df7_o, aes(x=FModel, y=LiqHoldup) )
                   + labs(title="Calculated holdup (m3) vs. measured at 630m3 (FriggStFergus07)")
                   + geom_bar(position="stack", stat="identity", alpha=0.75)
                   + scale_y_continuous( limit=c(0,4500),expand=c(0,0),breaks = c(0,500,1000,2000,3000,4000) ) 
                   + ylab("Liquid inventory, m3") + xlab("")
                   + geom_hline(aes(yintercept=630), size=1,colour="black", linetype="solid")
                   + coord_flip()
                   + theme(title=element_text(face = "bold", color = "black", size = 14),
                           axis.text.y=element_text(color="black",size=rel(2)),
                           axis.text.x=element_text(color="black",size=rel(2),angle=90),
                           panel.grid.major.x= element_line(colour="gray",linetype = 2),
                           panel.grid.minor.x= element_line(colour="gray",linetype = "dashed") ) )

SaveGGplotObjToPNG(LiqHold7_plot)

## Saving 7 x 5 in image

print(LiqHold6_plot)

print(LiqHold7_plot)

g9_LiqInv <- ComputeGrade9LiqInv( dfG1R6R7 )

## Prepare the G9s:
g9_PDown <- ComputeGrade9(dfPSetDown)
g9_PUp <- ComputeGrade9(dfPSetUp)

g9_Coarse <- ComputeGrade9(dfCoarse)
g9_Detail <- ComputeGrade9(dfDetail)

g9_DC <- ComputeGrade9(dfPDownCoarse)
g9_DD <- ComputeGrade9(dfPDownDetail)
g9_UC <- ComputeGrade9(dfUpCoarse)
g9_UD <- ComputeGrade9(dfUpDetail)


## Plot G9 for all observations:

## we don't need G9 anymore because it is computed from the individual g_i:
g9_plot <- CreateBarPlotfromG9DataFrame(g9_2P, "G9 all models")

print(g9_plot)

SaveGGplotObjToPNG(g9_plot)

## Saving 7 x 5 in image

# Create PNGs for use in publications
g9_PDown_plot <- CreateBarPlotfromG9DataFrame(g9_PDown, "G9 pressure set downstream")
SaveGGplotObjToPNG(g9_PDown_plot)

## Saving 7 x 5 in image

g9_PUp_plot <- CreateBarPlotfromG9DataFrame(g9_PUp, "G9 pressure set upstream")
SaveGGplotObjToPNG(g9_PUp_plot)

## Saving 7 x 5 in image

## plot G9 for PDown and PUp:
print(g9_PUp_plot)

print(g9_PDown_plot)

## plot G9 results for coarse and detailed elevation profile
g9_Coarse_plot <- CreateBarPlotfromG9DataFrame( g9_Coarse, "G9 coarse elevation profile")
SaveGGplotObjToPNG(g9_Coarse_plot)

## Saving 7 x 5 in image

g9_Detail_plot <- CreateBarPlotfromG9DataFrame( g9_Detail, "G9 detailed elevation profile")
SaveGGplotObjToPNG(g9_Detail_plot)

## Saving 7 x 5 in image

print(g9_Coarse_plot)

print(g9_Detail_plot)

## plot G9 results for combinations of variables profile(Detailed/Coarse) and PSetLocation(PDown/PUp)  
g9_DC_Plot <- CreateBarPlotfromG9DataFrame(g9_DC,"G9 downstream P set point and coarse elevation")
SaveGGplotObjToPNG(g9_DC_Plot)

## Saving 7 x 5 in image

g9_DD_Plot <- CreateBarPlotfromG9DataFrame(g9_DD,"G9 downstream P set point and detailed elevation")
SaveGGplotObjToPNG(g9_DD_Plot)

## Saving 7 x 5 in image

g9_UC_Plot <- CreateBarPlotfromG9DataFrame(g9_UC,"G9 upstream P set point and coarse elevation")
SaveGGplotObjToPNG(g9_UC_Plot)

## Saving 7 x 5 in image

g9_UD_Plot <- CreateBarPlotfromG9DataFrame(g9_UD,"G9 upstream P set point and detailed elevation")
SaveGGplotObjToPNG(g9_UD_Plot)

## Saving 7 x 5 in image

print(g9_DC_Plot)

print(g9_DD_Plot)

print(g9_UC_Plot)

print(g9_UD_Plot)

## Comparing all results to get top 10 PM by Grade:
g9_Top10_2P_All <- ReturnTop10(g9_2P, "all")
g9_Top10_Coarse <- ReturnTop10(g9_Coarse, "coarse")
g9_Top10_Detail <- ReturnTop10(g9_Detail,"detail")
g9_Top10_PSetUp <- ReturnTop10(g9_PUp,"upstream")
g9_Top10_PSDown <- ReturnTop10(g9_PDown, "downstream")
g9_Top10_UpCors <- ReturnTop10(g9_UC,"up-coarse")
g9_Top10_UpDetl <- ReturnTop10(g9_UD,"up-detail")
g9_Top10_DwnCrs <- ReturnTop10(g9_DC,"down-coarse")
g9_Top10_DwnDet <- ReturnTop10(g9_DD,"down-detail")

mdf<-rbind.data.frame(g9_Top10_2P_All,
                      g9_Top10_Coarse,
                      g9_Top10_Detail,
                      g9_Top10_PSetUp,
                      g9_Top10_PSDown,
                      g9_Top10_UpCors,
                      g9_Top10_UpDetl,
                      g9_Top10_DwnCrs,
                      g9_Top10_DwnDet)
# > mdf
#          G9     FModels cnt      filter
# 1  88.38232 Oliemanist1   1         all
# 2  87.94749    TUFFPU2P   1         all
# 3  86.52804         TMB   1         all
# 4  86.08756          MB   1         all
# 5  84.66321   EatonOli1   1         all
top10Plot<-GenerateTop10ClassificationBargg2Plot(mdf)
print(top10Plot)

SaveGGplotObjToPNG(top10Plot)

## Saving 7 x 5 in image

mdf_m <- mdf
 # head(mdf_m)
#         G9     FModels cnt
# 1 90.57677 Oliemanist1   1
# 2 90.22408    TUFFPU2P   1
# 3 89.07274         TMB   1
# 4 88.71546          MB   1
# 5 87.56016   EatonOli1   1
# 6 86.69338     LEDA2P1   1

mdf_m$G9<-NULL
mdf_m$filter<-NULL

mc<-melt(mdf_m,id.vars = "FModels")
dc<-dcast(mc,formula = FModels ~ .,sum)
names(dc)<-c("FModels","Freq")

# dc
# FModels Freq
# 1  Oliemanist1    9
# 2     TUFFPU2P    9
# 3          TMB    9
# 4           MB    9
# 5    EatonOli1    4
# 6      LEDA2P1    4
# 7         XIAO    9
# 8        Xiao1    8
# 9        Xiao2    8
# 10         BJA    9
# 11         BP1    5
# 12         BP2    5
# 13 olga_2p_627    1
# 14  olga_2p_72    1


mdf_m2 <- mdf
mdf_m2$cnt<-NULL
mc<-melt(mdf_m2,id.vars = "FModels")

## Warning: attributes are not identical across measure variables; they will
## be dropped

# > head(mc)
# FModels variable    value
# 1 Oliemanist1       G9 90.57677
# 2    TUFFPU2P       G9 90.22408
# 3         TMB       G9 89.07274
# 4          MB       G9 88.71546
# 5   EatonOli1       G9 87.56016
# 6     LEDA2P1       G9 86.69338
dc2<-dcast(mc,formula = FModels ~ .,mean)

## Warning in mean.default(.value[0], ...): argument is not numeric or
## logical: returning NA

## Warning in mean.default(.value[i], ...): argument is not numeric or
## logical: returning NA

## Warning in mean.default(.value[i], ...): argument is not numeric or
## logical: returning NA

## Warning in mean.default(.value[i], ...): argument is not numeric or
## logical: returning NA

## Warning in mean.default(.value[i], ...): argument is not numeric or
## logical: returning NA

## Warning in mean.default(.value[i], ...): argument is not numeric or
## logical: returning NA

## Warning in mean.default(.value[i], ...): argument is not numeric or
## logical: returning NA

## Warning in mean.default(.value[i], ...): argument is not numeric or
## logical: returning NA

## Warning in mean.default(.value[i], ...): argument is not numeric or
## logical: returning NA

## Warning in mean.default(.value[i], ...): argument is not numeric or
## logical: returning NA

## Warning in mean.default(.value[i], ...): argument is not numeric or
## logical: returning NA

## Warning in mean.default(.value[i], ...): argument is not numeric or
## logical: returning NA

## Warning in mean.default(.value[i], ...): argument is not numeric or
## logical: returning NA

## Warning in mean.default(.value[i], ...): argument is not numeric or
## logical: returning NA

## Warning in mean.default(.value[i], ...): argument is not numeric or
## logical: returning NA

names(dc2)<-c("FModels","Avg G9")
# > dc2
# FModels   Avg G9
# 1  Oliemanist1 90.90524
# 2     TUFFPU2P 91.06437
# 3          TMB 89.01124
# 4           MB 89.68280
# 5    EatonOli1 89.09737
# 6      LEDA2P1 89.03794
# 7         XIAO 87.18363
# 8        Xiao1 87.23361
# 9        Xiao2 86.99661
# 10         BJA 87.03658
# 11         BP1 88.11589
# 12         BP2 88.11589
# 13 olga_2p_627 89.14203
# 14  olga_2p_72 88.82127


#Plot all Top10s:
df_Top10_G9s <- data.frame(FullMatrix=g9_Top10_2P_All$G9,
                           Coarse=g9_Top10_Coarse$G9,
                           Detail=g9_Top10_Detail$G9,
                           PUp=g9_Top10_PSetUp$G9,
                           PDown=g9_Top10_PSDown$G9,
                           PDownCoarse=g9_Top10_DwnCrs$G9,
                           PDownDetail=g9_Top10_DwnDet$G9,
                           PUpCoarse=g9_Top10_UpCors$G9,
                           PUpDetail=g9_Top10_UpDetl$G9)

# boxplot(df_Top10_G9s, main="Top 10 point models for various filters",
#         xlab="Filter", ylab="Grade 9")
# boxplot(df_Top10_G9s, las = 2)
# 

mdf<-melt(df_Top10_G9s)

## No id variables; using all as measure variables

names(mdf)<-c("Filter","G9")


black.bold.16.text <- element_text(face = "bold", color = "black", size = 16)
black.normal.15.text <- element_text(color = "black", size = 15)
G9Top10ByFilter <- (qplot(Filter, G9,data=mdf)
                    + labs(title="Quantiles for grades of top 10 point models by filter")
                    + ylab("G9") + xlab("Filter")
                    + geom_boxplot(outlier.colour = "red", outlier.size = 4, outlier.shape = 13)
                    + geom_jitter()
                    + coord_flip()
                    + theme( text=element_text(size=16), panel.grid.minor = element_line(colour="white"),
                             title=black.bold.16.text,
                             axis.text = black.normal.15.text,
                             axis.text.x= element_text(angle = 90, vjust = 0.5, hjust=1), #http://stackoverflow.com/questions/1330989/rotating-and-spacing-axis-labels-in-ggplot2
                             panel.grid.major.y= element_line(colour="gray"),
                             panel.grid.minor.y= element_line(colour="gray"),
                             complete=FALSE )
                    + scale_x_discrete(limits=rev(levels(mdf$Filter)))
                    + scale_y_continuous( limit=c(80,95),expand=c(0,0),breaks = c(80,85,90,95) )
                    + scale_fill_grey())

SaveGGplotObjToPNG(G9Top10ByFilter)

## Saving 7 x 5 in image

## Warning: Removed 7 rows containing non-finite values (stat_boxplot).

## Warning: Removed 7 rows containing missing values (geom_point).

## Warning: Removed 7 rows containing missing values (geom_point).

print(G9Top10ByFilter)

## Warning: Removed 7 rows containing non-finite values (stat_boxplot).

## Warning: Removed 7 rows containing missing values (geom_point).

## Warning: Removed 7 rows containing missing values (geom_point).

# library(plyr)
# dc_o <- arrange(dc,desc(Freq),FModels)
#           FModels Freq
# 1          BJA    9
# 2           MB    9
# 3  Oliemanist1    9
# 4          TMB    9
# 5     TUFFPU2P    9
# 6         XIAO    9
# 7        Xiao1    8
# 8        Xiao2    8
# 9          BP1    5
# 10         BP2    5
# 11   EatonOli1    4
# 12     LEDA2P1    4
# 13 olga_2p_627    1
# 14  olga_2p_72    1

## plot liquid inventory g9s:
g9_LiqInv_plot <- CreateBarPlotfromG9DataFrame(g9_LiqInv, "G9 liquid inventory")

SaveGGplotObjToPNG(g9_LiqInv_plot)

## Saving 7 x 5 in image

print(g9_LiqInv_plot)

# Plotting errors in different ways:
# Note: some plots take a few seconds to display

# g<- ggplot(df2P, aes(ExpDP,e.i))
# g<-g+geom_point()+facet_wrap(FModel ~ Pset,nrow=9)+geom_smooth(method="loess")+theme_bw()
# print(g)

# require(graphics)
# pairs(df2P, 
#       panel = panel.smooth, 
#       main = "pipesim case results")

png("ErrorsCorrelated.png",
    width = 650,
    height = 650,
    units = "px",
    pointsize = 8,
    bg = "white",
    res = 160,
    family = "",
    restoreConsole = TRUE,
    type = c("cairo-png"),
    antialias="default")
dfErrors<- data.frame(error=df2P$e.i, absolute.error=df2P$abse.i,relative.error=df2P$rele.i,absrel.error=df2P$absrele.i)
pairs(dfErrors,
      panel = panel.smooth,
      main = "Friggs-St Fergus errors")
dev.off()

## png 
##   2

Main.R

padames

Fri Jun 03 15:26:13 2016