3.1 Acessing the optimal parameters files
#Clean R memory
rm(list=ls(all=TRUE))
#Loading packge to read .mat files
library(R.matlab)
## Registered S3 method overwritten by 'R.oo':
## method from
## throw.default R.methodsS3
## R.matlab v3.6.2 (2018-09-26) successfully loaded. See ?R.matlab for help.
##
## Attaching package: 'R.matlab'
## The following objects are masked from 'package:base':
##
## getOption, isOpen
#Loading clustering packages
library(cluster)
library(devtools)
## Loading required package: usethis
library("factoextra")
## Loading required package: ggplot2
## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa
setwd("C:/Users/rampincg/Desktop/Clarkson_Pc/cassio_PC/AGU_2019/dds_camels671_gr4j/dds_camels671_gr4j/_gr4j_kge")
# Saving the path of the base directory
base.Directory<-getwd()
# Save the list of all the sub-folders names
folders<-dir('.')[file.info(dir('.',full.names=T))$isdir]
# Loop to save in the variable folder.name the name of each
# folder individualy.
folder.name<-0
for (i in 1:length(folders)){
folder.name[i]=folders[i]
}
# Create the matrix to save the optimum parameters for each
# watershed
optimum.Paramters.kge<-matrix(NA,nrow=length(folder.name),ncol=9)
mat.file=0
#Loop to access the folders and extract the optimum parameters
#set for each watershed and save them in a matrix.
for(i in 1:length(folder.name)){
#Set the working directory as function of the folder name
setwd(paste0(base.Directory,"/",folder.name[i]))
#List of all files in the directory
files<-list.files()
#Acess the .mat file that is the sixth element in the list
mat.file<-readMat(files[6])
#Record the first element of the mat.file that is a vector with
#the optimum paramters and third element that is the NS value
par.optimal<-c(mat.file[[1]],mat.file[[3]])
#Save the optimum parameters set of the watershed in a line
optimum.Paramters.kge[i,]<-par.optimal
#re-set the dummy variable
par.optimal=0
}
3.2 Parameters Densities
#################################################
#################################################
#Part II - Parameters Densities
#################################################
#################################################
#Remove the last column of the data(NSE).
data.Set.kge<-optimum.Paramters.kge[,-9]
#Rename the columns
colnames(data.Set.kge)<-c("X1","X2","X3","X4","Ts","CFMAX","CFR","CWH")
rownames(data.Set.kge)<-folder.name
#Presenting a summary of the parameters statistics
summary(data.Set.kge)
## X1 X2 X3
## Min. : 0.3847 Min. :-5.00000 Min. : 0.6222
## 1st Qu.: 118.5645 1st Qu.:-1.11608 1st Qu.: 24.2992
## Median : 223.4190 Median : 0.12177 Median : 46.7825
## Mean : 339.9840 Mean : 0.06688 Mean : 77.2774
## 3rd Qu.: 471.2545 3rd Qu.: 1.21409 3rd Qu.:109.2390
## Max. :1000.0000 Max. : 5.00000 Max. :300.0000
## X4 Ts CFMAX CFR
## Min. :0.5000 Min. :-3.0000 Min. : 0.0000 Min. :0.00000
## 1st Qu.:0.6271 1st Qu.:-1.1689 1st Qu.: 0.5828 1st Qu.:0.02911
## Median :1.0304 Median :-0.1301 Median : 1.8705 Median :0.23287
## Mean :1.3961 Mean : 0.1262 Mean : 3.2233 Mean :0.35276
## 3rd Qu.:1.3136 3rd Qu.: 1.4817 3rd Qu.: 3.4917 3rd Qu.:0.48330
## Max. :5.0000 Max. : 3.0000 Max. :20.0000 Max. :1.00000
## CWH
## Min. :0.00000
## 1st Qu.:0.00000
## Median :0.07933
## Mean :0.28348
## 3rd Qu.:0.70750
## Max. :0.80000
#Loading required packages
library(reshape2)
library(ggplot2)
library(cowplot)
##
## ********************************************************
## Note: As of version 1.0.0, cowplot does not change the
## default ggplot2 theme anymore. To recover the previous
## behavior, execute:
## theme_set(theme_cowplot())
## ********************************************************
index=seq(from=1,to=nrow(data.Set.kge),by=1)
dataX1<-cbind(index,data.Set.kge[,1])
dataX1<-as.data.frame(dataX1)
cnames=c("index","X1")
colnames(dataX1)=cnames
dataX1<-reshape2::melt(dataX1, id.var='index')
dataX2<-cbind(index,data.Set.kge[,2])
dataX2<-as.data.frame(dataX2)
cnames=c("index","X2")
colnames(dataX2)=cnames
dataX2<-reshape2::melt(dataX2, id.var='index')
dataX3<-cbind(index,data.Set.kge[,3])
dataX3<-as.data.frame(dataX3)
cnames=c("index","X3")
colnames(dataX3)=cnames
dataX3<-reshape2::melt(dataX3, id.var='index')
dataX4<-cbind(index,data.Set.kge[,4])
dataX4<-as.data.frame(dataX4)
cnames=c("index","X4")
colnames(dataX4)=cnames
dataX4<-reshape2::melt(dataX4, id.var='index')
dataX5<-cbind(index,data.Set.kge[,5])
dataX5<-as.data.frame(dataX5)
cnames=c("index","Ts")
colnames(dataX5)=cnames
dataX5<-reshape2::melt(dataX5, id.var='index')
dataX6<-cbind(index,data.Set.kge[,6])
dataX6<-as.data.frame(dataX6)
cnames=c("index","CFMAX")
colnames(dataX6)=cnames
dataX6<-reshape2::melt(dataX6, id.var='index')
dataX7<-cbind(index,data.Set.kge[,7])
dataX7<-as.data.frame(dataX7)
cnames=c("index","CFR")
colnames(dataX7)=cnames
dataX7<-reshape2::melt(dataX7, id.var='index')
dataX8<-cbind(index,data.Set.kge[,8])
dataX8<-as.data.frame(dataX8)
cnames=c("index","CWH")
colnames(dataX8)=cnames
dataX8<-reshape2::melt(dataX8, id.var='index')
#Preparing the plots variables for ggplot2
plot1<-ggplot(dataX1) + stat_density(aes(x=value, y=..scaled..,color=variable,linetype=variable),lwd=1, geom="line",position = "identity")+
ylab("Density")+
xlab("X1")+
xlim(0,1100) +
theme_bw() +
theme(legend.position="top")+
theme(legend.title = element_blank())+
theme(axis.title.x=element_text(size=14,face="bold"),axis.title.y=element_text(size=14))+
theme(legend.text=element_text(size=14))+
theme(axis.text.x = element_text(size = 16),axis.text.y = element_text(size = 16))
plot2<-ggplot(dataX2) + stat_density(aes(x=value, y=..scaled..,color=variable,linetype=variable),lwd=1, geom="line",position = "identity")+
ylab("Density")+
xlab("X2")+
xlim(-5,5) +
theme_bw() +
theme(legend.position="top")+
theme(legend.title = element_blank())+
theme(axis.title.x=element_text(size=14,face="bold"),axis.title.y=element_text(size=14))+
theme(legend.text=element_text(size=14))+
theme(axis.text.x = element_text(size = 16),axis.text.y = element_text(size = 16))
plot3<-ggplot(dataX3) + stat_density(aes(x=value, y=..scaled..,color=variable,linetype=variable),lwd=1, geom="line",position = "identity")+
ylab("Density")+
xlab("X3")+
xlim(0,300) +
theme_bw() +
theme(legend.position="top")+
theme(legend.title = element_blank())+
theme(axis.title.x=element_text(size=14,face="bold"),axis.title.y=element_text(size=14))+
theme(legend.text=element_text(size=14))+
theme(axis.text.x = element_text(size = 16),axis.text.y = element_text(size = 16))
plot4<-ggplot(dataX4) + stat_density(aes(x=value, y=..scaled..,color=variable,linetype=variable),lwd=1, geom="line",position = "identity")+
ylab("Density")+
xlab("X4")+
xlim(0.4,5) +
theme_bw() +
theme(legend.position="top")+
theme(legend.title = element_blank())+
theme(axis.title.x=element_text(size=14,face="bold"),axis.title.y=element_text(size=14))+
theme(legend.text=element_text(size=14))+
theme(axis.text.x = element_text(size = 16),axis.text.y = element_text(size = 16))
plot5<-ggplot(dataX4) + stat_density(aes(x=value, y=..scaled..,color=variable,linetype=variable),lwd=1, geom="line",position = "identity")+
ylab("Density")+
xlab("Ts")+
xlim(-3,3) +
theme_bw() +
theme(legend.position="top")+
theme(legend.title = element_blank())+
theme(axis.title.x=element_text(size=14,face="bold"),axis.title.y=element_text(size=14))+
theme(legend.text=element_text(size=14))+
theme(axis.text.x = element_text(size = 16),axis.text.y = element_text(size = 16))
plot6<-ggplot(dataX4) + stat_density(aes(x=value, y=..scaled..,color=variable,linetype=variable),lwd=1, geom="line",position = "identity")+
ylab("Density")+
xlab("CFMAX")+
xlim(0,20) +
theme_bw() +
theme(legend.position="top")+
theme(legend.title = element_blank())+
theme(axis.title.x=element_text(size=14,face="bold"),axis.title.y=element_text(size=14))+
theme(legend.text=element_text(size=14))+
theme(axis.text.x = element_text(size = 16),axis.text.y = element_text(size = 16))
plot7<-ggplot(dataX4) + stat_density(aes(x=value, y=..scaled..,color=variable,linetype=variable),lwd=1, geom="line",position = "identity")+
ylab("Density")+
xlab("CFR")+
xlim(0,1) +
theme_bw() +
theme(legend.position="top")+
theme(legend.title = element_blank())+
theme(axis.title.x=element_text(size=14,face="bold"),axis.title.y=element_text(size=14))+
theme(legend.text=element_text(size=14))+
theme(axis.text.x = element_text(size = 16),axis.text.y = element_text(size = 16))
plot8<-ggplot(dataX4) + stat_density(aes(x=value, y=..scaled..,color=variable,linetype=variable),lwd=1, geom="line",position = "identity")+
ylab("Density")+
xlab("CWH")+
xlim(0,0.8) +
theme_bw() +
theme(legend.position="top")+
theme(legend.title = element_blank())+
theme(axis.title.x=element_text(size=14,face="bold"),axis.title.y=element_text(size=14))+
theme(legend.text=element_text(size=14))+
theme(axis.text.x = element_text(size = 16),axis.text.y = element_text(size = 16))
#Function plot
plot_grid(plot1,plot2,plot3,plot4,plot5,plot6,plot7,plot8, labels = "AUTO")

3.3 Scaling Parameters -range (0-1)
#####################################################################
#Scaling the variables into the range 0-1
#####################################################################
df.scaled.kge<-as.data.frame(data.Set.kge)
#Variables used to save the scaled columns
col1=col2=col3=col4=col5=col6=col7=col8=0
col1<-(df.scaled.kge[,1]-min(df.scaled.kge[,1]))/(max(df.scaled.kge[,1])-min(df.scaled.kge[,1]))
col2<-(df.scaled.kge[,2]-min(df.scaled.kge[,2]))/(max(df.scaled.kge[,2])-min(df.scaled.kge[,2]))
col3<-(df.scaled.kge[,3]-min(df.scaled.kge[,3]))/(max(df.scaled.kge[,3])-min(df.scaled.kge[,3]))
col4<-(df.scaled.kge[,4]-min(df.scaled.kge[,4]))/(max(df.scaled.kge[,4])-min(df.scaled.kge[,4]))
col5<-(df.scaled.kge[,5]-min(df.scaled.kge[,5]))/(max(df.scaled.kge[,5])-min(df.scaled.kge[,3]))
col6<-(df.scaled.kge[,6]-min(df.scaled.kge[,6]))/(max(df.scaled.kge[,6])-min(df.scaled.kge[,6]))
col7<-(df.scaled.kge[,7]-min(df.scaled.kge[,7]))/(max(df.scaled.kge[,7])-min(df.scaled.kge[,7]))
col8<-(df.scaled.kge[,8]-min(df.scaled.kge[,8]))/(max(df.scaled.kge[,8])-min(df.scaled.kge[,8]))
df.scaled.kge[,1]=col1
df.scaled.kge[,2]=col2
df.scaled.kge[,3]=col3
df.scaled.kge[,4]=col4
df.scaled.kge[,5]=col5
df.scaled.kge[,6]=col6
df.scaled.kge[,7]=col7
df.scaled.kge[,8]=col8
3.4 Euclidean Distance Matrix
#Euclidian Distance
dist.eucl.kge<-dist(df.scaled.kge,method="euclidean")
#Visualizing the distance matrices
fviz_dist(dist.eucl.kge)

3.4.1 Number of Clusters
3.4.1.1 K-Means
Elbow Method
library(factoextra)
###########
#K-Means###
###########
#Elbow method
fviz_nbclust(df.scaled.kge,kmeans,method="wss")+geom_vline(xintercept=3,linetype=2)+labs(subtitle="Elbow method")

Silhouette Method
#Silhouette method
fviz_nbclust(df.scaled.kge,kmeans,method="silhouette")+labs(subtitle="Silhouette method")

Gap Statistic
#Gap Statistic
#nboot=50 to keep the function speedy.
#recommended value: nboot=500 for your analysis.
#Use verbose=FALSE to hide computing progression.
fviz_nbclust(df.scaled.kge,kmeans,method="gap_stat",nboot=50)+labs(subtitle="Gap statistic method")

Clusters Plots
#Compute k-means k=2
km.res<-kmeans(df.scaled.kge,2,nstart=15)
#Plotting Cluster
fviz_cluster(km.res,data=df.scaled.kge,palette="jco",repel=TRUE,ggtheme=theme_classic(),show.clust.cent = FALSE,stand=FALSE,geom="point")

#Compute k-means k=3
km.res<-kmeans(df.scaled.kge,3,nstart=15)
#Plotting Cluster
fviz_cluster(km.res,data=df.scaled.kge,palette="jco",repel=TRUE,ggtheme=theme_classic(),show.clust.cent = FALSE,stand=FALSE,geom="point")

3.4.2 Hierarchical Clustering
Ward’s linkage
#Ward’s linkage
res.hc.ward<-hclust(d=dist.eucl.kge,method="ward.D2")
fviz_dend(res.hc.ward,cex=0.2)

Average linkage
#Average linkage
res.hc.average<-hclust(d=dist.eucl.kge,method="average")
fviz_dend(res.hc.average,cex=0.2)

Ward’s X Average linkage
#Wards X Average Linkage
#Ward's linkage
#Computing cophentic distance
res.coph.ward<-cophenetic(res.hc.ward)
#Correlation between cophenetic distance and the original distance
cor(dist.eucl.kge,res.coph.ward)
## [1] 0.6673348
#Average Linkage
#Computing cophentic distance
res.coph.average<-cophenetic(res.hc.average)
#Correlation between cophenetic distance and the original distance
cor(dist.eucl.kge,res.coph.average)
## [1] 0.7389588