Evaluate Modflow run with pit-latrine function

### Run with Modflow
library(devtools)
## Loading required package: usethis
#devtools::install_github("dof1985/cwatmRutils")
library(cwatmRutils)
library(raster)
## Loading required package: sp
## The legacy packages maptools, rgdal, and rgeos, underpinning this package
## will retire shortly. Please refer to R-spatial evolution reports on
## https://r-spatial.org/r/2023/05/15/evolution4.html for details.
## This package is now running under evolution status 0
library(ggplot2)
library(ncdf4)
library(mondate)
## 
## Attaching package: 'mondate'
## The following object is masked from 'package:base':
## 
##     as.difftime
#Define path root for run looking to analyse
pth <- "C:/Users/hinton/Documents/GitHub/MalawiLake/outputs/modflowrun5"

fn <- "discharge_monthavg.nc"
# 

start<- mondate("1971-01-01")
end<- mondate("2000-12-31")
###
#Discharge at chiromo station (Downstream of Blantyre)
## Chiromo data station -16.55, 35.13 (data from GRDC location)

pt<- rbind(as.numeric(c('35.125', '-16.375')))

pt<- as.data.frame(pt)
colnames(pt)<- c('x','y')

# extract data for the point(s)
point_extract <- ncdf2raster(pth = sprintf("%s/%s", pth, fn),
                 transpose = TRUE, time = c(1, 12*30 ), spatial = pt)

point_extract$date <- as.Date(point_extract$time, origin = "1901-01-01")


#+ #### Compare with data from CWatM output (both at point location and also running average CWatM basin wide discharge)

measured_discharge_output<- read.csv("H:/MyDocuments/Validation data/GRDC Discharge data/1992900/monthly measured discharge output.csv")
measured_discharge_output<- as.data.frame(measured_discharge_output[1:360,])
measured_discharge_output$group<- "measured chiromo station"
colnames(measured_discharge_output)<- c("Value", "group")

comparisondf<- rbind(measured_discharge_output)

####

comparisondf$date<- as.Date(mondate(start + seq(0, as.numeric(end - start), 1)))
comparisondf$Value<- as.numeric(comparisondf$Value)
## Warning: NAs introduced by coercion
####

point_data<- as.numeric(point_extract$value)
point_data<- as.data.frame(point_data)
point_data$group<- "CWatM chiromo station"
point_data$date<- as.Date(mondate(start + seq(0, as.numeric(end - start), 1)))
colnames(point_data)<- c("Value", "group", "date")

comparisondf<- rbind(point_data, comparisondf)
modflow_with_PL_chimono<- comparisondf

no_PL_comparisondf_chimono<- comparisondf
######### 
#Compare with discharge station upstream of Blantrye
#Discharge at Liwonde station

#pt_liwonde<- rbind(as.numeric(c('35.2', '-15.07')))
pt_liwonde<- rbind(as.numeric(c('35.29167', '-15.04167')))

pt_liwonde<- as.data.frame(pt_liwonde)
colnames(pt_liwonde)<- c('x','y')

# extract data for the point(s)
point_extract_liwonde <- ncdf2raster(pth = sprintf("%s/%s", pth, fn),
                 transpose = TRUE, time = c(1, 12*30 ), spatial = pt_liwonde)

point_extract_liwonde$date <- as.Date(point_extract_liwonde$time, origin = "1901-01-01")


#+ #############################
measured_discharge_output_liwonde<- read.csv("H:/MyDocuments/Validation data/GRDC Discharge data/1992700/Monthly discharge summary.csv")
measured_discharge_output_liwonde<- as.data.frame(measured_discharge_output_liwonde[1:360,2])
measured_discharge_output_liwonde$group<- "measured_liwonde"
colnames(measured_discharge_output_liwonde)<- c("Value", "group")
####

point_extract_liwonde<- as.numeric(point_extract_liwonde$value)
point_extract_liwonde<- as.data.frame(point_extract_liwonde)
point_extract_liwonde$group<- "cwatm_liwonde"
colnames(point_extract_liwonde)<- c("Value", "group")

comparisondf<- rbind(point_extract_liwonde, measured_discharge_output_liwonde)

start<- mondate("1971-01-01")
end<- mondate("1980-12-31")
comparisondf$date<- as.Date(mondate(start + seq(0, as.numeric(end - start), 1)))
colnames(comparisondf)<- c("Value", "group", "date")

modflow_with_PL_liwonde<- comparisondf

No Modflow or Pit-latrine run

#Define path root for run looking to analyse
pth <- "C:/Users/hinton/Documents/GitHub/MalawiLake/outputs/bigrun1"

fn <- "discharge_monthavg.nc"
# 
start<- mondate("1971-01-01")
end<- mondate("2000-12-31")

#Identified 2 point locations to compare discharge (upstream and downstream of Blantyre)

#Discharge at chiromo station (Downstream of Blantyre)
## Chiromo data station -16.55, 35.13 (data from GRDC location)

pt<- rbind(as.numeric(c('35.125', '-16.375')))

pt<- as.data.frame(pt)
colnames(pt)<- c('x','y')

# extract data for the point(s)
point_extract <- ncdf2raster(pth = sprintf("%s/%s", pth, fn),
                 transpose = TRUE, time = c(1, 12*30 ), spatial = pt)

point_extract$date <- as.Date(point_extract$time, origin = "1901-01-01")


measured_discharge_output<- read.csv("H:/MyDocuments/Validation data/GRDC Discharge data/1992900/monthly measured discharge output.csv")
measured_discharge_output<- as.data.frame(measured_discharge_output[1:360,])
measured_discharge_output$group<- "measured chiromo station"
colnames(measured_discharge_output)<- c("Value", "group")

comparisondf<- measured_discharge_output

####
comparisondf$date<- as.Date(mondate(start + seq(0, as.numeric(end - start), 1)))
comparisondf$Value<- as.numeric(comparisondf$Value)
## Warning: NAs introduced by coercion
####

point_data<- as.numeric(point_extract$value)
point_data<- as.data.frame(point_data)
point_data$group<- "CWatM chiromo station"
point_data$date<- as.Date(mondate(start + seq(0, as.numeric(end - start), 1)))
colnames(point_data)<- c("Value", "group", "date")

comparisondf<- rbind(point_data, comparisondf)
no_modflow_with_PL_chimono<- comparisondf
PL_comparisondf_chimono<- comparisondf
######### 
#Compare with discharge station upstream of Blantrye
#Discharge at Liwonde station

#pt_liwonde<- rbind(as.numeric(c('35.2', '-15.07')))
pt_liwonde<- rbind(as.numeric(c('35.29167', '-15.04167')))

pt_liwonde<- as.data.frame(pt_liwonde)
colnames(pt_liwonde)<- c('x','y')

# extract data for the point(s)
point_extract_liwonde <- ncdf2raster(pth = sprintf("%s/%s", pth, fn),
                 transpose = TRUE, time = c(1, 12*30 ), spatial = pt_liwonde)

point_extract_liwonde$date <- as.Date(point_extract_liwonde$time, origin = "1901-01-01")


#+ #############################
measured_discharge_output_liwonde<- read.csv("H:/MyDocuments/Validation data/GRDC Discharge data/1992700/Monthly discharge summary.csv")
measured_discharge_output_liwonde<- as.data.frame(measured_discharge_output_liwonde[1:360,2])
measured_discharge_output_liwonde$group<- "measured_liwonde"
colnames(measured_discharge_output_liwonde)<- c("Value", "group")
####

point_extract_liwonde<- as.numeric(point_extract_liwonde$value)
point_extract_liwonde<- as.data.frame(point_extract_liwonde)
point_extract_liwonde$group<- "cwatm_liwonde"
colnames(point_extract_liwonde)<- c("Value", "group")

comparisondf<- rbind(point_extract_liwonde, measured_discharge_output_liwonde)

start<- mondate("1971-01-01")
end<- mondate("2000-12-31")
comparisondf$date<- as.Date(mondate(start + seq(0, as.numeric(end - start), 1)))
colnames(comparisondf)<- c("Value", "group", "date")

no_modflow_with_PL_liwonde<- comparisondf
###### Metrics for comparison
KGE <- function(o,s) {
  nans <- is.na(o)
  o <- o[!nans]
  s <- s[!nans]
  B <- mean(s, na.rm = TRUE) / mean(o, na.rm = TRUE)
  y <- (sd(s, na.rm = TRUE) / mean(s, na.rm = TRUE)) / (sd(o, na.rm = TRUE) / mean(o, na.rm = TRUE))
  r <- cor(s, o, method = "pearson")
  
  kge <- 1 -((r - 1) ^ 2 + (B - 1) ^ 2 + (y - 1) ^ 2)^0.5
  
  return(kge)
  
}
NSE <- function(o,s, normalize = TRUE) {
  nse <- 1 - sum((o - s) ^ 2, na.rm = TRUE) / sum((o - mean(o, na.rm = TRUE)) ^ 2, na.rm = TRUE)
  if(normalize) {
    nse <- 1 / (2 - nse)
  }
  return(nse)
}
nRMSE = function(o, s){
  nrmse <- ((sum((o - s) ^ 2) / length(o)) ^ 0.5/ sd(o))
  return(nrmse)
}
nRMSE_f = function(o, s){
  nanrm <- !is.na(o) * ! is.na(s)
  o <- o[nanrm]
  s <- s[nanrm]
  DF_o <- o - mean(o, na.rm = TRUE)
  DF_s <- s - mean(s, na.rm = TRUE)
  
  nrmse <- (100 / sd(DF_o)) * (sum((DF_o - DF_s) ^ 2) / length(DF_o)) ^ 0.5
  return(nrmse)
}
ggplot(no_modflow_with_PL_chimono, aes(x= date, y= Value, colour=group)) + geom_line() + geom_smooth() +ggtitle("No Modflow or pit-latrine chiromo monitoring station (downstream of Blantyre)")
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'
## Warning: Removed 233 rows containing non-finite values (`stat_smooth()`).
## Warning: Removed 218 rows containing missing values (`geom_line()`).

ggplot(modflow_with_PL_chimono, aes(x= date, y= Value, colour=group)) + geom_line() + geom_smooth() +ggtitle("With Modflow and pit-latrines chiromo monitoring station (downstream of Blantyre)")
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'
## Warning: Removed 244 rows containing non-finite values (`stat_smooth()`).
## Warning: Removed 229 rows containing missing values (`geom_line()`).

#####
ggplot(no_modflow_with_PL_liwonde, aes(x= date, y= Value, colour=group)) + geom_line() + geom_smooth() +ggtitle("No Modflow or pit-latrine Liwonde monitoring station (upstream of Blantyre)")
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'
## Warning: Removed 192 rows containing non-finite values (`stat_smooth()`).
## Warning: Removed 192 rows containing missing values (`geom_line()`).

ggplot(modflow_with_PL_liwonde, aes(x= date, y= Value, colour=group)) + geom_line() + geom_smooth() +ggtitle("With Modflow and pit-latrines Liwonde monitoring station (upstream of Blantyre)")
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'
## Warning: Removed 203 rows containing non-finite values (`stat_smooth()`).
## Warning: Removed 3 rows containing missing values (`geom_line()`).

Comments: The addition of pit-latrines and modflow seems to have massively improved CWatM’s performance. Overall, CWatM seems to be lightly overestimating discharge at the upsteam (Chriromo) measuring station and underestimating discharge at the downstream (Liwonde) Station.

The biggest area of difference however, appears to be that CWatM has much greater range of discharge, particularly significant is a large seasonal overestimation of discharge. The biggest overestimation appears to be during late spring/ early summer, this would coincide with the end of the rainy season (typically November to April).

The significant range of CWatM outputs appears bigger in the models with modflow and pit-latrines (although big ranges are seen less frequently in the Liwonde CWatM, no modflow run).