#Detection and Attributon of hydrological data CODE FOR RECONSTRUCTION OF IOI WINTER (DJF) RAINFALL
##1. set working directory and load packages
graphics.off()
rm(list=ls())
setwd("C:/Dectection_Attributon/")
getwd()
## [1] "C:/Dectection_Attributon"
#install.packages("leaps")
#install.packages("forecast")
#install.packages("car")
#install.packages("broom")#only turn on if first time runnin
#install.packages("corrplot")
#install.packages("forecast")
library(leaps)
## Warning: package 'leaps' was built under R version 4.1.3
library(car)
## Warning: package 'car' was built under R version 4.1.3
## Loading required package: carData
library(broom)
## Warning: package 'broom' was built under R version 4.1.3
library(corrplot)
## Warning: package 'corrplot' was built under R version 4.1.3
## corrplot 0.92 loaded
library(forecast)
## Warning: package 'forecast' was built under R version 4.1.3
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
library(nlme)
##
## Attaching package: 'nlme'
## The following object is masked from 'package:forecast':
##
## getResponse
library(zoo)
## Warning: package 'zoo' was built under R version 4.1.3
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
#-----------------------------------------------------------------------------------------------
recon<-read.csv("ioi_recon.csv", header =TRUE)
head(recon)
## Year ioi cet wi pl lslp eu2 ss
## 1 1767 206.4005 2.933333 40.03333 -0.1750 1013.868 0.5066667 23
## 2 1768 316.7771 2.966667 34.10000 -0.1480 1014.798 0.6000000 24
## 3 1769 238.6505 3.266667 32.00000 0.0182 1014.062 -0.5266667 22
## 4 1770 173.3981 4.366667 32.00000 0.3140 1019.670 0.9700000 40
## 5 1771 238.1155 2.600000 26.16667 0.2000 1013.718 0.1666667 41
## 6 1772 260.8398 2.900000 33.20000 -0.2490 1007.307 -0.4933333 32
tail(recon)
## Year ioi cet wi pl lslp eu2 ss
## 231 1997 221.8 4.033333 21.93333 -0.0156 1017.469 NA NA
## 232 1998 313.6 6.100000 33.43333 0.1880 1016.587 NA NA
## 233 1999 332.7 5.433333 49.40000 0.2260 1015.973 NA NA
## 234 2000 370.8 5.400000 53.56667 0.3910 1017.292 NA NA
## 235 2001 312.7 4.466667 24.33333 0.0444 1010.628 NA NA
## 236 2002 372.1 5.366667 41.96667 0.2510 1017.923 NA NA
cal<-recon[134:236,] #1900-2002
head(cal)
## Year ioi cet wi pl lslp eu2 ss
## 134 1900 408.1 3.066667 34.56667 -0.1210 1009.930 -1.4300000 31
## 135 1901 295.9 4.333333 42.96667 0.1480 1016.971 0.8966667 24
## 136 1902 272.9 3.200000 33.33333 -0.0414 1012.982 -0.4366667 24
## 137 1903 355.3 5.300000 53.70000 0.4080 1018.667 1.1433333 16
## 138 1904 357.4 3.600000 37.33333 0.0773 1008.808 -1.1666667 21
## 139 1905 212.5 4.166667 45.83333 0.1480 1021.030 1.1166667 18
tail(cal)
## Year ioi cet wi pl lslp eu2 ss
## 231 1997 221.8 4.033333 21.93333 -0.0156 1017.469 NA NA
## 232 1998 313.6 6.100000 33.43333 0.1880 1016.587 NA NA
## 233 1999 332.7 5.433333 49.40000 0.2260 1015.973 NA NA
## 234 2000 370.8 5.400000 53.56667 0.3910 1017.292 NA NA
## 235 2001 312.7 4.466667 24.33333 0.0444 1010.628 NA NA
## 236 2002 372.1 5.366667 41.96667 0.2510 1017.923 NA NA
### verification will be done for 1870-1919 recon[104:133,], this is done in loop later
### reconstruction will be period 1767-1869 recon[1:103,], defined in loop l
#Basic check of data assumptions - all checked on full series 1767-2002
### linearly related variables
scatterplotMatrix(cal[,2:7])
### derive correlation matrix to examine correlations between variables
M<-cor(cal[,2:7], use = "pairwise.complete.obs")
head(round(M,2))
## ioi cet wi pl lslp eu2
## ioi 1.00 0.31 0.40 0.47 -0.69 -0.68
## cet 0.31 1.00 0.62 0.84 0.07 -0.17
## wi 0.40 0.62 1.00 0.75 -0.17 -0.38
## pl 0.47 0.84 0.75 1.00 -0.06 -0.33
## lslp -0.69 0.07 -0.17 -0.06 1.00 0.81
## eu2 -0.68 -0.17 -0.38 -0.33 0.81 1.00
res1 <- cor.mtest(cal[,2:7], conf.level = .95)
corrplot(M, method="number", type="upper", p.mat = res1$p, sig.level = 0.05)
### Check predictors for presence of autocorrelation
acf(recon$cet, na.action=na.pass)
acf(recon$wi, na.action=na.pass)
acf(recon$pl, na.action=na.pass)
acf(recon$lslp, na.action=na.pass)
acf(recon$eu2, na.action=na.pass)
### Check that predictors are normally distributed
shapiro.test(recon$cet) #non normal this is not a valation .04
##
## Shapiro-Wilk normality test
##
## data: recon$cet
## W = 0.98805, p-value = 0.04676
shapiro.test(recon$wi)
##
## Shapiro-Wilk normality test
##
## data: recon$wi
## W = 0.99213, p-value = 0.2394
shapiro.test(recon$pl)
##
## Shapiro-Wilk normality test
##
## data: recon$pl
## W = 0.99011, p-value = 0.108
shapiro.test(recon$lslp)
##
## Shapiro-Wilk normality test
##
## data: recon$lslp
## W = 0.99551, p-value = 0.724
shapiro.test(recon$eu2)
##
## Shapiro-Wilk normality test
##
## data: recon$eu2
## W = 0.99502, p-value = 0.6614
### Check dependent variables for normality and acf
acf(recon$ioi)
durbinWatsonTest(recon$ioi)
## [1] 0.09548973
shapiro.test(recon$ioi) # non normal dirtrubution v autocor
##
## Shapiro-Wilk normality test
##
## data: recon$ioi
## W = 0.99119, p-value = 0.166
#-------------------------------------------------------------------------------------------------
#3. Select and test potential models using exhaustive search of predictors
library(leaps)
regsubsets.out <-
regsubsets(ioi ~ cet + wi + pl + lslp + eu2,
data = cal,
nbest = 2, # 5 best model for each number of predictors
nvmax = 3, # max limit on number of variables
force.in = NULL, force.out = NULL,
method = "exhaustive")
summary(regsubsets.out)
## Subset selection object
## Call: regsubsets.formula(ioi ~ cet + wi + pl + lslp + eu2, data = cal,
## nbest = 2, nvmax = 3, force.in = NULL, force.out = NULL,
## method = "exhaustive")
## 5 Variables (and intercept)
## Forced in Forced out
## cet FALSE FALSE
## wi FALSE FALSE
## pl FALSE FALSE
## lslp FALSE FALSE
## eu2 FALSE FALSE
## 2 subsets of each size up to 3
## Selection Algorithm: exhaustive
## cet wi pl lslp eu2
## 1 ( 1 ) " " " " " " "*" " "
## 1 ( 2 ) " " " " " " " " "*"
## 2 ( 1 ) " " " " "*" "*" " "
## 2 ( 2 ) "*" " " " " "*" " "
## 3 ( 1 ) " " "*" "*" "*" " "
## 3 ( 2 ) " " " " "*" "*" "*"
best.summary <- summary(regsubsets.out, scale="rsq")
as.data.frame(best.summary$outmat)
## cet wi pl lslp eu2
## 1 ( 1 ) *
## 1 ( 2 ) *
## 2 ( 1 ) * *
## 2 ( 2 ) * *
## 3 ( 1 ) * * *
## 3 ( 2 ) * * *
plot(regsubsets.out, scale = "adjr2", main = "Adjusted R^2")
### potential models >0.8 R2
mod1<-lm(ioi ~lslp+pl, data=cal)
mod2<-lm(ioi ~lslp+pl+cet, data=cal)
mod3<-lm(ioi ~lslp+pl+wi, data=cal)
#-----------------------------------------------------------------------------------------------
#4. Testing of selected ioi models plots done on common data 1920-1995
###mod1
summary(mod1) ###produce model summary
##
## Call:
## lm(formula = ioi ~ lslp + pl, data = cal)
##
## Residuals:
## Min 1Q Median 3Q Max
## -96.598 -30.345 -0.774 26.095 90.269
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 10531.3887 916.7640 11.488 < 2e-16 ***
## lslp -10.0674 0.9031 -11.147 < 2e-16 ***
## pl 142.9474 19.8851 7.189 1.2e-10 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 42.03 on 100 degrees of freedom
## Multiple R-squared: 0.6515, Adjusted R-squared: 0.6445
## F-statistic: 93.47 on 2 and 100 DF, p-value: < 2.2e-16
#produce scatter plot of observed Vs modelled IOI
scatterplot(mod1$fitted.values[1:76], cal$ioi[1:76], smoother=FALSE, boxplots=FALSE)
## Warning in plot.window(...): "smoother" is not a graphical parameter
## Warning in plot.xy(xy, type, ...): "smoother" is not a graphical parameter
## Warning in axis(side = side, at = at, labels = labels, ...): "smoother" is not a
## graphical parameter
## Warning in axis(side = side, at = at, labels = labels, ...): "smoother" is not a
## graphical parameter
## Warning in box(...): "smoother" is not a graphical parameter
## Warning in title(...): "smoother" is not a graphical parameter
plot(cal$Year[1:76], mod1$residuals[1:76], type="l")
abline(h = 0, lty = 2)
###check if model residuals are autocorrelated
acf(mod1$residuals)
durbinWatsonTest(mod1)
## lag Autocorrelation D-W Statistic p-value
## 1 0.1048693 1.753222 0.19
## Alternative hypothesis: rho != 0
###check if model residuals are normally distributed
shapiro.test(mod1$residuals)
##
## Shapiro-Wilk normality test
##
## data: mod1$residuals
## W = 0.9865, p-value = 0.3835
qqPlot(mod1, main="QQ Plot")
## 170 228
## 37 95
###check of predictors are independent (VIF should be less than 2)
vif(mod1)
## lslp pl
## 1.004099 1.004099
ncvTest(mod1)### Evaluate homoscedasticity using non-constant error variance test
## Non-constant Variance Score Test
## Variance formula: ~ fitted.values
## Chisquare = 0.03191154, Df = 1, p = 0.85822
###mod1 has ac but this is ok
###mod2
summary(mod2) #cet is not a significant to addition to model - reduces to mod1
##
## Call:
## lm(formula = ioi ~ lslp + pl + cet, data = cal)
##
## Residuals:
## Min 1Q Median 3Q Max
## -96.581 -30.719 -0.481 26.176 90.325
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 10543.6046 940.0798 11.216 < 2e-16 ***
## lslp -10.0811 0.9313 -10.824 < 2e-16 ***
## pl 140.8937 37.2047 3.787 0.000262 ***
## cet 0.4162 6.3588 0.065 0.947952
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 42.24 on 99 degrees of freedom
## Multiple R-squared: 0.6515, Adjusted R-squared: 0.641
## F-statistic: 61.7 on 3 and 99 DF, p-value: < 2.2e-16
###mod3
summary(mod3) #wi is not a significant addition to model - reduces to mod1
##
## Call:
## lm(formula = ioi ~ lslp + pl + wi, data = cal)
##
## Residuals:
## Min 1Q Median 3Q Max
## -94.478 -29.971 0.916 27.914 94.586
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 10708.9116 938.7049 11.408 < 2e-16 ***
## lslp -10.2237 0.9206 -11.105 < 2e-16 ***
## pl 163.0611 29.9645 5.442 3.83e-07 ***
## wi -0.5106 0.5685 -0.898 0.371
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 42.07 on 99 degrees of freedom
## Multiple R-squared: 0.6543, Adjusted R-squared: 0.6438
## F-statistic: 62.46 on 3 and 99 DF, p-value: < 2.2e-16
#-------------------------------------------------------------------------------------------
#5. Make predictions using the selected model and evaluate output
sim<-predict(mod1, recon, interval="confidence", level=0.95)
summary(sim)
## fit lwr upr
## Min. :178.1 Min. :154.3 Min. :201.9
## 1st Qu.:279.2 1st Qu.:265.2 1st Qu.:291.5
## Median :317.3 Median :306.2 Median :329.2
## Mean :317.4 Mean :303.6 Mean :331.1
## 3rd Qu.:354.2 3rd Qu.:342.0 3rd Qu.:366.2
## Max. :466.6 Max. :443.3 Max. :490.0
###Evaluate model performance for Verifictation period and reconstruction period
verification<-cor(recon$ioi[104:133],sim[104:133])
verification^2 ###correlation coefficient squared gives the R2 value - or proportion of the variance on the observations captured by the model
## [1] 0.7544849
reconstruction<-cor(recon$ioi[1:103],sim[1:103,1])
reconstruction^2
## [1] 0.4459166
###combine model results and observed IOI into dataframe
results<-cbind(recon$Year, recon$ioi, sim[,1], sim[,2], sim[,3])
head(results)
## [,1] [,2] [,3] [,4] [,5]
## 1 1767 206.4005 299.3551 287.0281 311.6822
## 2 1768 316.7771 293.8518 282.5274 305.1761
## 3 1769 238.6505 325.0202 316.5253 333.5151
## 4 1770 173.3981 310.8431 294.7943 326.8919
## 5 1771 238.1155 354.4698 344.1341 364.8055
## 6 1772 260.8398 354.8263 334.3998 375.2528
colnames(results) <- c("Year", "ioi", "fitted", "lower" , "upper")
head(results)
## Year ioi fitted lower upper
## 1 1767 206.4005 299.3551 287.0281 311.6822
## 2 1768 316.7771 293.8518 282.5274 305.1761
## 3 1769 238.6505 325.0202 316.5253 333.5151
## 4 1770 173.3981 310.8431 294.7943 326.8919
## 5 1771 238.1155 354.4698 344.1341 364.8055
## 6 1772 260.8398 354.8263 334.3998 375.2528
results<-as.data.frame(results)
head(results)
## Year ioi fitted lower upper
## 1 1767 206.4005 299.3551 287.0281 311.6822
## 2 1768 316.7771 293.8518 282.5274 305.1761
## 3 1769 238.6505 325.0202 316.5253 333.5151
## 4 1770 173.3981 310.8431 294.7943 326.8919
## 5 1771 238.1155 354.4698 344.1341 364.8055
## 6 1772 260.8398 354.8263 334.3998 375.2528
###derive decadal rolling mean for lower and upper bounds, med simulations and obs
results.dec<-rollapply(results, 10, mean)
results.dec<-as.data.frame(results.dec)
head(results.dec)
## Year ioi fitted lower upper
## 1 1771.5 248.3759 342.5797 328.9510 356.2083
## 2 1772.5 245.1969 342.4542 328.4754 356.4330
## 3 1773.5 230.6895 341.5910 326.7787 356.4034
## 4 1774.5 227.6335 335.8788 319.6868 352.0709
## 5 1775.5 236.4824 333.4444 317.3691 349.5197
## 6 1776.5 230.8273 319.9442 303.1816 336.7069
###DECADAL RESULTS PLOT main = "IOI Decadal Reconstruction",
plot(results.dec$Year, results.dec$ioi,
type = 'l', lwd=2, col = "red",
xlab = " ", ylab = "Rain (mm)",
ylim = c(140,350), cex=1) # set ylim to fit all data on plot
### Add other variables as 'lines' to same plot
lines(results.dec$Year, results.dec$fitted, lwd=2, col = "black")
lines(results.dec$Year, results.dec$lower, lwd=2, col = "grey")
lines(results.dec$Year, results.dec$upper, lwd=2, col = "grey")
### Add legend to plot
legend("bottomright",
legend=c("IOI Obs", "Median Reconstruction", "95% CI"),
col= c("red", "black", "grey"), cex = 1, lty=1,lwd=1, bty="n")
### Add vertical lines to indicate cal, val and recon periods
abline(v=1900,col="gray",lwd=1.5)
abline(v=1870, col="gray", lwd=1.5)
### Add results to plot at text
text(1910, 350, "Cal ->", cex = 1)
text(1885, 350, "Ver ->", cex= 1)
text(1850, 350, "<- Recon", cex= 1)
#------------------------------------------------------------------------------------------
### derive residuals for each model and median simulation
residuals<-(results$ioi-results$fitted)
residuals<-cbind(results$Year, residuals)
colnames(residuals) <- c("Year", "residuals")
### calculate rolling decadal residuals
resid.dec<-rollapply(residuals, 10, mean)
resid.dec<-as.data.frame(resid.dec)
head(resid.dec)
## Year residuals
## 1 1771.5 -94.20379
## 2 1772.5 -97.25727
## 3 1773.5 -110.90148
## 4 1774.5 -108.24537
## 5 1775.5 -96.96207
## 6 1776.5 -89.11688
###resid.decplot(resid.dec$Year, resid.dec$median)
plot(resid.dec$Year, resid.dec$residuals, type="l")
abline(h=0)
###create sleet and snow series
ss<-cbind(recon$Year, recon$ss)
colnames(ss) <- c("Year", "ss")
ss<-as.data.frame(ss)
head(ss)
## Year ss
## 1 1767 23
## 2 1768 24
## 3 1769 22
## 4 1770 40
## 5 1771 41
## 6 1772 32
### plot full ss series main="Manley Sleet and Snow Series"
plot(ss$Year, ss$ss, type="l", lwd=1.5, col="black",
xlab= "Year", ylab= "Annual No. Days",
ylim= c(0, 80))
lines(lowess(ss$ss~ss$Year, f = 0.05), lwd=2, col = "red") # add loess to plot
mean(ss$ss[232:281]) #add 1900-1949 mean to plot
## [1] NA
abline(h=22.38, lwd=3, col="grey")
legend("topleft",
legend=c("Manley Sleet and Snow Series", "Loess smooth (f=0.05)", "1900-1949 mean"),
col= c("black", "red", "grey"), cex = 0.8, lty=1,lwd=1, bty="n")
abline(v=1869, col="grey", lwd=2)
###Extract data for reconstruction period 1767-1869
ss.recon<-ss[1:103,]
head(ss.recon)
## Year ss
## 1 1767 23
## 2 1768 24
## 3 1769 22
## 4 1770 40
## 5 1771 41
## 6 1772 32
tail(ss.recon)
## Year ss
## 98 1864 26
## 99 1865 44
## 100 1866 18
## 101 1867 29
## 102 1868 28
## 103 1869 16
### extract ressiduals for same period
r<-residuals[1:103,]
head(r)
## Year residuals
## [1,] 1767 -92.95468
## [2,] 1768 22.92539
## [3,] 1769 -86.36970
## [4,] 1770 -137.44495
## [5,] 1771 -116.35432
## [6,] 1772 -93.98652
tail(r)
## Year residuals
## [98,] 1864 -24.58422
## [99,] 1865 -20.55997
## [100,] 1866 -16.76120
## [101,] 1867 -47.30609
## [102,] 1868 -46.43508
## [103,] 1869 20.78779
###combine into one data frame
ss.dat<-cbind(r, ss.recon$ss)
ss.dat<-as.data.frame(ss.dat)
colnames(ss.dat) <- c("Year","residuals", "ss")
head(ss.dat)
## Year residuals ss
## 1 1767 -92.95468 23
## 2 1768 22.92539 24
## 3 1769 -86.36970 22
## 4 1770 -137.44495 40
## 5 1771 -116.35432 41
## 6 1772 -93.98652 32
###standardise the data
st.ss.dat<-apply(ss.dat, 2, function(x) (x-mean(x))/sd(x))
###rolling decadal mean
st.ss.dec<-rollapply(st.ss.dat, 10, mean)# decadal rolling mean of standardised data
st.ss.dec<-as.data.frame(st.ss.dec)# as dataframe
q<-cbind(st.ss.dec, NA) # add extra col to calc inverse ss for ease of plotting
q[,4]<-q$ss*-1 #calculate -ss
colnames(q)<-c("Year","residuals", "ss", "inv.ss")
head(q)
## Year residuals ss inv.ss
## 1 -1.556354 -0.9603980 0.2417123 -0.2417123
## 2 -1.522884 -1.0224722 0.4411607 -0.4411607
## 3 -1.489414 -1.2998456 0.6579525 -0.6579525
## 4 -1.455944 -1.2458494 0.5625641 -0.5625641
## 5 -1.422474 -1.0164712 0.6145941 -0.6145941
## 6 -1.389004 -0.8569861 0.4758474 -0.4758474
tail(q)
## Year residuals ss inv.ss
## 89 1.389004 0.4271521 -0.11382620 0.11382620
## 90 1.422474 0.5666115 -0.08781119 0.08781119
## 91 1.455944 0.5695746 -0.12249787 0.12249787
## 92 1.489414 0.5938355 -0.21788625 0.21788625
## 93 1.522884 0.4904273 -0.17452790 0.17452790
## 94 1.556354 0.6035183 -0.12249787 0.12249787
q<-subset(q, select= -c(Year))
Year<-c(1767:1860)
q<-cbind(q, Year)
head(q)
## residuals ss inv.ss Year
## 1 -0.9603980 0.2417123 -0.2417123 1767
## 2 -1.0224722 0.4411607 -0.4411607 1768
## 3 -1.2998456 0.6579525 -0.6579525 1769
## 4 -1.2458494 0.5625641 -0.5625641 1770
## 5 -1.0164712 0.6145941 -0.6145941 1771
## 6 -0.8569861 0.4758474 -0.4758474 1772
### plot standardised residuals Vs inverse standardised SS series for recon period
plot(q$Year,q$residuals, type="l", ylim=c(-1.5, +1.5), lwd=2.5, col="black",
main="Standardised Decadal Residual Vs Standardised Decadal SS",
xlab="Year",
ylab="Standardised Variable")
lines(q$Year, q$inv.ss, lwd=2.5, col="red")
### Add legend to plot
legend("topleft",
legend=c("Model residual", "Inverse Sleet and Snow Series"),
col= c("black", "red"), cex = 0.8, lty=1,lwd=1, bty="n")
cor(q$inv.ss, q$residuals)
## [1] 0.7760537
### Add correlatons to plot at text
text(1830, -1.2, "Correlation of residuals with inverse SS series: 0.69 ", cex = 0.8)