This report provides an exploratory analysis and modeling of the Standardized Precipitation Index (SPI), Indian Ocean Dipole (IOD), NINO 3.4, and Sea Surface Temperature (SST) to understand their interactions and influence on drought conditions in South-West Australia. Key sections include:
# Load required libraries
# Mapping and spatial visualization
library(ggmap) # For working with maps and geographic data
library(rworldmap) # For visualizing world maps
library(sp) # For spatial data handling and analysis
library(raster) # For raster data manipulation and visualization
# Visualization
library(ggplot2) # For creating detailed and customizable visualizations
library(gridExtra) # For arranging multiple ggplot graphs
library(ggthemes) # Additional themes for ggplot2
library(RColorBrewer) # Provides color palettes for visualizations
library(patchwork) # To arrange and combine multiple ggplots
# Statistical modeling and analysis
library(fields) # For spatial statistics and field data analysis
library(glmnet) # For Lasso and ridge regression models
library(tseries) # For time series analysis, including stationarity tests
library(pracma) # For mathematical functions and Fourier analysis
library(zoo) # For working with time series data
library(mvtnorm) # For multivariate normal distribution functions
# Data wrangling and manipulation
library(dplyr) # Data manipulation (filtering, summarizing, etc.)
library(tidyverse) # Collection of R packages for data science
library(tidyr) # Simplifies reshaping and tidying data
library(readr) # For reading and writing rectangular data (e.g., CSV)
library(grid) # For managing graphical layout
# Correlation analysis
library(corrplot) # For creating correlation matrix plots
library(ggcorrplot) # For visualizing correlation matrices with ggplot2
# Table formatting
library(xtable) # For exporting data tables to LaTeX/HTML
# Load data
site <- read.csv("WA_JNU_CMI_Project_Sites.csv") # Site-specific data
R <- read.csv("WA_JNU_CMI_Project_spi_index_site_specific.csv") # SPI index data
load("site_description.RData") # Site descriptions
load("IOD_data_long.RData") # Long-term IOD data
load("Nino_data_long.RData") # Long-term NINO 3.4 data
load("data_new.RData") # Processed dataset
load("data_spi_sst_nino_iod.RData") # Combined SPI, SST, NINO, and IOD data
load("~/R/WA_JNU_CMI_Project/WestAustraliaMap.RData") # Geographical map of Western Australia
# Load custom functions
source("Generic_Fourier_Model_Def.R") # Functions for Fourier modeling
source("learn_long_period.R") # Functions for periodicity analysis
source("lasso_fit_forecast.R") # Functions for Lasso regression and forecasting
source("spatial_correction_with_GP.R") # Spatial correction functions using Gaussian Processes# Load the base map for Western Australia
AustraliaMap <- ggmap(west_australia_map)
# Add data points for study sites
AustraliaMap +
geom_point(
data = site_description, # Dataset containing site descriptions
aes(x = lon, y = lat), # Map longitude and latitude for plotting
col = "red", # Use red color for points
alpha = 1, # Full opacity for points
size = 3, # Set point size
show.legend = FALSE # Hide the legend for points
) +
# Add labels for axes
xlab('Longitude') +
ylab('Latitude') +
# Customize the map theme
theme(
axis.title.x = element_text(size = 20, face = "bold"), # Bold and large X-axis title
axis.title.y = element_text(size = 20, face = "bold"), # Bold and large Y-axis title
axis.text.x = element_text(size = 14, face = "bold"), # Bold and readable X-axis text
axis.text.y = element_text(size = 14, face = "bold"), # Bold and readable Y-axis text
axis.ticks.x = element_line(colour = "black", size = 1), # Define X-axis tick appearance
axis.ticks.y = element_line(colour = "black", size = 1), # Define Y-axis tick appearance
legend.text = element_text(size = 15, face = "bold"), # Bold and readable legend text
legend.title = element_blank(), # Remove legend title
axis.line = element_line(colour = "black", size = 1, linetype = "solid") # Bold solid axis lines
)# Verify station numbers and subset data
station.no <- unique(R$stn)
x1 <- subset(R, stn == station.no[1])
x1 <- na.omit(x1) # Remove NA values
# Check if spi_1m exists
if ("spi_1m" %in% colnames(x1)) {
y1 <- mean(x1$spi_1m)
# Plot
ggplot(data = x1) +
aes(x = year, y = spi_1m) +
geom_point(aes(color = spi_1m), alpha = .9, size=4) +
xlab('Year') +
ylab('SPI') +
ggtitle('1-month') +
theme_bw() +
ylim(-4, 4) +
theme(panel.grid.major = element_line(colour = "grey", size = 0.9, linetype = "dashed"),
panel.background = element_rect(colour = "black", size = 4),
plot.title = element_text(hjust = 0.5, face = "bold", size = 25),
axis.title.x = element_text(size = 25, face = "bold", colour = "black"),
axis.title.y = element_text(size = 25, face = "bold", colour = "black"),
axis.text.x = element_text(size = 20, face = "bold", colour = "black", angle = 90),
axis.text.y = element_text(size = 20, face = "bold", colour = "black"),
axis.ticks.length = unit(0.4, "cm"),
axis.ticks.x = element_line(colour = "black", size = 2),
axis.ticks.y = element_line(colour = "black", size = 2),
legend.title = element_text(size = 20, color = "black", face = "bold"),
legend.text = element_text(size = 20, face = "bold"),
legend.key = element_rect(fill = 'skyblue'),
legend.key.size = unit(1, "cm")) +
scale_x_continuous(limits = c(1960, 2020), breaks = seq(1960, 2020, 5)) +
geom_smooth(color = 'black', size = .8, method = "gam", se = TRUE, n = 36, span = 2, formula = y ~ s(x, bs = "cs")) +
labs(size = 'Average SPI') +
scale_color_gradientn(name = 'SPI', colors = (brewer.pal(10, 'Spectral')))
} else {
print("Column 'spi_1m' does not exist in the dataset.")
}# Second Plot: SPI_12m
x2 <- subset(R, stn == station.no[1])
x2 <- na.omit(x2)
y2 <- mean(x2$spi_12m)
#png("spi_12m_new.png", width = 12, height = 8,units = "in",res = 600)
ggplot(data = x2) +
aes(x = year, y = spi_12m) +
geom_point(aes(color = spi_1m), alpha = .9, size=4) +
xlab('Year') +
ylab('SPI') +
ggtitle('12-month') +
theme_classic() +
ylim(-4, 4) +
theme(panel.grid.major = element_line(colour = "grey", size = 0.9, linetype = "dashed"),
panel.background = element_rect(colour = "black", size = 4),
plot.title = element_text(hjust = 0.5, face = "bold", size = 25),
axis.title.x = element_text(size = 25, face = "bold", colour = "black"),
axis.title.y = element_text(size = 25, face = "bold", colour = "black"),
axis.text.x = element_text(size = 20, face = "bold", colour = "black", angle = 90),
axis.text.y = element_text(size = 20, face = "bold", colour = "black"),
axis.ticks.length = unit(0.4, "cm"),
axis.ticks.x = element_line(colour = "black", size = 2),
axis.ticks.y = element_line(colour = "black", size = 2),
legend.title = element_text(size = 20, color = "black", face = "bold"),
legend.text = element_text(size = 20, face = "bold"),
legend.key = element_rect(fill = 'skyblue'),
legend.key.size = unit(1, "cm")) +
scale_x_continuous(limits = c(1960, 2020), breaks = seq(1960, 2020, 5)) +
geom_smooth(color = 'black', size = .8, method = "gam", se = TRUE, n = 36, span = 2, formula = y ~ s(x, bs = "cs")) +
labs(size = 'Average SPI') +
scale_color_gradientn(name = 'SPI', colors = (brewer.pal(10, 'Spectral'))) +
scale_size(name = waiver(), limits = NULL, range = c(1, 8), trans = "identity", guide = "legend")#dev.off()
## Sea Surface temperature
#png("sst_new.png", width = 10, height = 8,units = "in",res = 600)
ggplot(data = data_new) + aes(x=Year,y =mean_sst) +
geom_point(aes(color=mean_sst),alpha=.9, size =3) +
xlab('') +
ylab('SST ')+
#theme(legend.position='top') #+
ggtitle('Sea Surface temperature in degree Celsius') +
theme_classic() +
theme(panel.grid.major = element_line(colour = "grey", size = 0.5,linetype="dashed"),
# panel.grid.minor = element_line(colour = "grey", size = 0.5,linetype="dashed"),
panel.background = element_rect(colour = "black", size=4))+
theme(plot.title = element_text(hjust = 0.5,face = "bold",size = 25))+
theme(axis.title.x=element_text(size=25,face = "bold",colour = "black"), # X axis title
axis.title.y=element_text(size=25,face = "bold",colour = "black"), # Y axis title
axis.text.x=element_text(size=20,face = "bold",colour = "black",angle = 90), # X axis text
axis.text.y=element_text(size=20,face = "bold",colour = "black"),
axis.ticks.length = unit(0.4, "cm"),
axis.ticks.x = element_line(colour = "black", size = 2),
axis.ticks.y = element_line(colour = "black", size = 2),
legend.title = element_text(size=20, color = "black", face="bold"),
legend.text = element_text(size=20, face="bold"),
legend.key=element_rect(fill='skyblue'),
legend.key.size = unit(1, "cm"))+
scale_x_continuous(limits = c(1982, 2020),breaks = seq(1982,2020,2))+
scale_y_continuous(limits = c(18,26) )+# ,breaks = c(-0.08,-0.06,-0.04,-0.02,0,0.02))+
geom_smooth(color='black',size=1,method = "gam", formula = y ~ s(x, bs = "cs"),se = TRUE,
n = 12,span = 2) +
#labs(size = 'Average SST') +
scale_color_gradientn(name='SST',
colors=rev(brewer.pal(10,'Spectral'))
,guide = "colourbar",limits=c(18,26))+#, breaks = c(-0.06,-0.04,-0.02))+
scale_size(name = waiver(), breaks = waiver(),labels = waiver(),
limits = NULL,range = c(1, 6),
trans = "identity",guide = "legend")#dev.off()
#png("IOD_plot1.png",width = 12, height = 8,units = "in",res = 500 )
IOD_data_long<-na.omit(IOD_data_long)
x<-subset(IOD_data_long,IOD_Value>-999&Year>=1982)
#png("iod_new.png", width = 10, height = 8,units = "in",res = 600)
ggplot(data = x) + aes(x=Year,y =IOD_Value) +
geom_point(aes(color=IOD_Value),
alpha=.9, size =4) +
xlab('') +
ylab('IOD')+
ggtitle("Indian Ocean Dipole")+
theme(legend.position='top') +
theme_classic() +
theme(panel.grid.major = element_line(colour = "grey", size = 0.5,linetype="dashed"),
# panel.grid.minor = element_line(colour = "grey", size = 0.5,linetype="dashed"),
panel.background = element_rect(colour = "black", size=4))+
theme(plot.title = element_text(hjust = 0.5,face = "bold",size = 25))+
theme(axis.title.x=element_text(size=25,face = "bold",colour = "black"), # X axis title
axis.title.y=element_text(size=25,face = "bold",colour = "black"), # Y axis title
axis.text.x=element_text(size=20,face = "bold",colour = "black",angle = 90), # X axis text
axis.text.y=element_text(size=20,face = "bold",colour = "black"),
axis.ticks.length = unit(0.4, "cm"),
axis.ticks.x = element_line(colour = "black", size = 2),
axis.ticks.y = element_line(colour = "black", size = 2),
legend.title = element_text(size=20, color = "black", face="bold"),
legend.text = element_text(size=20, face="bold"),
legend.key=element_rect(fill='skyblue'),
legend.key.size = unit(1, "cm"))+
scale_x_continuous(limits = c(1982, 2020),breaks = seq(1982,2020,2))+
scale_y_continuous(limits = c(-2,2) )+# ,breaks = c(-0.08,-0.06,-0.04,-0.02,0,0.02))+
geom_smooth(color='black',size=1,method = "gam", formula = y ~ s(x, bs = "cs"),se = TRUE,
n = 12,span = 2) +
#labs(size = 'Average IOD') +
scale_color_gradientn(name='IOD',
colors=(brewer.pal(10,'Spectral'))
,guide = "colourbar")+#, breaks = c(-0.06,-0.04,-0.02))+
scale_size(name = waiver(), breaks = waiver(),labels = waiver(),
limits = NULL,range = c(1, 10),
trans = "identity",guide = "legend")#dev.off()
####### Nino plot #############
#png("NINO_plot1.png",width = 12, height = 8,units = "in",res = 500 )
Nino_data_long<-na.omit(Nino_data_long)
y<-subset(Nino_data_long,Nino_Value>-99&Year>=1982)
#png("nino_new.png", width = 10, height = 8,units = "in",res = 600)
ggplot(data = y) + aes(x=Year,y =Nino_Value) +
geom_point(aes(color=Nino_Value),
alpha=.9, size =4) +
xlab('') +
ylab('NINO 3.4')+
ggtitle("El Nino Southern Oscillation Nino 3.4")+
theme(legend.position='top') +
theme_classic() +
theme(panel.grid.major = element_line(colour = "grey", size = 0.5,linetype="dashed"),
# panel.grid.minor = element_line(colour = "grey", size = 0.5,linetype="dashed"),
panel.background = element_rect(colour = "black", size=4))+
theme(plot.title = element_text(hjust = 0.5,face = "bold",size = 25))+
theme(axis.title.x=element_text(size=25,face = "bold",colour = "black"), # X axis title
axis.title.y=element_text(size=25,face = "bold",colour = "black"), # Y axis title
axis.text.x=element_text(size=20,face = "bold",colour = "black",angle = 90), # X axis text
axis.text.y=element_text(size=20,face = "bold",colour = "black"),
axis.ticks.length = unit(0.4, "cm"),
axis.ticks.x = element_line(colour = "black", size = 2),
axis.ticks.y = element_line(colour = "black", size = 2),
legend.title = element_text(size=20, color = "black", face="bold"),
legend.text = element_text(size=20, face="bold"),
#legend.key=element_rect(fill='skyblue'),
legend.key.size = unit(1, "cm"))+
scale_x_continuous(limits = c(1982, 2020),breaks = seq(1982,2020,2))+
scale_y_continuous(limits = c(22,32) ,breaks = c(22,24,26,28,30,32))+
geom_smooth(color='black',size=1,method = "gam", formula = y ~ s(x, bs = "cs"),se = TRUE,
n = 12,span = 1) +
labs(size = 'Average NINO') +
scale_color_gradientn(name='NINO',
colors=(brewer.pal(10,'Spectral'))
,guide = "colourbar")+#, breaks = c(-0.06,-0.04,-0.02))+
scale_size(name = waiver(), breaks = waiver(),labels = waiver(),
limits = NULL,range = c(1, 10),
trans = "identity",guide = "legend")#dev.off()
##############SPI monthwise average plot ##########
month_nm <- c('Jan','Feb','Mar','Apr','May','Jun','Jul'
,'Aug','Sep','Oct','Nov','Dec')
R<-na.omit(R)
#png("SPI_12m_avg.png",width = 8,height=6,units = "in",res = 300)
for(s in 1:194){
sub_R<-subset(R,stn==station.no[s])# create a subset for particular station from the whole dataset
# take average by month for 58 years of a all stations
avg_by_month <- tapply(sub_R[,"spi_1m"],sub_R[,"month"],mean)
#avg_by_month<-as.data.frame( avg_by_month)
#avg_by_month$months<-month.abb
names(avg_by_month) <- month_nm
plot(avg_by_month
,type="b",col="magenta", ylim=c(-0.2,1)
,xlab = "Month"
,ylab = "Mean",
font.lab=2, cex=1,lwd=2,
cex.lab=1.5, cex.axis=1,
xaxt='n',font.axis=2)
par(new=TRUE)
}
axis(1, at=1:12, labels=month_nm,font = 2,font.axis=2)
abline(h=seq(-0.2,1,0.2),v=1:12,col="grey80",lty=2)
legend("topright",
legend = c("Curves for all 194 Stations","Avg over all Stations"),
col = c("magenta","black"),pch=c(1,19), bty = "n", pt.cex = 2,
cex = 1.2,text.col = "black",text.font=2,lwd=2,horiz = F )
points(avg_by_month,type="o",lwd=2)###correlation matrix of 194 stations####
# Reshape the data to wide format, with stations as rows and SPI_1m as columns
data_wide <- R %>%
select(stn, year, month, spi_1m) %>%
spread(key = stn, value = spi_1m)
# Step 2: Calculate the correlation matrix
cor_matrix <- cor(data_wide[,-c(1, 2, 3)], use = "complete.obs") ## 6045 location omega= 0.02908882, 0.0416105, 0.1047198
##Time Period=2pi/omega =216, 151, 60
R_sub$tms<-(R_sub$year-year1[1])*12+R_sub$month
R_sub = na.omit(R_sub)
acf.fit<-acf(na.omit(R_sub[id_train,var_nm])
,lag.max=400, main="ACF")
acf.data<-data.frame(cbind(acf.fit$lag, acf.fit$acf))
colnames(acf.data)<-c("lag","acf")
s<-mad(acf.data$acf)
#png("acf_spi_6045.png", width = 8, height = 6,units = "in",res = 300)
data1 <-acf.data[acf.data$acf>0,]
data2<- acf.data[acf.data$acf<0,]
ggplot() +
geom_point(data = data2, aes(x = lag, y = acf)
,size=3, linetype="solid",color="blue") +
geom_point(data = data1, aes(x = lag, y = acf)
,size=3, linetype="solid",color="red") +
geom_hline(yintercept = c(-s,0,s),linetype="dashed",size=1)+
# geom_point(aes(size=0.004),color=acf, alpha=.75) +
xlab('') +
ylab('ACF')+
ggtitle('Latitude=-26.6969, Longitude=113.7158')+
theme(legend.position='top') +
theme_bw()+ ylim(-0.2,1) +
# theme(legend.position='top') +
theme(panel.grid.major = element_line(colour = "grey", size = 0.9,linetype="dashed"),
# panel.grid.minor = element_line(colour = "grey", size = 0.5,linetype="dashed"),
panel.background = element_rect(colour = "black", size=2))+
theme(plot.title = element_text(hjust = 0.5,face = "bold",size = 20))+
theme(axis.title.x=element_text(size=25,face = "bold",colour = "black"), # X axis title
axis.title.y=element_text(size=25,face = "bold",colour = "black"), # Y axis title
axis.text.x=element_text(size=20,face = "bold",colour = "black",angle = 90), # X axis text
axis.text.y=element_text(size=20,face = "bold",colour = "black"),
axis.ticks.length = unit(0.4, "cm"),
axis.ticks.x = element_line(colour = "black", size = 2),
axis.ticks.y = element_line(colour = "black", size = 2),
legend.title = element_text(size=20, color = "black", face="bold"),
legend.text = element_text(size=20, face="bold"),
legend.key=element_rect(fill='skyblue'),
legend.key.size = unit(1, "cm"))+
scale_x_continuous(limits = c(0,350),breaks = seq(0,400,50))+
#scale_x_datetime()+
#scale_y_continuous(limits = c(-4,4) ,breaks = c(22,24,26,28,30,32))+
geom_smooth(color='black',size=.8,method = "gam",
se = TRUE,
n = 36,
span =2,
formula = y ~ s(x, bs = "cs")) +
labs(size = 'Average SPI') +
scale_color_gradientn(name='SPI',
colors=rev(brewer.pal(10,'Spectral'))) +
scale_size(name = waiver(), breaks = waiver(),labels = waiver(),
limits = NULL,range = c(1, 8),
trans = "identity",guide = "legend")df_grp = data_sub %>% group_by(year, month) %>%
summarise(sst = mean(sst_mean),
spi = mean(spi_12m),
nino = sum(Nino_Value),
iod = sum(IOD_Value),
.groups = 'drop')
data_1999_2008<- subset(df_grp, year<=2008 & year >= 1999)
data_2009_2018<- subset(df_grp, year>2008)
### CCF for 2009 - 2018:
### SPI vs SST
ccf_fit2<-ccf(x=data_2009_2018$spi,y=data_2009_2018$sst,lag.max = 120)
s<-mad(ccf_fit2$acf)
data3<-ccf_fit2$acf
data4<-ccf_fit2$lag
data5<-cbind(data3,data4)
data5<-as.data.frame(data5)
colnames(data5)<-c("acf","lag")
data1 <-data5[data5$acf>0,]
data2<- data5[data5$acf<0,]
#png("ccf_SPI_SST_2009_2018.png", width = 8, height = 6,units = "in",res = 300)
ggplot() +
geom_point(data = data2, aes(x = lag, y = acf)
,size=3, linetype="solid",color="blue") +
geom_point(data = data1, aes(x = lag, y = acf)
,size=3, linetype="solid",color="red")+
geom_line(data=data5,aes(x = lag, y = acf),size=1)+
xlab('') +
ylab('CCF')+
ggtitle('SPI vs SST')+
theme_bw()+# ylim(-0.2,1) +
# theme(legend.position='top') +
theme(panel.grid.major = element_line(colour = "grey", size = 0.9,linetype="dashed"),
# panel.grid.minor = element_line(colour = "grey", size = 0.5,linetype="dashed"),
panel.background = element_rect(colour = "black", size=2))+
theme(plot.title = element_text(hjust = 0.5,face = "bold",size = 25))+
theme(axis.title.x=element_text(size=25,face = "bold",colour = "black"), # X axis title
axis.title.y=element_text(size=25,face = "bold",colour = "black"), # Y axis title
axis.text.x=element_text(size=20,face = "bold",colour = "black",angle = 90), # X axis text
axis.text.y=element_text(size=20,face = "bold",colour = "black"),
axis.ticks.length = unit(0.4, "cm"),
axis.ticks.x = element_line(colour = "black", size = 2),
axis.ticks.y = element_line(colour = "black", size = 2),
legend.title = element_text(size=20, color = "black", face="bold"),
legend.text = element_text(size=20, face="bold"),
legend.key=element_rect(fill='skyblue'),
legend.key.size = unit(1, "cm"))+
geom_vline(xintercept =0 ,linetype="dashed",col="black",size=2)+
scale_x_continuous( limits = c(-120,120) ,breaks = seq(-120,120,20))+
#scale_y_continuous(limits = c(-0.4,0.4))+
geom_hline(yintercept = c(-s,s), size=1)## Defining generic Fourier series model #####
## K : Number of Harmonics to be considered
## m : Number of periods to be considered
Generic_Fourier_Model_Def <- function(var_nm,K,m){
x_nm<-c()
for(j in 1:m){
xnam <- paste0(1:K,"*tms*omega")
xnam <- paste0(xnam,j)
xnam_s <- paste0("sin(",xnam)
xnam_s <- paste0(xnam_s,")")
xnam_c <- paste0("cos(",xnam)
xnam_c <- paste0(xnam_c,")")
x_nm<- c(x_nm,xnam_s,xnam_c)
}
modl <- as.formula(paste(var_nm,"~",paste(x_nm, collapse= "+")))
return(modl)
}
### Learning long period using ACF ####
learn_long_period<-function(dsn,id,var_nm,feature_nm,max_lag,
plot_title=TRUE,plot_TRUE=FALSE,type='acf'){
if(type=="acf"){
acf.fit<-acf(na.omit(dsn[id,var_nm])
,lag.max=max_lag,plot = plot_TRUE)
acf.data<-data.frame(cbind(acf.fit$lag, acf.fit$acf))
colnames(acf.data)<-c("lag","acf")
}else if(type=="ccf"){
#dsn <- na.omit(dsn[id,])
#print(dsn[id,c(var_nm,feature_nm)])
acf.fit<-ccf(y=dsn[id,var_nm],x=ts(dsn[id,feature_nm])
,lag.max=max_lag,plot = plot_TRUE)
acf.data<-data.frame(cbind(acf.fit$lag, acf.fit$acf))
colnames(acf.data)<-c("lag","acf")
acf.data <- subset(acf.data,lag>=0)
}
sd.acf<-mad(acf.fit$acf) # median absolute deviation
acf.data<-acf.data[12:max_lag,]
acf.data.pos<-acf.data[acf.data$acf>sd.acf,]
acf.data.pos$lag_dif<-NA
acf.data.pos$lag_dif[2:nrow(acf.data.pos)]<-diff(acf.data.pos$lag)
acf.data.pos<-na.omit(acf.data.pos)
j1<-1
for(j in 1:nrow(acf.data.pos)){
if(acf.data.pos$lag_dif[j]==1){
acf.data.pos$lag_dif[j]<-j1
}else{
j1<-j1+1
acf.data.pos$lag_dif[j]<-j1
}
}
per<-acf.max<-unique(acf.data.pos$lag_dif)
num_of_per <-length(per)
for(j2 in 1:num_of_per){
d_sub<-subset(acf.data.pos,lag_dif==per[j2])
per[j2]<-d_sub[which.max(d_sub[,"acf"]),"lag"]
acf.max[j2]<-d_sub[which.max(d_sub[,"acf"]),"acf"]
}
acf.max<-data.frame(cbind(per,acf.max))
acf.max<-acf.max[with(acf.max, order(-acf.max)), ]
omega<-2*pi/acf.max$per
omega_nm<-paste("omega",1:num_of_per,sep="")
omega_mat<-data.frame(matrix(omega,byrow=TRUE
,nrow=nrow(dsn)
,ncol = num_of_per))
colnames(omega_mat)<-omega_nm
dsn<-data.frame(cbind(dsn,omega_mat))
return(list(num_of_per,dsn))
}
#### Lasso fitting #####
lasso_fit_forecast<-function(dsn
,target_var
,model
,id_train
,id_test){
library(glmnet)
x_train <- model.matrix(model, data= dsn[id_train,])[,-1]
y_train <- dsn[id_train,target_var]
x_test <- model.matrix(model, data= dsn[id_test,])[,-1]
n_tst<-length(id_test)
x_test <- matrix(x_test,nrow=n_tst)
#find the best lambda from our list via cross-validation
cv.out <- cv.glmnet(x=x_train
,y=y_train
,alpha = 1
,nfolds = 10
#,lambda = lambda
,family = "gaussian")
bestlam <- cv.out$lambda.min
lasso.mod <- glmnet(x=x_train
,y=y_train
,alpha = 1
,lambda = bestlam
,family = "gaussian")
beta_lasso <- coef(lasso.mod,s=bestlam)
nms <- beta_lasso@Dimnames[[1]]
b_lasso <-rep(0,length(nms))
names(b_lasso)<-nms
b_lasso_indx <- beta_lasso@i+1
b_lasso[b_lasso_indx]<-beta_lasso@x
#beta_trend[s]<-b_lasso["tms"]
y_pred<- predict(lasso.mod
,s = bestlam
, newx = x_test)
return(c(y_pred,b_lasso["tms"]))
}
#### Spetial Correction #####
spatial_correction_with_GP<-function(theta,y,Earth.Dist){
#sigma2<-exp(theta[1])
#tau2<-exp(theta[2])
rho<-exp(theta[1])
mu_hat<-mean(y)
sigma2_hat<-var(y)
tau2_hat<-sigma2_hat*0.1
Sigma1<-sigma2_hat*exp(-rho*abs(Earth.Dist))
Sigma2<-tau2_hat*diag(nrow(Earth.Dist))
Sigma <- Sigma1 + Sigma2
#correction_term <-Sigma1%*%solve(Sigma)%*%(y-mu_hat)
#y_hat<-mu_hat+correction_term
#mse <-mean((y-y_hat)^2)
nll<--dmvnorm(y,mean=rep(mu_hat,length(y)),sigma=Sigma,log = TRUE)+sum(theta^2)
return(nll)
}##### Fourier Series Model analysis #####
yrs <- unique(R$year)
month<- unique(R$month)
R$yr_mnth<-NA
R$yr_mnth<-paste(R$year,R$month,sep="-")
R$tms<-(R$year-yrs[1])*12+R$month
n<-nrow(R)
R$lag_1m[2:n]<-R$spi_1m[1:(n-1)]
Nino_data_long<-na.omit(Nino_data_long)
Nino_data_long<-subset(Nino_data_long,Year<2019)
Nino_data_long<-subset(Nino_data_long,Year>=1982)
Nino_data_long$yr_mnth<-NA
Nino_data_long$yr_mnth<-paste(Nino_data_long$Year
,Nino_data_long$Month
,sep="-")
Nino_data_long<-Nino_data_long[-c(1,2)]
IOD_data_long<-na.omit(IOD_data_long)
IOD_data_long<-subset(IOD_data_long,Year<2019)
IOD_data_long<-subset(IOD_data_long,Year>=1982)
IOD_data_long$yr_mnth<-NA
IOD_data_long$yr_mnth<-paste(IOD_data_long$Year
,IOD_data_long$Month
,sep="-")
IOD_data_long<-IOD_data_long[-c(1,2)]
data_new<-subset(data_new,doy<=365)
data_new<-data_new[-c(1,2)]
data_new<-subset(data_new,Year<2019)
data_new<-subset(data_new,Year>=1982)
###### monthwise average of mean_SST
yr<-unique(data_new$Year)
mnth<-unique(data_new$month)
SST_data<-matrix(NA,nrow=(length(yr)*12),ncol=3)
colnames(SST_data)<-c("Year","Month","sst_mean")
i=1
for(y in 1:length(yr)){
for(m in 1:length(mnth))
{
data_sub<-subset(data_new,Year==yr[y])
data_sub<-subset(data_sub, month==mnth[m])
SST_mean<-mean(data_sub$mean_sst)
SST_data[i,1]<-yr[y]
SST_data[i,2]<-mnth[m]
SST_data[i,3]<-SST_mean
i=i+1
}
}
SST_data<-as.data.frame(na.omit(SST_data))
SST_data$yr_mnth<-NA
SST_data$yr_mnth<-paste(SST_data$Year
,SST_data$Month
,sep="-")
SST_data<-SST_data[-c(1,2)]
head(SST_data)
#
R_ext<-merge(R,IOD_data_long,by="yr_mnth",sort=FALSE)
R_ext<-merge(R_ext,Nino_data_long,by="yr_mnth",sort=FALSE)
R_ext<-merge(R_ext,SST_data,by="yr_mnth",sort=FALSE)
R_ext<-R_ext[,-c(1,6,7,8)]
n<-nrow(R_ext)
yrs <- unique(R_ext$year)
month<- unique(R_ext$month)
stan<-unique(R_ext$stn)
R_ext$tms<-(R_ext$year-yrs[1])*12+R_ext$month
R_ext$sst_mean<- ((R_ext$sst_mean)-mean(R_ext$sst_mean))/sd(R_ext$sst_mean)
R_ext$lag1_sst[2:n]<-R_ext$sst_mean[1:(n-1)]
R_ext$lag1_Elnino[2:n]<-R_ext$Nino_Value[1:(n-1)]
R_ext$lag1_IOD[2:n]<-R_ext$IOD_Value[1:(n-1)]
R_ext<-na.omit(R_ext)
Earth.Dist<-round(rdist.earth(site_description[,c("lon","lat")]),1)
d_max <- max(Earth.Dist)
rho_hat <- -log(0.1)/d_max
feature_nm_1<-"Nino_Value"
feature_nm_2<-"IOD_Value"
feature_nm_3<-"sst_mean"
var_nm<-"spi_1m"
no_of_harmonics_spi<-3
no_of_harmonics_iod<-3
no_of_harmonics_El_nino<-3
no_of_harmonics_sst<-3
#id_test<- 157
lag.max<-100
id_test_range<-438:443 #
RMSE_Eval_matrix<-matrix(NA,nrow=max(id_test_range),ncol=4)
colnames(RMSE_Eval_matrix)<-c("Full","Lasso","Full correct","Lasso correct")
rho_lasso<-rho_full<-rep(NA,length.out=max(id_test_range))
y<-y_pred_full<-y_pred_lasso<-adj.R2.full<-adj.R2.lasso<-rep(NA,length(stan))
names(y)<-names(y_pred_full)<-names(y_pred_lasso)<-names(adj.R2.full)<-names(adj.R2.lasso)<-stan
#beta_nino_all<-matrix(NA,nrow = 1,ncol=6)
#beta_sst_all<-matrix(NA,nrow = 1,ncol=6)
#colnames(beta_nino_all)<-c("site","lon","lat","year","month","beta_nino")
start<-Sys.time()
for(id_test in id_test_range){
id_train<-(id_test-lag.max-50):(id_test-1)
y<-y_pred_full<-y_pred_lasso<-adj.R2.full<-adj.R2.lasso<-rep(NA,length(stan))
names(y)<-names(y_pred_full)<-names(y_pred_lasso)<-names(adj.R2.full)<-names(adj.R2.lasso)<-stan
beta_nino <-rep(NA,length(stan))
## fit forecasting model in s locations
for(s in 1:length(stan)){
R_sub <- subset(R_ext,stn==stan[s])
## learning period for location s with ACF
fit_period<-learn_long_period(dsn=R_sub
,id=id_train
,var_nm=var_nm
,max_lag=lag.max
#,plot_title=stan[s]
,plot_TRUE=FALSE
,type = "acf")
num_of_per <-fit_period[[1]]
num_of_per
R_sub<-fit_period[[2]]
#omega1[s]<-R_sub$omega1
### Model fitting for location s
modl<-Generic_Fourier_Model_Def(var_nm=var_nm
,K=3
,m=num_of_per)
modl<- update(modl,.~.+lag_1m+lag1_IOD+lag1_Elnino+lag1_sst)
#fit<-step(lm(modl,data = R_sub[id_train,]),trace=0)
fit<-lm(modl,data = R_sub[id_train,])
sum <- summary(fit)
fit <- update(fit ,.~.+tms)
modl <- as.formula(fit$call)
adj.R2.full[s]<-sum$adj.r.squared
#beta_sst[s]<-fit$coefficients["lag1_sst"]
#beta_nino[s]<-fit$coefficients["lag1_Elnino"]
y_pred_full[s]<-predict(fit,newdata = R_sub[id_test,])
### lasso selection and forecast
lasso_fit_fore<-lasso_fit_forecast(dsn=R_sub
,target_var=var_nm
,model=modl
,id_train = id_train
,id_test =id_test)
y_pred_lasso[s]<-lasso_fit_fore[1]
#beta_trend[s]<-lasso_fit_fore[2]
### Actual value
y[s]<- R_sub[id_test,var_nm]
if(s%%20==0) cat("id_test = ",id_test,"s = ",s,"\n")
}
#beta_sst<-cbind(site_description,beta_sst)
#beta_nino<-cbind(site_description,beta_nino)
##yr<-R_sub[id_test,"year"]
#mo<-R_sub[id_test,"month"]
##beta_nino$year<-rep(yr,nrow(beta_nino))
#beta_nino$month<-rep(mo,nrow(beta_nino))
#beta_nino_all<-rbind(beta_nino_all,beta_nino)
#beta_sst_all<-rbind(beta_sst_all,beta_sst)
#Sys.time()-start
pred_actual<-merge(y_pred_full,y_pred_lasso,by=0)
rownames(pred_actual)<-pred_actual$Row.names
pred_actual<-pred_actual[,-1]
pred_actual<-merge(pred_actual,y,by=0)
colnames(pred_actual)<-c("site","fore_full","fore_lasso","actual")
pred_actual<-merge(site,pred_actual,by="site")
### correction for prediction from full model
opt_GP_corct_fore_full<-optimise(f=spatial_correction_with_GP
,interval = c(-6,-4)
,y=pred_actual$fore_full
,Earth.Dist=Earth.Dist)
rho_hat1<-exp(opt_GP_corct_fore_full$minimum)
rho_full[id_test]<-rho_hat1
mu_hat<-mean(pred_actual$fore_full)
sigma2_hat<-var(pred_actual$fore_full)
tau2_hat<-sigma2_hat*0.1
Sigma1<-sigma2_hat*exp(-rho_hat1*abs(Earth.Dist))
Sigma2<-tau2_hat*diag(nrow(Earth.Dist))
Sigma <- Sigma1 + Sigma2
pred_actual$fore_full_corected <- Sigma1%*%solve(Sigma)%*%pred_actual$fore_full
head(pred_actual)
tail(pred_actual)
### correction for prediction from lasso model
opt_GP_corct_fore_lasso<-optimise(f=spatial_correction_with_GP
,interval = c(-6,-4)
,y=pred_actual$fore_lasso
,Earth.Dist=Earth.Dist)
rho_hat2<-exp(opt_GP_corct_fore_lasso$minimum)
rho_lasso[id_test]<-rho_hat2
mu_hat<-mean(pred_actual$fore_lasso)
sigma2_hat<-var(pred_actual$fore_lasso)
tau2_hat<-sigma2_hat*0.1
Sigma1<-sigma2_hat*exp(-rho_hat2*abs(Earth.Dist))
Sigma2<-tau2_hat*diag(nrow(Earth.Dist))
Sigma <- Sigma1 + Sigma2
pred_actual$fore_lasso_corected <- Sigma1%*%solve(Sigma)%*%pred_actual$fore_lasso
#### RMSE Evaluation
RMSE_Eval_matrix[id_test,"Full"]<-sqrt(mean((pred_actual$fore_full-pred_actual$actual)^2))
RMSE_Eval_matrix[id_test,"Lasso"]<-sqrt(mean((pred_actual$fore_lasso-pred_actual$actual)^2))
RMSE_Eval_matrix[id_test,"Full correct"]<-sqrt(mean((pred_actual$fore_full_corected-pred_actual$actual)^2))
RMSE_Eval_matrix[id_test,"Lasso correct"]<-sqrt(mean((pred_actual$fore_lasso_corected-pred_actual$actual)^2))
}
Sys.time()-start
RMSE_Eval_matrix<-RMSE_Eval_matrix[id_test_range,]
rho_full[id_test_range]
rho_lasso[id_test_range]
RMSE_Eval_matrix<-na.omit(RMSE_Eval_matrix)
round(apply(RMSE_Eval_matrix,2,summary),3)
round(apply(RMSE_Eval_matrix,2,sd),3)### SST, NINO, IOD Delta Plots
iod_delta <- read.csv("data_beta_iod_all.csv")
iod_delta <- na.omit(iod_delta)
beta_vec_iod<- aggregate(beta_iod ~ year, data = iod_delta, FUN = mean)
#png("iod_beta_plot_new.png",width = 12, height = 8,units = "in",res = 300 )
ggplot(data = beta_vec_iod) + aes(x=year,y =beta_iod) +
geom_point(aes(color = beta_iod), alpha = .9, size =10) +
geom_line(aes(x = year, y = beta_iod),size=2)+
xlab('Year') +
ylab("Delta (𝛿)")+
ggtitle('Indian Ocean Dipole') +
#theme(legend.position='top') #+
theme_classic() +
theme(panel.grid.major = element_line(colour = "grey", size = 0.9,linetype="dashed"),
# panel.grid.minor = element_line(colour = "grey", size = 0.5,linetype="dashed"),
panel.background = element_rect(colour = "black", size=4))+
theme(plot.title = element_text(hjust = 0.5,face = "bold",size = 25))+
theme(axis.title.x=element_text(size=25,face = "bold",colour = "black"), # X axis title
axis.title.y=element_text(size=25,face = "bold",colour = "black"), # Y axis title
axis.text.x=element_text(size=20,face = "bold",colour = "black",angle = 90), # X axis text
axis.text.y=element_text(size=20,face = "bold",colour = "black"),
axis.ticks.length = unit(0.4, "cm"),
axis.ticks.x = element_line(colour = "black", size = 2),
axis.ticks.y = element_line(colour = "black", size = 2),
legend.title = element_text(size=25, color = "black", face="bold"),
legend.text = element_text(size=25, face="bold"),
legend.key=element_rect(fill='skyblue'),
legend.key.size = unit(1.5, "cm"))+
scale_x_continuous(limits = c(1995, 2020),breaks = seq(1994,2020,2))+
scale_y_continuous(limits = c(-0.2,0.2),breaks = c(-0.2,-0.1,0,0.1,0.2))+
geom_smooth(color='red',size=1.5,method = "gam", formula = y ~ x,se = TRUE,n = 36,span = 2) +
#labs(size = "Average(𝛿)") +
scale_color_gradientn(name="Delta(𝛿)",
colors=(brewer.pal(10,'Spectral'))
,guide = "colourbar",limits=c(-0.2,0.2),breaks=c(-0.1,0,0.1))+
scale_size(name = waiver(), breaks = waiver(),labels = waiver(), limits = NULL,range = c(1, 20),
trans = "identity",guide = "legend")+
theme(panel.spacing = unit(4, "lines"))#dev.off()
nino_delta <- read.csv("data_beta_nino_all.csv")
nino_delta <- na.omit(nino_delta)
beta_vec_nino<- aggregate(beta_nino ~ year, data = nino_delta, FUN = mean)
#png("nino_beta_plot_new.png",width = 12, height = 8,units = "in",res = 300 )
ggplot(data = beta_vec_nino) + aes(x=year,y =beta_nino) +
geom_point(aes(color=beta_nino),alpha=.9, size =10) +
geom_line(aes(x = year, y = beta_nino),size=2)+
xlab('Year') +
ggtitle('El Nino Southern Oscillation ') +
ylab("Delta (𝛿)")+
#theme(legend.position='top') #+
theme_classic() +
theme(panel.grid.major = element_line(colour = "grey", size = 0.9,linetype="dashed"),
# panel.grid.minor = element_line(colour = "grey", size = 0.5,linetype="dashed"),
panel.background = element_rect(colour = "black", size=4))+
theme(plot.title = element_text(hjust = 0.5,face = "bold",size = 25))+
theme(axis.title.x=element_text(size=25,face = "bold",colour = "black"), # X axis title
axis.title.y=element_text(size=25,face = "bold",colour = "black"), # Y axis title
axis.text.x=element_text(size=20,face = "bold",colour = "black",angle = 90), # X axis text
axis.text.y=element_text(size=20,face = "bold",colour = "black"),
axis.ticks.length = unit(0.4, "cm"),
axis.ticks.x = element_line(colour = "black", size = 2),
axis.ticks.y = element_line(colour = "black", size = 2),
legend.title = element_text(size=25, color = "black", face="bold"),
legend.text = element_text(size=25, face="bold"),
legend.key=element_rect(fill='skyblue'),
legend.key.size = unit(1.5, "cm"))+
scale_x_continuous(limits = c(1995, 2020),breaks = seq(1994,2020,2))+
scale_y_continuous(limits = c(-0.08,0.02),breaks = c(-0.08,-0.06,-0.04,-0.02,0,0.02))+
geom_smooth(color='red',size=2,method = "gam", formula = y ~ s(x, bs = "cs"),se = TRUE,
n = 36,span = 2) +
# labs(size = "Average(𝛿)") +
scale_color_gradientn(name="Delta(𝛿)",
colors=(brewer.pal(10,'Spectral'))
,guide = "colourbar",limits=c(-0.09,-0.003),breaks=c(-0.075,-0.050,-0.025,0))+
scale_size(name = waiver(), breaks = waiver(),labels = waiver(),
limits = NULL,range = c(1, 20),
trans = "identity",guide = "legend")#dev.off()
sst_delta <- read.csv("data_beta_sst_all.csv")
sst_delta<- na.omit(sst_delta)
### SST Delta Plot
beta_vec_sst<- aggregate(beta_sst ~ year, data = sst_delta, FUN = mean)
#plot(beta_vec_sst,type="l",col="purple",lwd=2,main="SST trend")
#png("sst_beta_plot_new.png",width = 12, height = 8,units = "in",res = 300 )
ggplot(data = beta_vec_sst) + aes(x=year,y =beta_sst) +
geom_point(aes(color=beta_sst),alpha=.9, size=10) +
geom_line(aes(x = year, y = beta_sst),size=2)+
xlab('Year') +
ylab("Delta (𝛿)")+
ggtitle('Sea Surface Temperature') +
#theme(legend.position='top') #+
theme_classic() +
theme(panel.grid.major = element_line(colour = "grey", size = 0.9,linetype="dashed"),
# panel.grid.minor = element_line(colour = "grey", size = 0.5,linetype="dashed"),
panel.background = element_rect(colour = "black", size=4))+
theme(plot.title = element_text(hjust = 0.5,face = "bold",size = 25))+
theme(axis.title.x=element_text(size=25,face = "bold",colour = "black"), # X axis title
axis.title.y=element_text(size=25,face = "bold",colour = "black"), # Y axis title
axis.text.x=element_text(size=20,face = "bold",colour = "black",angle = 90), # X axis text
axis.text.y=element_text(size=20,face = "bold",colour = "black"),
axis.ticks.length = unit(0.4, "cm"),
axis.ticks.x = element_line(colour = "black", size = 2),
axis.ticks.y = element_line(colour = "black", size = 2),
legend.title = element_text(size=25, color = "black", face="bold"),
legend.text = element_text(size=25, face="bold"),
legend.key=element_rect(fill='skyblue'),
legend.key.size = unit(1.5, "cm"))+
scale_x_continuous(limits = c(1995, 2020),breaks = seq(1994,2020,2))+
scale_y_continuous(limits = c(-0.02,0.02),breaks = c(-0.02,-0.01,0,0.01,0.02))+
geom_smooth(color='red',size=1.5,method = "gam", formula = y ~ x,se = TRUE,n = 36,span = 2) +
#labs(size = "Average(𝛿)") +
scale_color_gradientn(name="Delta(𝛿)",
colors=(brewer.pal(10,'Spectral'))
,guide = "colourbar",limits=c(-0.02,0.02),breaks=c(-0.01,0,0.01))+
scale_size(name = waiver(), breaks = waiver(),labels = waiver(), limits = NULL,range = c(1, 20),
trans = "identity",guide = "legend")+
theme(panel.spacing = unit(4, "lines"))|
This study examines the complex dynamics between SPI and other climate variables, including Sea Surface Temperature (SST), El Niño Southern Oscillation (ENSO) (aka, NINO 3.4), and Indian Ocean Dipole (IOD), using a machine learning approach. The findings reveal:
Using a statistical machine learning approach, an inferential network of these climate variables was constructed. The network revealed:
Interestingly, there was a structural change in the dynamics of the four climate variables (NINO 3.4, IOD, SST, and SPI) around 2008. Although a simple 12-month moving average of SPI over 58 years (1961–2018) shows a negative trend toward drought, the complex dynamics of SPI with other climate variables suggest a wet season for south-west Australia.
By addressing these challenges, the study can more effectively understand and predict the complex dynamics of climate variables influencing drought conditions in South-West Australia.
Thank You