Interpretative seasonal prediction of tropical storm frequency in the western North Pacific

Namyoung Kang and James Elsner

Abstract

Despite the improving techniques of the seasonal prediction on the tropical storm frequency, attentions seem focused mostly on the accuracy rather than the interpretation of the results. This study aims to show how the prediction results from a hybrid model, i.e. statistical/dynamical model can be interpreted by probability distributions. The tropical storm frequency in the western North Pacific is modeled by the environmental predictors through multiple linear regression. For a demonstration of the probabilistic structure of the prediction result, the forty-two ensemble predictions of the Glosea5 model for June-July-August in 2017 are utilized as the dynamical input. Rather than dealing with the expectation of the frequency, this study introduces the predictive probability for a single value of the frequency. From as many probability distributions, a marginal probability distribution is obtained as the final predictive probability distribution of the model. Then the probability distribution is compared to the climatological reference by the terciles. The predictive probability distributions made on the separate predictors, also provide helpful information on how the predictors contribute to the final prediction. The procedure of the probabilistic interpretation shown in the study is expected to be effectively used for any improving hybrid approaches.

Read data

rm(list=ls())
####################################################
# Read data
####################################################
#forecast target 2017 JJA
#set the data period as 1986~2015 (30 years)

# Tropical storms from JTWC (FRQo)
#----------------------------------------------------------------------------
AB=read.table('Data/ABtrack2018.dat',T)
AB2=AB[(AB$bst=='JT' & AB$yr>=1986 & AB$yr<=2015 & AB$mo>=6 & AB$mo<=8),]  #1986-2015 
FRQo=as.numeric(table(AB2$yr))

# observed environmental variables (GMSSTo, NSOIo)
#----------------------------------------------------------------------------
imsi=read.csv('Data/Obs_NSOI_81to17mon.csv',header=FALSE)[,1]
imsi2=NULL; for(i in 1:37) imsi2=c(imsi2,mean(imsi[(i-1)*12+(6:8)])) #for JJA
NSOI81to17=imsi2
NSOIo=NSOI81to17[6:35] #1986~2015 (30 years)

imsi=read.csv('Data/Obs_GMSST_81to17mon.csv',header=FALSE)[,1]
imsi2=NULL; for(i in 1:37) imsi2=c(imsi2,mean(imsi[(i-1)*12+(6:8)])) #for JJA
GMSST81to17=imsi2
GMSSTo=GMSST81to17[6:35] #1986~2015 (30 years)

# Glosea5 hindcast (GMSSTh, NSOIh)
#----------------------------------------------------------------------------
imsi=read.csv('Data/Hindcast_NSOI_91to10jja.csv',T)$nsoi  #1991~2010                 # read hindcast data 
imsi2=NULL; for(i in 1:20) imsi2=c(imsi2,mean(imsi[(i-1)*3+(1:3)])) 
NSOIh=imsi2

imsi=read.csv('Data/Hindcast_GMSST_91to10jja.csv',T)$gmsst  #1991~2010                # read hindcast data 
imsi2=NULL; for(i in 1:20) imsi2=c(imsi2,mean(imsi[(i-1)*3+(1:3)])) 
GMSSTh=imsi2

NSOIclim=NSOI81to17[11:30] ; GMSSTclim=GMSST81to17[11:30]        #matching clim 

# scale Glosea5 forecast ensemble pairs of GMSST and NSOI
#----------------------------------------------------------------------------
imsi = read.csv('Data/Forecast_NSOI_17jja.csv',header=TRUE)[,1]        #42 ensemble members of NSOI 
model=lm(NSOIclim~NSOIh) 
NSOIf=predict(model, data.frame(NSOIh=imsi))     #scaled

imsi=read.csv('Data/Forecast_GMSST_17jja.csv',header=TRUE)[,1]         #42 ensemble members of GMSST 
model=lm(GMSSTclim~GMSSTh) 
GMSSTf=predict(model, data.frame(GMSSTh=imsi)) #scaled

Figure 1

####################################################
# Fig. 1. Environental connection to tropical storm frequency
####################################################
par(mfrow=c(1,1))

signif = 0.05   # 95% significance level
theta = seq(1, 180, by=1)

#Compute the correlation between environmental factors and TS FRQ
cFRQ = NULL ; pFRQ = NULL
for (k in theta){
  C1 = cos(k * pi/180)
  C2 = sin(k * pi/180)
  ENVclim = C1 * scale(NSOIo) + C2 * scale(GMSSTo)
  ctest = cor.test(ENVclim, FRQo)
  cFRQ = c(cFRQ, as.numeric(ctest$estimate))
  pFRQ = c(pFRQ, as.numeric(ctest$p.value))
  }

plot(-10, -10, xlim=c(-1, 1.3), ylim=c(-1, 1.3), axes=FALSE, xlab='', ylab='', main='')

i = seq(0, 360, .5)
Outl = cbind(cos(i * pi/180), sin(i * pi/180))
Innl = cbind(.5 * cos(i * pi/180), .5 * sin(i * pi/180))

polygon(Outl[, 1], Outl[, 2], border=colors()[229], col='white', lwd=2)
polygon(Innl[, 1], Innl[, 2], border=colors()[229], col=NULL, lwd=2)

Line.xcord = c(-1, 1, NA, 0, 0, NA, -cos(pi/4), cos(pi/4), NA, -cos(pi/4), cos(pi/4))
Line.ycord = c(0, 0, NA, -1, 1, NA, sin(pi/4), -sin(pi/4), NA, -sin(pi/4), sin(pi/4))
lines(Line.xcord, Line.ycord, col=colors()[229], lwd=1)

text(par('usr')[2] - 0.29, 0.0, srt=0, adj = 0, labels = 'NSOI', xpd = TRUE, cex=2) 
text(par('usr')[2] - 0.6, 0.81, srt=0, adj = 0, labels = 'PC1', xpd = TRUE, cex=2)
text(par('usr')[2] - 1.52, 1.17, srt=0, adj = 0, labels = 'GMSST', xpd = TRUE, cex=2)
text(par('usr')[2] - 0.6, -0.81, srt=0, adj = 0, labels = 'PC2', xpd = TRUE, cex=2)

text(par('usr')[2] - 2.85, 0.0, srt=0, adj = 0, labels = '-NSOI', xpd = TRUE, cex=2, col=colors()[229]) 
text(par('usr')[2] - 2.55, -0.81, srt=0, adj = 0, labels = '-PC1', xpd = TRUE, cex=2, col=colors()[229])
text(par('usr')[2] - 1.6, -1.17, srt=0, adj = 0, labels = '-GMSST', xpd = TRUE, cex=2, col=colors()[229])
text(par('usr')[2] - 2.55, 0.81, srt=0, adj = 0, labels = '-PC2', xpd = TRUE, cex=2, col=colors()[229])
text(0,0.55, '0.5', cex=1.4, col=colors()[229])
text(0,1.05, '1.0', cex=1.4, col=colors()[229])

dg = theta
polygon(cFRQ * cos(dg * pi/180), cFRQ * sin(dg * pi/180), border="#ff9900", lwd=7, col=NULL)
cFRQ2 = c(cFRQ[which.max(pFRQ):length(pFRQ)], cFRQ[1:(which.max(pFRQ) - 1)])
pFRQ2 = c(pFRQ[which.max(pFRQ):length(pFRQ)], pFRQ[1:(which.max(pFRQ) - 1)])
dg2 = c(dg[which.max(pFRQ):length(pFRQ)], dg[1:(which.max(pFRQ) - 1)])
lines(cFRQ2[pFRQ2 <= signif] * cos(dg2[pFRQ2 <= signif] * pi/180),
      cFRQ2[pFRQ2 <= signif] * sin(dg2[pFRQ2 <= signif] * pi/180), col="#cc3300", lwd=7) 

cFRQ3 = cFRQ[which.max(abs(cFRQ))]
dg3 = dg[which.max(abs(cFRQ))]
points(cFRQ3 * cos(dg3 * pi/180), 
       cFRQ3 * sin(dg3 * pi/180), col="#cc3300", pch=16, cex=3)
text((cFRQ3 * cos(dg3 * pi/180) + 0.1),
      (cFRQ3 * sin(dg3 * pi/180) - 0.14), abs(round(cFRQ3, 2)), cex=1.6, col="#cc3300")

####################################################
#Ref. Check correlations among the variables
####################################################
#Check some correlations
#(Result) Correlations are confirmed as expeceted,
#----------------------------------------------------------------------------
cor.test(NSOIo,GMSSTo)                                         #near orthogonal

## 
##  Pearson's product-moment correlation
## 
## data:  NSOIo and GMSSTo
## t = 0.28114, df = 28, p-value = 0.7807
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.3131997  0.4055731
## sample estimates:
##        cor 
## 0.05305619

cor.test(FRQo,NSOIo)

## 
##  Pearson's product-moment correlation
## 
## data:  FRQo and NSOIo
## t = 2.5565, df = 28, p-value = 0.01628
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.08864496 0.68753652
## sample estimates:
##       cor 
## 0.4350213

cor.test(FRQo,GMSSTo)

## 
##  Pearson's product-moment correlation
## 
## data:  FRQo and GMSSTo
## t = -2.5789, df = 28, p-value = 0.01546
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.68954306 -0.09242902
## sample estimates:
##        cor 
## -0.4381095

model=lm(FRQo~NSOIo+GMSSTo)
cor.test(FRQo,predict(model))                                  #strong correlation

## 
##  Pearson's product-moment correlation
## 
## data:  FRQo and predict(model)
## t = 4.3434, df = 28, p-value = 0.0001663
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.3554357 0.8096604
## sample estimates:
##       cor 
## 0.6344606

cor.test(NSOIclim,NSOIh) ; cor.test(GMSSTclim,GMSSTh)  #well matching the climatology

## 
##  Pearson's product-moment correlation
## 
## data:  NSOIclim and NSOIh
## t = 6.852, df = 18, p-value = 2.066e-06
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.6535993 0.9393247
## sample estimates:
##       cor 
## 0.8502121

## 
##  Pearson's product-moment correlation
## 
## data:  GMSSTclim and GMSSTh
## t = 8.033, df = 18, p-value = 2.311e-07
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.7256883 0.9536195
## sample estimates:
##       cor 
## 0.8842485

####################################################
#Ref. Check normality of the variables
####################################################

X1=scale(NSOIo) ;  X2=scale(GMSSTo) ; Y=scale(FRQo)

#1. Check normality of the variables for linear assumption------------------------
#Normality test
shapiro.test(Y)     #p-value = 0.6682

## 
##  Shapiro-Wilk normality test
## 
## data:  Y
## W = 0.97451, p-value = 0.6682

shapiro.test(X1)    #p-value = 0.3793

## 
##  Shapiro-Wilk normality test
## 
## data:  X1
## W = 0.97781, p-value = 0.7649

shapiro.test(X2)    #p-value = 0.7649

## 
##  Shapiro-Wilk normality test
## 
## data:  X2
## W = 0.9635, p-value = 0.3793

Fig. 1. Correlation screen of the tropical storm frequency in the western North Pacific (modified from Kang and Elsner (2016)). \(\rm{PC1}\) and \(\rm{PC2}\) represent the principal components by the two primary environmental variables of \(\rm{NSOI}\) and \(\rm{GMSST}\). The inner and outer grey circles indicate 0.5 and 1.0 correlation coefficients, respectively. Significant correlation coefficients (\(\alpha\) \(\le\) 0.05) are shown in red line with a dot for the highest value.

Figure 2

####################################################
# Fig. 2. Predictive probability for all available future environment
####################################################
par(mfrow=c(1,2),mar=c(4,5,3,3))

#a. Two dimensional density of the environmental variables
#------------------------------------------------------------------------------
library(mvtnorm)
X1.new = seq(-3,3,0.1)  #length=61
X2.new = X1.new
length=length(X1.new)
Z = matrix(0,nrow=length,ncol=length)  #Z: density
sigma = matrix(c(1,cor(X1,X2),cor(X1,X2),1),nrow=2)
for (i in 1:length){for (j in 1:length) {Z[i,j] = dmvnorm(c(X1.new[i],X2.new[j]),mean=c(0,0),sigma=sigma)}}
contour(X1.new,X2.new,Z, col='grey25',xlim=c(-3,3), ylim=c(-3,3), xlab='',ylab='',axes=FALSE) 
abline(h=0,v=0,lty=2,col='dark grey')
points(X1,X2,cex=1.5,pch=16,col='grey76')
points(mean(X1),mean(X2),cex=2,pch=16,col='green')

axis(1,at=seq(-3,3,1), labels=seq(-3,3,1),cex.axis=1.2)
axis(2,at=seq(-3,3,1), labels=seq(-3,3,1),cex.axis=1.2,las=1)
axis(3,at=seq(-3,3,1), labels=rep('',7))
axis(4,at=seq(-3,3,1), labels=rep('',7))
mtext('NSOI (s.d.)',side=1,line=2.7,cex=1.3) 
mtext('GMSST (s.d.)',side=2,line=3,cex=1.3)
mtext('(a)',side=3,line=0.8,cex=1.5,adj=-0.2)

box()

#b. Predictive probability for all available future environment
#------------------------------------------------------------------------------
#(reference: https://youtu.be/V-sReSM887I)

X1=as.numeric(scale(NSOIo)) ;  X2=as.numeric(scale(GMSSTo)) ; Y=as.numeric(scale(FRQo))
model=lm(Y~X1+X2)

plot(-999,-999,type='l',col='green',xlim=c(0,20),ylim=c(0,0.1)
 , xlab='',ylab='',axes=FALSE)

df=27 #N-k-1 #N:30  #The degrees of freedom in a multiple regression equals N-k-1, where k is the number of variables.  
FRQbin=seq(-7,25,0.1) ; Ybin=(FRQbin-mean(FRQo))/sd(FRQo)

#probability of E[Y] on the means (0,0)
new=data.frame(X1=0,X2=0) #for the means (0,0) 
pred=predict(model,new,interval='confidence',level=0.95)
fit=pred[1] ; lwr=pred[2] ; upr=pred[3]
SE=-(lwr-fit)/qt(0.975,df)  #SE=(upr-fit)/qt(0.975,df) #for 95% CI
t=(Ybin-fit)/SE 
prob=dt(t,df)
lines(FRQbin,prob/sum(prob),col='green',lwd=1)

#probability of Y on the means (0,0)
new=data.frame(X1=0,X2=0) 
pred=predict(model,new,interval='prediction',level=0.95)
fit=pred[1] ; lwr=pred[2] ; upr=pred[3]
SE=-(lwr-fit)/qt(0.975,df)  #SE=(upr-fit)/qt(0.975,df) #for 95% CI
t=(Ybin-fit)/SE 
prob=dt(t,df)
lines(FRQbin,prob/sum(prob),col='green',lwd=4)

#marginal probability of Y 
marg=0
for(i in 1:length)
{for(j in 1:length)
 {new=data.frame(X1=X1.new[i],X2=X2.new[j]) #for all available (X1,X2)  
   pred=predict(model,new,interval='prediction',level=0.95)
   fit=pred[1] ; lwr=pred[2] ; upr=pred[3]
   SE=-(lwr-fit)/qt(0.975,df)  #SE=(upr-fit)/qt(0.975,df) #for 95% CI
   t=(Ybin-fit)/SE 
   prob=Z[i,j]*dt(t,df)
   marg=marg+prob
}} 
marg=marg/sum(marg) ;marg_clim=marg
lines(FRQbin,marg_clim,col='grey25',lwd=4)  

axis(1,at=seq(0,20,5), labels=seq(0,20,5),cex.axis=1.2)
axis(2,at=seq(0,0.1,0.02), labels=c('0.00',seq(0.02,0.08,0.02),'0.10'),cex.axis=1.2,las=1)
mtext('FRQ',side=1,line=2.7,cex=1.3) 
mtext('Probability density',side=2,line=3.8,cex=1.3)
mtext('(b)',side=3,line=0.8,cex=1.5,adj=-0.27)
box()

Fig. 2. Probability distribution of the tropical storm frequency on the climatological predictors. a) Bivariate normal distribution of the predictors, and b) probability distributions of the prediction results. For a), circles represent the lines with each equal density in a bivariate normal distribution of the standardized \(\rm{NSOI}\) and \(\rm{GMSST}\). Grey dots indicate the pairs of observed \(\rm{NSOI}\) and \(\rm{GMSST}\) for the 30-year period (1986–2015), while their mean is shown by a green dot. The two green lines in b) show the probability distribution of the prediction result on the climatological mean (green dot in Fig. 2a). The thin and thick green lines represent the probabilities for \(\rm{\hat{FRQ}}\) and \(\rm{FRQ}\), respectively. The marginal probability distribution of \(\rm{FRQ}\) on the bivariate normal predictors is shown by the thick black line.

Figure 3

####################################################
# Fig. 3. Predictive probability for the dynamical ensemble members
####################################################
par(mfrow=c(1,2),mar=c(4,5,3,3))

#a. Distribution of the dynamical ensemble members
#------------------------------------------------------------------------------
X1=as.numeric(scale(NSOIo)) ;  X2=as.numeric(scale(GMSSTo)) 
X1.new=(NSOIf-mean(NSOIo))/sd(NSOIo) ; X2.new=(GMSSTf-mean(GMSSTo))/sd(GMSSTo)

plot(-999,-999,xlim=c(1986,2017),ylim=c(-3,4),xlab='',ylab='',axes=F)
lines(1986:2015,X1,col='royalblue1',lwd=3)
lines(1986:2015,X2,col='orange',lwd=3)
points(rep(2017,42),X1.new,col='royalblue1');points(2017,mean(X1.new),pch=16,cex=2,col='green')
points(rep(2017,42),X2.new,col='orange');points(2017,mean(X2.new),pch=16,cex=2,col='green')
abline(h=0,lty=2,col='dark grey')

axis(1,at=seq(1985,2020,5), labels=seq(1985,2020,5),cex.axis=1.2)
axis(2,at=seq(-4,4,1), labels=seq(-4,4,1),cex.axis=1.2,las=1)
axis(3,at=seq(1985,2020,5), labels=rep('',8))
axis(4,at=seq(-4,4,1), labels=rep('',9))
mtext('Year',side=1,line=2.7,cex=1.3) 
mtext('Standardized value (s.d.)',side=2,line=3,cex=1.3)
mtext('(a)',side=3,line=0.8,cex=1.5,adj=-0.2)

box()

#b. Marginal predictive probability given the dynamical ensemble members
#------------------------------------------------------------------------------
#(reference: https://youtu.be/V-sReSM887I)

X1=as.numeric(scale(NSOIo)) ;  X2=as.numeric(scale(GMSSTo)) ; Y=as.numeric(scale(FRQo))
model=lm(Y~X1+X2)

plot(-999,-999,type='l',col='green',xlim=c(-7,20),ylim=c(0,0.015),xlab='',ylab='', axes=F)

df=27 #N-k-1 #N:30 #The degrees of freedom in a multiple regression equals N-k-1, where k is the number of variables. 
FRQbin=seq(-7,25,0.1) ; Ybin=(FRQbin-mean(FRQo))/sd(FRQo)

#Ensembe members and their marginal density
marg=0 
for(i in 1:42) #number of ensemble members
{new=data.frame(X1=X1.new[i],X2=X2.new[i]) #for all available (X1,X2)  
  pred=predict(model,new,interval='prediction',level=0.95)
  fit=pred[1] ; lwr=pred[2] ; upr=pred[3]
  SE=-(lwr-fit)/qt(0.975,df)  #SE=(upr-fit)/qt(0.975,df) #for 95% CI
  t=(Ybin-fit)/SE 
  prob=dt(t,df) ; prob=prob/sum(prob)
  lines(FRQbin,prob,col='grey76',lwd=1)
  marg=marg+prob
} 
marg=marg/sum(marg)
marg_ens=marg    

#probability of Y on the ensemble means
new=data.frame(X1=mean(X1.new),X2=mean(X2.new)) 
pred=predict(model,new,interval='prediction',level=0.95)
fit=pred[1] ; lwr=pred[2] ; upr=pred[3]
SE=-(lwr-fit)/qt(0.975,df)  #SE=(upr-fit)/qt(0.975,df) #for 95% CI
t=(Ybin-fit)/SE 
prob=dt(t,df)
lines(FRQbin,prob/sum(prob),col='green',lwd=4)

#comparison with marginal _ens
lines(FRQbin,marg_ens,col='red3',lwd=4)  
#lines(FRQbin,marg_clim,col='black',lwd=4)  

axis(1,at=seq(-10,20,5), labels=seq(-10,20,5),cex.axis=1.2)
axis(2,at=seq(0,0.02,0.002), labels=c('0.000',seq(0.002,0.008,0.002),'0.010',seq(0.012,0.018,0.002),'0.02'),cex.axis=1.2,las=1)
mtext('FRQ',side=1,line=2.7,cex=1.3) 
mtext('Probability density',side=2,line=3.8,cex=1.3)
mtext('(b)',side=3,line=0.8,cex=1.5,adj=-0.28)
box()

####################################################
#Ref. Check correlations among the variables
####################################################
#Check some correlations
#(Result) Correlations are confirmed as expeceted,
#----------------------------------------------------------------------------
time=1986:2015
cor.test(time,NSOIo)

## 
##  Pearson's product-moment correlation
## 
## data:  time and NSOIo
## t = -0.35349, df = 28, p-value = 0.7264
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.4169128  0.3008383
## sample estimates:
##        cor 
## -0.0666552

cor.test(time,GMSSTo)

## 
##  Pearson's product-moment correlation
## 
## data:  time and GMSSTo
## t = 8.7656, df = 28, p-value = 1.622e-09
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.7169681 0.9296449
## sample estimates:
##       cor 
## 0.8561043

Fig. 3. Probability distribution of the tropical storm frequency on the dynamical predictors. a) Forty-two pairs of \(\rm{NSOI}\) and \(\rm{GMSST}\) for the dynamical input, and b) probability distributions of the prediction results. For a), the time series of the standardized \(\rm{NSOI}\) and \(\rm{GMSST}\) for the 30-year period (1986–2015) are compared to the scaled dynamical input. Each mean is indicated by a green dot. In b), the probability distributions of \(\rm{FRQ}\) on the forty-two dynamical ensemble predictors are shown in thin grey lines, while that on the mean predictors (green dot) in a thick green line. The thick red line represents the predictive distribution of \(\rm{FRQ}\), which is the marginal distribution by the forty-two pairs of the predictors.

Figure 4

####################################################
# Fig. 4. Interpretation of the predictive probability
####################################################
# FRQbin=seq(-7,25,0.1) (from Fig2 & Fig3) 
# marg_clim & marg_ens (from Fig2)

#a. Probability and terciles as a climatological reference 
#-------------------------------------------------------------------------------
par(mfrow=c(2,1),mar=c(4,6,3,3))

#a. Histogram and theoretical climatology
#------------------------------------------------------------------------------
H=hist(FRQo,breaks=seq(0.5,20.5,1),plot=F)
x=H$mids ; y= H$counts/350
par(lend=2) #line shape  
plot(x[y>0],y[y>0],xlim=c(-7,25),ylim=c(0,0.015),type='h',col='grey85',lwd=12, xlab='',ylab='',axes=F)

#check the splits for the terciles 
den=H$density/sum(H$density)
cbind(x,cumsum(den))     #normal range: 9~11 based on the empirical terciles

##        x           
##  [1,]  1 0.00000000
##  [2,]  2 0.00000000
##  [3,]  3 0.00000000
##  [4,]  4 0.00000000
##  [5,]  5 0.03333333
##  [6,]  6 0.10000000
##  [7,]  7 0.13333333
##  [8,]  8 0.23333333
##  [9,]  9 0.36666667
## [10,] 10 0.53333333
## [11,] 11 0.60000000
## [12,] 12 0.70000000
## [13,] 13 0.83333333
## [14,] 14 0.90000000
## [15,] 15 0.93333333
## [16,] 16 1.00000000
## [17,] 17 1.00000000
## [18,] 18 1.00000000
## [19,] 19 1.00000000
## [20,] 20 1.00000000

par(new=TRUE) ; plot(x[x>=9 & x<=11],y[x>=9 & x<=11],xlim=c(-7,25),ylim=c(0,0.015),type='h',col='ivory4',lwd=12,xlab='', ylab='',axes=F)
#abline(v=mean(FRQo),col='green',lwd=2)  #mean FRQ: 10.633

#marginal probability of Y  
lines(FRQbin,marg_clim,col='grey25',lwd=4)

#normal range by the terciles (inclusive)
cdf=cbind(FRQbin,round(cumsum(marg_clim),3))
cdf[c(164,190),]  #normal range: 9.3~11.9 (line num 164~190) based on terciles

##      FRQbin      
## [1,]    9.3 0.340
## [2,]   11.9 0.664

abline(v=FRQbin[c(164,190)],col='grey25',lwd=2) 
FRQbin[c(164,190)]

## [1]  9.3 11.9

axis(1,at=seq(-10,20,5), labels=seq(-10,20,5),cex.axis=1.2)
axis(2,at=seq(0,0.02,0.002), labels=c('0.000',seq(0.002,0.008,0.002),'0.010',seq(0.012,0.018,0.002),'0.02'),cex.axis=1.2,las=1)
mtext('FRQ',side=1,line=2.7,cex=1.3) 
mtext('Probability density',side=2,line=4,cex=1.3)
mtext('(a)',side=3,line=0.8,cex=1.7,adj=-0.2)

box()

#b. Comparison of the predictive probability with theoretical climatology as a reference
#------------------------------------------------------------------------------
#---------partial contributions
#Dynamic ens NSOI and normal GMSST 
X1=as.numeric(scale(NSOIo)) ;  X2=as.numeric(scale(GMSSTo)) ; Y=as.numeric(scale(FRQo))
model=lm(Y~X1+X2)
df=27 #N-k-1 #N:30 #The degrees of freedom in a multiple regression equals N-k-1, where k is the number of variables.
FRQbin=seq(-7,25,0.1) ; Ybin=(FRQbin-mean(FRQo))/sd(FRQo)

X1.new=(NSOIf-mean(NSOIo))/sd(NSOIo) ; X2.new=seq(-3,3,0.1) ; Z=dnorm(X2.new) # length:61
marg=0
for(i in 1:length(X1.new)) #loop for NSOIf
{for(j in 1:length(X2.new)) #loop for normal density of GMSST  
 {new=data.frame(X1=X1.new[i],X2=X2.new[j]) #for all available (X1,X2)  
   pred=predict(model,new,interval='prediction',level=0.95)
   fit=pred[1] ; lwr=pred[2] ; upr=pred[3]
   SE=-(lwr-fit)/qt(0.975,df)  #SE=(upr-fit)/qt(0.975,df) #for 95% CI
   t=(Ybin-fit)/SE 
   prob=Z[j]*dt(t,df)
   marg=marg+prob
}} 
marg=marg/sum(marg) ; marg_nsoi=marg

#Dynamic ens GMSST and normal NSOI
X1=as.numeric(scale(NSOIo)) ;  X2=as.numeric(scale(GMSSTo)) ; Y=as.numeric(scale(FRQo))
model=lm(Y~X1+X2)
df=27 #N-k-1 #N:30 #The degrees of freedom in a multiple regression equals N-k-1, where k is the number of variables.
FRQbin=seq(-7,25,0.1) ; Ybin=(FRQbin-mean(FRQo))/sd(FRQo)

X1.new=seq(-3,3,0.1) ; X2.new=(GMSSTf-mean(GMSSTo))/sd(GMSSTo) ;Z=dnorm(X2.new) # length:61
marg=0
for(i in 1:length(X1.new)) #loop for normal density of NSOI
{for(j in 1:length(X2.new)) #loop for GMSSTf  
 {new=data.frame(X1=X1.new[i],X2=X2.new[j]) #for all available (X1,X2)  
   pred=predict(model,new,interval='prediction',level=0.95)
   fit=pred[1] ; lwr=pred[2] ; upr=pred[3]
   SE=-(lwr-fit)/qt(0.975,df)  #SE=(upr-fit)/qt(0.975,df) #for 95% CI
   t=(Ybin-fit)/SE 
   prob=Z[j]*dt(t,df)
   marg=marg+prob
}} 
marg=marg/sum(marg) ; marg_gmsst=marg

#----------plot
#NSOI and GMSST
 plot(-999,-999,xlim=c(-7,25),ylim=c(0,0.015),xlab='', ylab='',axes=F)
 polygon(cbind(FRQbin,marg_nsoi),col='#3366ff66',border=NA)  
 polygon(cbind(FRQbin,marg_gmsst),col='#ffcc0066',border=NA)  

#marginal probability distributions
#lines(FRQbin,marg_clim,col='grey25',lwd=4)
lines(FRQbin,marg_ens,col='red3',lwd=4)

#theoretical terciles
abline(v=FRQbin[c(164,190)],col='grey25',lwd=2)

axis(1,at=seq(-10,20,5), labels=seq(-10,20,5),cex.axis=1.2)
axis(2,at=seq(0,0.02,0.002), labels=c('0.000',seq(0.002,0.008,0.002),'0.010',seq(0.012,0.018,0.002),'0.02'),cex.axis=1.2,las=1)
mtext('FRQ',side=1,line=2.7,cex=1.3) 
mtext('Probability density',side=2,line=4,cex=1.3)
mtext('(b)',side=3,line=0.8,cex=1.7,adj=-0.2)
box()

####################################################
#Ref. Check the probability portions
#(find normal range where 0.33333~0.66666 is included)
####################################################
# FRQbin=seq(-7,25,0.1) (from Fig2 & Fig3) 

#check the observed mean 
#------------------------------------------------
   mean(FRQo)

## [1] 10.63333

#check "normal" range by the terciles (inclusive) 
#------------------------------------------------
  cdf=cbind(FRQbin,round(cumsum(marg_clim),3))
  cdf[c(164,190),]  #normal range: 9.3~11.9 (line num 164~190) based on terciles

##      FRQbin      
## [1,]    9.3 0.340
## [2,]   11.9 0.664

#Final predictive probability distribution
#------------------------------------------------
  bn= sum(marg_ens[1:163])       #below-normal: 78.1%
  n  = sum(marg_ens[164:190])   #normal: 17.3%
  an = sum(marg_ens[191:321])  #above-normal: 4.6%
  bn ;  n ; an

## [1] 0.7809288

## [1] 0.1728924

## [1] 0.04617879

  bn+n+an #check if the sum is 1.0

## [1] 1

#NSOI 
#------------------------------------------------
  bn= sum(marg_nsoi[1:163])       #below-normal: 27.2%
  n  = sum(marg_nsoi[164:190])   #normal: 35.2%
  an = sum(marg_nsoi[191:321])  #above-normal: 37.5%
  bn ;  n ; an

## [1] 0.2723683

## [1] 0.3524373

## [1] 0.3751944

  bn+n+an #check if the sum is 1.0

## [1] 1

#GMSST
#------------------------------------------------
  bn= sum(marg_gmsst[1:163])       #below-normal: 74.5%
  n  = sum(marg_gmsst[164:190])   #normal: 17.1%
  an = sum(marg_gmsst[191:321])  #above-normal: 8.3%
  bn ;  n ; an

## [1] 0.745332

## [1] 0.1714388

## [1] 0.08322916

  bn+n+an #check if the sum is 1.0

## [1] 1

Fig. 4. Portioning the predictive densities of the tropical storm frequency by the climatological terciles. a) Comparison of the terciles in the theoretical climatology and the histogram, and b) division of the predictive densities divided by the theoretical terciles. Terciles are 0.33 and 0.66 quantiles. For a), histogram bars in dark grey represents the empirical “normal”. In b), the predictive densities of \(\rm{FRQ}\) are compared with the ranges defined by the theoretical terciles. In this example case, 78.1 %, 17.3 %, and 4.6 % are assigned to “below-normal”, “normal”, and “above-normal”, respectively. Areas shaded in blue and yellow shows the predictive probability distributions where respective \(\rm{NSOI}\) and \(\rm{GMSST}\) ensemble predictors are exclusively applied, providing helpful information on their role in the result.