The accuracy of track forecasts for tropical cyclones (TCs) is well studied, but less attention has been paid to the representation of track-forecast uncertainty. Here Bayesian updating is employed on the radius of the 70 % probability circle using 72-h operational forecasts with comparisons made to the classical approach based on the empirical cumulative density (ECD). Despite an intuitive and efficient way of treating track errors, the ECD approach is statistically less informative than Bayesian updating. Built on a solid statistical foundation, Bayesian updating is shown to be a useful technique that can serve as a substitute for the classical approach in representing operational TC track-forecast uncertainty
rm(list=ls())
data=read.table('Data/ErrDist72.txt',T)
err=data$err
nlist=c(1,(1+as.numeric(cumsum(table(data$year))))) #Year of annually the first event
par(mar=c(5,6,2,2))
plot(-999,-999,xlim=c(0,rev(nlist)[1]),ylim=c(-50,1200),axes=FALSE, xlab='', ylab='')
lines(1:length(err),err,col=1,lwd=2)
points(err,pch=16,col = "black",cex = 1)
abline(h=mean(err)) ; mean(err)
## [1] 316.2465
model=lm(err ~ c(1:length(err)))
abline(model,lty=2)
s.model=summary(model) ; s.model
##
## Call:
## lm(formula = err ~ c(1:length(err)))
##
## Residuals:
## Min 1Q Median 3Q Max
## -296.10 -106.39 -16.63 69.29 610.11
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 475.791 28.964 16.427 < 2e-16 ***
## c(1:length(err)) -2.799 0.441 -6.347 4.94e-09 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 152.9 on 111 degrees of freedom
## Multiple R-squared: 0.2663, Adjusted R-squared: 0.2596
## F-statistic: 40.28 on 1 and 111 DF, p-value: 4.937e-09
Slope=round(s.model$coefficients[2,1],1) ; StdErr=round(s.model$coefficients[2,2],2)
lab1 =as.expression(bquote(Trend ~~~.(Slope) %+-% ~.(StdErr) ~ (s.e.) ~ km/cyclone))
#lab1 =as.expression('-2.8' %+-% ~ '0.44' ~ (s.e.) ~ km/cyclone)
text(30,-5,lab1,cex=1.3,pos=4)
axis(1,at=nlist,label=2008:2015,cex.axis=1.3)
axis(2,at=seq(0,1200,100),label=seq(0,1200,100),cex.axis=1.3,las=2)
axis(4,at=seq(0,1200,100),label=rep('',13))
mtext('Year of annually the first event',side=1,line=3, cex=1.4)
mtext('Error distance (km)',side=2,line=3.8, cex=1.4)
legend('topright',c('Error Distance', 'Average','Trend'),pch=c(16,NA,NA),bty='n'
,lty=c('solid','solid','dashed'),col=c(1,1,1),lwd=c(2,1,1),cex=1.2)
box()
Fig. 1 Time series of the track error distances for 72-h forecasts over seven years (2008–2014). Each value represents the average per cyclone. One-hundred and thirteen values are plotted sequentially. Solid horizontal line indicates the average over the whole period. Linear regression on the values shows a decreasing trend (dashed line).
require(png)
## Loading required package: png
## Warning: package 'png' was built under R version 3.2.3
require(grid)
## Loading required package: grid
img <- readPNG('Fig2.png')
grid.raster(img)
Fig. 2 Schematic flowchart of the Bayesian updating method. Given an observation x, the posterior is used as a new prior and the whole process is repeated. Predictive density is calculated for a future values x’ based on the posterior.
rm(list=ls())
library(MASS) #for fitdistr function
data=read.table('Data/ErrDist72.txt',T) ; err=data$err
nlist=c(1,(1+as.numeric(cumsum(table(data$year))))) #Year of annually the first event
p=0.7
# 1. Prob circle radius from up-to-date ECD
#------------------------------------------------------------------------------------
ECD=c()
for (i in 31:length(err))
{ECD=c(ECD,quantile(err[1:i],prob=p)) #quantile at p level
}
# 2. Prob cirle radius from gamma fit
#-------------------------------------------------------------------------------------
set.seed(1)
GammaFit=c()
for (i in 31:length(err))
{f.gamma=fitdistr(err[1:i],'gamma')$estimate
a=f.gamma[1] ; b=f.gamma[2]
GammaFit=c(GammaFit,qgamma(p,a,b))
}
# 3. Prob cirle radius through Bayesian updating
#------------------------------------------------------------------------------------
# set data
num=30
base=err[1:num] # errors used for prior dist
obs=err[(num+1):length(err)]
# step1. parametric bootstrapping(n=1000)
#--------------------------------------------------------
f.gamma=fitdistr(base,'gamma')$estimate
a=f.gamma[1] ; b=f.gamma[2]
set.seed(1)
M_samp=NULL ; V_samp=NULL #using method of moments
for (i in 1:1000)
{samp=rgamma(length(base), a, b)
M_samp=c(M_samp,mean(samp)) ; V_samp=c(V_samp,var(samp))
}
#acf(M_samp)$acf[2] ; acf(V_samp)$acf[2] # check dependency by autocorrelation
# step2. joint prob distribution at grids (g1, g2, g3, ....., g151*151)
#--------------------------------------------------------
AA=density(M_samp,n=151,from=330,to=630) # 151 grids, 2km interval
Mx=rep(AA$x,each=151) # array : M set per each V
My=rep(AA$y,each=151) # array : M set per each V
BB=density(V_samp,n=151,from=11000,to=162000) # 151 grids, 1000km^2 interval
Vx=rep(BB$x,151) # array : V set per each M
Vy=rep(BB$y,151) # array : V set per each M
prior= (My*Vy)/sum(My*Vy)
# joint density between Md*Vd (array : V set per each M)
# discrete prior density at 151*151 grids
# step3. compute posterior (of the params) and predictive (of the variable) distribution
#--------------------------------------------------------
Bayes=NULL # Probability circle radius
for (loop in 1:length(obs))
{ lik=dgamma(obs[loop], (Mx^2)/Vx, Mx/Vx) # likelihood=conditional densities at grids
# Moments are translated into the params
# shape=(M^2)/V , rate(=inverse scale)=V/M
joint=lik*prior # joint density at bins
post=joint/sum(joint) # posterior at bins # posterior = likelihood*prior
# predictive density
pred=NULL
for (errd in 1:1000) # error distance
{pred=c(pred,sum(post*dgamma(errd, (Mx^2)/Vx, Mx/Vx))) # marginal density distribution
}
pred=pred/sum(pred)
# Radius of probability circle
k=which.min(abs(cumsum(pred)-p))
Bayes=c(Bayes, k)
prior=post
} # end of Bayesian loop
# 4. Prob cirle radius through Bayesian updating2 (Non-informative prior)
#------------------------------------------------------------------------------
# set data
obs=err
# step1. parametric bootstrapping(n=1000)
#--------------------------------------------------------
NA
## [1] NA
# step2. joint prob distribution at grids (g1, g2, g3, ....., g151*151)
#--------------------------------------------------------
prior= rep(1,151*151)/(151*151)
# joint density between Md*Vd (array : V set per each M)
# discrete prior density at 151*151 grids
# Non-informative
# step3. compute posterior (of the params) and predictive (of the variable) distribution
#--------------------------------------------------------
Bayes2=NULL # Probability circle radius
for (loop in 1:length(obs))
{ lik=dgamma(obs[loop], (Mx^2)/Vx, Mx/Vx) # likelihood=conditional densities at grids
# Moments are translated into the params
# shape=(M^2)/V , rate(=inverse scale)=V/M
joint=lik*prior # joint density at bins
post=joint/sum(joint) # posterior at bins # posterior = likelihood*prior
# predictive density
pred=NULL
for (errd in 1:1000) # error distance
{pred=c(pred,sum(post*dgamma(errd, (Mx^2)/Vx, Mx/Vx))) # marginal density distribution
}
pred=pred/sum(pred)
# Radius of probability circle
k=which.min(abs(cumsum(pred)-p))
Bayes2=c(Bayes2, k)
prior=post
} # end of Bayesian loop
# plot
#----------------------------------------------------------------------------------
par(mar=c(5,6,2,3))
plot(-999,-999,xlim=c(1,rev(nlist)[1]),ylim=c(300,800),axes=FALSE, xlab='', ylab='')
abline(v=-50:200,col='gray',lty=2)
lines(31:length(err),ECD,col=1,lwd=3)
lines(31:length(err),GammaFit,col=3,lwd=3)
lines(1:length(err),Bayes2,col=4,lwd=3)
lines(31:length(err),Bayes,col=2,lwd=3)
axis(1,at=nlist,label=2008:2015,cex.axis=1.2)
axis(2,at=seq(200,700,100),label=seq(200,700,100),cex.axis=1.5,las=2)
axis(4,at=seq(200,700,100),label=rep('',6))
legend('topright',c('ECD','Gamma fit','Bayesian (informative prior)','Bayesian (non-informative prior)' ,'Sequence'),col=c(1,3,2,4,'gray')
,bg='white',lwd=c(3,3,3,3,1),lty=c(1,1,1,1,2),cex=1.3)
mtext('Year of annually the first event',side=1,line=3.2, cex=1.7)
mtext('Radius of 70 % probability circle (km)',side=2,line=4.4, cex=1.7)
box()
Fig. 3 Time series of 70th percentile probability circle radius. Black and green lines show the values from up-to-date ECD and theoretical gamma distribution, respectively. Red line represents the Bayesian result from an informative initial prior which is the joint probability distribution of the gamma parameters based on the first 30 error distances. For comparison, an alternative experiment starting from non-informative prior is shown in blue line.
rm(list=ls())
library(MASS) #for fitdistr function
data=read.table('Data/ErrDist72.txt',T)
err=data$err
par(mfrow=c(1,2),mar=c(5,7,2,3))
#Fig4a
#-----------------------------------------------------
plot(-999,-999, xlim = c(0,700),ylim=c(0,1),axes=FALSE,xlab='',ylab='',main='')
gamma.fit=fitdistr(err,'gamma')$estimate #Gamma fit
a=gamma.fit[1] #shape
b=gamma.fit[2] #rate(inverse scale)
dg=dgamma(0:1000,a,b)
lines(cumsum(dg),col=3,lwd=4)
Fn=ecdf(err)
points(err,Fn(err),pch=16,col=1,cex=.8)
axis(1,at=seq(0,700,100),label=seq(0,700,100),cex.axis=1.3)
axis(2,at=seq(0,1,0.2),label=seq(0,1,0.2),cex.axis=1.3,las=2)
mtext('Error distance (km)',side=1,line=3, cex=1.4)
mtext('Cumulative density',side=2,line=4, cex=1.4)
mtext('(a)',side=3,line=0.22,cex=2,adj=-0.23)
box()
ks.test(err, 'pgamma', a, b, exact = TRUE)
##
## One-sample Kolmogorov-Smirnov test
##
## data: err
## D = 0.10053, p-value = 0.1903
## alternative hypothesis: two-sided
#Fig4b
#-----------------------------------------------------
#parametric bootstrapping (for example; 113 TC cases)
set.seed(1)
nset=NULL; Dset=NULL ; M=NULL
for(n in seq(60,3000,140)) # modifying the number of samples
{# resampling 100 times
D=NULL ; for(j in 1:100) D=c(D,stat=ks.test(rgamma(n,a,b),'pgamma',a,b)$statistic)
nset=c(nset,rep(n,100))
Dset=c(Dset,D)
M=c(M,median(D))
}
boxplot(Dset~nset,axes=FALSE,xlab='',ylab='',main='')
lines(M,lwd=2,col=4)
ECDpos=1+(113-60)/140 # ECD position
abline(v=ECDpos,col=2)
points(ECDpos,ks.test(err,'pgamma',a,b)$statistic,pch=16,cex=1.5,col=2)
axis(1,at=1:22,label=seq(60,3000,140),cex.axis=1.3)
axis(2,at=seq(0,0.3,0.05),label=seq(0,0.3,0.05),cex.axis=1.3,las=2)
mtext('Sample size',side=1,line=3, cex=1.4)
mtext('Kolmogorov-Smirnov test statistic (D)',side=2,line=4.2, cex=1.4)
mtext('(b)',side=3,line=0.22,cex=2,adj=-0.25)
box()
Fig. 4 Comparison of cumulative density distributions. (a) Cumulative density distributions of 113 error distances (black dot) and their fitted gamma distribution (green line), and (b) boxplot of Kolmogorov-Smirnov test statistic by comparing the gamma distribution and its bootstrapped samples. For (b), test statistic (D) is calculated 100 times for each different sample size. Each boxplot shows the median (black thick solid line), interquartile range (box range) and outliers (circle). Median values of the D samples are connected by the blue curve. Red dot represents the D calculated from the observed errors and the gamma distribution at 113 error samples (red line).