Features:
Calculate SPEI value based on spei function in SPEI package
log-Logistic function with Kolmogorov - Smirnov test
Comparison between empirical and theoretical cumulative graph for each month
Package requirements are SPEI, ggplot2, and plyr. They can be downloaded by using install.packages command or Tools/Install Packages…
require(SPEI)
## Loading required package: SPEI
## Loading required package: lmomco
## Loading required package: parallel
## # Package SPEI (1.6) loaded [try SPEINews()].
#wichita is an available data set inside the SPEI package
data(wichita)
#calculate and display of average, maximum, minimum for above dataset
summary(wichita)
## YEAR MONTH PRCP TMAX
## Min. :1980 Min. : 1.000 Min. : 0.00 Min. :-4.45
## 1st Qu.:1987 1st Qu.: 3.250 1st Qu.: 24.00 1st Qu.:11.09
## Median :1995 Median : 6.000 Median : 51.10 Median :20.70
## Mean :1995 Mean : 6.474 Mean : 67.74 Mean :20.10
## 3rd Qu.:2003 3rd Qu.: 9.000 3rd Qu.: 97.03 3rd Qu.:29.12
## Max. :2011 Max. :12.000 Max. :333.80 Max. :40.48
##
## TMIN TMED AWND TSUN
## Min. :-12.9600 Min. :-8.71 Min. : 3.490 Min. : 3.210
## 1st Qu.: -0.9325 1st Qu.: 5.09 1st Qu.: 4.650 1st Qu.: 6.475
## Median : 7.7950 Median :14.52 Median : 5.170 Median : 8.340
## Mean : 7.8089 Mean :13.96 Mean : 5.218 Mean : 8.404
## 3rd Qu.: 16.7600 3rd Qu.:23.16 3rd Qu.: 5.660 3rd Qu.:10.200
## Max. : 24.5400 Max. :32.46 Max. :18.220 Max. :21.140
## NA's :51 NA's :36
## ACSH
## Min. :23.23
## 1st Qu.:45.70
## Median :55.84
## Mean :54.54
## 3rd Qu.:62.59
## Max. :80.97
## NA's :228
For SPEI calculation purpose, We need precipitation and potential evapotranspiration data. In this case, we consider the scarce data region which only need temperature to calculate potential evapotranspiration. The rainfall column name is PRCP, while the average temperature column name is TMED
#calculate potential evapotranspiration based on thornthwaite method
PET = thornthwaite(wichita$TMED,37.6475) #37.6475 is latitude in degree of wichita.
#calculate 3-month SPEI with log-Logistic fitting function
SPEI3 = spi(wichita$PRCP - PET, scale = 3, distribution = 'log-Logistic')#log-Logistic is also default option
plot(SPEI3)
#calculate 6-month SPI with Gamma fitting function
SPEI6 = spi(wichita$PRCP - PET, scale = 6, distribution = 'log-Logistic')
plot(SPEI6)
#Define 3-month timeseries with properties ts()
plot(spei(ts(wichita$PRCP - PET,freq=12,start=c(1980,1)),scale = 3))
Below is a goodness of fit test for 3-month SPEI using Kolmogorov - Smirnov test
# extract paramaters. There are 12 paramater groups corresponding to 12 months
PARA = as.data.frame(SPEI3$coefficients)
PARA
## PET_tho.1 PET_tho.2 PET_tho.3 PET_tho.4 PET_tho.5
## xi 65.46296802 67.3011857 87.41153410 78.65311392 68.94447982
## alpha 26.29263542 25.1680144 35.77723904 41.95280626 52.36717372
## kappa -0.06523326 -0.1643513 -0.08184319 -0.09105031 -0.09324284
## PET_tho.6 PET_tho.7 PET_tho.8 PET_tho.9 PET_tho.10
## xi 5.16380502 -91.5198310 -173.02727430 -188.63554487 -81.45094194
## alpha 74.28368582 86.1344666 76.55909004 76.29564234 67.41733745
## kappa -0.08326852 0.0124413 0.02234176 -0.02318632 -0.07473596
## PET_tho.11 PET_tho.12
## xi -0.2422885 62.39152267
## alpha 56.4896879 42.71139672
## kappa -0.1553510 -0.07168786
The log-Logistic has three parameters in data frame PARA. In order to use Kolmogorov - Smirnov test, empirical cumulative function and theory cumulative function should be calculated
# Different between Rainfall and Potential Evapotranspiration
D = wichita$PRCP - PET
# Accumulated Rainfall and Accumulated Potential Evapotranspiration for 3-month SPEI
D3 = rowSums(embed(D,3),na.rm=FALSE)
# Adding year and month to data frame D3
D3 = data.frame(wichita$YEAR[3:nrow(wichita)],wichita$MONTH[3:nrow(wichita)], D3)
# Change the colunm names
colnames(D3) = c("year", "month", "accumulatedPreminusETp")
# Calculate p value for each month
KSTest = function(X, Para, MON){
Ecdf = ecdf(X)
Fempirical = Ecdf(X)
#pglo is an available program inside SPEI package
Ftheory = pglo(X,list(type="glo", para=Para, source="user"))
# Kolmogorov - Sminov Test, jitter function used to avoid
# a possible warning: "In ks.test(Ftheory, Fempirical) : cannot compute exact p-value with ties
KSTest = ks.test(jitter(Ftheory), jitter(Fempirical))
pval = KSTest$p.value
print(paste("---p-value for ", MON," is: ", pval,sep = ""))
# create matrix for furthur plot
Fall = data.frame(X,Fempirical,Ftheory)
return(Fall)
}
#Determine total length of timeseries X for all months
t = 0
for (iM in 1:12){
X = subset(D3, D3$month == iM)[,3]
t = t+length(X)
}
FMain = mat.or.vec(t,3)
MONLabel = mat.or.vec(t,1)
monthnames = c("Jan", "Feb", "Mar", "Apr", "May", "Jun",
"Jul","Aug","Sept","Oct", "Nov", "Dec")
t1 = 0; t2 = 0; #set index to save acummulated timeseries X, empirical cumulative function, theoretical cumulative function
for (iM in 1:12){
X = subset(D3, D3$month == iM)[,3]
t1 = t2 +1 # set lower bound index
t2 = t2 + length(X) #set upper bound index
Para = PARA[,iM] #extract parameter according to iM month
MON = monthnames[iM] #extract the month's name
FMain[t1:t2,]= as.matrix(KSTest(X, Para, MON))
MONLabel[t1:t2] = rep(MON,length(X))
if (iM == 12){
print("Finish Kolmogorov Smirnov test")
print("Null hypothesis: both distributions follow the same distribution")
print("A p-value is less than 0.05 means a rejection of this null hypothesis")
}
}
## [1] "---p-value for Jan is: 0.963429714888691"
## [1] "---p-value for Feb is: 0.999115261755522"
## [1] "---p-value for Mar is: 0.968289837864919"
## [1] "---p-value for Apr is: 0.968289837864919"
## [1] "---p-value for May is: 0.999325913718493"
## [1] "---p-value for Jun is: 0.999325913718493"
## [1] "---p-value for Jul is: 0.999325913718493"
## [1] "---p-value for Aug is: 0.999325913718493"
## [1] "---p-value for Sept is: 0.999325913718493"
## [1] "---p-value for Oct is: 0.968289837864919"
## [1] "---p-value for Nov is: 0.999115261755522"
## [1] "---p-value for Dec is: 0.999115261755522"
## [1] "Finish Kolmogorov Smirnov test"
## [1] "Null hypothesis: both distributions follow the same distribution"
## [1] "A p-value is less than 0.05 means a rejection of this null hypothesis"
FMain = data.frame(FMain, MONLabel)
# change file names for FMain
colnames(FMain) =c("X","Fempirical", "Ftheory","MONLabel")
# change order to display in ggplot2
order = monthnames
library("plyr")
dat <- arrange(transform(FMain,MONLabel=factor(MONLabel,levels=order)),MONLabel)
# plot empirical and theoretical cumulative for all months in a same figure
textSize = 8 # set text size for axis title and axis label
require(ggplot2)
## Loading required package: ggplot2
## Warning: package 'ggplot2' was built under R version 3.2.5
ggplot()+
geom_line(data = dat,aes(x = X, y = Ftheory),size = 0.6, colour = "blue")+
geom_point(data = dat, aes(x = X, y = Fempirical),size =0.6,shape = 1, colour = "red")+
facet_wrap(~MONLabel, scales = "free")+
ylab("F(x)")+
xlab("Time Series X")+
theme_bw()+
theme( axis.text.x = element_text(size=textSize),
axis.title.x = element_text(face = "bold",size = textSize),
axis.text.y = element_text(size=textSize),
axis.title.y = element_text(face = "bold",size = textSize),
legend.position = "none",
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
strip.background = element_blank(),
strip.text = element_text(size = textSize, face = "bold"))
Note: blue line is theoretical cumulative curve while red point is empirical cumulative value.