Features:
calculate SPI value based on spi function in SPEI package,
Gamma fitting function with Kolmogorov - Smirnov test
comparison between empirical and theoretical cumulative graph for each month
Package requirements are SPEI, ggplot2, and plyr. They can be downloaded by using install.packages command or Tools/Install Packages…
require(SPEI)
## Loading required package: SPEI
## Loading required package: lmomco
## Loading required package: parallel
## # Package SPEI (1.6) loaded [try SPEINews()].
#wichita is an available data set inside the SPEI package
data(wichita)
#calculate and display of average, maximum, minimum for above dataset
summary(wichita)
## YEAR MONTH PRCP TMAX
## Min. :1980 Min. : 1.000 Min. : 0.00 Min. :-4.45
## 1st Qu.:1987 1st Qu.: 3.250 1st Qu.: 24.00 1st Qu.:11.09
## Median :1995 Median : 6.000 Median : 51.10 Median :20.70
## Mean :1995 Mean : 6.474 Mean : 67.74 Mean :20.10
## 3rd Qu.:2003 3rd Qu.: 9.000 3rd Qu.: 97.03 3rd Qu.:29.12
## Max. :2011 Max. :12.000 Max. :333.80 Max. :40.48
##
## TMIN TMED AWND TSUN
## Min. :-12.9600 Min. :-8.71 Min. : 3.490 Min. : 3.210
## 1st Qu.: -0.9325 1st Qu.: 5.09 1st Qu.: 4.650 1st Qu.: 6.475
## Median : 7.7950 Median :14.52 Median : 5.170 Median : 8.340
## Mean : 7.8089 Mean :13.96 Mean : 5.218 Mean : 8.404
## 3rd Qu.: 16.7600 3rd Qu.:23.16 3rd Qu.: 5.660 3rd Qu.:10.200
## Max. : 24.5400 Max. :32.46 Max. :18.220 Max. :21.140
## NA's :51 NA's :36
## ACSH
## Min. :23.23
## 1st Qu.:45.70
## Median :55.84
## Mean :54.54
## 3rd Qu.:62.59
## Max. :80.97
## NA's :228
For SPI calculation purpose, we only need precipitation data whose name is PRCP
#calculate 3-month SPI with Gamma fitting function
SPI3 = spi(wichita$PRCP, scale = 3, distribution = 'Gamma')
plot(SPI3)
#calculate 6-month SPI with Gamma fitting function
SPI6 = spi(wichita$PRCP, scale = 6, distribution = 'Gamma')
plot(SPI6)
#Define 3-month timeseries with properties ts()
plot(spi(ts(wichita$PRCP,freq=12,start=c(1980,1)),scale = 3))
Below is a goodness of fit test for 3-month SPI using Kolmogorov - Smirnov test
# extract paramaters. There are 12 paramater groups corresponding to 12 months
PARA = data.frame(SPI3$coefficients)
PARA
## Series.1.1 Series.1.2 Series.1.3 Series.1.4 Series.1.5 Series.1.6
## alpha 3.344195 2.977991 3.515247 5.412788 8.121795 6.61030
## beta 26.187893 27.610086 33.961871 29.845407 30.110724 46.16895
## Series.1.7 Series.1.8 Series.1.9 Series.1.10 Series.1.11 Series.1.12
## alpha 5.78867 7.484525 4.843661 4.843779 3.059231 3.153384
## beta 55.62267 40.523197 51.763530 49.315190 59.678715 43.191693
In this example, we select Gamma distribution, so there are two parameters in data frame PARA In order to use Kolmogorov - Smirnov test, empirical cumulative function and theory cumulative function should be calculated
# Accumulated Rainfall for 3-month SPI
PRCP3 = rowSums(embed(wichita$PRCP,3),na.rm=FALSE)
# Adding year and month to data fram PRCP3
PRCP3 = data.frame(wichita$YEAR[3:nrow(wichita)],wichita$MONTH[3:nrow(wichita)], PRCP3)
# Change the colunm names
colnames(PRCP3) = c("year", "month", "accumulatedPre")
# Calculate p value for each month
KSTest = function(X, Para, MON){
Ecdf = ecdf(X)
Fempirical = Ecdf(X)
## Extract Gamma parameter
a = Para[1]
b = Para[2]
Ftheory = mat.or.vec(length(X),1)
for (i in 1:length(X)){
x = X[i]
xf <- function(t,A,B) { t^(A-1)*exp(-t/B) }
# compute the integral
Q <- integrate(xf,0,x,A=a,B=b)
Ftheory[i] <- Q$val*b^(-a)/gamma(a)
}
# Kolmogorov - Sminov Test, jitter function used to avoid
# a possible warning: "In ks.test(Ftheory, Fempirical) : cannot compute exact p-value with ties
KSTest = ks.test(jitter(Ftheory), jitter(Fempirical))
pval = KSTest$p.value
print(paste("---p-value for ", MON," is: ", pval,sep = ""))
# create matrix for furthur plot
Fall = data.frame(X,Fempirical,Ftheory)
return(Fall)
}
#Determine total length of timeseries X for all months
t = 0
for (iM in 1:12){
X = subset(PRCP3, PRCP3$month == iM)[,3]
t = t+length(X)
}
FMain = mat.or.vec(t,3)
MONLabel = mat.or.vec(t,1)
monthnames = c("Jan", "Feb", "Mar", "Apr", "May", "Jun",
"Jul","Aug","Sept","Oct", "Nov", "Dec")
t1 = 0; t2 = 0; #set index to save acummulated timeseries X, empirical cumulative function, theoretical cumulative function
for (iM in 1:12){
X = subset(PRCP3, PRCP3$month == iM)[,3]
t1 = t2 +1 # set lower bound index
t2 = t2 + length(X) #set upper bound index
Para = PARA[,iM] #extract parameter according to iM month
MON = monthnames[iM] #extract the month's name
FMain[t1:t2,]= as.matrix(KSTest(X, Para, MON))
MONLabel[t1:t2] = rep(MON,length(X))
if (iM == 12){
print("Finish Kolmogorov Smirnov test")
print("Null hypothesis: both distributions follow the same distribution")
print("A p-value is less than 0.05 means a rejection of this null hypothesis")
}
}
## [1] "---p-value for Jan is: 0.999115261755522"
## [1] "---p-value for Feb is: 0.999115261755522"
## [1] "---p-value for Mar is: 0.999325913718493"
## [1] "---p-value for Apr is: 0.968289837864919"
## [1] "---p-value for May is: 0.999325913718493"
## [1] "---p-value for Jun is: 0.999999997656384"
## [1] "---p-value for Jul is: 0.999325913718493"
## [1] "---p-value for Aug is: 0.837777479258822"
## [1] "---p-value for Sept is: 0.999325913718493"
## [1] "---p-value for Oct is: 0.999325913718493"
## [1] "---p-value for Nov is: 0.999115261755522"
## [1] "---p-value for Dec is: 0.963429714888691"
## [1] "Finish Kolmogorov Smirnov test"
## [1] "Null hypothesis: both distributions follow the same distribution"
## [1] "A p-value is less than 0.05 means a rejection of this null hypothesis"
FMain = data.frame(FMain, MONLabel)
# change file names for FMain
colnames(FMain) =c("X","Fempirical", "Ftheory","MONLabel")
# change order to display in ggplot2
order = monthnames
library("plyr")
dat <- arrange(transform(FMain,MONLabel=factor(MONLabel,levels=order)),MONLabel)
# plot empirical and theoretical cumulative for all months in a same figure
textSize = 8 # set text size for axis title and axis label
require(ggplot2)
## Loading required package: ggplot2
## Warning: package 'ggplot2' was built under R version 3.2.5
ggplot()+
geom_line(data = dat,aes(x = X, y = Ftheory),size = 0.6, colour = "blue")+
geom_point(data = dat, aes(x = X, y = Fempirical),size =0.6,shape = 1, colour = "red")+
facet_wrap(~MONLabel, scales = "free")+
ylab("G(x)")+
xlab("Time Series X")+
theme_bw()+
theme( axis.text.x = element_text(size=textSize),
axis.title.x = element_text(face = "bold",size = textSize),
axis.text.y = element_text(size=textSize),
axis.title.y = element_text(face = "bold",size = textSize),
legend.position = "none",
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
strip.background = element_blank(),
strip.text = element_text(size = textSize, face = "bold"))
Note: blue line is theoretical cumulative curve while red point is empirical cumulative value.