Standardized Precipitation Index (SPI) calculation and its goodness of fit testing with Gamma distribution

Features:

calculate SPI value based on spi function in SPEI package,
Gamma fitting function with Kolmogorov - Smirnov test
comparison between empirical and theoretical cumulative graph for each month

Package requirements are SPEI, ggplot2, and plyr. They can be downloaded by using install.packages command or Tools/Install Packages…

require(SPEI)

## Loading required package: SPEI

## Loading required package: lmomco

## Loading required package: parallel

## # Package SPEI (1.6) loaded [try SPEINews()].

#wichita is an available data set inside the SPEI package
data(wichita) 
#calculate and display of average, maximum, minimum for above dataset
summary(wichita)

##       YEAR          MONTH             PRCP             TMAX      
##  Min.   :1980   Min.   : 1.000   Min.   :  0.00   Min.   :-4.45  
##  1st Qu.:1987   1st Qu.: 3.250   1st Qu.: 24.00   1st Qu.:11.09  
##  Median :1995   Median : 6.000   Median : 51.10   Median :20.70  
##  Mean   :1995   Mean   : 6.474   Mean   : 67.74   Mean   :20.10  
##  3rd Qu.:2003   3rd Qu.: 9.000   3rd Qu.: 97.03   3rd Qu.:29.12  
##  Max.   :2011   Max.   :12.000   Max.   :333.80   Max.   :40.48  
##                                                                  
##       TMIN               TMED            AWND             TSUN       
##  Min.   :-12.9600   Min.   :-8.71   Min.   : 3.490   Min.   : 3.210  
##  1st Qu.: -0.9325   1st Qu.: 5.09   1st Qu.: 4.650   1st Qu.: 6.475  
##  Median :  7.7950   Median :14.52   Median : 5.170   Median : 8.340  
##  Mean   :  7.8089   Mean   :13.96   Mean   : 5.218   Mean   : 8.404  
##  3rd Qu.: 16.7600   3rd Qu.:23.16   3rd Qu.: 5.660   3rd Qu.:10.200  
##  Max.   : 24.5400   Max.   :32.46   Max.   :18.220   Max.   :21.140  
##                                     NA's   :51       NA's   :36      
##       ACSH      
##  Min.   :23.23  
##  1st Qu.:45.70  
##  Median :55.84  
##  Mean   :54.54  
##  3rd Qu.:62.59  
##  Max.   :80.97  
##  NA's   :228

For SPI calculation purpose, we only need precipitation data whose name is PRCP

#calculate 3-month SPI with Gamma fitting function
SPI3 = spi(wichita$PRCP, scale = 3, distribution = 'Gamma')
plot(SPI3)

#calculate 6-month SPI with Gamma fitting function
SPI6 = spi(wichita$PRCP, scale = 6, distribution = 'Gamma')
plot(SPI6)

#Define 3-month timeseries with properties ts()
plot(spi(ts(wichita$PRCP,freq=12,start=c(1980,1)),scale = 3))

Below is a goodness of fit test for 3-month SPI using Kolmogorov - Smirnov test

# extract  paramaters. There are 12 paramater groups corresponding to 12 months 
PARA = data.frame(SPI3$coefficients)
PARA

##       Series.1.1 Series.1.2 Series.1.3 Series.1.4 Series.1.5 Series.1.6
## alpha   3.344195   2.977991   3.515247   5.412788   8.121795    6.61030
## beta   26.187893  27.610086  33.961871  29.845407  30.110724   46.16895
##       Series.1.7 Series.1.8 Series.1.9 Series.1.10 Series.1.11 Series.1.12
## alpha    5.78867   7.484525   4.843661    4.843779    3.059231    3.153384
## beta    55.62267  40.523197  51.763530   49.315190   59.678715   43.191693

In this example, we select Gamma distribution, so there are two parameters in data frame PARA In order to use Kolmogorov - Smirnov test, empirical cumulative function and theory cumulative function should be calculated

# Accumulated Rainfall for 3-month SPI
PRCP3 = rowSums(embed(wichita$PRCP,3),na.rm=FALSE)
# Adding year and month to data fram PRCP3
PRCP3 = data.frame(wichita$YEAR[3:nrow(wichita)],wichita$MONTH[3:nrow(wichita)], PRCP3)
# Change the colunm names
colnames(PRCP3) = c("year", "month", "accumulatedPre")
# Calculate p value for each month
KSTest = function(X, Para, MON){
  Ecdf = ecdf(X)
  Fempirical = Ecdf(X)
  ## Extract Gamma parameter
  a = Para[1]
  b = Para[2]
  Ftheory = mat.or.vec(length(X),1)
  for (i in 1:length(X)){
    x = X[i]
    xf <- function(t,A,B) { t^(A-1)*exp(-t/B) }
    # compute the integral
    Q  <- integrate(xf,0,x,A=a,B=b)
    Ftheory[i] <- Q$val*b^(-a)/gamma(a)
  }
  # Kolmogorov - Sminov Test, jitter function used to avoid 
  # a possible warning: "In ks.test(Ftheory, Fempirical) : cannot compute exact p-value with ties
  KSTest = ks.test(jitter(Ftheory), jitter(Fempirical))
  pval = KSTest$p.value
  print(paste("---p-value for ", MON," is: ", pval,sep = ""))
  # create matrix for furthur plot 
  Fall = data.frame(X,Fempirical,Ftheory)
  return(Fall)
}
#Determine total length of timeseries X for all months
t = 0
for (iM in 1:12){
    X = subset(PRCP3, PRCP3$month == iM)[,3]
    t = t+length(X)
}
FMain = mat.or.vec(t,3)
MONLabel = mat.or.vec(t,1)
monthnames = c("Jan", "Feb", "Mar", "Apr", "May", "Jun",
               "Jul","Aug","Sept","Oct", "Nov", "Dec")
t1 = 0; t2 = 0; #set index to save acummulated timeseries X, empirical cumulative function, theoretical cumulative function
for (iM in 1:12){
  X = subset(PRCP3, PRCP3$month == iM)[,3]
  t1 = t2 +1  # set lower bound index
  t2 = t2 + length(X) #set upper bound index
  Para = PARA[,iM] #extract parameter according to iM month
  MON = monthnames[iM] #extract the month's name 
  FMain[t1:t2,]= as.matrix(KSTest(X, Para, MON))
  MONLabel[t1:t2] = rep(MON,length(X))
  if (iM == 12){
    print("Finish Kolmogorov Smirnov test")
    print("Null hypothesis: both distributions follow the same distribution")
    print("A p-value is less than 0.05 means a rejection of this null hypothesis")
  }
}

## [1] "---p-value for Jan is: 0.999115261755522"
## [1] "---p-value for Feb is: 0.999115261755522"
## [1] "---p-value for Mar is: 0.999325913718493"
## [1] "---p-value for Apr is: 0.968289837864919"
## [1] "---p-value for May is: 0.999325913718493"
## [1] "---p-value for Jun is: 0.999999997656384"
## [1] "---p-value for Jul is: 0.999325913718493"
## [1] "---p-value for Aug is: 0.837777479258822"
## [1] "---p-value for Sept is: 0.999325913718493"
## [1] "---p-value for Oct is: 0.999325913718493"
## [1] "---p-value for Nov is: 0.999115261755522"
## [1] "---p-value for Dec is: 0.963429714888691"
## [1] "Finish Kolmogorov Smirnov test"
## [1] "Null hypothesis: both distributions follow the same distribution"
## [1] "A p-value is less than 0.05 means a rejection of this null hypothesis"

FMain = data.frame(FMain, MONLabel)
# change file names for FMain
colnames(FMain)  =c("X","Fempirical", "Ftheory","MONLabel")
# change order to display in ggplot2
order = monthnames
library("plyr")
dat <- arrange(transform(FMain,MONLabel=factor(MONLabel,levels=order)),MONLabel)
# plot empirical and theoretical  cumulative for all months in a same figure
    textSize = 8 # set text size for axis title and axis label
    require(ggplot2)

## Loading required package: ggplot2

## Warning: package 'ggplot2' was built under R version 3.2.5

    ggplot()+
    geom_line(data = dat,aes(x = X, y = Ftheory),size = 0.6, colour = "blue")+
    geom_point(data = dat, aes(x = X, y = Fempirical),size =0.6,shape = 1, colour = "red")+
    facet_wrap(~MONLabel, scales = "free")+
    ylab("G(x)")+
    xlab("Time Series X")+
    theme_bw()+
    theme(  axis.text.x = element_text(size=textSize),
            axis.title.x = element_text(face = "bold",size = textSize),
            axis.text.y = element_text(size=textSize),
            axis.title.y = element_text(face = "bold",size = textSize),
            legend.position = "none",
            panel.grid.major = element_blank(),
            panel.grid.minor = element_blank(),
            strip.background = element_blank(),
            strip.text = element_text(size = textSize, face = "bold"))

Note: blue line is theoretical cumulative curve while red point is empirical cumulative value.

Standardized Precipitation Index (SPI) calculation and its goodness of fit testing with Gamma distribution

Manh Hung Le

August 17, 2016