Dataset #2

Snow Cover Extent in the Northern Hemisphere (1976-2016)

These data are from Tamino’s Climate Data Service (https://tamino.wordpress.com/climate-data-links/).

snow <- readRDS("snow.NHem.rds")

Summary of Data

summary(snow)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   2.241   6.504  26.820  25.000  42.270  51.320
tsp(snow) # start time, end time, and frequency of the data
## [1] 1976.000 2016.917   12.000

Trend in time

Fit a linear model to see if there is a trend in the data as a function of time.

snowtime <- time(snow)
snowlm <- lm(snow ~ snowtime)
summary(snowlm)
## 
## Call:
## lm(formula = snow ~ snowtime)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -22.929 -18.436   1.441  17.023  25.753 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)
## (Intercept)  86.49762  126.69933   0.683    0.495
## snowtime     -0.03080    0.06346  -0.485    0.628
## 
## Residual standard error: 16.66 on 490 degrees of freedom
## Multiple R-squared:  0.0004806,  Adjusted R-squared:  -0.001559 
## F-statistic: 0.2356 on 1 and 490 DF,  p-value: 0.6276

Plot with trendline

plot(snow, ylab= "Snow Cover (million Km ^ 2)")  # calling plot.ts(seaice) will be the same

#Add trendline from the linear model
abline(snowlm, col="black", lwd=2)

Seasonality and Decomposition of Snow Cover Data

Calculate a moving average to remove the seasonal component of the data. This enables us to see the parts of the trend that aren’t purely linear like we get from fitting a linear model to the data.

n <- length(snow) # length of the record
m.hat <- rep(NA,n) # an empty vector to store results
for(i in 7:(n-6)){
  m.hat[i] <- (0.5*snow[i-6] + snow[i-5] + snow[i-4] + snow[i-3] + snow[i-2] + 
    snow[i-1] + snow[i] + snow[i+1] + snow[i+2] + snow[i+3] + snow[i+4] + snow[i+5] + 
    0.5*snow[i+6])/12   
}

m.hat <- ts(m.hat) # make it a time series
# give it the right start time and frequency using tsp()
tsp(m.hat) <- tsp(snow) 
plot(m.hat,ylab=expression('Snow Cover Extent (million Km ^ 2)')) # here is the trend.

Now use the filter function to calculate a moving average. This is a simpler method than writing a loop function.

Step 1. Calculate the estimate of the trend (m.hat).

f <- frequency(snow)
filt <- c(0.5, rep(1, times=f-1), 0.5)/f
m.hat <- filter(x=snow, filter=filt)

Step 2. Calculate an estimate of the seasonal trend (s.hat) and plot.

s.hat <- snow - m.hat
par(mfcol=c(2,1))
plot(s.hat, ylab="Snow Cover Extent (million Km ^ 2)")
boxplot(s.hat ~ cycle(s.hat), xlab="Month", ylab="Snow Cover Extent (million Km ^ 2)")

By subtraction we got an estimate of the seasonal component and by making boxplots by month we see the Snow Cover extent shrink during the Northern Hemisphere summer and increase during the winter.

Step 3. Calculate the residual (we assume the seasonal cycle is the same every year) by calculating the mean of the seasonal trend (s.hat) by month and treat deviations from the mean as the noise term “z.hat”.

s.hat <- aggregate(s.hat ~ cycle(s.hat), FUN=mean) 
s.hat <- rep(s.hat$s.hat, length(snow)/12)  # repeat this component for each year
s.hat <- ts(s.hat) # make it a ts object
tsp(s.hat) <- tsp(snow)

# Now subtract to get noise
z.hat <- snow - m.hat - s.hat

Step 4. Combine these parts into a single object

snow.hat <- cbind(snow, m.hat, s.hat, z.hat)
plot(snow.hat) # units are still million Km^2 of Snow extent

Decomposition Function

snow.d <- decompose(snow)
plot(snow.d)

Histogram plot of Snow Cover Data

library(moments)
kt <- kurtosis(snow)
sk <- skewness(snow)
my.xlim <- range(snow)

h <- hist(snow, breaks=20, col="lightblue", xlab="Snow Cover Extent (million Km^2)",
          main="", xlim=my.xlim)

xfit <- seq(min(snow), max(snow), length=50)
yfit <- dnorm(xfit, mean=mean(snow), sd=sd(snow))
yfit <- yfit*diff(h$mids[1:2])*length(snow)

lines(xfit, yfit, col="darkblue", lwd=2)

boxplot(snow, horizontal=T, outline=T, axes=F, 
        ylim=my.xlim, col= "lightgreen", add=TRUE, boxwex=3)

text(x= 40, y= 60, labels = paste("Kurtosis=",round(kt,2)),pos = 4)
text(x= 40, y= 55, labels = paste("Skewness=",round(sk,2)),pos = 4)

Stem and leaf plot

stem(snow)
## 
##   The decimal point is at the |
## 
##    2 | 233333444455555566666666667777899990000000113333344444555566667899
##    4 | 0001222333333455689999901122222344444446777889999
##    6 | 0000134556789912249
##    8 | 144577011225578
##   10 | 133469038
##   12 | 22834469
##   14 | 270246
##   16 | 12336788880002334444555568
##   18 | 01222555667889999011133457
##   20 | 0111345670004
##   22 | 15558902
##   24 | 7
##   26 | 9
##   28 | 1333466680344556789
##   30 | 011112245699003466689
##   32 | 01566789124666679
##   34 | 01112445668801266
##   36 | 256991246999
##   38 | 24556670445677899
##   40 | 14577011133477999
##   42 | 03335677890003355556667777889
##   44 | 0012233444456789000012233334445556677779999
##   46 | 001122234555568888999003445666789
##   48 | 00112355588001247
##   50 | 233

Q-Q Plot of Snow Cover Data

qqnorm(snow)
qqline(snow)

Very simple filters practice

Plot with a simple filter to add a moving average.

# Moving average of 12 months or 1 year (ma01)
ma01 <- filter(x=snow, filter=rep(x=1/12, times=12), sides=2)

# Moving average of 6 months (ma6)
ma6 <- filter(x=snow, filter=rep(x=1/6, times=6), sides=2)

plot(snow, col="grey", ylab= "Snow Cover Extent (million Km^2)")
lines(ma6, col="pink", lwd=2)
lines(ma01, col="blue", lwd=2)


# Add trend line from the linear model
abline(snowlm, col="black", lwd=2, lty="dashed")

head(cbind(snow, ma6, ma01), n=10)
##          snow      ma6     ma01
##  [1,] 45.6755       NA       NA
##  [2,] 45.2344       NA       NA
##  [3,] 41.4450 33.13027       NA
##  [4,] 30.0579 26.46927       NA
##  [5,] 22.4751 19.48807       NA
##  [6,] 13.8937 13.44390 26.30391
##  [7,]  5.7095 12.72167 26.58074
##  [8,]  3.3472 14.74852 26.51049
##  [9,]  5.1800 19.47755 26.39463
## [10,] 25.7245 26.69222 26.46812