Dataset #1

Sea Ice Extent in the Northern Hemisphere (1979-2016)

These data are from Tamino’s Climate Data Service (https://tamino.wordpress.com/climate-data-links/). There are two missing “NA” values in this dataset that were approximated.

seaice <- readRDS("ice.nx.rds")

Summary of data

summary(seaice)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   3.630   9.218  12.380  11.750  14.540  16.520
tsp(seaice) # start time, end time, and frequency of the data
## [1] 1979.000 2016.917   12.000

Trend in time

Fit a linear model to see if there is a trend in the data as a function of time.

icetime <- time(seaice)
icelm <- lm(seaice ~ icetime)
summary(icelm)
## 
## Call:
## lm(formula = seaice ~ icetime)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -7.2569 -2.5551  0.6599  2.8627  4.3937 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 129.20647   26.54119   4.868 1.56e-06 ***
## icetime      -0.05879    0.01328  -4.425 1.21e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3.112 on 454 degrees of freedom
## Multiple R-squared:  0.04135,    Adjusted R-squared:  0.03924 
## F-statistic: 19.58 on 1 and 454 DF,  p-value: 1.207e-05

Plot with Trendline

plot(seaice, ylab="Sea Ice Extent (million Km ^ 2)")  # calling plot.ts(seaice) will be the same
# Add trend line from the linear model
abline(icelm, col="black", lwd=2)

Seasonality and Decompostion of Sea Ice Data

Calculate a moving average to remove the seasonal component of the data. This enables us to see the parts of the trend that aren’t purely linear like we get from fitting a linear model to the data.

n <- length(seaice) # length of the record
m.hat <- rep(NA,n) # an empty vector to store results
for(i in 7:(n-6)){
  m.hat[i] <- (0.5*seaice[i-6] + seaice[i-5] + seaice[i-4] + seaice[i-3] + seaice[i-2] + 
    seaice[i-1] + seaice[i] + seaice[i+1] + seaice[i+2] + seaice[i+3] + seaice[i+4] + seaice[i+5] + 
    0.5*seaice[i+6])/12   
}

m.hat <- ts(m.hat) # make it a time series
# give it the right start time and frequency using tsp()
tsp(m.hat) <- tsp(seaice) 
plot(m.hat,ylab=expression('Sea Ice Extent (million Km ^ 2)')) # here is the trend.

Now use the filter function to calculate a moving average. This is a simpler method than writing a loop function.

Step 1. Calculate the estimate of the trend (m.hat).

f <- frequency(seaice)
filt <- c(0.5, rep(1, times=f-1), 0.5)/f
m.hat <- filter(x=seaice, filter=filt)

Step 2. Calculate an estimate of the seasonal trend (s.hat) and plot.

s.hat <- seaice - m.hat
par(mfcol=c(2,1))
plot(s.hat, ylab="Sea Ice (million Km ^ 2)")
boxplot(s.hat ~ cycle(s.hat), xlab="Month", ylab="Sea Ice (million Km ^ 2)")

By subtraction we got an estimate of the seasonal component and by making boxplots by month we see the sea ice extent shrink during the Northern Hemisphere summer and increase during the winter.

Step 3. Calculate the residual (we assume the seasonal cycle is the same every year) by calculating the mean of the seasonal trend (s.hat) by month and treat deviations from the mean as the noise term “z.hat”.

s.hat <- aggregate(s.hat ~ cycle(s.hat), FUN=mean) 
s.hat <- rep(s.hat$s.hat, length(seaice)/12)  # repeat this component for each year
s.hat <- ts(s.hat) # make it a ts object
tsp(s.hat) <- tsp(seaice)

# Now subtract to get noise
z.hat <- seaice - m.hat - s.hat

Step 4. Combine these parts into a single object

seaice.hat <- cbind(seaice, m.hat, s.hat, z.hat)
plot(seaice.hat) # units are still million Km^2 of sea ice extent

Decomposition Function

This function is a much easier method of the steps performed above.

seaice.d <- decompose(seaice)
plot(seaice.d)

Histogram plot of Sea Ice Data

library(moments)
kt <- kurtosis(seaice)
sk <- skewness(seaice)
my.xlim <- range(seaice)

h <- hist(seaice, breaks=10, col="lightblue", xlab="Sea Ice Extent (million million Km^2)",
          main="", xlim=my.xlim)

xfit <- seq(min(seaice), max(seaice), length=100)
yfit <- dnorm(xfit, mean=mean(seaice), sd=sd(seaice))
yfit <- yfit*diff(h$mids[1:2])*length(seaice)

lines(xfit, yfit, col="darkblue", lwd=2)

boxplot(seaice, horizontal=T, outline=T, axes=F, 
        ylim=my.xlim, col= "lightgreen", add=TRUE, boxwex=3)

text(x= 4, y= 70, labels = paste("Kurtosis=",round(kt,2)),pos = 4)
text(x= 4, y= 65, labels = paste("Skewness=",round(sk,2)),pos = 4)

Stem and leaf plot

stem(seaice)
## 
##   The decimal point is at the |
## 
##    3 | 6
##    4 | 3677779
##    5 | 34446667
##    6 | 0001112223333455666688889999
##    7 | 1112223344455555556666777899999
##    8 | 00000111122333444455567788889999
##    9 | 0001122233344455556667777777889999
##   10 | 00111223333445555566667778888999
##   11 | 000011122233444445666667777888888999
##   12 | 001111112222333334444555666666677788899999
##   13 | 000111111222222223333444445555666666667778888888889999999999
##   14 | 001111122222333444445555555555556666667777777778888888999999
##   15 | 00000111111222222222223333333334444444444555566666666667777777778889
##   16 | 0000111122222245

Q-Q Plot of Sea Ice Data

qqnorm(seaice)
qqline(seaice)

Very simple filters practice

Plot with a simple filter to add a moving average.

# Moving average of 12 months or 1 year (ma01)
ma01 <- filter(x=seaice, filter=rep(x=1/12, times=12), sides=2)

# Moving average of 6 months (ma6)
ma6 <- filter(x=seaice, filter=rep(x=1/6, times=6), sides=2)

plot(seaice, col="grey", ylab= "Sea Ice Extent (million Km^2)")
lines(ma6, col="pink", lwd=2)
lines(ma01, col="blue", lwd=2)


# Add trend line from the linear model
abline(icelm, col="black", lwd=2, lty="dashed")

head(cbind(seaice, ma6, ma01), n=10)
##       seaice       ma6     ma01
##  [1,]  15.60        NA       NA
##  [2,]  16.38        NA       NA
##  [3,]  16.52 15.131667       NA
##  [4,]  15.56 14.285000       NA
##  [5,]  14.08 12.918333       NA
##  [6,]  12.65 11.368333 12.57333
##  [7,]  10.52 10.345000 12.52750
##  [8,]   8.18  9.861667 12.50167
##  [9,]   7.22 10.015000 12.47583
## [10,]   9.42 10.770000 12.47500