Data

Loading data & exploration

setwd("C:/Users/weiyi/Desktop/R/Assignment 9")
timeseries<- read.csv("TSdata.csv", 
                     header=TRUE, stringsAsFactors=FALSE)
names(timeseries)

## [1] "Series1" "Series2" "Series3" "Series4"

str(timeseries)

## 'data.frame':    50 obs. of  4 variables:
##  $ Series1: num  0.54 -0.416 -0.99 -0.654 0.284 ...
##  $ Series2: num  0.3054 -0.1642 -0.7635 -0.1915 0.0171 ...
##  $ Series3: num  0.54 -0.416 -0.99 -0.654 0.284 ...
##  $ Series4: num  0.579 -0.347 -0.851 -0.494 0.51 ...

head(timeseries)

##      Series1     Series2    Series3    Series4
## 1  0.5403023  0.30537097  0.5403023  0.5794590
## 2 -0.4161468 -0.16424975 -0.4161468 -0.3471221
## 3 -0.9899925 -0.76348326 -0.9899925 -0.8508374
## 4 -0.6536436 -0.19151577 -0.6536436 -0.4937529
## 5  0.2836622  0.01713637  0.2836622  0.5099753
## 6  0.9601703  0.21359588  0.9601703  1.0392648

summary(timeseries)

##     Series1             Series2             Series3        
##  Min.   :-0.999961   Min.   :-0.781726   Min.   :-0.99234  
##  1st Qu.:-0.663614   1st Qu.:-0.177254   1st Qu.:-0.59265  
##  Median :-0.004425   Median :-0.005987   Median :-0.00122  
##  Mean   :-0.005153   Mean   : 0.008045   Mean   : 0.00574  
##  3rd Qu.: 0.656967   3rd Qu.: 0.210850   3rd Qu.: 0.53656  
##  Max.   : 0.999843   Max.   : 0.746110   Max.   : 0.99984  
##     Series4       
##  Min.   :-0.8508  
##  1st Qu.: 0.2367  
##  Median : 0.5786  
##  Mean   : 0.6259  
##  3rd Qu.: 1.0264  
##  Max.   : 2.6390

Time Series Objects

Next step is to store the data in a time series object in R. To store the data in a time series object, we use the ts() function in R. The function ts() is used to create time-series objects.

ts.all<-ts(timeseries)
ts.all

## Time Series:
## Start = 1 
## End = 50 
## Frequency = 1 
##         Series1      Series2      Series3    Series4
##  1  0.540302306  0.305370972  0.540302306  0.5794590
##  2 -0.416146837 -0.164249746 -0.416146837 -0.3471221
##  3 -0.989992497 -0.763483258 -0.989992497 -0.8508374
##  4 -0.653643621 -0.191515774 -0.653643621 -0.4937529
##  5  0.283662185  0.017136368  0.283662185  0.5099753
##  6  0.960170287  0.213595882  0.960170287  1.0392648
##  7  0.753902254  0.056218318  0.753902254  0.8787469
##  8 -0.145500034 -0.013961374 -0.145500034  0.2251907
##  9 -0.911130262 -0.052530378 -0.911130262 -0.4883454
## 10 -0.839071529 -0.180999339 -0.839071529 -0.8089237
## 11  0.004425698  0.000806093  0.004425698  0.1420512
## 12  0.843853959  0.491995416  0.843853959  0.9229642
## 13  0.907446781  0.202611153  0.907446781  1.3989204
## 14  0.136737218  0.094986272  0.136737218  0.5671922
## 15 -0.759687913 -0.734454867 -0.759687913 -0.2328096
## 16 -0.957659480 -0.412453849 -0.957659480 -0.3986393
## 17 -0.275163338 -0.071194010 -0.275163338  0.4471943
## 18  0.660316708  0.186297890  0.660316708  0.7631822
## 19  0.988704618  0.746109639  0.988704618  1.4406493
## 20  0.408082062  0.290738335  0.093258110  0.7176704
## 21 -0.547729260 -0.161679448 -0.008109384  0.4816611
## 22 -0.999960826 -0.781726376 -0.644937522  0.3929543
## 23 -0.532833020 -0.069813105 -0.526418547  0.2710561
## 24  0.424179007  0.163598866  0.130585072  0.3970542
## 25  0.991202812  0.333127086  0.873374004  1.0974741
## 26  0.646919322  0.350431830  0.280406373  0.9879815
## 27 -0.292138809 -0.023262324 -0.065535224  0.3653770
## 28 -0.962605866 -0.234421884 -0.614721124 -0.5486344
## 29 -0.748057530 -0.473454406 -0.022441143  0.2933174
## 30  0.154251450  0.121953578  0.055233033  0.9216628
## 31  0.914742358  0.559519632  0.370119972  0.8839571
## 32  0.834223361  0.614417533  0.069717505  0.5336537
## 33 -0.013276747 -0.012779319 -0.006866183  1.3308444
## 34 -0.848570275 -0.081900496 -0.091106161  0.9370304
## 35 -0.903692205 -0.216684114 -0.823168422 -0.1396801
## 36 -0.127963690 -0.106485921 -0.127963690  0.9317473
## 37  0.765414052  0.546518416  0.765414052  2.3999923
## 38  0.955073644  0.342769849  0.955073644  2.4247028
## 39  0.266642932  0.076821507  0.266642932  0.6644109
## 40 -0.666938062 -0.413350646 -0.666938062 -0.5300830
## 41 -0.987339278 -0.032366137 -0.987339278  0.5194263
## 42 -0.399985315 -0.217988913 -0.399985315  1.2144959
## 43  0.555113302  0.013845846  0.555113302  1.4565905
## 44  0.999843309  0.352481335  0.999843309  1.2551803
## 45  0.525321989  0.118663790  0.525321989  1.1759749
## 46 -0.432177945 -0.166016706 -0.432177945  0.5776615
## 47 -0.992335469 -0.441632519 -0.992335469  0.6596366
## 48 -0.640144339 -0.566110962 -0.640144339  0.1865261
## 49  0.300592544  0.141014066  0.300592544  2.6389833
## 50  0.964966028  0.645738664  0.964966028  1.5012492

ts.1<-ts(timeseries$Series1)    # to select one series only
ts.1

## Time Series:
## Start = 1 
## End = 50 
## Frequency = 1 
##  [1]  0.540302306 -0.416146837 -0.989992497 -0.653643621  0.283662185
##  [6]  0.960170287  0.753902254 -0.145500034 -0.911130262 -0.839071529
## [11]  0.004425698  0.843853959  0.907446781  0.136737218 -0.759687913
## [16] -0.957659480 -0.275163338  0.660316708  0.988704618  0.408082062
## [21] -0.547729260 -0.999960826 -0.532833020  0.424179007  0.991202812
## [26]  0.646919322 -0.292138809 -0.962605866 -0.748057530  0.154251450
## [31]  0.914742358  0.834223361 -0.013276747 -0.848570275 -0.903692205
## [36] -0.127963690  0.765414052  0.955073644  0.266642932 -0.666938062
## [41] -0.987339278 -0.399985315  0.555113302  0.999843309  0.525321989
## [46] -0.432177945 -0.992335469 -0.640144339  0.300592544  0.964966028

Sometime temporal data has frequency. This means that data is collected at regular intervals. Example: yearly, monthly, quarterly, etc

ts.quart<-ts(timeseries$Series1, frequency = 4) 
ts.quart

##            Qtr1         Qtr2         Qtr3         Qtr4
## 1   0.540302306 -0.416146837 -0.989992497 -0.653643621
## 2   0.283662185  0.960170287  0.753902254 -0.145500034
## 3  -0.911130262 -0.839071529  0.004425698  0.843853959
## 4   0.907446781  0.136737218 -0.759687913 -0.957659480
## 5  -0.275163338  0.660316708  0.988704618  0.408082062
## 6  -0.547729260 -0.999960826 -0.532833020  0.424179007
## 7   0.991202812  0.646919322 -0.292138809 -0.962605866
## 8  -0.748057530  0.154251450  0.914742358  0.834223361
## 9  -0.013276747 -0.848570275 -0.903692205 -0.127963690
## 10  0.765414052  0.955073644  0.266642932 -0.666938062
## 11 -0.987339278 -0.399985315  0.555113302  0.999843309
## 12  0.525321989 -0.432177945 -0.992335469 -0.640144339
## 13  0.300592544  0.964966028

ts.month<-ts(timeseries$Series1, frequency = 12) 
ts.month

##            Jan          Feb          Mar          Apr          May
## 1  0.540302306 -0.416146837 -0.989992497 -0.653643621  0.283662185
## 2  0.907446781  0.136737218 -0.759687913 -0.957659480 -0.275163338
## 3  0.991202812  0.646919322 -0.292138809 -0.962605866 -0.748057530
## 4  0.765414052  0.955073644  0.266642932 -0.666938062 -0.987339278
## 5  0.300592544  0.964966028                                       
##            Jun          Jul          Aug          Sep          Oct
## 1  0.960170287  0.753902254 -0.145500034 -0.911130262 -0.839071529
## 2  0.660316708  0.988704618  0.408082062 -0.547729260 -0.999960826
## 3  0.154251450  0.914742358  0.834223361 -0.013276747 -0.848570275
## 4 -0.399985315  0.555113302  0.999843309  0.525321989 -0.432177945
## 5                                                                 
##            Nov          Dec
## 1  0.004425698  0.843853959
## 2 -0.532833020  0.424179007
## 3 -0.903692205 -0.127963690
## 4 -0.992335469 -0.640144339
## 5

Plotting Time Series

To plot a time series data use the plot.ts() function

plot.ts(ts.all)

Decomposing Time Series

Decomposing a time series means separating the time series into these three components: a trend component, a seasonal component, and an irregular component.
To estimate these we use the decompose() function in R. This function estimates the trend, seasonal, and irregular components of a time series that can be described using an additive model. decompose() returns a list object as its result, where the estimates of the seasonal component, trend component and irregular component are stored in named elements of that list objects, called “seasonal”, “trend”, and “random” respectively.

We proceed to decompose series4 as monthly data using frequency = 12

ts.4<-ts(timeseries$Series4, frequency = 12)
plot.ts(ts.4)

ts.4.d <- decompose(ts.4)
plot(ts.4.d)

The plot above shows the original time series (top), the estimated trend component (second from top), the estimated seasonal component (third from top), and the estimated irregular component (bottom). We see that the estimated trend component showsa steady increase over time.

Try this with series 1,2, and 3 and tell us if there is a trend and seasonality!

Forecasting

If you have a time series that can be described using an additive model with constant level and no seasonality, you can use simple exponential smoothing to make short-term forecasts. Smoothing reduces the variation in a time series!

The simple exponential smoothing method provides a way of estimating the level at the current time point. Smoothing is controlled by the parameter alpha; for the estimate of the level at the current time point. The value of alpha; lies between 0 and 1. Values of alpha that are close to 0 mean that little weight is placed on the most recent observations when making forecasts of future values.

For example, the file http://robjhyndman.com/tsdldata/hurst/precip1.dat contains total annual rainfall in inches. The time series looks as follows:

rain <- scan("http://robjhyndman.com/tsdldata/hurst/precip1.dat",skip=1)
head(rain)

## [1] 23.56 26.07 21.86 31.24 23.65 23.88

str(rain)

##  num [1:100] 23.6 26.1 21.9 31.2 23.6 ...

rainseries <- ts(rain,start=c(1900), frequency=2)
plot.ts(rainseries)

rainseries.d <- decompose(rainseries)
plot(rainseries.d)

From above we can see how there is no trend (stochastic behavior of weather) but strong seasonality.

Holt’s exponential smoothing

Holt’s exponential smoothing estimates the level and slope at the current time point. Smoothing is controlled by two parameters, alpha, for the estimate of the level at the current time point, and beta for the estimate of the slope b of the trend component at the current time point. As with simple exponential smoothing, the paramters alpha and beta have values between 0 and 1, and values that are close to 0 mean that little weight is placed on the most recent observations when making forecasts of future values. With rain data, we can do the following smoothing:

rain.holt <- HoltWinters(rainseries, gamma=FALSE)
rain.holt # to inspect the smoothing results

## Holt-Winters exponential smoothing with trend and without seasonal component.
## 
## Call:
## HoltWinters(x = rainseries, gamma = FALSE)
## 
## Smoothing parameters:
##  alpha: 0.1973985
##  beta : 0.3469287
##  gamma: FALSE
## 
## Coefficients:
##         [,1]
## a 26.2701189
## b  0.2851515

We can plot the original time series as a black line, with the forecasted values as a red line on top of that, by typing:

plot(rain.holt)

We can see from the picture that the in-sample forecasts agree pretty well with the observed values, although they tend to lag behind the observed values a little bit..

As for simple exponential smoothing, we can make forecasts for future times not covered by the original time series by using the forecast.HoltWinters() function in the forecast package. For example, our time series data for rain ranges from 1900-1950, so we can make predictions for 1950 to 1970 (20 more data points), and plot them, by typing:

library(forecast)

## Loading required package: zoo

## 
## Attaching package: 'zoo'

## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric

## Loading required package: timeDate

## This is forecast 7.3

rain.forecasts <- forecast.HoltWinters(rain.holt, h=20)
plot.forecast(rain.forecasts)

The forecasts are shown as a blue line, with the 80% prediction intervals as an blue shaded area, and the 95% prediction intervals as a gray shaded area.

Now, lets try it with the time series data series4 assuming it is monthly. Notices how the frequency = 12 affects the time series

ts.s4<-ts(timeseries$Series4, start = c(2000), frequency = 12)
plot(ts.s4)

ts.s4.holt <- HoltWinters(ts.s4, gamma=FALSE)
plot(ts.s4.holt)

ts.s4.forecasts <- forecast.HoltWinters(ts.s4.holt, h=12)  # forecast 1 year (12 months)
plot.forecast(ts.s4.forecasts)

Now lets try the gamma = TRUE paramaters. The gamma parameter is used for the seasonal component. If set to FALSE, an non-seasonal model is fitted.

ts.s4.holt <- HoltWinters(ts.s4, gamma=TRUE)
plot(ts.s4.holt)

ts.s4.forecasts <- forecast.HoltWinters(ts.s4.holt, h=12)  # forecast 1 year (12 months)
plot.forecast(ts.s4.forecasts)

sp500<- read.csv("sp500_5yrs.csv", header=TRUE, stringsAsFactors=FALSE)
names(sp500)

## [1] "Date"      "Open"      "High"      "Low"       "Close"     "Volume"   
## [7] "Adj.Close"

sp500.ts<-ts(sp500$Open,start=c(2010),frequency = 12)
plot(sp500.ts)

sp500.ts.d<-decompose(sp500.ts)
plot(sp500.ts.d)

Any thing wrong here????

BINGO

sp500.sorted<-sp500[order(as.Date(sp500$Date, format="%d/%m/%Y")),]

sp500.sorted.ts<-ts(sp500.sorted$Open,start=c(2010),frequency = 12)
plot(sp500.sorted.ts)

sp500.sorted.ts.d<-decompose(sp500.sorted.ts)
plot(sp500.sorted.ts.d)

sp500.holt.T <- HoltWinters(sp500.sorted.ts, gamma=TRUE)
sp500.holt.F <- HoltWinters(sp500.sorted.ts, gamma=FALSE)
sp500.forecasts <- forecast.HoltWinters(sp500.holt.T, h=12)  # forecast 1 year (12 months)
plot.forecast(sp500.forecasts)

sp500.forecasts.F <- forecast.HoltWinters(sp500.holt.F, h=12)  # forecast 1 year (12 months)
plot.forecast(sp500.forecasts.F)

Forecasting Using an ARIMA Model

In time series analysis, an autoregressive integrated moving average (ARIMA) model is a generalization of an autoregressive moving average (ARMA) model. You can estimate the parameters of an ARIMA(p,d,q) model using the arima() function from the package library(forecast)

sp500.arima<-arima(sp500.sorted.ts, c(0,0,0))    # this models is equivalent to ARMA(0,0,0)
sp500.arima.forecasts <- forecast.Arima(sp500.arima, h=12)
plot(sp500.arima.forecasts)

sp500.arima<-arima(sp500.sorted.ts, c(0,1,0))    # this models is equivalent to ARMA(0,1,0)
sp500.arima.forecasts <- forecast.Arima(sp500.arima, h=12)
plot(sp500.arima.forecasts)

sp500.arima<-arima(sp500.sorted.ts, c(1,1,0))   # this models is equivalent to ARMA(1,1,0)
sp500.arima.forecasts <- forecast.Arima(sp500.arima, h=12)
plot(sp500.arima.forecasts)

If your time series is stationary, or if you have transformed it to a stationary time series by differencing d times, the next step is to select the appropriate ARIMA model, which means finding the values of most appropriate values of p and q for an ARIMA(p,d,q) model. To do this, you usually need to examine the correlogram and partial correlogram of the stationary time series.

For more details visit this site to learn more abour how to select an ARIMA model https://people.duke.edu/~rnau/arimrule.htm

Source: http://a-little-book-of-r-for-time-series.readthedocs.io/en/latest/src/timeseries.html#time-series-analysis.

BA - TimeSeries