Loading data & exploration
setwd("C:/Users/weiyi/Desktop/R/Assignment 9")
timeseries<- read.csv("TSdata.csv",
header=TRUE, stringsAsFactors=FALSE)
names(timeseries)
## [1] "Series1" "Series2" "Series3" "Series4"
str(timeseries)
## 'data.frame': 50 obs. of 4 variables:
## $ Series1: num 0.54 -0.416 -0.99 -0.654 0.284 ...
## $ Series2: num 0.3054 -0.1642 -0.7635 -0.1915 0.0171 ...
## $ Series3: num 0.54 -0.416 -0.99 -0.654 0.284 ...
## $ Series4: num 0.579 -0.347 -0.851 -0.494 0.51 ...
head(timeseries)
## Series1 Series2 Series3 Series4
## 1 0.5403023 0.30537097 0.5403023 0.5794590
## 2 -0.4161468 -0.16424975 -0.4161468 -0.3471221
## 3 -0.9899925 -0.76348326 -0.9899925 -0.8508374
## 4 -0.6536436 -0.19151577 -0.6536436 -0.4937529
## 5 0.2836622 0.01713637 0.2836622 0.5099753
## 6 0.9601703 0.21359588 0.9601703 1.0392648
summary(timeseries)
## Series1 Series2 Series3
## Min. :-0.999961 Min. :-0.781726 Min. :-0.99234
## 1st Qu.:-0.663614 1st Qu.:-0.177254 1st Qu.:-0.59265
## Median :-0.004425 Median :-0.005987 Median :-0.00122
## Mean :-0.005153 Mean : 0.008045 Mean : 0.00574
## 3rd Qu.: 0.656967 3rd Qu.: 0.210850 3rd Qu.: 0.53656
## Max. : 0.999843 Max. : 0.746110 Max. : 0.99984
## Series4
## Min. :-0.8508
## 1st Qu.: 0.2367
## Median : 0.5786
## Mean : 0.6259
## 3rd Qu.: 1.0264
## Max. : 2.6390
Next step is to store the data in a time series object in R. To store the data in a time series object, we use the ts() function in R. The function ts() is used to create time-series objects.
ts.all<-ts(timeseries)
ts.all
## Time Series:
## Start = 1
## End = 50
## Frequency = 1
## Series1 Series2 Series3 Series4
## 1 0.540302306 0.305370972 0.540302306 0.5794590
## 2 -0.416146837 -0.164249746 -0.416146837 -0.3471221
## 3 -0.989992497 -0.763483258 -0.989992497 -0.8508374
## 4 -0.653643621 -0.191515774 -0.653643621 -0.4937529
## 5 0.283662185 0.017136368 0.283662185 0.5099753
## 6 0.960170287 0.213595882 0.960170287 1.0392648
## 7 0.753902254 0.056218318 0.753902254 0.8787469
## 8 -0.145500034 -0.013961374 -0.145500034 0.2251907
## 9 -0.911130262 -0.052530378 -0.911130262 -0.4883454
## 10 -0.839071529 -0.180999339 -0.839071529 -0.8089237
## 11 0.004425698 0.000806093 0.004425698 0.1420512
## 12 0.843853959 0.491995416 0.843853959 0.9229642
## 13 0.907446781 0.202611153 0.907446781 1.3989204
## 14 0.136737218 0.094986272 0.136737218 0.5671922
## 15 -0.759687913 -0.734454867 -0.759687913 -0.2328096
## 16 -0.957659480 -0.412453849 -0.957659480 -0.3986393
## 17 -0.275163338 -0.071194010 -0.275163338 0.4471943
## 18 0.660316708 0.186297890 0.660316708 0.7631822
## 19 0.988704618 0.746109639 0.988704618 1.4406493
## 20 0.408082062 0.290738335 0.093258110 0.7176704
## 21 -0.547729260 -0.161679448 -0.008109384 0.4816611
## 22 -0.999960826 -0.781726376 -0.644937522 0.3929543
## 23 -0.532833020 -0.069813105 -0.526418547 0.2710561
## 24 0.424179007 0.163598866 0.130585072 0.3970542
## 25 0.991202812 0.333127086 0.873374004 1.0974741
## 26 0.646919322 0.350431830 0.280406373 0.9879815
## 27 -0.292138809 -0.023262324 -0.065535224 0.3653770
## 28 -0.962605866 -0.234421884 -0.614721124 -0.5486344
## 29 -0.748057530 -0.473454406 -0.022441143 0.2933174
## 30 0.154251450 0.121953578 0.055233033 0.9216628
## 31 0.914742358 0.559519632 0.370119972 0.8839571
## 32 0.834223361 0.614417533 0.069717505 0.5336537
## 33 -0.013276747 -0.012779319 -0.006866183 1.3308444
## 34 -0.848570275 -0.081900496 -0.091106161 0.9370304
## 35 -0.903692205 -0.216684114 -0.823168422 -0.1396801
## 36 -0.127963690 -0.106485921 -0.127963690 0.9317473
## 37 0.765414052 0.546518416 0.765414052 2.3999923
## 38 0.955073644 0.342769849 0.955073644 2.4247028
## 39 0.266642932 0.076821507 0.266642932 0.6644109
## 40 -0.666938062 -0.413350646 -0.666938062 -0.5300830
## 41 -0.987339278 -0.032366137 -0.987339278 0.5194263
## 42 -0.399985315 -0.217988913 -0.399985315 1.2144959
## 43 0.555113302 0.013845846 0.555113302 1.4565905
## 44 0.999843309 0.352481335 0.999843309 1.2551803
## 45 0.525321989 0.118663790 0.525321989 1.1759749
## 46 -0.432177945 -0.166016706 -0.432177945 0.5776615
## 47 -0.992335469 -0.441632519 -0.992335469 0.6596366
## 48 -0.640144339 -0.566110962 -0.640144339 0.1865261
## 49 0.300592544 0.141014066 0.300592544 2.6389833
## 50 0.964966028 0.645738664 0.964966028 1.5012492
ts.1<-ts(timeseries$Series1) # to select one series only
ts.1
## Time Series:
## Start = 1
## End = 50
## Frequency = 1
## [1] 0.540302306 -0.416146837 -0.989992497 -0.653643621 0.283662185
## [6] 0.960170287 0.753902254 -0.145500034 -0.911130262 -0.839071529
## [11] 0.004425698 0.843853959 0.907446781 0.136737218 -0.759687913
## [16] -0.957659480 -0.275163338 0.660316708 0.988704618 0.408082062
## [21] -0.547729260 -0.999960826 -0.532833020 0.424179007 0.991202812
## [26] 0.646919322 -0.292138809 -0.962605866 -0.748057530 0.154251450
## [31] 0.914742358 0.834223361 -0.013276747 -0.848570275 -0.903692205
## [36] -0.127963690 0.765414052 0.955073644 0.266642932 -0.666938062
## [41] -0.987339278 -0.399985315 0.555113302 0.999843309 0.525321989
## [46] -0.432177945 -0.992335469 -0.640144339 0.300592544 0.964966028
Sometime temporal data has frequency. This means that data is collected at regular intervals. Example: yearly, monthly, quarterly, etc
ts.quart<-ts(timeseries$Series1, frequency = 4)
ts.quart
## Qtr1 Qtr2 Qtr3 Qtr4
## 1 0.540302306 -0.416146837 -0.989992497 -0.653643621
## 2 0.283662185 0.960170287 0.753902254 -0.145500034
## 3 -0.911130262 -0.839071529 0.004425698 0.843853959
## 4 0.907446781 0.136737218 -0.759687913 -0.957659480
## 5 -0.275163338 0.660316708 0.988704618 0.408082062
## 6 -0.547729260 -0.999960826 -0.532833020 0.424179007
## 7 0.991202812 0.646919322 -0.292138809 -0.962605866
## 8 -0.748057530 0.154251450 0.914742358 0.834223361
## 9 -0.013276747 -0.848570275 -0.903692205 -0.127963690
## 10 0.765414052 0.955073644 0.266642932 -0.666938062
## 11 -0.987339278 -0.399985315 0.555113302 0.999843309
## 12 0.525321989 -0.432177945 -0.992335469 -0.640144339
## 13 0.300592544 0.964966028
ts.month<-ts(timeseries$Series1, frequency = 12)
ts.month
## Jan Feb Mar Apr May
## 1 0.540302306 -0.416146837 -0.989992497 -0.653643621 0.283662185
## 2 0.907446781 0.136737218 -0.759687913 -0.957659480 -0.275163338
## 3 0.991202812 0.646919322 -0.292138809 -0.962605866 -0.748057530
## 4 0.765414052 0.955073644 0.266642932 -0.666938062 -0.987339278
## 5 0.300592544 0.964966028
## Jun Jul Aug Sep Oct
## 1 0.960170287 0.753902254 -0.145500034 -0.911130262 -0.839071529
## 2 0.660316708 0.988704618 0.408082062 -0.547729260 -0.999960826
## 3 0.154251450 0.914742358 0.834223361 -0.013276747 -0.848570275
## 4 -0.399985315 0.555113302 0.999843309 0.525321989 -0.432177945
## 5
## Nov Dec
## 1 0.004425698 0.843853959
## 2 -0.532833020 0.424179007
## 3 -0.903692205 -0.127963690
## 4 -0.992335469 -0.640144339
## 5
To plot a time series data use the plot.ts() function
plot.ts(ts.all)
Decomposing a time series means separating the time series into these three components: a trend component, a seasonal component, and an irregular component.
To estimate these we use the decompose() function in R. This function estimates the trend, seasonal, and irregular components of a time series that can be described using an additive model. decompose() returns a list object as its result, where the estimates of the seasonal component, trend component and irregular component are stored in named elements of that list objects, called “seasonal”, “trend”, and “random” respectively.
We proceed to decompose series4 as monthly data using frequency = 12
ts.4<-ts(timeseries$Series4, frequency = 12)
plot.ts(ts.4)
ts.4.d <- decompose(ts.4)
plot(ts.4.d)
The plot above shows the original time series (top), the estimated trend component (second from top), the estimated seasonal component (third from top), and the estimated irregular component (bottom). We see that the estimated trend component showsa steady increase over time.
Try this with series 1,2, and 3 and tell us if there is a trend and seasonality!
If you have a time series that can be described using an additive model with constant level and no seasonality, you can use simple exponential smoothing to make short-term forecasts. Smoothing reduces the variation in a time series!
The simple exponential smoothing method provides a way of estimating the level at the current time point. Smoothing is controlled by the parameter alpha; for the estimate of the level at the current time point. The value of alpha; lies between 0 and 1. Values of alpha that are close to 0 mean that little weight is placed on the most recent observations when making forecasts of future values.
For example, the file http://robjhyndman.com/tsdldata/hurst/precip1.dat contains total annual rainfall in inches. The time series looks as follows:
rain <- scan("http://robjhyndman.com/tsdldata/hurst/precip1.dat",skip=1)
head(rain)
## [1] 23.56 26.07 21.86 31.24 23.65 23.88
str(rain)
## num [1:100] 23.6 26.1 21.9 31.2 23.6 ...
rainseries <- ts(rain,start=c(1900), frequency=2)
plot.ts(rainseries)
rainseries.d <- decompose(rainseries)
plot(rainseries.d)
From above we can see how there is no trend (stochastic behavior of weather) but strong seasonality.
Holt’s exponential smoothing estimates the level and slope at the current time point. Smoothing is controlled by two parameters, alpha, for the estimate of the level at the current time point, and beta for the estimate of the slope b of the trend component at the current time point. As with simple exponential smoothing, the paramters alpha and beta have values between 0 and 1, and values that are close to 0 mean that little weight is placed on the most recent observations when making forecasts of future values. With rain data, we can do the following smoothing:
rain.holt <- HoltWinters(rainseries, gamma=FALSE)
rain.holt # to inspect the smoothing results
## Holt-Winters exponential smoothing with trend and without seasonal component.
##
## Call:
## HoltWinters(x = rainseries, gamma = FALSE)
##
## Smoothing parameters:
## alpha: 0.1973985
## beta : 0.3469287
## gamma: FALSE
##
## Coefficients:
## [,1]
## a 26.2701189
## b 0.2851515
We can plot the original time series as a black line, with the forecasted values as a red line on top of that, by typing:
plot(rain.holt)
We can see from the picture that the in-sample forecasts agree pretty well with the observed values, although they tend to lag behind the observed values a little bit..
As for simple exponential smoothing, we can make forecasts for future times not covered by the original time series by using the forecast.HoltWinters() function in the forecast package. For example, our time series data for rain ranges from 1900-1950, so we can make predictions for 1950 to 1970 (20 more data points), and plot them, by typing:
library(forecast)
## Loading required package: zoo
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
## Loading required package: timeDate
## This is forecast 7.3
rain.forecasts <- forecast.HoltWinters(rain.holt, h=20)
plot.forecast(rain.forecasts)
The forecasts are shown as a blue line, with the 80% prediction intervals as an blue shaded area, and the 95% prediction intervals as a gray shaded area.
Now, lets try it with the time series data series4 assuming it is monthly. Notices how the frequency = 12 affects the time series
ts.s4<-ts(timeseries$Series4, start = c(2000), frequency = 12)
plot(ts.s4)
ts.s4.holt <- HoltWinters(ts.s4, gamma=FALSE)
plot(ts.s4.holt)
ts.s4.forecasts <- forecast.HoltWinters(ts.s4.holt, h=12) # forecast 1 year (12 months)
plot.forecast(ts.s4.forecasts)
Now lets try the
gamma = TRUE paramaters. The gamma parameter is used for the seasonal component. If set to FALSE, an non-seasonal model is fitted.
ts.s4.holt <- HoltWinters(ts.s4, gamma=TRUE)
plot(ts.s4.holt)
ts.s4.forecasts <- forecast.HoltWinters(ts.s4.holt, h=12) # forecast 1 year (12 months)
plot.forecast(ts.s4.forecasts)
sp500<- read.csv("sp500_5yrs.csv", header=TRUE, stringsAsFactors=FALSE)
names(sp500)
## [1] "Date" "Open" "High" "Low" "Close" "Volume"
## [7] "Adj.Close"
sp500.ts<-ts(sp500$Open,start=c(2010),frequency = 12)
plot(sp500.ts)
sp500.ts.d<-decompose(sp500.ts)
plot(sp500.ts.d)
Any thing wrong here????
BINGO
sp500.sorted<-sp500[order(as.Date(sp500$Date, format="%d/%m/%Y")),]
sp500.sorted.ts<-ts(sp500.sorted$Open,start=c(2010),frequency = 12)
plot(sp500.sorted.ts)
sp500.sorted.ts.d<-decompose(sp500.sorted.ts)
plot(sp500.sorted.ts.d)
sp500.holt.T <- HoltWinters(sp500.sorted.ts, gamma=TRUE)
sp500.holt.F <- HoltWinters(sp500.sorted.ts, gamma=FALSE)
sp500.forecasts <- forecast.HoltWinters(sp500.holt.T, h=12) # forecast 1 year (12 months)
plot.forecast(sp500.forecasts)
sp500.forecasts.F <- forecast.HoltWinters(sp500.holt.F, h=12) # forecast 1 year (12 months)
plot.forecast(sp500.forecasts.F)
In time series analysis, an autoregressive integrated moving average (ARIMA) model is a generalization of an autoregressive moving average (ARMA) model. You can estimate the parameters of an ARIMA(p,d,q) model using the arima() function from the package library(forecast)
sp500.arima<-arima(sp500.sorted.ts, c(0,0,0)) # this models is equivalent to ARMA(0,0,0)
sp500.arima.forecasts <- forecast.Arima(sp500.arima, h=12)
plot(sp500.arima.forecasts)
sp500.arima<-arima(sp500.sorted.ts, c(0,1,0)) # this models is equivalent to ARMA(0,1,0)
sp500.arima.forecasts <- forecast.Arima(sp500.arima, h=12)
plot(sp500.arima.forecasts)
sp500.arima<-arima(sp500.sorted.ts, c(1,1,0)) # this models is equivalent to ARMA(1,1,0)
sp500.arima.forecasts <- forecast.Arima(sp500.arima, h=12)
plot(sp500.arima.forecasts)
If your time series is stationary, or if you have transformed it to a stationary time series by differencing d times, the next step is to select the appropriate ARIMA model, which means finding the values of most appropriate values of p and q for an ARIMA(p,d,q) model. To do this, you usually need to examine the correlogram and partial correlogram of the stationary time series.
For more details visit this site to learn more abour how to select an ARIMA model https://people.duke.edu/~rnau/arimrule.htm