Please submit exercises 2.1, 2.2, 2.3 and 2.6 from the Hyndman online Forecasting book

Question 2.1 Use the help function to explore what the series gold, woolyrnq and gas represent. Use autoplot() to plot each of these in separate plots. What is the frequency of each series? Hint: apply the frequency() function. Use which.max() to spot the outlier in the gold series. Which observation was it?

library(fpp3) 
## Warning: package 'fpp3' was built under R version 3.6.2
## ── Attaching packages ───────────────────────────────────────────────────────────────── fpp3 0.3 ──
## ✔ tibble      3.0.3     ✔ tsibble     0.9.2
## ✔ dplyr       1.0.2     ✔ tsibbledata 0.2.0
## ✔ tidyr       1.1.2     ✔ feasts      0.1.5
## ✔ lubridate   1.7.9     ✔ fable       0.2.1
## ✔ ggplot2     3.2.1
## Warning: package 'tibble' was built under R version 3.6.2
## Warning: package 'dplyr' was built under R version 3.6.2
## Warning: package 'tidyr' was built under R version 3.6.2
## Warning: package 'lubridate' was built under R version 3.6.2
## Warning: package 'tsibble' was built under R version 3.6.2
## Warning: package 'tsibbledata' was built under R version 3.6.2
## Warning: package 'feasts' was built under R version 3.6.2
## Warning: package 'fabletools' was built under R version 3.6.2
## Warning: package 'fable' was built under R version 3.6.2
## ── Conflicts ──────────────────────────────────────────────────────────────────── fpp3_conflicts ──
## ✖ lubridate::date()   masks base::date()
## ✖ dplyr::filter()     masks stats::filter()
## ✖ tsibble::interval() masks lubridate::interval()
## ✖ dplyr::lag()        masks stats::lag()
library(fpp2)
## Loading required package: forecast
## Warning: package 'forecast' was built under R version 3.6.2
## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo
## Loading required package: fma
## Loading required package: expsmooth
library(forecast)
help("gold")
autoplot(gold)

Daily morning gold prices in US dollars. 1 January 1985 – 31 March 1989.

frequency(gold)
## [1] 1
which.max(gold)
## [1] 770

Frequency is 1 and the outlier is at the 770th observation.

help("woolyrnq")
autoplot(woolyrnq)

Quarterly production of woollen yarn in Australia: tonnes. Mar 1965 – Sep 1994.

frequency(woolyrnq)
## [1] 4

Frequency is 4

help("gas")
autoplot(gas)

Australian monthly gas production: 1956–1995.

frequency(gas)
## [1] 12

Frequency is 12

Question 2.2

tute1 <- read.csv("/Users/christinakasman/Desktop/tute1.csv", header=TRUE)
mytimeseries <- ts(tute1[,-1], start=1981, frequency=4)
library(ggfortify)
## Warning: package 'ggfortify' was built under R version 3.6.2
## Registered S3 methods overwritten by 'ggfortify':
##   method                 from    
##   autoplot.Arima         forecast
##   autoplot.acf           forecast
##   autoplot.ar            forecast
##   autoplot.bats          forecast
##   autoplot.decomposed.ts forecast
##   autoplot.ets           forecast
##   autoplot.forecast      forecast
##   autoplot.stl           forecast
##   autoplot.ts            forecast
##   fitted.ar              forecast
##   fortify.ts             forecast
##   residuals.ar           forecast
autoplot(mytimeseries, facets=TRUE)

autoplot(mytimeseries)

Question 2.3

library(readxl)
retaildata <- readxl::read_excel("/Users/christinakasman/Desktop/retail.xlsx", skip=1)
head(retaildata)
## # A tibble: 6 x 190
##   `Series ID` A3349335T A3349627V A3349338X A3349398A A3349468W A3349336V
##         <dbl>     <dbl>     <dbl>     <dbl>     <dbl>     <dbl>     <dbl>
## 1       30042      303.      41.7      63.9      409.      65.8      91.8
## 2       30072      298.      43.1      64        405.      65.8     103. 
## 3       30103      298       40.3      62.7      401       62.3     105  
## 4       30133      308.      40.9      65.6      414.      68.2     106  
## 5       30164      299.      42.1      62.6      404.      66        96.9
## 6       30195      305.      42        64.4      412.      62.3      97.5
## # … with 183 more variables: A3349337W <dbl>, A3349397X <dbl>,
## #   A3349399C <dbl>, A3349874C <dbl>, A3349871W <dbl>, A3349790V <dbl>,
## #   A3349556W <dbl>, A3349791W <dbl>, A3349401C <dbl>, A3349873A <dbl>,
## #   A3349872X <dbl>, A3349709X <dbl>, A3349792X <dbl>, A3349789K <dbl>,
## #   A3349555V <dbl>, A3349565X <dbl>, A3349414R <dbl>, A3349799R <dbl>,
## #   A3349642T <dbl>, A3349413L <dbl>, A3349564W <dbl>, A3349416V <dbl>,
## #   A3349643V <dbl>, A3349483V <dbl>, A3349722T <dbl>, A3349727C <dbl>,
## #   A3349641R <dbl>, A3349639C <dbl>, A3349415T <dbl>, A3349349F <dbl>,
## #   A3349563V <dbl>, A3349350R <dbl>, A3349640L <dbl>, A3349566A <dbl>,
## #   A3349417W <dbl>, A3349352V <dbl>, A3349882C <dbl>, A3349561R <dbl>,
## #   A3349883F <dbl>, A3349721R <dbl>, A3349478A <dbl>, A3349637X <dbl>,
## #   A3349479C <dbl>, A3349797K <dbl>, A3349477X <dbl>, A3349719C <dbl>,
## #   A3349884J <dbl>, A3349562T <dbl>, A3349348C <dbl>, A3349480L <dbl>,
## #   A3349476W <dbl>, A3349881A <dbl>, A3349410F <dbl>, A3349481R <dbl>,
## #   A3349718A <dbl>, A3349411J <dbl>, A3349638A <dbl>, A3349654A <dbl>,
## #   A3349499L <dbl>, A3349902A <dbl>, A3349432V <dbl>, A3349656F <dbl>,
## #   A3349361W <dbl>, A3349501L <dbl>, A3349503T <dbl>, A3349360V <dbl>,
## #   A3349903C <dbl>, A3349905J <dbl>, A3349658K <dbl>, A3349575C <dbl>,
## #   A3349428C <dbl>, A3349500K <dbl>, A3349577J <dbl>, A3349433W <dbl>,
## #   A3349576F <dbl>, A3349574A <dbl>, A3349816F <dbl>, A3349815C <dbl>,
## #   A3349744F <dbl>, A3349823C <dbl>, A3349508C <dbl>, A3349742A <dbl>,
## #   A3349661X <dbl>, A3349660W <dbl>, A3349909T <dbl>, A3349824F <dbl>,
## #   A3349507A <dbl>, A3349580W <dbl>, A3349825J <dbl>, A3349434X <dbl>,
## #   A3349822A <dbl>, A3349821X <dbl>, A3349581X <dbl>, A3349908R <dbl>,
## #   A3349743C <dbl>, A3349910A <dbl>, A3349435A <dbl>, A3349365F <dbl>,
## #   A3349746K <dbl>, A3349370X <dbl>, …
myts <- ts(retaildata[,"A3349413L"],
  frequency=12, start=c(1982,4))
autoplot(myts)

ggseasonplot(myts)

ggsubseriesplot(myts)

gglagplot(myts)

ggAcf(myts)

The above plots show that there is a general increasing trend. There is also seasonality at the beginning and the end of the year.In addition, the lag plots show that there is a strong positive seasonality at most lags.

Question 2.6 Use the following graphics functions: autoplot(), ggseasonplot(), ggsubseriesplot(), gglagplot(), ggAcf() and explore features from the following time series: hsales, usdeaths, bricksq, sunspotarea, gasoline.

Can you spot any seasonality, cyclicity and trend? What do you learn about the series?

autoplot(hsales)

ggseasonplot(hsales)

ggsubseriesplot(hsales)

gglagplot(hsales)

ggAcf(hsales)

The hsales data appears to have some cyclical pattern and seasonality. The greatests sales appear in march, april, and may. Sales are the lowest during the winter months.

autoplot(usdeaths)

ggseasonplot(usdeaths)

ggsubseriesplot(usdeaths)

gglagplot(usdeaths)

ggAcf(usdeaths)

The us deaths data has seasonality with deaths highest from May to August.

autoplot(bricksq)

ggseasonplot(bricksq)

ggsubseriesplot(bricksq)

gglagplot(bricksq)

ggAcf(bricksq)

The Bricksq data has an overall increased trend with some cyclicity. There is not much seasonality.

autoplot(sunspotarea)

#ggseasonplot(sunspotarea)
#ggsubseriesplot(sunspotarea)
gglagplot(sunspotarea)

ggAcf(sunspotarea)

The sunspotarea data packages would not load and the error advised that it is not seasonal data. However, the charts that did load do not show much patten]rn to the data.

autoplot(gasoline)

ggseasonplot(gasoline)

#ggsubseriesplot(gasoline)
gglagplot(gasoline)

ggAcf(gasoline)

The gasoline dataset has an upward trend with little seasonality. There is a strong positive correlation in the lags.