library(fpp2)
library(readxl)

Time Series

2.1

Use the help function to explore what the series gold, woolyrnq and gas represent.

a. Use autoplot() to plot each of these in separate plots.

# Daily morning gold prices in US dollars. 1 January 1985 – 31 March 1989.
autoplot(gold) + 
    labs(title = "Daily Morning Gold Prices", 
         subtitle = "1/1985 - 3/1989",
         x = "Day", y = "Price (USD)")

# Quarterly production of woollen yarn in Australia: tonnes. Mar 1965 – Sep 1994.
autoplot(woolyrnq) + 
    labs(title = "Quarterly production of woollen yarn in Australia", 
         subtitle = "Q1 1965- Q3 1994", 
         x = "Quarter", y = "Tonnes")

# Australian monthly gas production: 1956–1995.
autoplot(gas) + 
    labs(title = "Australian monthly gas production", 
         subtitle = "1956 - 1995", 
         x = "Month", y = "Units")

b. What is the frequency of each series? Hint: apply the frequency() function.

frequency(gold) # Annual
## [1] 1
frequency(woolyrnq) # Quarterly
## [1] 4
frequency(gas) # Monthly
## [1] 12

c. Use which.max() to spot the outlier in the gold series. Which observation was it?

# The outlier of gold is on day 770 for the amount of $593.7.
outlier <- which.max(gold)
print(outlier)
## [1] 770
gold[outlier]
## [1] 593.7

2.2

a. Read the data into R

tute1 <- read.csv("http://otexts.com/fpp2/extrafiles/tute1.csv", header=TRUE)
head(tute1)
##        X  Sales AdBudget   GDP
## 1 Mar-81 1020.2    659.2 251.8
## 2 Jun-81  889.2    589.0 290.9
## 3 Sep-81  795.0    512.5 290.8
## 4 Dec-81 1003.9    614.1 292.4
## 5 Mar-82 1057.7    647.2 279.1
## 6 Jun-82  944.4    602.0 254.0

b. Convert the data to time series

mytimeseries <- ts(tute1[ , -1], start=1981, frequency=4)
head(mytimeseries)
##          Sales AdBudget   GDP
## 1981 Q1 1020.2    659.2 251.8
## 1981 Q2  889.2    589.0 290.9
## 1981 Q3  795.0    512.5 290.8
## 1981 Q4 1003.9    614.1 292.4
## 1982 Q1 1057.7    647.2 279.1
## 1982 Q2  944.4    602.0 254.0

c. Construct time series plots of each of the three series

autoplot(mytimeseries, facets=TRUE) + 
  labs(title = "Quarterly mytimeseries data", 
       subtitle = "Q1 1981 - Q4 2005", 
       x = "Year", y = "")

2.3

a. Read the data into R

# I attempted to read the file from the book's website however I got the following error `path` does not exist. I downloaded the file to my working directory.
retaildata <- readxl::read_excel("retail.xlsx", skip=1)
head(retaildata)
## # A tibble: 6 x 190
##   `Series ID`         A3349335T A3349627V A3349338X A3349398A A3349468W
##   <dttm>                  <dbl>     <dbl>     <dbl>     <dbl>     <dbl>
## 1 1982-04-01 00:00:00      303.      41.7      63.9      409.      65.8
## 2 1982-05-01 00:00:00      298.      43.1      64        405.      65.8
## 3 1982-06-01 00:00:00      298       40.3      62.7      401       62.3
## 4 1982-07-01 00:00:00      308.      40.9      65.6      414.      68.2
## 5 1982-08-01 00:00:00      299.      42.1      62.6      404.      66  
## 6 1982-09-01 00:00:00      305.      42        64.4      412.      62.3
## # … with 184 more variables: A3349336V <dbl>, A3349337W <dbl>, A3349397X <dbl>,
## #   A3349399C <dbl>, A3349874C <dbl>, A3349871W <dbl>, A3349790V <dbl>,
## #   A3349556W <dbl>, A3349791W <dbl>, A3349401C <dbl>, A3349873A <dbl>,
## #   A3349872X <dbl>, A3349709X <dbl>, A3349792X <dbl>, A3349789K <dbl>,
## #   A3349555V <dbl>, A3349565X <dbl>, A3349414R <dbl>, A3349799R <dbl>,
## #   A3349642T <dbl>, A3349413L <dbl>, A3349564W <dbl>, A3349416V <dbl>,
## #   A3349643V <dbl>, A3349483V <dbl>, A3349722T <dbl>, A3349727C <dbl>,
## #   A3349641R <dbl>, A3349639C <dbl>, A3349415T <dbl>, A3349349F <dbl>,
## #   A3349563V <dbl>, A3349350R <dbl>, A3349640L <dbl>, A3349566A <dbl>,
## #   A3349417W <dbl>, A3349352V <dbl>, A3349882C <dbl>, A3349561R <dbl>,
## #   A3349883F <dbl>, A3349721R <dbl>, A3349478A <dbl>, A3349637X <dbl>,
## #   A3349479C <dbl>, A3349797K <dbl>, A3349477X <dbl>, A3349719C <dbl>,
## #   A3349884J <dbl>, A3349562T <dbl>, A3349348C <dbl>, A3349480L <dbl>,
## #   A3349476W <dbl>, A3349881A <dbl>, A3349410F <dbl>, A3349481R <dbl>,
## #   A3349718A <dbl>, A3349411J <dbl>, A3349638A <dbl>, A3349654A <dbl>,
## #   A3349499L <dbl>, A3349902A <dbl>, A3349432V <dbl>, A3349656F <dbl>,
## #   A3349361W <dbl>, A3349501L <dbl>, A3349503T <dbl>, A3349360V <dbl>,
## #   A3349903C <dbl>, A3349905J <dbl>, A3349658K <dbl>, A3349575C <dbl>,
## #   A3349428C <dbl>, A3349500K <dbl>, A3349577J <dbl>, A3349433W <dbl>,
## #   A3349576F <dbl>, A3349574A <dbl>, A3349816F <dbl>, A3349815C <dbl>,
## #   A3349744F <dbl>, A3349823C <dbl>, A3349508C <dbl>, A3349742A <dbl>,
## #   A3349661X <dbl>, A3349660W <dbl>, A3349909T <dbl>, A3349824F <dbl>,
## #   A3349507A <dbl>, A3349580W <dbl>, A3349825J <dbl>, A3349434X <dbl>,
## #   A3349822A <dbl>, A3349821X <dbl>, A3349581X <dbl>, A3349908R <dbl>,
## #   A3349743C <dbl>, A3349910A <dbl>, A3349435A <dbl>, A3349365F <dbl>,
## #   A3349746K <dbl>, …
tute1 <- read.csv("http://otexts.com/fpp2/extrafiles/tute1.csv", header=TRUE)
head(tute1)
##        X  Sales AdBudget   GDP
## 1 Mar-81 1020.2    659.2 251.8
## 2 Jun-81  889.2    589.0 290.9
## 3 Sep-81  795.0    512.5 290.8
## 4 Dec-81 1003.9    614.1 292.4
## 5 Mar-82 1057.7    647.2 279.1
## 6 Jun-82  944.4    602.0 254.0

b. Select a time series

colID <- colnames(retaildata)[6]
myts <- ts(retaildata[ , colID], frequency=12, start=c(1982,4))

c. Explore time series

I can spot seasonality (factors such as the time of the year or the day of the week), as retail sales from October until the end of the year increases. There is also a trend (long-term increase or decrease in the data) of increasing retail sales over time. The data also appears to be cyclical as there are rises and falls that are not of a fixed frequency. (Length being longer than the length of a seasonal pattern and more variable than the magnitudes of seasonal patterns)

autoplot(myts) + 
  labs(title = "Turnover - NSW", 
       subtitle = "04/1982 - 12/2013", 
       x = "Month", y = "")

ggseasonplot(myts) + 
  labs(title = "Turnover - NSW", 
       subtitle = "04/1982 - 12/2013", 
       x = "Month", y = "")

ggsubseriesplot(myts) + 
  labs(title = "Turnover - NSW", 
       subtitle = "04/1982 - 12/2013", 
       x = "Month", y = "")

gglagplot(myts) + 
  labs(title = "Turnover - NSW", 
       subtitle = "04/1982 - 12/2013", 
       x = "Month", y = "")

ggAcf(myts) + 
  labs(title = "Turnover - NSW", 
       subtitle = "04/1982 - 12/2013", 
       x = "Month", y = "")

2.6

hsales

Seasonality is present and is based on a 12-month period. The peak activity occurs in the spring and summer months. Cyclicality is present and occurs within a 7-10 year period. There doesn’t appear to be long-term increases or decreases in the data although however there are short-term increases or decrease in the data.

autoplot(hsales)

ggseasonplot(hsales)

ggsubseriesplot(hsales)

gglagplot(hsales)

ggAcf(hsales)

usdeaths

Seasonality is present and is based on a 12-month period. The peak deaths occurring in the summer and winter months. Cyclicality and trends does not seem to be present.

autoplot(usdeaths)

ggseasonplot(usdeaths)

ggsubseriesplot(usdeaths) 

gglagplot(usdeaths) 

ggAcf(usdeaths)

bricksq

Seasonality is present and is based on a 4-quarter period. The peak activity occurs in the third quarter. Cyclicality is present and occurs within a 7-10 year period. There appears to be long-term increases in the data.

autoplot(bricksq)

ggseasonplot(bricksq)

ggsubseriesplot(bricksq) 

gglagplot(bricksq)

ggAcf(bricksq)

sunspotarea

Seasonality is not present as this is not a fixed and known frequency. Cyclicality is present and occurs within a 7-10 year period. There doesn’t appears to be long-term increases in the data.

autoplot(sunspotarea) 

ggseasonplot(sunspotarea) 
## Error in ggseasonplot(sunspotarea): Data are not seasonal
ggsubseriesplot(sunspotarea) 
## Error in ggsubseriesplot(sunspotarea): Data are not seasonal
gglagplot(sunspotarea) 

ggAcf(sunspotarea) 

gasoline

Seasonality is present and is based on a 52-week period. The peak activity occurs in the summer months and winter months. Cyclicality is present and occurs within a 7-10 year period. There appears to be long-term increases in the data.

autoplot(gasoline) 

ggseasonplot(gasoline) 

ggsubseriesplot(gasoline) 
## Error in ggsubseriesplot(gasoline): Each season requires at least 2 observations. This may be caused from specifying a time-series with non-integer frequency.
gglagplot(gasoline) 

ggAcf(gasoline)