#install.packages('fpp2')
library(fpp2)
## Loading required package: ggplot2
## Loading required package: forecast
## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo
## Loading required package: fma
## Loading required package: expsmooth
  1. Use the help function to explore what the series gold, woolyrnq and gas represent.
  1. Use autoplot() to plot each of these in separate plots.
  2. What is the frequency of each series? Hint: apply the frequency() function.
  3. Use which.max() to spot the outlier in the gold series. Which observation was it?
help(gold)
## starting httpd help server ... done
help(woolyrnq)
help(gas)
autoplot(gold)

frequency(gold)
## [1] 1
autoplot(woolyrnq)

frequency(woolyrnq)
## [1] 4
autoplot(gas) 

frequency(gas)
## [1] 12

Outlier

which.max(gold)
## [1] 770
  1. Download the file tute1.csv from the book website, open it in Excel (or some other spreadsheet application), and review its contents. You should find four columns of information. Columns B through D each contain a quarterly series, labelled Sales, AdBudget and GDP. Sales contains the quarterly sales for a small company over the period 1981-2005. AdBudget is the advertising budget and GDP is the gross domestic product. All series have been adjusted for inflation.
  1. You can read the data into R with the following script:
tute1 <- read.csv('C:\\Users\\charls.joseph\\Documents\\Cuny\\Data624\\week1\\tute1.csv')

head(tute1)
##        X  Sales AdBudget   GDP
## 1 Mar-81 1020.2    659.2 251.8
## 2 Jun-81  889.2    589.0 290.9
## 3 Sep-81  795.0    512.5 290.8
## 4 Dec-81 1003.9    614.1 292.4
## 5 Mar-82 1057.7    647.2 279.1
## 6 Jun-82  944.4    602.0 254.0
  1. Convert the data to time series
mytimeseries <- ts(tute1[,-1], start=1981, frequency=4)
  1. Construct time series plots of each of the three series
autoplot(mytimeseries, facets=TRUE)

3. Download some monthly Australian retail data from the book website. These represent retail sales in various categories for different Australian states, and are stored in a MS-Excel file.

You can read the data into R with the following script:

#install.packages('readxl')
retaildata <- readxl::read_excel('C:\\Users\\charls.joseph\\Documents\\Cuny\\Data624\\week1\\retail.xlsx', skip=1)

Select one of the time series as follows (but replace the column name with your own chosen column):

myts <- ts(retaildata[,"A3349397X"],
  frequency=12, start=c(1982,4))
autoplot(myts) +
  xlab('Year') +
  ylab("Turn Over")

gglagplot(myts, lags=12)

 ggAcf(myts,lag=64)

##2.6 Use the following graphics functions: autoplot(), ggseasonplot(), ggsubseriesplot(), gglagplot(), ggAcf() and explore features from the following time series: hsales, usdeaths, bricksq, sunspotarea, gasoline.

Lets start with hsales

autoplot(hsales) + 
  ggtitle('Time series plot for house sales in USA since 1973 ') + 
  xlab('Year') + ylab('Sales')

ggseasonplot(hsales) + 
  ggtitle('Season plot plot for house sales in USA since 1973 ') + 
  xlab('month') + ylab('Sales')

ggsubseriesplot(hsales) + 
  ggtitle('Time Sub series plot for house sales in USA since 1973 ') + 
  xlab('month') + ylab('Sales')

gglagplot(hsales, lags = 20) + 
  ggtitle('Lag plot for house sales upto 12 month lags ') + 
  xlab('Year') + ylab('Sales')

ggAcf(hsales,lag=64) + 
  ggtitle('Auto correlation plot for house sales upto 64 month lags ') 

###Observations

  1. From time-series plot, clearly seeing a cyclic behavior .
  2. From the seasonal and sub series plots, we can see that there is slight higher rate of sales for the month of Mar Apr and May
  3. Auto correlation plot shows there is no evidence of trend, but there are some seasonality observed because of the upward scallops showing up in the beginning and a downward scallops showing in the later part of ACF plots.

Lets analyze with usdeaths dataset.

help(usdeaths)

frequency(usdeaths)
## [1] 12
autoplot(usdeaths) + 
  ggtitle('Time series plot for Accidental death in USA  ') + 
  xlab('Year') + ylab('deaths')

ggseasonplot(usdeaths) + 
  ggtitle('Season plot plot for Accidental death in USA ') + 
  xlab('month') + ylab('deaths')

ggsubseriesplot(usdeaths) + 
  ggtitle('Time Sub series plot for Accidental death in USA ') + 
  xlab('month') + ylab('deaths')

gglagplot(usdeaths, lags = 12) + 
  ggtitle('Lag plot for US death  ') + 
  xlab('Year') + ylab('Sales')

ggAcf(usdeaths,lag=64) + 
  ggtitle('Auto correlation plot for US death upto 64 month lags ') 

Observations

  1. From above plots, we do see a cyclic, trend and seasonal patterns.
  2. From the seasonal and sub series plots, we can see that there is higher death rate for the month of Jun, Jul and Aug. 
  3. Auto correlation plot shows there is an evidence of seasonal pattern with strong positive correlation on every 12 months and strong negative correlation on every 12 months. It also shows the evidence of trend as the auto correlation gets reduced as lags increases.
help(bricksq)
frequency(bricksq)
## [1] 4
autoplot(bricksq) + 
  ggtitle('Time series plot for Brick production in Australia  ') + 
  xlab('Months') + ylab('Brick production')

ggseasonplot(bricksq) + 
  ggtitle('Season plot plot for Brick production in Australia ') + 
  xlab('Quarters') + ylab('Brick production')

ggsubseriesplot(bricksq) + 
  ggtitle('Time Sub series plot for Brick production in Australia ') + 
  xlab('Quarters') + ylab('Brick production')

gglagplot(bricksq, lags = 12) + 
  ggtitle('Lag plot for Brick production  ') + 
  xlab('Year') + ylab('Sales')

ggAcf(bricksq,lag=64) + 
  ggtitle('Auto correlation plot for Brick production upto 64 month lags ') 

Observation

From the above plots, there is an evidence of trend and cyclic pattern, But there is no seasonality.

Lets look at sunspotarea time series data

help(sunspotarea)
frequency(sunspotarea)
## [1] 1
autoplot(sunspotarea) + 
  ggtitle('Time series plot for Annual average sunspot area  ') + 
  xlab('Years') + ylab('Annual average sunspot')

gglagplot(sunspotarea, lags = 12) + 
  ggtitle('Lag plot for Annual average sunspot area  ') + 
  xlab('Year') + ylab('Sales')

ggAcf(sunspotarea,lag=64) + 
  ggtitle('Auto correlation plot for Annual average sunspot area ') 

Observation

From the above plots, there is an evidence of cyclic and seasonal patterns. Every 5 years, there is strong positive and negative auto correlation which indicate that there is seasonal behavior. Auto correlation is not going down significantly as the lags increases. Hence we dont observe a significant trend.

help("gasoline")
frequency(gasoline)
## [1] 52.17857
autoplot(gasoline) + 
  ggtitle('Time series plot for finished gasoline supply  ') + 
  xlab('Years') + ylab('million Barrels')

gglagplot(gasoline, lags = 12) + 
  ggtitle('Lag plot for finished gasoline supply  ') + 
  xlab('Year') + ylab('million Barrels')

ggAcf(gasoline,lag=64) + 
  ggtitle('Auto correlation plot for finished gasoline supply ') 

Observations: From the above plots, we see there is cyclic pattern, but no significant evidence of seasonal pattern. We do observe a upward trend from ACF and autoplot graphs.