#Import needed libraries

library(tsibbledata) #to use the time series data in it for the exercises.
library(tsibble) # to use function as_tsibble
library(tibble) # to use view function
library(ggplot2)
library(feasts) # to use the functions for graphics like autoplot()

library(readr) # to uses read_csv function
library(dplyr) # to use Filter, mutate function etc
library(tidyr) # to use pivot_longer function

library(USgas) # to use us_total data

library(readxl)  # to use the readxl function to read excel files
library(httr) # Import httr for downloading the file

library(fpp3)  # to use us_gasoline dataset

Exercises:

2.1 Explore the following four time series

Bricks from aus_production, Lynx from pelt, Close from gafa_stock, Demand from vic_elec.

Use ? (or help()) to find out about the data in each series.

What is the time interval of each series?

Use autoplot() to produce a time plot of each series.

For the last plot, modify the axis labels and title.

Answers:

aus_production has a quarterly time interval. Bricks from aus_production is plotted below

aus_production
## # A tsibble: 218 x 7 [1Q]
##    Quarter  Beer Tobacco Bricks Cement Electricity   Gas
##      <qtr> <dbl>   <dbl>  <dbl>  <dbl>       <dbl> <dbl>
##  1 1956 Q1   284    5225    189    465        3923     5
##  2 1956 Q2   213    5178    204    532        4436     6
##  3 1956 Q3   227    5297    208    561        4806     7
##  4 1956 Q4   308    5681    197    570        4418     6
##  5 1957 Q1   262    5577    187    529        4339     5
##  6 1957 Q2   228    5651    214    604        4811     7
##  7 1957 Q3   236    5317    227    603        5259     7
##  8 1957 Q4   320    6152    222    582        4735     6
##  9 1958 Q1   272    5758    199    554        4608     5
## 10 1958 Q2   233    5641    229    620        5196     7
## # ℹ 208 more rows
autoplot(aus_production, Bricks)

pelt has a annual/yearly time interval. Lynx from pelt is plotted below.

pelt
## # A tsibble: 91 x 3 [1Y]
##     Year  Hare  Lynx
##    <dbl> <dbl> <dbl>
##  1  1845 19580 30090
##  2  1846 19600 45150
##  3  1847 19610 49150
##  4  1848 11990 39520
##  5  1849 28040 21230
##  6  1850 58000  8420
##  7  1851 74600  5560
##  8  1852 75090  5080
##  9  1853 88480 10170
## 10  1854 61280 19600
## # ℹ 81 more rows
autoplot(pelt, Lynx)

gafa_stock has a daily time interval.Close from gafa_stock is plotted below.

gafa_stock
## # A tsibble: 5,032 x 8 [!]
## # Key:       Symbol [4]
##    Symbol Date        Open  High   Low Close Adj_Close    Volume
##    <chr>  <date>     <dbl> <dbl> <dbl> <dbl>     <dbl>     <dbl>
##  1 AAPL   2014-01-02  79.4  79.6  78.9  79.0      67.0  58671200
##  2 AAPL   2014-01-03  79.0  79.1  77.2  77.3      65.5  98116900
##  3 AAPL   2014-01-06  76.8  78.1  76.2  77.7      65.9 103152700
##  4 AAPL   2014-01-07  77.8  78.0  76.8  77.1      65.4  79302300
##  5 AAPL   2014-01-08  77.0  77.9  77.0  77.6      65.8  64632400
##  6 AAPL   2014-01-09  78.1  78.1  76.5  76.6      65.0  69787200
##  7 AAPL   2014-01-10  77.1  77.3  75.9  76.1      64.5  76244000
##  8 AAPL   2014-01-13  75.7  77.5  75.7  76.5      64.9  94623200
##  9 AAPL   2014-01-14  76.9  78.1  76.8  78.1      66.1  83140400
## 10 AAPL   2014-01-15  79.1  80.0  78.8  79.6      67.5  97909700
## # ℹ 5,022 more rows
autoplot(gafa_stock, Close)

vice_elec has a half hourly time interval.Demand from vic_elec is plotted below.

vic_elec
## # A tsibble: 52,608 x 5 [30m] <Australia/Melbourne>
##    Time                Demand Temperature Date       Holiday
##    <dttm>               <dbl>       <dbl> <date>     <lgl>  
##  1 2012-01-01 00:00:00  4383.        21.4 2012-01-01 TRUE   
##  2 2012-01-01 00:30:00  4263.        21.0 2012-01-01 TRUE   
##  3 2012-01-01 01:00:00  4049.        20.7 2012-01-01 TRUE   
##  4 2012-01-01 01:30:00  3878.        20.6 2012-01-01 TRUE   
##  5 2012-01-01 02:00:00  4036.        20.4 2012-01-01 TRUE   
##  6 2012-01-01 02:30:00  3866.        20.2 2012-01-01 TRUE   
##  7 2012-01-01 03:00:00  3694.        20.1 2012-01-01 TRUE   
##  8 2012-01-01 03:30:00  3562.        19.6 2012-01-01 TRUE   
##  9 2012-01-01 04:00:00  3433.        19.1 2012-01-01 TRUE   
## 10 2012-01-01 04:30:00  3359.        19.0 2012-01-01 TRUE   
## # ℹ 52,598 more rows
autoplot(vic_elec, Demand)+
  ggtitle("Half-hourly electricity demand for Victoria, Australia") +
  xlab("Date ") +
  ylab("Demand") 

2.2 Use filter()

to find what days corresponded to the peak closing price for each of the four stocks in gafa_stock.

Answers:

AAPL is 2018-10-03 ; AMZN is 2018-09-04 ; FB is 2018-07-25 ; GOOG is 2018-07-26

# grouping by stock symbol, then filtering to the max value of Close, selecting the required columns and renaming the column to Peak_close for clarity

gafa_stock |>
  group_by(Symbol) |>
  filter(Close == max(Close)) |>
  select(Symbol, Date, Close) |>
  rename( Peak_Close = Close)
## # A tsibble: 4 x 3 [!]
## # Key:       Symbol [4]
## # Groups:    Symbol [4]
##   Symbol Date       Peak_Close
##   <chr>  <date>          <dbl>
## 1 AAPL   2018-10-03       232.
## 2 AMZN   2018-09-04      2040.
## 3 FB     2018-07-25       218.
## 4 GOOG   2018-07-26      1268.

2.3: Download the tute1.csv and analyze its contents

Read the data

Convert the data into a time series

Construct time series plots of each of the three series

Answers:

Facet grid places the y axis scaling for each facet in more detail making it easy to understand the trend. Removing this function will bring the three time series to a common y axis scale and difficult to comprehend.

# read the data into R file by uploading teh file into github account
tute1 <- read_csv("https://raw.githubusercontent.com/datanerddhanya/DATA-624/refs/heads/main/tute1.csv",show_col_types = FALSE)

#Convert the data to time series
mytimeseries <- tute1 |>
  mutate(Quarter = yearquarter(Quarter)) |>
  as_tsibble(index = Quarter)

# Construct time series plots of each of the three series with facet grid
mytimeseries |>
  pivot_longer(-Quarter) |>
  ggplot(aes(x = Quarter, y = value, colour = name)) +
  geom_line() +
  facet_grid(name ~ ., scales = "free_y")+
  labs(title = "Sales vs GDP vs Budget over time( with facet grid)")

# Construct time series plots of each of the three series without facet grid
mytimeseries |>
  pivot_longer(-Quarter) |>
  ggplot(aes(x = Quarter, y = value, colour = name)) +
  geom_line() +
  labs(title = "Sales vs GDP vs Budget over time( without facet grid)")

2.4 The USgas package .

Install the USgas package.

Create a tsibble from us_total with year as the index and state as the key.

Plot the annual natural gas consumption by state for the New England area (comprising the states of Maine, Vermont, New Hampshire, Massachusetts, Connecticut and Rhode Island).

# Create a tsibble from us_total with year as the index and state as the key.
ustotal_series <- as_tsibble(us_total,key = state,index = year)

# view ustotal_series
ustotal_series  
## # A tsibble: 1,266 x 3 [1Y]
## # Key:       state [53]
##     year state        y
##    <int> <chr>    <int>
##  1  1997 Alabama 324158
##  2  1998 Alabama 329134
##  3  1999 Alabama 337270
##  4  2000 Alabama 353614
##  5  2001 Alabama 332693
##  6  2002 Alabama 379343
##  7  2003 Alabama 350345
##  8  2004 Alabama 382367
##  9  2005 Alabama 353156
## 10  2006 Alabama 391093
## # ℹ 1,256 more rows
# Plot the annual natural gas consumption by state for the New England area (comprising the states of ### Maine, Vermont, New Hampshire, Massachusetts, Connecticut and Rhode Island).

ustotal_series  |>
  filter( state %in% c("Maine", "Vermont", "New Hampshire", "Massachusetts", "Connecticut","Rhode Island")) |>
  ggplot(aes(x = year, y = y, colour = state)) +
  geom_line() +
  geom_point() +
  labs(title = "US Annual Total Natural Gas Consumption by state for the New England area",
       x = "Year",
       y = "Total natural gas consumption in a million cubic feet") +
  theme_minimal() +
  scale_x_continuous(breaks = seq(min(ustotal_series$year), max(ustotal_series$year), by = 3)) +
  scale_y_continuous(labels = scales::comma)

2.5 Use tourism dataset

Download tourism.xlsx from the book website and read it into R using readxl::read_excel().

Create a tsibble which is identical to the tourism tsibble from the tsibble package.

Find what combination of Region and Purpose had the maximum number of overnight trips on average.

Create a new tsibble which combines the Purposes and Regions, and just has total trips by State.

Answers:

Sydney region with a purpose of visiting in 2017 Q4 had the maximum number of overnight trips on average.

# read the data into R file by uploading the file into github account
# Download the Excel file from the URL
GET("https://raw.githubusercontent.com/datanerddhanya/DATA-624/main/tourism.xlsx", 
    write_disk(tf <- tempfile(fileext = ".xlsx")))
## Response [https://raw.githubusercontent.com/datanerddhanya/DATA-624/main/tourism.xlsx]
##   Date: 2025-02-08 03:19
##   Status: 200
##   Content-Type: application/octet-stream
##   Size: 679 kB
## <ON DISK>  C:\Users\ajay2\AppData\Local\Temp\RtmpEPOFbB\file66e435e719d0.xlsx
# Read the downloaded Excel file
tourism_file<- read_excel(tf, col_names = TRUE) 

# Converting data type from "Quarter" to Date and "Trips" to numeric
# Creating a tsibble identical to the tourism one
tourism <- tourism_file |>
  mutate(Quarter = yearquarter(Quarter),Trips = as.numeric(Trips)) 

# Creating a tsibble identical to the tourism one
tourismtstibble <-  as_tsibble(tourism, key = c(Region, State, Purpose), index = Quarter)

# combination of Region and Purpose had the maximum number of overnight trips on average.
 tourism_avg_trips<- tourismtstibble |>
   group_by(Region,Purpose) |>
   summarize(average_trips = mean(Trips, na.rm = TRUE)) |>
   arrange(desc(average_trips))
 
# display the top five combination
 head(tourism_avg_trips)
## # A tsibble: 6 x 4 [1Q]
## # Key:       Region, Purpose [4]
## # Groups:    Region [3]
##   Region      Purpose  Quarter average_trips
##   <chr>       <chr>      <qtr>         <dbl>
## 1 Melbourne   Visiting 2017 Q4          985.
## 2 Sydney      Business 2001 Q4          948.
## 3 Sydney      Visiting 2016 Q4          921.
## 4 Sydney      Visiting 2017 Q4          920.
## 5 Sydney      Visiting 2017 Q1          916.
## 6 South Coast Holiday  1998 Q1          915.
# Create a new tsibble which combines the Purposes and Regions, and just has total trips by State.
 tourism_total_trips <- tourismtstibble %>%
  group_by(State) %>%
  summarise(total_trips = sum(Trips)) %>%
  arrange(desc(total_trips))

view (tourism_total_trips)

2.8 Graphic functions

Use the following graphics functions: autoplot(), gg_season(), gg_subseries(), gg_lag(), ACF() and explore features from the following time series: “Total Private” Employed from us_employment, Bricks from aus_production, Hare from pelt, “H02” Cost from PBS, and Barrels from us_gasoline.

Can you spot any seasonality, cyclicity and trend?

What do you learn about the series?

What can you say about the seasonal patterns?

Can you identify any unusual years?

Answers:

“Total Private” Employed from us_employment

In this dataset, I can make the following observations:

1.The data shows a strong upward trend through the years.The employment is going up over the years.Exception is from 2008 to 2010 as there is a sudden drop due to the financial bubble. we can see no strong seasonality or cyclic behavior in general.

2. Through the seasonal plot we can observe that there is low seasonality behavior across the months jan to june there is a increase and then it is stable for the year.

3. In the sub season plot, there is a strong positive trend for each month through the years. The mean remains the same in each month.

4. As evident in the lag plot, there is strong positive correlation in lag 1 to lag 9 sub plots. it proves that there is seasonality.

5. The ACF shows a trend as the autocorrelations for small lags tend to be large and positive because observations nearby in time are also nearby in value. So the ACF of a trended time series tends to have positive values that slowly decrease as the lags increase.

us_employment |> filter(Title=='Total Private') |> 
autoplot(Employed)+
  ggtitle("US monthly employment for “Total Private” Employed using autoplot()") +
  xlab( "Month") +
  ylab("Employed count") +
  scale_y_continuous(labels = scales::comma)

us_employment |> filter(Title=='Total Private') |> 
gg_season(Employed)+
  ggtitle("US monthly employment for “Total Private” Employed using gg_season()") +
  xlab( "Month") +
  ylab("Employed count") +
  scale_y_continuous(labels = scales::comma)

us_employment |> filter(Title=='Total Private') |> 
gg_subseries(Employed)+
  ggtitle("US monthly employment for “Total Private” Employed using gg_subseries()") +
  xlab( "Month") +
  ylab("Employed count") +
  scale_y_continuous(labels = scales::comma)

us_employment |> filter(Title=='Total Private') |> 
gg_lag(Employed)+
  ggtitle("US monthly employment for “Total Private” Employed using gg_lag()") +
  xlab( "Month") +
  ylab("Employed count") +
  scale_y_continuous(labels = scales::comma)

us_employment |> filter(Title=='Total Private') |>  
ACF(Employed)|>
  autoplot()

Bricks from aus_production dataset

In the aus_production dataset, I can make the following observations:

1. The Brick production shows a upward trend from 1956 to 1980, thereafter there is a downward trend. So we can refer this trend as “changing direction”, as it goes from an increasing trend to a decreasing trend.There is seasonality as it seems to drop every alternate year in Q1.There is no cyclic behavior as there is no rise or fall pattern over a long term.

2. Using the seasonal sub series plot we can observe that there is a seasonality behavior as the production is the least in Q1 across all years . It increases in Q2 and Q3 and then decreases in Q4 across the years.

3. As evident in the lag plot, there is positive and highest correlation for Lag1 followed by lag4, thus reflecting the strong seasonality in the data.

4. ACF shows a trended time series as it has positive values that slowly decrease as the lags increase, while the “scalloped” shape is due to the seasonality.

The highest drop in brick production was in 1986 probably due to Australia economy.

autoplot(aus_production, Bricks)+
  ggtitle("Quarterly production of Brick commodity in Australia using autoplot()") +
  xlab("Quarter ") +
  ylab("Brick production in millions of bricks") 

gg_season(aus_production, Bricks)+
  ggtitle("Quarterly production of Brick commodity in Australia using gg_season()") +
  xlab("Quarter ") +
  ylab("Brick production in millions of bricks") 

gg_subseries(aus_production, Bricks)+
  ggtitle("Quarterly production of Brick commodity in Australia using gg_subseries()") +
  xlab("Quarter ") +
  ylab("Brick production in millions of bricks") 

gg_lag(aus_production, Bricks, geom ="point")+
  ggtitle("Quarterly production of Brick commodity in Australia using gg_lag()") +
  xlab("Quarter ") +
  ylab("Brick production in millions of bricks") 

ACF(aus_production, Bricks)|>
  autoplot()

Hare from pelt

In the pelt dataset, I can make the following observations:

1.The Hare pelt trading record shows no clear trend. However, we can see strong cyclic behavior. We can see sharp increases and decreases of hare pelt traded in a repeating pattern..

2. As evident in the lag plot, there is no correlation in any lags. it proves that there is not seasonality. The ACF shows that the cyclic pattern is repeated every 5 years.

autoplot(pelt, Hare)+
  ggtitle("Hudson Bay Company: Annual Snowshoe Hare pelts traded using autoplot()") +
  xlab( "Year ") +
  ylab("Snowshoe Hare pelts traded") 

gg_lag(pelt, Hare, geom="point")+
  ggtitle("Hudson Bay Company: Annual Snowshoe Hare pelts traded using gg_lag()") +
  xlab( "Year ") +
  ylab("Snowshoe Hare pelts traded") 

ACF(pelt,Hare)|>
  autoplot()

“H02” Cost from PBS

In the PBS dataset, I can make the following observations:

1.The data shows a upward trend in general for ATC2:H02 concessional types. There is no trend observed for ATC2:H02 General types. we can see strong cyclic behavior as there is sharp increases and decreases in a repeating pattern..

2. Through the seasonal plot we can observe that there is a seasonality behavior. For safety net type, It is least from Feb to May and then increases till Jan. This is correct as the Safety net subsidies are provided to individuals only after their script expenditure exceeds the threshold cost. On the contrary the concessional co-payment types are high from Feb to May, and thereafter it reduces as Co-payments are made until an individual’s script expenditure hits a threshold . There is no seasonliaty observed in General co-payments.

3.As evident in the lag plot, there is positive correlation in lag 1. it proves that there is seasonality. The ACF shows that a full 12 months is the most highly correlated of a lags.

PBS |> filter(ATC2=='H02') |> 
autoplot(Cost)+
  ggtitle("Medicare Australia prescription cost for ATC2 type:H02 using autoplot()") +
  xlab( "Month") +
  ylab("Cost of the scripts in $AUD") +
  scale_y_continuous(labels = scales::comma)

PBS |> filter(ATC2=='H02') |> 
gg_season(Cost)+
  ggtitle("Medicare Australia prescription cost for ATC2 type:H02 using gg_season()") +
  xlab( "Month") +
  ylab("Cost of the scripts in $AUD") +
  scale_y_continuous(labels = scales::comma)

PBS |> filter(ATC2=='H02') |> 
gg_subseries(Cost)+
  ggtitle("Medicare Australia prescription cost for ATC2 type:H02 using gg_subseries()") +
  xlab( "Month") +
  ylab("Cost of the scripts in $AUD") +
  scale_y_continuous(labels = scales::comma)

PBS |> filter(ATC2=='H02'& Concession=='Concessional' & Type=='Co-payments') |> 
gg_lag(Cost, geom= "point")+
  ggtitle("Medicare Australia prescription cost for ATC2 type:H02 using gg_lag()") +
  xlab( "Month") +
  ylab("Cost of the scripts in $AUD") +
  scale_y_continuous(labels = scales::comma)

PBS |> filter(ATC2=='H02'& Concession=='Concessional' & Type=='Co-payments') |> 
ACF(Cost)|>
  autoplot()

Barrels from us_gasoline

In the us_gasoline dataset, I can make the following observations:

1. The Barrel supply shows a upward trend from 1990 to 2004, thereafter there is a downward trend, then increase and decrease. So we can refer this trend as “changing direction”, as it goes from an increasing trend to a decreasing trend. No seasonality or cyclic behavior can be observed from the time plot.

2. In the seasonal plots, it is not clear if there is any seasonality.

3. In the lag plot, there is a positive correlation for all the lags 1-8 sub plots. This shows that there is seasonality.

4. the ACF plot, indicates a higher lagged correlation, that of 26 weeks. ACF of a trended time series tends to have positive values that slowly decrease as the lags increase.

autoplot(us_gasoline,Barrels) + 
ggtitle("US gasoline supply in Barrels using autoplot()") +
  xlab( "Week") +
  ylab("Barrels") +
  scale_y_continuous(labels = scales::comma)

gg_season(us_gasoline,Barrels) + 
ggtitle("US gasoline supply in Barrels using gg_season()") +
  xlab( "Week") +
  ylab("Barrels") +
  scale_y_continuous(labels = scales::comma)

gg_subseries(us_gasoline,Barrels) + 
ggtitle("US gasoline supply in Barrels using gg_subseries()") +
  xlab( "Week") +
  ylab("Barrels") +
  scale_y_continuous(labels = scales::comma)

gg_lag(us_gasoline,Barrels, geom ="point") + 
ggtitle("US gasoline supply in Barrels using autoplot()") +
  xlab( "Week") +
  ylab("Barrels") +
  scale_y_continuous(labels = scales::comma)

us_gasoline |> 
ACF(Barrels,lag_max=110)|>
  autoplot()