Data_624_lab01

Author

Henock Montcho

Published

March 10, 2025

library(fpp3)
library(tsibble)
library(dplyr)
  1. Explore four time series:
bricks <- aus_production  |>
  select("Bricks")
bricks
# A tsibble: 218 x 2 [1Q]
   Bricks Quarter
    <dbl>   <qtr>
 1    189 1956 Q1
 2    204 1956 Q2
 3    208 1956 Q3
 4    197 1956 Q4
 5    187 1957 Q1
 6    214 1957 Q2
 7    227 1957 Q3
 8    222 1957 Q4
 9    199 1958 Q1
10    229 1958 Q2
# ℹ 208 more rows
lynx <- pelt  |>
  select("Lynx")
lynx
# A tsibble: 91 x 2 [1Y]
    Lynx  Year
   <dbl> <dbl>
 1 30090  1845
 2 45150  1846
 3 49150  1847
 4 39520  1848
 5 21230  1849
 6  8420  1850
 7  5560  1851
 8  5080  1852
 9 10170  1853
10 19600  1854
# ℹ 81 more rows
close <- gafa_stock  |>
  select("Close")  
close
# A tsibble: 5,032 x 3 [!]
# Key:       Symbol [4]
   Close Date       Symbol
   <dbl> <date>     <chr> 
 1  79.0 2014-01-02 AAPL  
 2  77.3 2014-01-03 AAPL  
 3  77.7 2014-01-06 AAPL  
 4  77.1 2014-01-07 AAPL  
 5  77.6 2014-01-08 AAPL  
 6  76.6 2014-01-09 AAPL  
 7  76.1 2014-01-10 AAPL  
 8  76.5 2014-01-13 AAPL  
 9  78.1 2014-01-14 AAPL  
10  79.6 2014-01-15 AAPL  
# ℹ 5,022 more rows
demand <- vic_elec  |>
  select("Demand")
demand
# A tsibble: 52,608 x 2 [30m] <Australia/Melbourne>
   Demand Time               
    <dbl> <dttm>             
 1  4383. 2012-01-01 00:00:00
 2  4263. 2012-01-01 00:30:00
 3  4049. 2012-01-01 01:00:00
 4  3878. 2012-01-01 01:30:00
 5  4036. 2012-01-01 02:00:00
 6  3866. 2012-01-01 02:30:00
 7  3694. 2012-01-01 03:00:00
 8  3562. 2012-01-01 03:30:00
 9  3433. 2012-01-01 04:00:00
10  3359. 2012-01-01 04:30:00
# ℹ 52,598 more rows
?aus_production # The time interval is Quarter.
?pelt # The time interval is Year.
?gafa_stock # The time interval is Irregular Days.
?vic_elec # The time interval is Half Hour (every 30 minutes).

#autoplot

bricks  |>
  autoplot()  +
  geom_point()

lynx  |>
  autoplot()  +
  geom_point()

close  |>
  autoplot()  +
  geom_point()

#Modify the axis labels and title 
demand  |>
  autoplot()  +
  geom_point() +
  labs(title = "Half-hourly electricity demand for Victoria, Australia",
       x = "Frequency of demand (Every 30 Minutes)",
       y = "Demand of electricity")

  1. Use filter for peak closing price day
peak_close <- gafa_stock  |>
  group_by(Symbol)  |>
  filter(Close == max(Close))  
peak_close
# A tsibble: 4 x 8 [!]
# Key:       Symbol [4]
# Groups:    Symbol [4]
  Symbol Date        Open  High   Low Close Adj_Close   Volume
  <chr>  <date>     <dbl> <dbl> <dbl> <dbl>     <dbl>    <dbl>
1 AAPL   2018-10-03  230.  233.  230.  232.      230. 28654800
2 AMZN   2018-09-04 2026. 2050. 2013  2040.     2040.  5721100
3 FB     2018-07-25  216.  219.  214.  218.      218. 58954200
4 GOOG   2018-07-26 1251  1270. 1249. 1268.     1268.  2405600
  1. tute1.csv
#a- Load data
getwd()
[1] "C:/Users/month/OneDrive/Documents/Cuny MSDS/Data 624_Predictive Analytics"
setwd("C:/Users/month/Downloads")
tute1 <- readr::read_csv("tute1.csv")  |>
print()
# A tibble: 100 × 4
   Quarter    Sales AdBudget   GDP
   <date>     <dbl>    <dbl> <dbl>
 1 1981-03-01 1020.     659.  252.
 2 1981-06-01  889.     589   291.
 3 1981-09-01  795      512.  291.
 4 1981-12-01 1004.     614.  292.
 5 1982-03-01 1058.     647.  279.
 6 1982-06-01  944.     602   254 
 7 1982-09-01  778.     531.  296.
 8 1982-12-01  932.     608.  272.
 9 1983-03-01  996.     638.  260.
10 1983-06-01  908.     582.  280.
# ℹ 90 more rows
#b- Convert data to time series
mytimeseries <- tute1 |>
  mutate(Quarter = yearquarter(Quarter)) |>
  as_tsibble(index = Quarter)  |>
print()
# A tsibble: 100 x 4 [1Q]
   Quarter Sales AdBudget   GDP
     <qtr> <dbl>    <dbl> <dbl>
 1 1981 Q1 1020.     659.  252.
 2 1981 Q2  889.     589   291.
 3 1981 Q3  795      512.  291.
 4 1981 Q4 1004.     614.  292.
 5 1982 Q1 1058.     647.  279.
 6 1982 Q2  944.     602   254 
 7 1982 Q3  778.     531.  296.
 8 1982 Q4  932.     608.  272.
 9 1983 Q1  996.     638.  260.
10 1983 Q2  908.     582.  280.
# ℹ 90 more rows
#Construct time series plots
mytimeseries |>
  pivot_longer(-Quarter) |>
  ggplot(aes(x = Quarter, y = value, colour = name)) +
  geom_line() +
  facet_grid(name ~ ., scales = "free_y")

Comment: When facet_grid() is not included, all the times series plots are combined and plotted on the same graph sharing the same y-axis. This could make the plot interpretation challenging.

  1. “USgas” exploration
#a install.packages("USgas")
library(USgas)

#b Create a tsibble from us_total
us_total_tsibble <- us_total  |>
  as_tsibble(index = year ,
             key = state)  |>
print()
# A tsibble: 1,266 x 3 [1Y]
# Key:       state [53]
    year state        y
   <int> <chr>    <int>
 1  1997 Alabama 324158
 2  1998 Alabama 329134
 3  1999 Alabama 337270
 4  2000 Alabama 353614
 5  2001 Alabama 332693
 6  2002 Alabama 379343
 7  2003 Alabama 350345
 8  2004 Alabama 382367
 9  2005 Alabama 353156
10  2006 Alabama 391093
# ℹ 1,256 more rows
#c Plot the annual gas consumption
us_total_NE <- us_total_tsibble  |>
  filter(state %in% c("Maine", "Vermont", "New Hampshire", "Massachusetts", 
                      "Connecticut", "Rhode Island"))  

us_total_NE  |>
  autoplot(y) + 
  scale_y_continuous(labels = scales::comma) +
  theme_minimal() +
  labs(title = "Annual natural gas consumption by state for the New England area",
    y = "Gas consumption in a million cubic feet") +
  facet_grid(state ~ ., scales = "free_y") +
  theme(strip.text = element_text(size = 7.5, angle = 45, hjust = 1))

  1. “tourism.xlsx” exploration
#a Load tourism.xlsx
getwd()
[1] "C:/Users/month/OneDrive/Documents/Cuny MSDS/Data 624_Predictive Analytics"
setwd("C:/Users/month/Downloads")
tourism <- readxl::read_excel("tourism.xlsx")  |>
print()
# A tibble: 24,320 × 5
   Quarter    Region   State           Purpose  Trips
   <chr>      <chr>    <chr>           <chr>    <dbl>
 1 1998-01-01 Adelaide South Australia Business  135.
 2 1998-04-01 Adelaide South Australia Business  110.
 3 1998-07-01 Adelaide South Australia Business  166.
 4 1998-10-01 Adelaide South Australia Business  127.
 5 1999-01-01 Adelaide South Australia Business  137.
 6 1999-04-01 Adelaide South Australia Business  200.
 7 1999-07-01 Adelaide South Australia Business  169.
 8 1999-10-01 Adelaide South Australia Business  134.
 9 2000-01-01 Adelaide South Australia Business  154.
10 2000-04-01 Adelaide South Australia Business  169.
# ℹ 24,310 more rows
#b Create a tsibble identical to 'tourism'
tourism_tsibble <- tourism  |>
  mutate(Quarter = yearquarter(Quarter))  |>
  as_tsibble(index = Quarter, key = c(Region, State, Purpose))  |>
print()
# A tsibble: 24,320 x 5 [1Q]
# Key:       Region, State, Purpose [304]
   Quarter Region   State           Purpose  Trips
     <qtr> <chr>    <chr>           <chr>    <dbl>
 1 1998 Q1 Adelaide South Australia Business  135.
 2 1998 Q2 Adelaide South Australia Business  110.
 3 1998 Q3 Adelaide South Australia Business  166.
 4 1998 Q4 Adelaide South Australia Business  127.
 5 1999 Q1 Adelaide South Australia Business  137.
 6 1999 Q2 Adelaide South Australia Business  200.
 7 1999 Q3 Adelaide South Australia Business  169.
 8 1999 Q4 Adelaide South Australia Business  134.
 9 2000 Q1 Adelaide South Australia Business  154.
10 2000 Q2 Adelaide South Australia Business  169.
# ℹ 24,310 more rows
#c Combination of 'Region' and 'Purpose' with maximum of overnight trips oon average

average_trips <- tourism_tsibble  |>
  group_by(Region, Purpose)  |>
  summarise(Average = mean(Trips, na.rm = TRUE))  |>
  filter(Average == max(Average))  |>
print()
# A tsibble: 76 x 4 [1Q]
# Key:       Region, Purpose [76]
# Groups:    Region [76]
   Region                     Purpose  Quarter Average
   <chr>                      <chr>      <qtr>   <dbl>
 1 Adelaide                   Visiting 2017 Q1   270. 
 2 Adelaide Hills             Visiting 2002 Q4    81.1
 3 Alice Springs              Holiday  1998 Q3    76.5
 4 Australia's Coral Coast    Holiday  2014 Q3   198. 
 5 Australia's Golden Outback Business 2017 Q3   174. 
 6 Australia's North West     Business 2016 Q3   297. 
 7 Australia's South West     Holiday  2016 Q1   612. 
 8 Ballarat                   Visiting 2004 Q1   103. 
 9 Barkly                     Holiday  1998 Q3    37.9
10 Barossa                    Holiday  2006 Q1    51.0
# ℹ 66 more rows
#d Create new tsibble which combines the Purposes and the Regions, and just have total trips by State

trips_by_state <- tourism_tsibble  |>
  group_by(State)  |>
  summarise(Total_Trips = sum(Trips))  |>
  as_tibble(index = State)  |>
print()
# A tibble: 640 × 3
   State Quarter Total_Trips
   <chr>   <qtr>       <dbl>
 1 ACT   1998 Q1        551.
 2 ACT   1998 Q2        416.
 3 ACT   1998 Q3        436.
 4 ACT   1998 Q4        450.
 5 ACT   1999 Q1        379.
 6 ACT   1999 Q2        558.
 7 ACT   1999 Q3        449.
 8 ACT   1999 Q4        595.
 9 ACT   2000 Q1        600.
10 ACT   2000 Q2        557.
# ℹ 630 more rows
  1. Use of graphics functions
autoplot(us_employment, Employed) + ggtitle("Total Private Employed")

autoplot(aus_production, Bricks) + ggtitle("Bricks Production")

autoplot(pelt, Hare) + ggtitle("Hare Pelts")

autoplot(PBS, Cost) + ggtitle("H02 Cost")

#autoplot(us_gasoline, Barrels) + ggtitle("Gasoline Barrels")


gg_season(us_employment, Employed) + ggtitle("Seasonality in Total Private Employed")

gg_season(aus_production, Bricks) + ggtitle("Seasonality in Bricks Production")

#gg_season(pelt, Hare) + ggtitle("Seasonality in Hare Pelts")
#gg_season(PBS, cost) + ggtitle("Seasonality in H02 Cost")
gg_season(us_gasoline, Barrels) + ggtitle("Seasonality in Gasoline Barrels")

#gg_subseries(us_employment, Employed) + ggtitle("Subseries Plot for Total Private Employed")
gg_subseries(aus_production, Bricks) + ggtitle("Subseries Plot for Bricks Production")

gg_subseries(pelt, Hare) + ggtitle("Subseries Plot for Hare Pelts")

#gg_subseries(PBS, Cost) + ggtitle("Subseries Plot for H02 Cost")
#gg_subseries(us_gasoline, Barrels) + ggtitle("Subseries Plot for Gasoline Barrels")


#gg_lag(us_employment, Employed) + ggtitle("Lag Plot for Total Private Employed")
gg_lag(aus_production, Bricks) + ggtitle("Lag Plot for Bricks Production")

gg_lag(pelt, Hare) + ggtitle("Lag Plot for Hare Pelts")

#gg_lag(PBS, cost) + ggtitle("Lag Plot for H02 Cost")
gg_lag(us_gasoline, Barrels) + ggtitle("Lag Plot for Gasoline Barrels")

#ACF(us_employment, Employed) + ggtitle("ACF for Total Private Employed")
#ACF(aus_production, Bricks) + ggtitle("ACF for Bricks Production")
#ACF(pelt, Hare) + ggtitle("ACF for Hare Pelts")
#ACF(PBS, Cost) + ggtitle("ACF for H02 Cost")
#ACF(us_gasoline, Barrels) + ggtitle("ACF for Gasoline Barrels")

#Note: the ACF plots do not give useful information to exploit.

Interpretation:

-Seasonality:

The pelt trading records autoplot shows a seasonality and a cyclic behaviour over the course of 20 years each time.

-Cyclicity:

Overall, there are two big cycles in the quarterly production of Bricks (autoplot): the period before 1980 where the production of the bricks has an increasing trend and the period post-1980 where there is a declining trend in the production of bricks.

-Trend:

There is an increased trend of the US finished motor gasoline product supplied (us_gasoline) throughout the years (autoplot).