Load Libraries

library(tsibble)
library(fpp3)
library(readxl)

Assignment:

Excersise 2.1

Q1:

Use ? (or help()) to find out about the data in each series.

help(aus_production)
## starting httpd help server ... done
data("aus_production")

help("pelt")
data("pelt")

help("gafa_stock")
data("gafa_stock")

help("vic_elec")
data("vic_elec")

What is the time interval of each series?

aus_production

min(aus_production$Quarter)
## <yearquarter[1]>
## [1] "1956 Q1"
## # Year starts on: January
max(aus_production$Quarter)
## <yearquarter[1]>
## [1] "2010 Q2"
## # Year starts on: January

pelt

min(pelt$Year)
## [1] 1845
max(pelt$Year)
## [1] 1935

gafa_stock

min(gafa_stock$Date)
## [1] "2014-01-02"
max(gafa_stock$Date)
## [1] "2018-12-31"

Use autoplot() to produce a time plot of each series.

aus_production %>% 
    autoplot(Bricks)
## Warning: Removed 20 rows containing missing values or values outside the scale range
## (`geom_line()`).

pelt %>% 
    autoplot(Lynx)

gafa_stock %>% 
    autoplot(Close)

For the last plot, modify the axis labels and title.

vic_elec %>% 
    autoplot(Demand)+
    labs(x = "vic_ elect Date", y = "Demand")+
    ggtitle("Bi-hourly Electricity Demand")

Excersise 2.2

Use filter() to find what days corresponded to the peak closing price for each of the four stocks in gafa_stock.

gafa_stock %>% 
    group_by(Symbol) %>% 
    filter(Close == max(Close))
## # A tsibble: 4 x 8 [!]
## # Key:       Symbol [4]
## # Groups:    Symbol [4]
##   Symbol Date        Open  High   Low Close Adj_Close   Volume
##   <chr>  <date>     <dbl> <dbl> <dbl> <dbl>     <dbl>    <dbl>
## 1 AAPL   2018-10-03  230.  233.  230.  232.      230. 28654800
## 2 AMZN   2018-09-04 2026. 2050. 2013  2040.     2040.  5721100
## 3 FB     2018-07-25  216.  219.  214.  218.      218. 58954200
## 4 GOOG   2018-07-26 1251  1270. 1249. 1268.     1268.  2405600

From the filter AAPL has a max close at 232.07 on 2018-10-03, AMZN at 2013.00 on 2018-09-04 FB at 217.50 on 2018-07-25 and finally GOOG 1268.33 on 2018-07-26.

Excersise 2.3

2.3 a.

tute1 <- read.csv("https://raw.githubusercontent.com/jonburns2454/DATA-624/main/tute1.csv")
View(tute1)

2.3 b.

mytimeseries <- tute1 %>% 
  mutate(Quarter = yearquarter(Quarter)) %>% 
  as_tsibble(index = Quarter)

2.3 c.

mytimeseries %>% 
  pivot_longer(-Quarter) %>% 
  ggplot(aes(x = Quarter, y = value, colour = name)) +
  geom_line() +
  facet_grid(name ~ ., scales = "free_y")

no facet_grid

mytimeseries %>% 
  pivot_longer(-Quarter) %>% 
  ggplot(aes(x = Quarter, y = value, colour = name)) +
  geom_line()

With no facet grid, everything is forced to one graph with only colors and a legend differentiating the three line graphs.

Excersise 2.4

a.

Install the USgas package.

library(USgas)

b.

Create a tsibble from us_total with year as the index and state as the key.
data("us_total", package = "USgas")
us_total_tib<- as_tsibble(us_total, index = year, key = state)
us_total_tib <- us_total_tib %>% 
    rename(nat_gas_cons = y)

c.

Plot the annual natural gas consumption by state for the New England area (comprising the states of Maine, Vermont, New Hampshire, Massachusetts, Connecticut and Rhode Island).
new_england_list <- c("Maine", "Vermont", "New Hampshire", "Massachusetts", "Connecticut", "Rhode Island")
ne_data <- us_total_tib %>% filter(state %in% new_england_list)
ne_data %>% autoplot(ne_data$nat_gas_cons)
## Warning: Use of `ne_data$nat_gas_cons` is discouraged.
## ℹ Use `nat_gas_cons` instead.

Excersise 2.5

a.

Download tourism.xlsx from the book website and read it into R using readxl::read_excel()
tourism_1 <- read_excel("C:\\Users\\jashb\\OneDrive\\Documents\\Masters Data Science\\Fall 2024\\DATA 624\\tourism.xlsx")

b.

Create a tsibble which is identical to the tourism tsibble from the tsibble package.
tourism_identical <- tourism_1 %>% 
  mutate(Quarter = as.Date(Quarter))
tourism_identical <- as_tibble(tourism_identical, key = c(Region, State, Purpose), index = Quarter)

c.

Find what combination of Region and Purpose had the maximum number of overnight trips on average.
avg_trips_overnight <- tourism_identical %>% 
    group_by(Region, Purpose) %>% 
    summarise(overnight_avg = mean(Trips, na.rm = T))
## `summarise()` has grouped output by 'Region'. You can override using the
## `.groups` argument.
highest_trips <- avg_trips_overnight %>% 
    filter(overnight_avg == max(overnight_avg)) %>% 
    arrange(desc(overnight_avg))
head(highest_trips)
## # A tibble: 6 × 3
## # Groups:   Region [6]
##   Region          Purpose  overnight_avg
##   <chr>           <chr>            <dbl>
## 1 Sydney          Visiting          747.
## 2 Melbourne       Visiting          619.
## 3 North Coast NSW Holiday           588.
## 4 Gold Coast      Holiday           528.
## 5 South Coast     Holiday           495.
## 6 Brisbane        Visiting          493.

d.

Create a new tsibble which combines the Purposes and Regions, and just has total trips by State.
tourism_total_by_state <- tourism_identical %>%
  group_by(State) %>%
  select(3, 5) %>% 
  summarise(Total_Trips = sum(Trips, na.rm = TRUE)) %>%
  arrange(desc(Total_Trips))

head(tourism_total_by_state)
## # A tibble: 6 × 2
##   State             Total_Trips
##   <chr>                   <dbl>
## 1 New South Wales       557367.
## 2 Victoria              390463.
## 3 Queensland            386643.
## 4 Western Australia     147820.
## 5 South Australia       118151.
## 6 Tasmania               54137.

New South Wales has by far the highest amount of total trips, likely being for visiting purposes according the the table made for part c.

Excersise 2.8

Use the following graphics functions: autoplot(), gg_season(), gg_subseries(), gg_lag(), ACF() and explore features from the following time series: “Total Private” Employed from us_employment, Bricks from aus_production, Hare from pelt, “H02” Cost from PBS, and Barrels from us_gasoline.
data(PBS)
data("us_gasoline")
data("us_employment")
data("aus_production")
data(pelt)

“Total Private” Employed

Autoplot

us_employment_private <- us_employment %>% filter(Title == "Total Private")
us_employment_private %>% 
    autoplot(Employed)

gg_season

us_employment_private %>% 
    gg_season(Employed)

gg_subseries

us_employment_private %>% 
    gg_subseries(Employed)

gg_lag

us_employment_private %>% 
    gg_lag(Employed)

ACF()

us_employment_private %>% 
    ACF(Employed) %>% 
    autoplot()

Can you spot any seasonality, cyclicity and trend?

The US employment dataset shows trends in employment that match (mostly) historical economic downturns and booms, including the several major recessions during the 80s, 90s and 2000s. if it had a few extra months into 2020 it would also reflect the massive drop sometime around March 2020. As for seasonality, especially recently, there seems to be growth in the first 7-ish months of the year and then a sort of drop off following that. And the business cycle is also reflected and shows the peaks and valleys during recessions and recoveries.

What do you learn about the series?

Economic recoveries have been progressively getting longer, ie it takes the US longer to get employment back to where it was pre-recession.

What can you say about the seasonal patterns?

Since it is total us employment the second stage of growth in the tail end of the year could reflect major consumer holidays (Black Friday + Christmas), while the clear peak in July could reflect the large majority of private companies who’s fiscal year ends during that period.

Can you identify any unusual years?

The span and raw growth following the 2008 crisis looks to the fastest and longest since the 40’s.

Bricks:

Autoplot

aus_production%>% 
    autoplot(Bricks)
## Warning: Removed 20 rows containing missing values or values outside the scale range
## (`geom_line()`).

gg_season

aus_production %>% 
    gg_season(Bricks)
## Warning: Removed 20 rows containing missing values or values outside the scale range
## (`geom_line()`).

gg_subseries

aus_production %>% 
    gg_subseries(Bricks)
## Warning: Removed 5 rows containing missing values or values outside the scale range
## (`geom_line()`).

gg_lag

aus_production %>% 
    gg_lag(Bricks)
## Warning: Removed 20 rows containing missing values (gg_lag).

ACF()

aus_production %>% 
    ACF(Bricks) %>% 
    autoplot()

Can you spot any seasonality, cyclicity and trend?

There is a clear peak in the Q3 spanning the entirety of the date range for the dataset. Additionally,. brick production peaked in the 80’s and has fallen handedly through the 2000’s.

What do you learn about the series?

This could mean two things, either brick has fallen out of style with Australian home buyers or it has become too expensive to produce (tightening resource constraints).

What can you say about the seasonal patterns?

Maybe Q3 marks the end of the building season in AUS?

Can you identify any unusual years?

The massive declines in the 70’s and then again in the 80’s stands out

Hare:

Autoplot

pelt%>% 
    autoplot(Hare)

gg_subseries

pelt %>% 
    gg_subseries(Hare)

gg_lag

pelt %>% 
    gg_lag(Hare)

ACF()

pelt %>% 
    ACF(Hare) %>% 
    autoplot()

Can you spot any seasonality, cyclicity and trend?

The Hare plot does not seem to indicate any seasonality or trends, and the lag plot helps to confirm the randomness

Ho2 Cost:

PBS1 <- PBS %>% 
    filter(ATC2 == "H02")

Autoplot

PBS1%>% 
    autoplot(Cost)

gg_season

PBS1 %>% 
    gg_season(Cost)

gg_subseries

PBS1 %>% 
    gg_subseries(Cost)

ACF()

PBS1 %>% 
    ACF(Cost) %>% 
    autoplot()

Can you spot any seasonality, cyclicity and trend?

Looking at the seasonality graph there seems to be a distinct price hike between the copayment cohort and the saftey net cohort. I am unsure how the australian medical system works. In addition to this there is distinct difference in cost changes between the two groups. The copayment group has fairly stable cost throughout the year, while the saftey net group drops quickly in Feb and then rises slowly throughout the year.

Barrels:

Autoplot

us_gasoline%>% 
    autoplot(Barrels)

gg_season

us_gasoline %>% 
    gg_season(Barrels)

gg_subseries

us_gasoline %>% 
    gg_subseries(Barrels)

gg_lag

us_gasoline %>% 
    gg_lag(Barrels)

ACF()

us_gasoline %>% 
    ACF(Barrels) %>% 
    autoplot()

The price of barrels has generally showed an upward trend during the time period and exhibits a sharp decrease somewhere before 2009 and then cuts up again following a correction. The cost of barrels also shows some seasonality, that at first seems wild, but upon further review, every years seasonality takes a very similar shape (despite price increases). Showing that, for the most part, oil has and will always have seasonality that peaks in June/ July and falls as people drive less during colder months.