2.1 Explore the following four time series: Bricks from aus_production, Lynx from pelt, Close from gafa_stock, Demand from vic_elec.

Use ? (or help()) to find out about the data in each series. What is the time interval of each series? I used ‘help’ on the right bottom corner

Use autoplot() to produce a time plot of each series. For the last plot, modify the axis labels and title.

library(fpp3)
## Registered S3 method overwritten by 'tsibble':
##   method               from 
##   as_tibble.grouped_df dplyr
## ── Attaching packages ──────────────────────────────────────────── fpp3 1.0.0 ──
## ✔ tibble      3.2.1     ✔ tsibble     1.1.5
## ✔ dplyr       1.1.4     ✔ tsibbledata 0.4.1
## ✔ tidyr       1.3.1     ✔ feasts      0.3.2
## ✔ lubridate   1.9.3     ✔ fable       0.3.4
## ✔ ggplot2     3.5.1     ✔ fabletools  0.4.2
## ── Conflicts ───────────────────────────────────────────────── fpp3_conflicts ──
## ✖ lubridate::date()    masks base::date()
## ✖ dplyr::filter()      masks stats::filter()
## ✖ tsibble::intersect() masks base::intersect()
## ✖ tsibble::interval()  masks lubridate::interval()
## ✖ dplyr::lag()         masks stats::lag()
## ✖ tsibble::setdiff()   masks base::setdiff()
## ✖ tsibble::union()     masks base::union()
library(ggplot2)
library(dplyr)
data("aus_production")
data("pelt")
data("gafa_stock")
data("vic_elec")
autoplot(aus_production,Bricks ) 
## Warning: Removed 20 rows containing missing values or values outside the scale range
## (`geom_line()`).

#add the variable as the second argument 
 autoplot(pelt, Lynx) 

 autoplot(gafa_stock, Close) 

# modify the axis labels and title 
autoplot(vic_elec, Demand) +
  labs (title = "Electricity Demand for Victoria, Australia",
      y = "Demand, MWh",
      x = "Time, half-hourly")

2.2 Use filter() to find what days corresponded to the peak closing price for each of the four stocks in gafa_stock.

p_closing <- gafa_stock |>  # pipe or use  %>%
  group_by(Symbol) |>      # stock types are called symbol
  mutate(MaxClose = max(Close)) |> # new column with the max closing price
  filter(Close == MaxClose) |>   # keep rows where close matches
  select(Symbol, Date, Close)  
p_closing
## # A tsibble: 4 x 3 [!]
## # Key:       Symbol [4]
## # Groups:    Symbol [4]
##   Symbol Date       Close
##   <chr>  <date>     <dbl>
## 1 AAPL   2018-10-03  232.
## 2 AMZN   2018-09-04 2040.
## 3 FB     2018-07-25  218.
## 4 GOOG   2018-07-26 1268.

2.3 Download the file tute1.csv from the book website, open it in Excel (or some other spreadsheet application), and review its contents. You should find four columns of information. Columns B through D each contain a quarterly series, labelled Sales, AdBudget and GDP. Sales contains the quarterly sales for a small company over the period 1981-2005. AdBudget is the advertising budget and GDP is the gross domestic product. All series have been adjusted for inflation.

getwd()
## [1] "/Users/marjetevucinaj/data624"
# Read the CSV file
tute1 <- readr::read_csv("tute1.csv")
## Rows: 100 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl  (3): Sales, AdBudget, GDP
## date (1): Quarter
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
View(tute1) 
# Convert the data to time series

mytimeseries <- tute1 |>
  mutate(Quarter = yearquarter(Quarter)) |>
  as_tsibble(index = Quarter)

#Construct time series plots of each of the three series

mytimeseries |>
  pivot_longer(-Quarter) |>
  ggplot(aes(x = Quarter, y = value, colour = name)) +
  geom_line() +
  facet_grid(name ~ ., scales = "free_y")

Check what happens when you don’t include facet_grid().

mytimeseries <- tute1 |>
  mutate(Quarter = yearquarter(Quarter)) |>
  as_tsibble(index = Quarter)
#Construct time series plots of each of the three series

mytimeseries |>
  pivot_longer(-Quarter) |>
  ggplot(aes(x = Quarter, y = value, colour = name)) +
  geom_line()

2.4 The USgas package contains data on the demand for natural gas in the US. Install the USgas package. Create a tsibble from us_total with year as the index and state as the key. Plot the annual natural gas consumption by state for the New England area (comprising the states of Maine, Vermont, New Hampshire, Massachusetts, Connecticut and Rhode Island).

library(USgas)
data(us_total) #variables = year, state and 'y'= natural gas consumption
  
my_data <- us_total %>% tsibble(
  index = year,
  key = state
)
#"tsibble objects extend tidy data frames (tibble objects) by introducing temporal structure. We have set the time series index to be the Year column, which associates the measurements (Observation) with the time of recording (Year)" Hyndman.

new_england <- my_data %>%
  filter(state %in% case_when(
    state %in% c("Maine", "Vermont", "New Hampshire", "Massachusetts", "Connecticut", "Rhode Island") ~ state
  ))
autoplot(new_england)
## Plot variable not specified, automatically selected `.vars = y`

2.5 Download tourism.xlsx from the book website and read it into R using readxl::read_excel(). Create a tsibble which is identical to the tourism tsibble from the tsibble package. Find what combination of Region and Purpose had the maximum number of overnight trips on average.

# Read the file
tourism <- readxl::read_excel("tourism.xlsx")

# Convert the data to time series
ttimeseries <- tourism |>
  mutate(Quarter = yearquarter(Quarter)) |>
  as_tsibble(index = Quarter, key = c(Region, Purpose)) 
# keys are distinct groups; trips acts as observations
max_trips <- ttimeseries %>%
  group_by(Region, Purpose) %>% #combination
  summarize(Average_Trips = mean(Trips, na.rm = TRUE), .groups = 'drop') %>% 
  slice_max(order_by = Average_Trips, n = 1)  # top result

print(max_trips)
## # A tsibble: 1 x 4 [1Q]
## # Key:       Region, Purpose [1]
##   Region    Purpose  Quarter Average_Trips
##   <chr>     <chr>      <qtr>         <dbl>
## 1 Melbourne Visiting 2017 Q4          985.

Create a new tsibble which combines the Purposes and Regions, and just has total trips by State.

total_trips <- ttimeseries %>%
  group_by(State) %>% 
  summarize(Total_Trips = sum(Trips, na.rm = TRUE), .groups = 'drop')  

# Step 4: Create a new tsibble with total trips by State
total_trips_tsibble <- total_trips %>%
  as_tsibble(index = Quarter, key = State)  # State as key (prior had purpose & region which now are dropped)

print(total_trips_tsibble)
## # A tsibble: 640 x 3 [1Q]
## # Key:       State [8]
##    State Quarter Total_Trips
##    <chr>   <qtr>       <dbl>
##  1 ACT   1998 Q1        551.
##  2 ACT   1998 Q2        416.
##  3 ACT   1998 Q3        436.
##  4 ACT   1998 Q4        450.
##  5 ACT   1999 Q1        379.
##  6 ACT   1999 Q2        558.
##  7 ACT   1999 Q3        449.
##  8 ACT   1999 Q4        595.
##  9 ACT   2000 Q1        600.
## 10 ACT   2000 Q2        557.
## # ℹ 630 more rows

2.8 Use the following graphics functions: autoplot(), gg_season(), gg_subseries(), gg_lag(), ACF() and explore features from the following time series: “Total Private” Employed from us_employment, Bricks from aus_production, Hare from pelt, “H02” Cost from PBS, and Barrels from us_gasoline.

Can you spot any seasonality, cyclicity and trend?

What do you learn about the series?

Can you identify any unusual years?

-gg_subseries is helpful

us_employment the autoplot shows increasing trend of employment in private. Clearly theres is a downward trend in 2008-2010 due to the recession. Seasonal pattern seems slight; the means for each month in the subseries plot suggest fewer people working in January of most years, with a slightly increase monthly until September; gg_season supports this idea and looks like for some years it continues increasing through the remaining months. The gg_lag plot was broken down by months so its a bit difficult to read(unsure how to make it quarterly with a monthly index but that might be more helpful). aus_production the autoplot shows inconsistent pattern of brick production. GG_season shows that there is seasonality inQ2 andQ3 of most years reflecting an increase in production. Subseries confirms this, showing Q3 with the highest mean, following with Q2, Q3 and Q1 seems significantly lower in comparison. The lag plot also visually shows Q1 performing least across lags; lag 1 seems to show a positive relationship. The ACF or autocorrection confirms that lag 1 is higher than others due to the seasonal pattern in data.

pelt shows a inconsistent pattern of Hare production with some years very high such as ~1865 and some very low, 1882, 1888, ect. The subseries shows that the mean was generally the same across years. Based on the gg_lag this series seems cyclical beyond any potential seasonal explanations. I had difficulty plotting the seasonal plot, I would want to revisit this as one can assume pelt production might be higher in preparing for cold months. The ACF shows some changes are white noise where some are not.

PBS cost of H02 is highest in concessional copayments and the cost of concessional safety net goes up and down and wide ranges of amounts, through out time the cost has gotten higher for these two where that pattern us not found for the ‘general’ category. Gg season shows a inverse relationship between the concessional copayments and safety net, Around Feb- mid summer the prices increase for increase for concessional copayments and decrease for concessional safety net. The general categories follows a similar pattern but a bit less so. The subseries also supports this with the way the mean changes, ACF shows a bit of white noise but not for the majority.

data("us_employment")
total_private <- us_employment %>%
  filter(Title == "Total Private") 
total_private_ts <- total_private %>% select(
  Month, Employed) %>% as_tsibble(index = Month)

autoplot(total_private, Employed)

gg_subseries(total_private, Employed)

gg_season(total_private, Employed, period = "year") #even though it monthly in earlier code

gg_lag(total_private, Employed, geom = "point")

ACF(total_private, Employed)
## # A tsibble: 29 x 3 [1M]
## # Key:       Series_ID [1]
##    Series_ID          lag   acf
##    <chr>         <cf_lag> <dbl>
##  1 CEU0500000001       1M 0.997
##  2 CEU0500000001       2M 0.993
##  3 CEU0500000001       3M 0.990
##  4 CEU0500000001       4M 0.986
##  5 CEU0500000001       5M 0.983
##  6 CEU0500000001       6M 0.980
##  7 CEU0500000001       7M 0.977
##  8 CEU0500000001       8M 0.974
##  9 CEU0500000001       9M 0.971
## 10 CEU0500000001      10M 0.968
## # ℹ 19 more rows
a_prod_ts <- aus_production %>%
  select(Quarter, Bricks) %>%
  as_tsibble(index = Quarter)

autoplot(a_prod_ts, Bricks)
## Warning: Removed 20 rows containing missing values or values outside the scale range
## (`geom_line()`).

gg_season(a_prod_ts, Bricks)
## Warning: Removed 20 rows containing missing values or values outside the scale range
## (`geom_line()`).

gg_subseries(a_prod_ts, Bricks)
## Warning: Removed 5 rows containing missing values or values outside the scale range
## (`geom_line()`).

gg_lag(a_prod_ts, Bricks) +
    ggtitle("Lag Plot")
## Warning: Removed 20 rows containing missing values (gg_lag).

ACF(a_prod_ts, Bricks)  %>%
  autoplot()

data(pelt)
autoplot(pelt, Hare)

gg_subseries(pelt, Hare)

gg_lag(pelt, Hare)

ACF(pelt, Hare) %>%
  autoplot()

data(PBS)
h02 <- PBS %>%
  filter(ATC2 == "H02")
         
pbs_tsibble <- h02 %>%
  as_tsibble(index = Month)

autoplot(pbs_tsibble, Cost) 

gg_season(pbs_tsibble, Cost, period = "year")

gg_subseries(pbs_tsibble, Cost)

ACF(pbs_tsibble, Cost) %>%
  autoplot()

data("us_gasoline")
autoplot(us_gasoline, Barrels)

gg_season(us_gasoline, Barrels, time="year")
## Warning in geom_line(...): Ignoring unknown parameters: `time`

gg_subseries(us_gasoline, Barrels)

gg_lag(us_gasoline, Barrels)

ACF(us_gasoline, Barrels)
## # A tsibble: 31 x 2 [1W]
##         lag   acf
##    <cf_lag> <dbl>
##  1       1W 0.893
##  2       2W 0.882
##  3       3W 0.873
##  4       4W 0.866
##  5       5W 0.847
##  6       6W 0.844
##  7       7W 0.832
##  8       8W 0.831
##  9       9W 0.822
## 10      10W 0.808
## # ℹ 21 more rows