aus_production
## # A tsibble: 218 x 7 [1Q]
## Quarter Beer Tobacco Bricks Cement Electricity Gas
## <qtr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 1956 Q1 284 5225 189 465 3923 5
## 2 1956 Q2 213 5178 204 532 4436 6
## 3 1956 Q3 227 5297 208 561 4806 7
## 4 1956 Q4 308 5681 197 570 4418 6
## 5 1957 Q1 262 5577 187 529 4339 5
## 6 1957 Q2 228 5651 214 604 4811 7
## 7 1957 Q3 236 5317 227 603 5259 7
## 8 1957 Q4 320 6152 222 582 4735 6
## 9 1958 Q1 272 5758 199 554 4608 5
## 10 1958 Q2 233 5641 229 620 5196 7
## # ℹ 208 more rows
bricks <- aus_production |>
select(Quarter, Bricks)
bricks |> autoplot()
## Plot variable not specified, automatically selected `.vars = Bricks`
## Warning: Removed 20 rows containing missing values or values outside the scale range
## (`geom_line()`).
lynx <- pelt |>
select(Year, Lynx)
lynx |> autoplot()
## Plot variable not specified, automatically selected `.vars = Lynx`
gafa_stock
## # A tsibble: 5,032 x 8 [!]
## # Key: Symbol [4]
## Symbol Date Open High Low Close Adj_Close Volume
## <chr> <date> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 AAPL 2014-01-02 79.4 79.6 78.9 79.0 67.0 58671200
## 2 AAPL 2014-01-03 79.0 79.1 77.2 77.3 65.5 98116900
## 3 AAPL 2014-01-06 76.8 78.1 76.2 77.7 65.9 103152700
## 4 AAPL 2014-01-07 77.8 78.0 76.8 77.1 65.4 79302300
## 5 AAPL 2014-01-08 77.0 77.9 77.0 77.6 65.8 64632400
## 6 AAPL 2014-01-09 78.1 78.1 76.5 76.6 65.0 69787200
## 7 AAPL 2014-01-10 77.1 77.3 75.9 76.1 64.5 76244000
## 8 AAPL 2014-01-13 75.7 77.5 75.7 76.5 64.9 94623200
## 9 AAPL 2014-01-14 76.9 78.1 76.8 78.1 66.1 83140400
## 10 AAPL 2014-01-15 79.1 80.0 78.8 79.6 67.5 97909700
## # ℹ 5,022 more rows
close <- gafa_stock |>
select(Date, Symbol, Close)
close |> autoplot() +
labs(y = "Closing Price per share ($)",
title = "GAFA stock closing price")
## Plot variable not specified, automatically selected `.vars = Close`
peak_close <- close |>
group_by(Symbol) |>
filter(Close == max(Close)) |>
select(Symbol, Date)
peak_close
## # A tsibble: 4 x 2 [!]
## # Key: Symbol [4]
## # Groups: Symbol [4]
## Symbol Date
## <chr> <date>
## 1 AAPL 2018-10-03
## 2 AMZN 2018-09-04
## 3 FB 2018-07-25
## 4 GOOG 2018-07-26
tute1 <- read_csv("tute1.csv")
## Rows: 100 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (3): Sales, AdBudget, GDP
## date (1): Quarter
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
mytimeseries <- tute1 |>
mutate(Quarter = yearquarter(Quarter)) |>
as_tsibble(index = Quarter)
mytimeseries |>
pivot_longer(-Quarter) |>
ggplot(aes(x = Quarter, y = value, color = name)) +
geom_line() +
facet_grid(name ~ ., scales = "free_y")
head(USgas::us_total)
## year state y
## 1 1997 Alabama 324158
## 2 1998 Alabama 329134
## 3 1999 Alabama 337270
## 4 2000 Alabama 353614
## 5 2001 Alabama 332693
## 6 2002 Alabama 379343
us_df <- USgas::us_total |>
as_tsibble(key = state,
index = year)
us_df
## # A tsibble: 1,266 x 3 [1Y]
## # Key: state [53]
## year state y
## <int> <chr> <int>
## 1 1997 Alabama 324158
## 2 1998 Alabama 329134
## 3 1999 Alabama 337270
## 4 2000 Alabama 353614
## 5 2001 Alabama 332693
## 6 2002 Alabama 379343
## 7 2003 Alabama 350345
## 8 2004 Alabama 382367
## 9 2005 Alabama 353156
## 10 2006 Alabama 391093
## # ℹ 1,256 more rows
us_df |> filter(state %in% c('Maine', 'Vermont', 'New Hampshire', 'Massachusetts', 'Connecticut', 'Rhode Island')) |>
ggplot(aes(x = year, y = y, color = state)) +
geom_line() +
facet_grid(state ~., scales = "free_y") +
labs(y = "Consumption",
title = "Annual natural gas consumption in New England")
trsm_df <- readxl::read_excel("tourism.xlsx") |>
mutate(Quarter = yearquarter(Quarter)) |>
as_tsibble(key = c(Region, State, Purpose),
index = Quarter)
reg_purp <- trsm_df |>
group_by(Region, Purpose) |>
mutate(Avg_Trips = mean(Trips)) |>
ungroup() |>
filter(Avg_Trips == max(Avg_Trips)) |>
distinct(Region, Purpose)
trsm_df |>
group_by(State) |>
summarise(Trips = sum(Trips)) |>
ungroup() -> tourism_by_state
tourism_by_state |> autoplot()
## Plot variable not specified, automatically selected `.vars = Trips`
aus_arrivals |> autoplot()
## Plot variable not specified, automatically selected `.vars = Arrivals`
gg_season(aus_arrivals)
## Plot variable not specified, automatically selected `y = Arrivals`
gg_subseries(aus_arrivals)
## Plot variable not specified, automatically selected `y = Arrivals`
### Australian retail
set.seed(12345678)
myseries <- aus_retail |>
filter(`Series ID` == sample(aus_retail$`Series ID`, 1))
myseries |> autoplot()
## Plot variable not specified, automatically selected `.vars = Turnover`
gg_season(myseries)
## Plot variable not specified, automatically selected `y = Turnover`
gg_subseries(myseries)
## Plot variable not specified, automatically selected `y = Turnover`
gg_lag(myseries)
## Plot variable not specified, automatically selected `y = Turnover`
myseries |> ACF(Turnover) |> autoplot()
The seasonality shows that each year there is an increased turnover around Christmas and the summer months. Generally, this is an upward trend.
There is some indication around the late 1990s and late 2000s that there is decreased spending, likely due to recessions.
us_private <- us_employment |>
filter(Title == "Total Private")
us_private
## # A tsibble: 969 x 4 [1M]
## # Key: Series_ID [1]
## Month Series_ID Title Employed
## <mth> <chr> <chr> <dbl>
## 1 1939 Jan CEU0500000001 Total Private 25338
## 2 1939 Feb CEU0500000001 Total Private 25447
## 3 1939 Mar CEU0500000001 Total Private 25833
## 4 1939 Apr CEU0500000001 Total Private 25801
## 5 1939 May CEU0500000001 Total Private 26113
## 6 1939 Jun CEU0500000001 Total Private 26485
## 7 1939 Jul CEU0500000001 Total Private 26481
## 8 1939 Aug CEU0500000001 Total Private 26848
## 9 1939 Sep CEU0500000001 Total Private 27468
## 10 1939 Oct CEU0500000001 Total Private 27830
## # ℹ 959 more rows
us_private |> autoplot(Employed)
gg_season(us_private)
## Plot variable not specified, automatically selected `y = Employed`
gg_subseries(us_private)
## Plot variable not specified, automatically selected `y = Employed`
gg_lag(us_private)
## Plot variable not specified, automatically selected `y = Employed`
us_private |> ACF(Employed) |> autoplot()
There is a clear upward trend here with employment with regular cycles were employment decreases due to economic recession. Seasonality is hard to tell due to the structure of the data but splitting each year into quarters would provide greater insight.
bricks |> autoplot()
## Plot variable not specified, automatically selected `.vars = Bricks`
## Warning: Removed 20 rows containing missing values or values outside the scale range
## (`geom_line()`).
gg_season(bricks)
## Plot variable not specified, automatically selected `y = Bricks`
## Warning: Removed 20 rows containing missing values or values outside the scale range
## (`geom_line()`).
gg_subseries(bricks)
## Plot variable not specified, automatically selected `y = Bricks`
## Warning: Removed 5 rows containing missing values or values outside the scale range
## (`geom_line()`).
gg_lag(bricks)
## Plot variable not specified, automatically selected `y = Bricks`
## Warning: Removed 20 rows containing missing values (gg_lag).
bricks |> ACF(Bricks) |> autoplot()
There is a clear seasonality of more bricks being produced around Q3 with there also being high production in Q2 and Q4 respectively where the graph then drops off as production slows due to the winter months.
hare <- pelt |>
select(Hare)
hare |> autoplot()
## Plot variable not specified, automatically selected `.vars = Hare`
# gg_season(hare, Hare)
gg_subseries(hare)
## Plot variable not specified, automatically selected `y = Hare`
gg_lag(hare)
## Plot variable not specified, automatically selected `y = Hare`
hare |> ACF(Hare) |> autoplot()
This tsibble shows a peak of hare pelts around the 1860s this could infer that more pelts were traded due to the need for fur during the American Civil War. Further, there appears to be general increased in pelt trading every 5 years and a drop that follows it. This could be due to population recovery or due to hunting of predatory animals.
cost <- PBS |>
filter(ATC2 == "H02") |>
select(Month, Cost)
cost |> autoplot()
## Plot variable not specified, automatically selected `.vars = Cost`
gg_season(cost)
## Plot variable not specified, automatically selected `y = Cost`
gg_subseries(cost)
## Plot variable not specified, automatically selected `y = Cost`
# gg_lag(cost)
cost |> ACF() |> autoplot()
## Response variable not specified, automatically selected `var = Cost`
There is a clear seasonality around the summer months for Concessional Co-payments with March - June being the peak. Concessional safety net has a seasonality around winter and peaks in January. This is the same with General Safety net.However, with General Co-payments, these appears to be more consistent.
us_gasoline
## # A tsibble: 1,355 x 2 [1W]
## Week Barrels
## <week> <dbl>
## 1 1991 W06 6.62
## 2 1991 W07 6.43
## 3 1991 W08 6.58
## 4 1991 W09 7.22
## 5 1991 W10 6.88
## 6 1991 W11 6.95
## 7 1991 W12 7.33
## 8 1991 W13 6.78
## 9 1991 W14 7.50
## 10 1991 W15 6.92
## # ℹ 1,345 more rows
us_gasoline |> autoplot()
## Plot variable not specified, automatically selected `.vars = Barrels`
gg_season(us_gasoline)
## Plot variable not specified, automatically selected `y = Barrels`
gg_subseries(us_gasoline)
## Plot variable not specified, automatically selected `y = Barrels`
gg_lag(us_gasoline)
## Plot variable not specified, automatically selected `y = Barrels`
us_gasoline |> ACF() |> autoplot()
## Response variable not specified, automatically selected `var = Barrels`
The US gasoline tsibble shows a steady trend of increased gasoline production year on year with a slight drop around the 2008 financial crisis. There appears to be large drops in production and sudden ramp up. This could be to ensure the price of oil is kept competitive by not producing too many barrels. The seasonal graph overall shows more barrels produced each year, indicative of an increased reliance on cars for a growing population.
aus_livestock
## # A tsibble: 29,364 x 4 [1M]
## # Key: Animal, State [54]
## Month Animal State Count
## <mth> <fct> <fct> <dbl>
## 1 1976 Jul Bulls, bullocks and steers Australian Capital Territory 2300
## 2 1976 Aug Bulls, bullocks and steers Australian Capital Territory 2100
## 3 1976 Sep Bulls, bullocks and steers Australian Capital Territory 2100
## 4 1976 Oct Bulls, bullocks and steers Australian Capital Territory 1900
## 5 1976 Nov Bulls, bullocks and steers Australian Capital Territory 2100
## 6 1976 Dec Bulls, bullocks and steers Australian Capital Territory 1800
## 7 1977 Jan Bulls, bullocks and steers Australian Capital Territory 1800
## 8 1977 Feb Bulls, bullocks and steers Australian Capital Territory 1900
## 9 1977 Mar Bulls, bullocks and steers Australian Capital Territory 2700
## 10 1977 Apr Bulls, bullocks and steers Australian Capital Territory 2300
## # ℹ 29,354 more rows
pigs_1972_2018 <- aus_livestock |>
filter(Month >= yearmonth("1990 Jan") & Month <= yearmonth("1995 Dec")) |>
filter(Animal == "Pigs") |>
filter(State == "Victoria")
pigs_1972_2018 |> ACF() |> autoplot()
## Response variable not specified, automatically selected `var = Count`
pigs_1972_2018 |> autoplot()
## Plot variable not specified, automatically selected `.vars = Count`
The plot demonstrates that there is a steady decay with each lag, with an increase during the 12th lag which could show some seasonality in the data.
If a longer of period of time would be used, the graph would still have a steady decay and even demonstrate a jump around lag 24.
dgoog <- gafa_stock |>
filter(Symbol == "GOOG", year(Date) >= 2018) |>
mutate(trading_day = row_number()) |>
update_tsibble(index = trading_day, regular = TRUE) |>
mutate(diff = difference(Close))
dgoog |> ACF() |> autoplot()
## Response variable not specified, automatically selected `var = Date`
dgoog |> autoplot()
## Plot variable not specified, automatically selected `.vars = Open`
The tsibble was re-indexed due to the mutation of the trading day, ensuring proper time series operations.
Due to the ACF reading decreasing steadily overtime, this does infer that there is some trend present in this year. However, the stock market is unpredictable and open to many different global factors. I believe if more than this year was used, then there may be a better identification of white noise.