library(fpp3)
library(tsibble)
library(dplyr)Data_624_lab01
- Explore four time series:
bricks <- aus_production |>
select("Bricks")
bricks# A tsibble: 218 x 2 [1Q]
Bricks Quarter
<dbl> <qtr>
1 189 1956 Q1
2 204 1956 Q2
3 208 1956 Q3
4 197 1956 Q4
5 187 1957 Q1
6 214 1957 Q2
7 227 1957 Q3
8 222 1957 Q4
9 199 1958 Q1
10 229 1958 Q2
# ℹ 208 more rows
lynx <- pelt |>
select("Lynx")
lynx# A tsibble: 91 x 2 [1Y]
Lynx Year
<dbl> <dbl>
1 30090 1845
2 45150 1846
3 49150 1847
4 39520 1848
5 21230 1849
6 8420 1850
7 5560 1851
8 5080 1852
9 10170 1853
10 19600 1854
# ℹ 81 more rows
close <- gafa_stock |>
select("Close")
close# A tsibble: 5,032 x 3 [!]
# Key: Symbol [4]
Close Date Symbol
<dbl> <date> <chr>
1 79.0 2014-01-02 AAPL
2 77.3 2014-01-03 AAPL
3 77.7 2014-01-06 AAPL
4 77.1 2014-01-07 AAPL
5 77.6 2014-01-08 AAPL
6 76.6 2014-01-09 AAPL
7 76.1 2014-01-10 AAPL
8 76.5 2014-01-13 AAPL
9 78.1 2014-01-14 AAPL
10 79.6 2014-01-15 AAPL
# ℹ 5,022 more rows
demand <- vic_elec |>
select("Demand")
demand# A tsibble: 52,608 x 2 [30m] <Australia/Melbourne>
Demand Time
<dbl> <dttm>
1 4383. 2012-01-01 00:00:00
2 4263. 2012-01-01 00:30:00
3 4049. 2012-01-01 01:00:00
4 3878. 2012-01-01 01:30:00
5 4036. 2012-01-01 02:00:00
6 3866. 2012-01-01 02:30:00
7 3694. 2012-01-01 03:00:00
8 3562. 2012-01-01 03:30:00
9 3433. 2012-01-01 04:00:00
10 3359. 2012-01-01 04:30:00
# ℹ 52,598 more rows
?aus_production # The time interval is Quarter.
?pelt # The time interval is Year.
?gafa_stock # The time interval is Irregular Days.
?vic_elec # The time interval is Half Hour (every 30 minutes).
#autoplot
bricks |>
autoplot() +
geom_point()lynx |>
autoplot() +
geom_point()close |>
autoplot() +
geom_point()#Modify the axis labels and title
demand |>
autoplot() +
geom_point() +
labs(title = "Half-hourly electricity demand for Victoria, Australia",
x = "Frequency of demand (Every 30 Minutes)",
y = "Demand of electricity")- Use filter for peak closing price day
peak_close <- gafa_stock |>
group_by(Symbol) |>
filter(Close == max(Close))
peak_close# A tsibble: 4 x 8 [!]
# Key: Symbol [4]
# Groups: Symbol [4]
Symbol Date Open High Low Close Adj_Close Volume
<chr> <date> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 AAPL 2018-10-03 230. 233. 230. 232. 230. 28654800
2 AMZN 2018-09-04 2026. 2050. 2013 2040. 2040. 5721100
3 FB 2018-07-25 216. 219. 214. 218. 218. 58954200
4 GOOG 2018-07-26 1251 1270. 1249. 1268. 1268. 2405600
- tute1.csv
#a- Load data
getwd()[1] "C:/Users/month/OneDrive/Documents/Cuny MSDS/Data 624_Predictive Analytics"
setwd("C:/Users/month/Downloads")
tute1 <- readr::read_csv("tute1.csv") |>
print()# A tibble: 100 × 4
Quarter Sales AdBudget GDP
<date> <dbl> <dbl> <dbl>
1 1981-03-01 1020. 659. 252.
2 1981-06-01 889. 589 291.
3 1981-09-01 795 512. 291.
4 1981-12-01 1004. 614. 292.
5 1982-03-01 1058. 647. 279.
6 1982-06-01 944. 602 254
7 1982-09-01 778. 531. 296.
8 1982-12-01 932. 608. 272.
9 1983-03-01 996. 638. 260.
10 1983-06-01 908. 582. 280.
# ℹ 90 more rows
#b- Convert data to time series
mytimeseries <- tute1 |>
mutate(Quarter = yearquarter(Quarter)) |>
as_tsibble(index = Quarter) |>
print()# A tsibble: 100 x 4 [1Q]
Quarter Sales AdBudget GDP
<qtr> <dbl> <dbl> <dbl>
1 1981 Q1 1020. 659. 252.
2 1981 Q2 889. 589 291.
3 1981 Q3 795 512. 291.
4 1981 Q4 1004. 614. 292.
5 1982 Q1 1058. 647. 279.
6 1982 Q2 944. 602 254
7 1982 Q3 778. 531. 296.
8 1982 Q4 932. 608. 272.
9 1983 Q1 996. 638. 260.
10 1983 Q2 908. 582. 280.
# ℹ 90 more rows
#Construct time series plots
mytimeseries |>
pivot_longer(-Quarter) |>
ggplot(aes(x = Quarter, y = value, colour = name)) +
geom_line() +
facet_grid(name ~ ., scales = "free_y")Comment: When facet_grid() is not included, all the times series plots are combined and plotted on the same graph sharing the same y-axis. This could make the plot interpretation challenging.
- “USgas” exploration
#a install.packages("USgas")
library(USgas)
#b Create a tsibble from us_total
us_total_tsibble <- us_total |>
as_tsibble(index = year ,
key = state) |>
print()# A tsibble: 1,266 x 3 [1Y]
# Key: state [53]
year state y
<int> <chr> <int>
1 1997 Alabama 324158
2 1998 Alabama 329134
3 1999 Alabama 337270
4 2000 Alabama 353614
5 2001 Alabama 332693
6 2002 Alabama 379343
7 2003 Alabama 350345
8 2004 Alabama 382367
9 2005 Alabama 353156
10 2006 Alabama 391093
# ℹ 1,256 more rows
#c Plot the annual gas consumption
us_total_NE <- us_total_tsibble |>
filter(state %in% c("Maine", "Vermont", "New Hampshire", "Massachusetts",
"Connecticut", "Rhode Island"))
us_total_NE |>
autoplot(y) +
scale_y_continuous(labels = scales::comma) +
theme_minimal() +
labs(title = "Annual natural gas consumption by state for the New England area",
y = "Gas consumption in a million cubic feet") +
facet_grid(state ~ ., scales = "free_y") +
theme(strip.text = element_text(size = 7.5, angle = 45, hjust = 1))- “tourism.xlsx” exploration
#a Load tourism.xlsx
getwd()[1] "C:/Users/month/OneDrive/Documents/Cuny MSDS/Data 624_Predictive Analytics"
setwd("C:/Users/month/Downloads")
tourism <- readxl::read_excel("tourism.xlsx") |>
print()# A tibble: 24,320 × 5
Quarter Region State Purpose Trips
<chr> <chr> <chr> <chr> <dbl>
1 1998-01-01 Adelaide South Australia Business 135.
2 1998-04-01 Adelaide South Australia Business 110.
3 1998-07-01 Adelaide South Australia Business 166.
4 1998-10-01 Adelaide South Australia Business 127.
5 1999-01-01 Adelaide South Australia Business 137.
6 1999-04-01 Adelaide South Australia Business 200.
7 1999-07-01 Adelaide South Australia Business 169.
8 1999-10-01 Adelaide South Australia Business 134.
9 2000-01-01 Adelaide South Australia Business 154.
10 2000-04-01 Adelaide South Australia Business 169.
# ℹ 24,310 more rows
#b Create a tsibble identical to 'tourism'
tourism_tsibble <- tourism |>
mutate(Quarter = yearquarter(Quarter)) |>
as_tsibble(index = Quarter, key = c(Region, State, Purpose)) |>
print()# A tsibble: 24,320 x 5 [1Q]
# Key: Region, State, Purpose [304]
Quarter Region State Purpose Trips
<qtr> <chr> <chr> <chr> <dbl>
1 1998 Q1 Adelaide South Australia Business 135.
2 1998 Q2 Adelaide South Australia Business 110.
3 1998 Q3 Adelaide South Australia Business 166.
4 1998 Q4 Adelaide South Australia Business 127.
5 1999 Q1 Adelaide South Australia Business 137.
6 1999 Q2 Adelaide South Australia Business 200.
7 1999 Q3 Adelaide South Australia Business 169.
8 1999 Q4 Adelaide South Australia Business 134.
9 2000 Q1 Adelaide South Australia Business 154.
10 2000 Q2 Adelaide South Australia Business 169.
# ℹ 24,310 more rows
#c Combination of 'Region' and 'Purpose' with maximum of overnight trips oon average
average_trips <- tourism_tsibble |>
group_by(Region, Purpose) |>
summarise(Average = mean(Trips, na.rm = TRUE)) |>
filter(Average == max(Average)) |>
print()# A tsibble: 76 x 4 [1Q]
# Key: Region, Purpose [76]
# Groups: Region [76]
Region Purpose Quarter Average
<chr> <chr> <qtr> <dbl>
1 Adelaide Visiting 2017 Q1 270.
2 Adelaide Hills Visiting 2002 Q4 81.1
3 Alice Springs Holiday 1998 Q3 76.5
4 Australia's Coral Coast Holiday 2014 Q3 198.
5 Australia's Golden Outback Business 2017 Q3 174.
6 Australia's North West Business 2016 Q3 297.
7 Australia's South West Holiday 2016 Q1 612.
8 Ballarat Visiting 2004 Q1 103.
9 Barkly Holiday 1998 Q3 37.9
10 Barossa Holiday 2006 Q1 51.0
# ℹ 66 more rows
#d Create new tsibble which combines the Purposes and the Regions, and just have total trips by State
trips_by_state <- tourism_tsibble |>
group_by(State) |>
summarise(Total_Trips = sum(Trips)) |>
as_tibble(index = State) |>
print()# A tibble: 640 × 3
State Quarter Total_Trips
<chr> <qtr> <dbl>
1 ACT 1998 Q1 551.
2 ACT 1998 Q2 416.
3 ACT 1998 Q3 436.
4 ACT 1998 Q4 450.
5 ACT 1999 Q1 379.
6 ACT 1999 Q2 558.
7 ACT 1999 Q3 449.
8 ACT 1999 Q4 595.
9 ACT 2000 Q1 600.
10 ACT 2000 Q2 557.
# ℹ 630 more rows
- Use of graphics functions
autoplot(us_employment, Employed) + ggtitle("Total Private Employed")autoplot(aus_production, Bricks) + ggtitle("Bricks Production")autoplot(pelt, Hare) + ggtitle("Hare Pelts")autoplot(PBS, Cost) + ggtitle("H02 Cost")#autoplot(us_gasoline, Barrels) + ggtitle("Gasoline Barrels")
gg_season(us_employment, Employed) + ggtitle("Seasonality in Total Private Employed")gg_season(aus_production, Bricks) + ggtitle("Seasonality in Bricks Production")#gg_season(pelt, Hare) + ggtitle("Seasonality in Hare Pelts")
#gg_season(PBS, cost) + ggtitle("Seasonality in H02 Cost")
gg_season(us_gasoline, Barrels) + ggtitle("Seasonality in Gasoline Barrels")#gg_subseries(us_employment, Employed) + ggtitle("Subseries Plot for Total Private Employed")
gg_subseries(aus_production, Bricks) + ggtitle("Subseries Plot for Bricks Production")gg_subseries(pelt, Hare) + ggtitle("Subseries Plot for Hare Pelts")#gg_subseries(PBS, Cost) + ggtitle("Subseries Plot for H02 Cost")
#gg_subseries(us_gasoline, Barrels) + ggtitle("Subseries Plot for Gasoline Barrels")
#gg_lag(us_employment, Employed) + ggtitle("Lag Plot for Total Private Employed")
gg_lag(aus_production, Bricks) + ggtitle("Lag Plot for Bricks Production")gg_lag(pelt, Hare) + ggtitle("Lag Plot for Hare Pelts")#gg_lag(PBS, cost) + ggtitle("Lag Plot for H02 Cost")
gg_lag(us_gasoline, Barrels) + ggtitle("Lag Plot for Gasoline Barrels")#ACF(us_employment, Employed) + ggtitle("ACF for Total Private Employed")
#ACF(aus_production, Bricks) + ggtitle("ACF for Bricks Production")
#ACF(pelt, Hare) + ggtitle("ACF for Hare Pelts")
#ACF(PBS, Cost) + ggtitle("ACF for H02 Cost")
#ACF(us_gasoline, Barrels) + ggtitle("ACF for Gasoline Barrels")
#Note: the ACF plots do not give useful information to exploit.Interpretation:
-Seasonality:
The pelt trading records autoplot shows a seasonality and a cyclic behaviour over the course of 20 years each time.
-Cyclicity:
Overall, there are two big cycles in the quarterly production of Bricks (autoplot): the period before 1980 where the production of the bricks has an increasing trend and the period post-1980 where there is a declining trend in the production of bricks.
-Trend:
There is an increased trend of the US finished motor gasoline product supplied (us_gasoline) throughout the years (autoplot).