# download the data from the web page.
download.file("http://OTexts.com/fpp3/extrafiles/tourism.xlsx",
tourism_file <- tempfile())
# reads the downloaded tourism data by excel
tourism_4 <- read_excel("tourism-4.xlsx")
## Your turn: create a tsibble format of the data below, and rename it as my_tourism:
# A1.Answer:
my_tourism <- tourism_4 %>%
mutate(Quarter = yearquarter(Quarter)) %>%
as_tsibble(index = Quarter,
key = c(Region, State, Purpose))
my_tourism
## # A tsibble: 24,320 x 5 [1Q]
## # Key: Region, State, Purpose [304]
## Quarter Region State Purpose Trips
## <qtr> <chr> <chr> <chr> <dbl>
## 1 1998 Q1 Adelaide South Australia Business 135.
## 2 1998 Q2 Adelaide South Australia Business 110.
## 3 1998 Q3 Adelaide South Australia Business 166.
## 4 1998 Q4 Adelaide South Australia Business 127.
## 5 1999 Q1 Adelaide South Australia Business 137.
## 6 1999 Q2 Adelaide South Australia Business 200.
## 7 1999 Q3 Adelaide South Australia Business 169.
## 8 1999 Q4 Adelaide South Australia Business 134.
## 9 2000 Q1 Adelaide South Australia Business 154.
## 10 2000 Q2 Adelaide South Australia Business 169.
## # ℹ 24,310 more rows
# A2.Answer
#1:
view(my_tourism)
head(my_tourism)
## # A tsibble: 6 x 5 [1Q]
## # Key: Region, State, Purpose [1]
## Quarter Region State Purpose Trips
## <qtr> <chr> <chr> <chr> <dbl>
## 1 1998 Q1 Adelaide South Australia Business 135.
## 2 1998 Q2 Adelaide South Australia Business 110.
## 3 1998 Q3 Adelaide South Australia Business 166.
## 4 1998 Q4 Adelaide South Australia Business 127.
## 5 1999 Q1 Adelaide South Australia Business 137.
## 6 1999 Q2 Adelaide South Australia Business 200.
#This is a quarterly data and starts from year 1998
# 2.
table(my_tourism$State, my_tourism$Purpose)
##
## Business Holiday Other Visiting
## ACT 80 80 80 80
## New South Wales 1040 1040 1040 1040
## Northern Territory 560 560 560 560
## Queensland 960 960 960 960
## South Australia 960 960 960 960
## Tasmania 400 400 400 400
## Victoria 1680 1680 1680 1680
## Western Australia 400 400 400 400
# There are 8 states and 4 purpose of trip categories
# 3.
GroupedData <- my_tourism %>%
as_tibble() %>%
group_by(Region, Purpose)
GroupedData
## # A tibble: 24,320 × 5
## # Groups: Region, Purpose [304]
## Quarter Region State Purpose Trips
## <qtr> <chr> <chr> <chr> <dbl>
## 1 1998 Q1 Adelaide South Australia Business 135.
## 2 1998 Q2 Adelaide South Australia Business 110.
## 3 1998 Q3 Adelaide South Australia Business 166.
## 4 1998 Q4 Adelaide South Australia Business 127.
## 5 1999 Q1 Adelaide South Australia Business 137.
## 6 1999 Q2 Adelaide South Australia Business 200.
## 7 1999 Q3 Adelaide South Australia Business 169.
## 8 1999 Q4 Adelaide South Australia Business 134.
## 9 2000 Q1 Adelaide South Australia Business 154.
## 10 2000 Q2 Adelaide South Australia Business 169.
## # ℹ 24,310 more rows
# 4.
AverageTrips <- GroupedData %>%
summarise(Trips = mean(Trips))
## `summarise()` has grouped output by 'Region'. You can override using the
## `.groups` argument.
AverageTrips
## # A tibble: 304 × 3
## # Groups: Region [76]
## Region Purpose Trips
## <chr> <chr> <dbl>
## 1 Adelaide Business 156.
## 2 Adelaide Holiday 157.
## 3 Adelaide Other 56.6
## 4 Adelaide Visiting 205.
## 5 Adelaide Hills Business 2.66
## 6 Adelaide Hills Holiday 10.5
## 7 Adelaide Hills Other 1.40
## 8 Adelaide Hills Visiting 14.2
## 9 Alice Springs Business 14.6
## 10 Alice Springs Holiday 31.9
## # ℹ 294 more rows
# 5.
OvernightTripsMax <- AverageTrips %>%
ungroup() %>%
filter(Trips == max(Trips))
OvernightTripsMax
## # A tibble: 1 × 3
## Region Purpose Trips
## <chr> <chr> <dbl>
## 1 Sydney Visiting 747.
#Sydney has highest overnight trips of 747.27
# A3.Answer:
state_tourism <- my_tourism %>%
as_tibble() %>%
group_by(State) %>%
summarise(Trips = sum(Trips)) %>%
ungroup()
state_tourism
## # A tibble: 8 × 2
## State Trips
## <chr> <dbl>
## 1 ACT 41007.
## 2 New South Wales 557367.
## 3 Northern Territory 28614.
## 4 Queensland 386643.
## 5 South Australia 118151.
## 6 Tasmania 54137.
## 7 Victoria 390463.
## 8 Western Australia 147820.
# Bricks from aus_production
autoplot(aus_production, Bricks)
## Warning: Removed 20 rows containing missing values or values outside the scale range
## (`geom_line()`).
# Lynx from pelt
autoplot(pelt, Lynx)
# Close from gafa_stock
autoplot(gafa_stock, Close)
# Demand from vic_elec
autoplot(vic_elec, Demand)
# B1.Answer:
#1. Bricks from aus_production
#There is a significant rise in bricks till 1980 and later gradually decreases.
#2.Lynx from pelt
#It follows a pattern of rising and falling over years
#3.Close from gafa_Stock
#There is a rapid rise till 2018 and later started falling from late 2018, 2019.
#4.Demand from vic_elec
#There are fluctuations in consumption as demand changes during specific seasons.
snowy <- tourism %>%
filter(Region == "Snowy Mountains")
snowy
## # A tsibble: 320 x 5 [1Q]
## # Key: Region, State, Purpose [4]
## Quarter Region State Purpose Trips
## <qtr> <chr> <chr> <chr> <dbl>
## 1 1998 Q1 Snowy Mountains New South Wales Business 15.9
## 2 1998 Q2 Snowy Mountains New South Wales Business 20.3
## 3 1998 Q3 Snowy Mountains New South Wales Business 36.2
## 4 1998 Q4 Snowy Mountains New South Wales Business 9.15
## 5 1999 Q1 Snowy Mountains New South Wales Business 20.9
## 6 1999 Q2 Snowy Mountains New South Wales Business 33.3
## 7 1999 Q3 Snowy Mountains New South Wales Business 21.6
## 8 1999 Q4 Snowy Mountains New South Wales Business 8.45
## 9 2000 Q1 Snowy Mountains New South Wales Business 20.1
## 10 2000 Q2 Snowy Mountains New South Wales Business 15.2
## # ℹ 310 more rows
Question: Take snowy data. Then sums up all trips in State and Purpose by each quarter every year by using summarizer() commands. Then Use autoplot(), gg_season() and gg_subseries() to explore the quarterly trips of snowy data. What do you observe? What type of pattern do you see. Write your comment on Answer below:
# C2.Answer:
autoplot(snowy, Trips)
gg_season(snowy, Trips)
gg_subseries(snowy, Trips)
summarise_snowy <- snowy %>%
index_by(Quarter) %>%
summarise(Trips = sum(Trips))
summarise_snowy
## # A tsibble: 80 x 2 [1Q]
## Quarter Trips
## <qtr> <dbl>
## 1 1998 Q1 141.
## 2 1998 Q2 167.
## 3 1998 Q3 373.
## 4 1998 Q4 126.
## 5 1999 Q1 183.
## 6 1999 Q2 163.
## 7 1999 Q3 279.
## 8 1999 Q4 123.
## 9 2000 Q1 161.
## 10 2000 Q2 171.
## # ℹ 70 more rows
autoplot(summarise_snowy, Trips)
gg_season(summarise_snowy, Trips)
gg_subseries(summarise_snowy, Trips)
#The three plots clearly show a strong seasonal pattern, with Q3 being the peak season, while Q1, Q2, and Q4 remain relatively stable.
# D1.Answer:
#Bricks from aus_production
gg_lag(aus_production, Bricks)
## Warning: Removed 20 rows containing missing values (gg_lag).
aus_production %>%
ACF(Bricks) %>%
autoplot()
#Lynx from pelt
lynx <- as_tsibble(lynx, index = year)
gg_lag(lynx, value)
lynx %>%
ACF(value) %>%
autoplot()
#Victorian Electricity Demand
gg_lag(vic_elec, Demand)
vic_elec %>%
ACF(Demand) %>%
autoplot()
#After analyzing the three graphs, a clear seasonal pattern emerges, with trends that are stable and predictable.
# D2.Answer:
#Yes, we can identify both seasonality and cyclicity, but there is no long-term trend. We can see that the production is influenced by seasons, and this pattern is predictable
# D2.Answer:
#A clear cyclic pattern was observed, with no long-term trends.
# D2.Answer:
#Primarily depends on the seasons and is also highly predictable.
goog <- gafa_stock %>%
filter(Symbol == "GOOG", year(Date) >= 2018)
# E1. Answer:
dgoog = goog %>% # get google daily data(>2018)
mutate(trading_day = row_number()) %>% #missing dates, create rownumber()-trading days!
update_tsibble(index = trading_day, regular = TRUE) %>% #update tsibble() with new index.
mutate(diff = difference(Close)) #calculates the first difference of a series with difference() command. it calculates the daily changes in the stock price.
# E2.Answer:
dgoog %>%
gg_tsdisplay(diff)
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_line()`).
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_point()`).
#After analyzing the graph, it doesn't look like white noise