library(tsibble)
##
## Attaching package: 'tsibble'
## The following objects are masked from 'package:base':
##
## intersect, setdiff, union
library(tidyverse)
## ── Attaching packages
## ───────────────────────────────────────
## tidyverse 1.3.2 ──
## ✔ ggplot2 3.3.6 ✔ purrr 0.3.4
## ✔ tibble 3.1.8 ✔ dplyr 1.0.9
## ✔ tidyr 1.2.0 ✔ stringr 1.4.0
## ✔ readr 2.1.2 ✔ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
library(fpp3)
## ── Attaching packages ──────────────────────────────────────────── fpp3 0.4.0 ──
## ✔ lubridate 1.8.0 ✔ feasts 0.2.2
## ✔ tsibbledata 0.4.0 ✔ fable 0.3.1
## ── Conflicts ───────────────────────────────────────────────── fpp3_conflicts ──
## ✖ lubridate::date() masks base::date()
## ✖ dplyr::filter() masks stats::filter()
## ✖ tsibble::intersect() masks base::intersect()
## ✖ lubridate::interval() masks tsibble::interval()
## ✖ dplyr::lag() masks stats::lag()
## ✖ tsibble::setdiff() masks base::setdiff()
## ✖ tsibble::union() masks base::union()
library(openxlsx)
library(ggplot2)
library(tidyr)
library(timeSeries)
## Loading required package: timeDate
library(readr)
library(dplyr)
#Question 1
tute1 <- readr::read_csv("C:\\Users\\lvm12\\OneDrive\\Desktop\\tute1.csv")
## Rows: 100 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): Quarter
## dbl (3): Sales, AdBudget, GDP
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
#Step 1: Turn the data set into a tsibble
tute2 <- tute1 %>%
mutate(Quarter = yearquarter(Quarter)) %>%
as_tsibble(index = Quarter)
#Step 2: Construct a time series plot for each of the three series
ggplot(data=tute2, aes(x=Quarter, y=Sales, group=1)) +
geom_line()

tuteg1 <- ggplot(data=tute2, aes(x=Quarter, y=Sales, group=1)) +
geom_line()
ggplot(data=tute2, aes(x=Quarter, y=AdBudget, group=2)) +
geom_line()

tuteg2 <- ggplot(data=tute2, aes(x=Quarter, y=AdBudget, group=2)) +
geom_line()
ggplot(data=tute2, aes(x=Quarter, y=GDP, group=3)) +
geom_line()

tuteg3 <- ggplot(data=tute2, aes(x=Quarter, y=GDP, group=3)) +
geom_line()
#Step 3: Construct a time series plot for all three time series together
supertute <-tute2 %>%
pivot_longer(cols=c('Sales', 'AdBudget', 'GDP'),
names_to='measure',
values_to='money')
ggplot(data = supertute) +
geom_line(mapping = aes(x = Quarter, y = money)) +
facet_grid(~ measure)

#Question 2
library(USgas)
head(us_total)
## year state y
## 1 1997 Alabama 324158
## 2 1998 Alabama 329134
## 3 1999 Alabama 337270
## 4 2000 Alabama 353614
## 5 2001 Alabama 332693
## 6 2002 Alabama 379343
ngus<-us_total%>%
as_tsibble(index= year, key = state)
ngus%>%
dplyr::filter(state %in% c("Maine","Vermont","New Hampshire", "Massachusetts","Connecticut","Rhode Island")) %>%
autoplot()
## Plot variable not specified, automatically selected `.vars = y`

#Question 3
tourism<-tourism%>%
mutate(quarter = as.Date(Quarter))%>%
select(-Quarter)
tourism2<-tourism%>%
mutate(Quarter = yearquarter(quarter))%>%
as_tsibble(index = Quarter, key = c("Region", "State", "Purpose"))%>%
select(-quarter)
tourism3<-tourism2%>%
group_by(Region, Purpose)%>%
summarise(Avg = mean(Trips))%>%
ungroup()%>%
mutate(Max = max(Avg))
tourism4<-tourism%>%
group_by(State) %>%
summarise(Trips = sum(Trips)) %>%
ungroup()
#Question 4
aus_arrivals
## # A tsibble: 508 x 3 [1Q]
## # Key: Origin [4]
## Quarter Origin Arrivals
## <qtr> <chr> <int>
## 1 1981 Q1 Japan 14763
## 2 1981 Q2 Japan 9321
## 3 1981 Q3 Japan 10166
## 4 1981 Q4 Japan 19509
## 5 1982 Q1 Japan 17117
## 6 1982 Q2 Japan 10617
## 7 1982 Q3 Japan 11737
## 8 1982 Q4 Japan 20961
## 9 1983 Q1 Japan 20671
## 10 1983 Q2 Japan 12235
## # … with 498 more rows
## # ℹ Use `print(n = ...)` to see more rows
autoplot(aus_arrivals)
## Plot variable not specified, automatically selected `.vars = Arrivals`

gg_season(aus_arrivals, y = Arrivals)

gg_subseries(aus_arrivals, y = Arrivals)

#In the autoplot of aus_arrivals - New Zealand has massive spike right before 1990 whereas all the other places have steady increases that follow their traditional pattern.
#Japan also plateaued from 1995 to 2000, and started declining, while all the other places continued increasing overtime
#Question 5
#library(rlang)
set.seed(123)
aus_retail
## # A tsibble: 64,532 x 5 [1M]
## # Key: State, Industry [152]
## State Industry Serie…¹ Month Turno…²
## <chr> <chr> <chr> <mth> <dbl>
## 1 Australian Capital Territory Cafes, restaurants and… A33498… 1982 Apr 4.4
## 2 Australian Capital Territory Cafes, restaurants and… A33498… 1982 May 3.4
## 3 Australian Capital Territory Cafes, restaurants and… A33498… 1982 Jun 3.6
## 4 Australian Capital Territory Cafes, restaurants and… A33498… 1982 Jul 4
## 5 Australian Capital Territory Cafes, restaurants and… A33498… 1982 Aug 3.6
## 6 Australian Capital Territory Cafes, restaurants and… A33498… 1982 Sep 4.2
## 7 Australian Capital Territory Cafes, restaurants and… A33498… 1982 Oct 4.8
## 8 Australian Capital Territory Cafes, restaurants and… A33498… 1982 Nov 5.4
## 9 Australian Capital Territory Cafes, restaurants and… A33498… 1982 Dec 6.9
## 10 Australian Capital Territory Cafes, restaurants and… A33498… 1983 Jan 3.8
## # … with 64,522 more rows, and abbreviated variable names ¹`Series ID`,
## # ²Turnover
## # ℹ Use `print(n = ...)` to see more rows
australian <- aus_retail
colnames(australian)
## [1] "State" "Industry" "Series ID" "Month" "Turnover"
names(australian)[3]<-"ID" # we rename the column because the code was not running.
myseries<-australian%>%
dplyr::filter(ID == sample(australian$ID,1))
autoplot(myseries)
## Plot variable not specified, automatically selected `.vars = Turnover`

gg_season(myseries)
## Plot variable not specified, automatically selected `y = Turnover`

gg_subseries(myseries)
## Plot variable not specified, automatically selected `y = Turnover`

gg_lag(myseries)
## Plot variable not specified, automatically selected `y = Turnover`

#Trend - There is a positive trend in the data as it has a linear trend from 1980 to right before 2020
#Seasonal - From the spikes in the data, there seems to be a seasonal spike for each year (or 2 years - hard to tell with the ) with an immediate downturn following
#Cyclic - The data has rises and falls that follow the traditional business cycle, they seem to follow the duration of 2 years, which matches up with the seasonal time series pattern mentioned above.
#Question 6
#Calculating the mean and standard deviation of the FB closing stock price
gafa_stock
## # A tsibble: 5,032 x 8 [!]
## # Key: Symbol [4]
## Symbol Date Open High Low Close Adj_Close Volume
## <chr> <date> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 AAPL 2014-01-02 79.4 79.6 78.9 79.0 67.0 58671200
## 2 AAPL 2014-01-03 79.0 79.1 77.2 77.3 65.5 98116900
## 3 AAPL 2014-01-06 76.8 78.1 76.2 77.7 65.9 103152700
## 4 AAPL 2014-01-07 77.8 78.0 76.8 77.1 65.4 79302300
## 5 AAPL 2014-01-08 77.0 77.9 77.0 77.6 65.8 64632400
## 6 AAPL 2014-01-09 78.1 78.1 76.5 76.6 65.0 69787200
## 7 AAPL 2014-01-10 77.1 77.3 75.9 76.1 64.5 76244000
## 8 AAPL 2014-01-13 75.7 77.5 75.7 76.5 64.9 94623200
## 9 AAPL 2014-01-14 76.9 78.1 76.8 78.1 66.1 83140400
## 10 AAPL 2014-01-15 79.1 80.0 78.8 79.6 67.5 97909700
## # … with 5,022 more rows
## # ℹ Use `print(n = ...)` to see more rows
colnames(gafa_stock)
## [1] "Symbol" "Date" "Open" "High" "Low" "Close"
## [7] "Adj_Close" "Volume"
names(gafa_stock)[1]<-"symbol"
gafa_stock
## # A tsibble: 5,032 x 8 [!]
## # Key: symbol [4]
## symbol Date Open High Low Close Adj_Close Volume
## <chr> <date> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 AAPL 2014-01-02 79.4 79.6 78.9 79.0 67.0 58671200
## 2 AAPL 2014-01-03 79.0 79.1 77.2 77.3 65.5 98116900
## 3 AAPL 2014-01-06 76.8 78.1 76.2 77.7 65.9 103152700
## 4 AAPL 2014-01-07 77.8 78.0 76.8 77.1 65.4 79302300
## 5 AAPL 2014-01-08 77.0 77.9 77.0 77.6 65.8 64632400
## 6 AAPL 2014-01-09 78.1 78.1 76.5 76.6 65.0 69787200
## 7 AAPL 2014-01-10 77.1 77.3 75.9 76.1 64.5 76244000
## 8 AAPL 2014-01-13 75.7 77.5 75.7 76.5 64.9 94623200
## 9 AAPL 2014-01-14 76.9 78.1 76.8 78.1 66.1 83140400
## 10 AAPL 2014-01-15 79.1 80.0 78.8 79.6 67.5 97909700
## # … with 5,022 more rows
## # ℹ Use `print(n = ...)` to see more rows
fbclose <- as.data.frame(gafa_stock) %>%
dplyr::filter(symbol == "FB") %>%
select(symbol, Date, Close)
fbcheck <- pull(fbclose, Close)
fb_mean <- mean(fbcheck, na.rm = TRUE)
fb_mean
## [1] 120.4625
fb_median <- median(fbcheck)
fb_median
## [1] 117.675
fb_sd <- sd(fbcheck, na.rm = TRUE)
fb_sd
## [1] 41.32364
fb_k <- kurtosis(fbcheck, na.rm = TRUE)
fb_k
## [1] -1.166207
## attr(,"method")
## [1] "excess"
#Calculating the mean, standard deviation, kurtosis, and skewness of the first difference of the FB closing stock price
library(moments)
##
## Attaching package: 'moments'
## The following objects are masked from 'package:timeDate':
##
## kurtosis, skewness
fb_diff <- diff(fbcheck)
fb_meann <- mean(fb_diff, na.rm = TRUE)
fb_meann
## [1] 0.06076372
fb_sdd <- sd(fb_diff, na.rm = TRUE)
fb_sdd
## [1] 2.414555
fb_kk <- kurtosis(fb_diff, na.rm = TRUE)
fb_kk
## [1] 74.02921
fb_sck <- skewness(fb_diff, na.rm = TRUE)
fb_sck
## [1] -3.973192
#Calculating the mean, standard deviation, kurtosis, and skewness of the FB closing stock price by hand
meanhand <- sum(fbcheck)/length(fbcheck)
meanhand
## [1] 120.4625
sdhand <- sqrt(sum((fbcheck-mean(fbcheck))^2/(length(fbcheck)-1)))
sdhand
## [1] 41.32364
kurthand <- (sum((fbcheck-mean(fbcheck))^4))/length(fbcheck)/(sum((fbcheck-mean(fbcheck))^2/length(fbcheck))^2)
kurthand
## [1] 1.836712
skewhand <- (3*(meanhand-fb_median))/sdhand
skewhand
## [1] 0.2023634
#Question 7
LULU <- readr::read_csv("C:\\Users\\lvm12\\OneDrive\\Desktop\\LULU.csv")
## Rows: 252 Columns: 7
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (6): Open, High, Low, Close, Adj Close, Volume
## date (1): Date
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
LULU
## # A tibble: 252 × 7
## Date Open High Low Close `Adj Close` Volume
## <date> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 2021-09-03 388. 391. 385. 388. 388. 712300
## 2 2021-09-07 395. 395. 383. 386. 386. 1080300
## 3 2021-09-08 385. 386. 376. 381. 381. 2878300
## 4 2021-09-09 434. 434. 420. 421. 421. 7789600
## 5 2021-09-10 424 430. 417. 426. 426. 2150600
## 6 2021-09-13 428. 428. 419. 425. 425. 1244600
## 7 2021-09-14 427. 428. 419. 420. 420. 853700
## 8 2021-09-15 421. 421. 414. 419. 419. 818700
## 9 2021-09-16 417. 430. 417. 426. 426. 978600
## 10 2021-09-17 428. 433. 424. 427. 427. 1546500
## # … with 242 more rows
## # ℹ Use `print(n = ...)` to see more rows
Lulu <-LULU %>% select(-(Open:Close)) %>%
select(-(Volume))
Lululu <- Lulu %>%
mutate(Date = as_date(Date)) %>%
as_tsibble(index = "Date", key = "Adj Close")
Lululu
## # A tsibble: 252 x 2 [1D]
## # Key: Adj Close [250]
## Date `Adj Close`
## <date> <dbl>
## 1 2022-05-24 258.
## 2 2022-05-25 261.
## 3 2022-07-01 263.
## 4 2022-05-19 271.
## 5 2022-06-16 271.
## 6 2022-05-18 273.
## 7 2022-06-30 273.
## 8 2022-05-23 273.
## 9 2022-05-20 274.
## 10 2022-06-21 276.
## # … with 242 more rows
## # ℹ Use `print(n = ...)` to see more rows
Luluplot <- Lululu %>%
dplyr::filter(Date >= as.Date("2022-06-01") & Date <= as.Date("2022-06-30")) %>%
ggplot(aes(x = Date, y = `Adj Close`)) +
geom_line()
Luluplot

#Finding the mean and variance for each month of the Lululemon stock
LuluJAN <- Lululu %>%
dplyr::filter(Date >= as.Date("2022-01-01") & Date <= as.Date("2022-01-31"))
LuluFEB <- Lululu %>%
dplyr::filter(Date >= as.Date("2022-02-01") & Date <= as.Date("2022-02-28"))
LuluMAR <- Lululu %>%
dplyr::filter(Date >= as.Date("2022-03-01") & Date <= as.Date("2022-03-31"))
LuluAPR <- Lululu %>%
dplyr::filter(Date >= as.Date("2022-04-01") & Date <= as.Date("2022-04-30"))
LuluMAY <- Lululu %>%
dplyr::filter(Date >= as.Date("2022-05-01") & Date <= as.Date("2022-05-31"))
LuluJUN <- Lululu %>%
dplyr::filter(Date >= as.Date("2022-06-01") & Date <= as.Date("2022-06-30"))
LuluJUL <- Lululu %>%
dplyr::filter(Date >= as.Date("2022-07-01") & Date <= as.Date("2022-07-31"))
LuluAUG <- Lululu %>%
dplyr::filter(Date >= as.Date("2022-08-01") & Date <= as.Date("2022-08-31"))
#January - Mean
LuJAN_Vec <- pull(LuluJAN, "Adj Close")
LuJAN_M <- mean(LuJAN_Vec)
LuJAN_M
## [1] 337.821
#February - Mean
LuFEB_Vec <- pull(LuluFEB, "Adj Close")
LuFEB_M <- mean(LuFEB_Vec)
LuFEB_M
## [1] 322.9658
#March - Mean
LuMAR_Vec <- pull(LuluFEB, "Adj Close")
LuMAR_M <- mean(LuMAR_Vec)
LuMAR_M
## [1] 322.9658
#April - Mean
LuAPR_Vec <- pull(LuluAPR, "Adj Close")
LuAPR_M <- mean(LuAPR_Vec)
LuAPR_M
## [1] 374.6035
#May - Mean
LuMAY_Vec <- pull(LuluMAY, "Adj Close")
LuMAY_M <- mean(LuMAY_Vec)
LuMAY_M
## [1] 301.1181
#June - Mean
LuJUN_Vec <- pull(LuluJUN, "Adj Close")
LuJUN_M <- mean(LuJUN_Vec)
LuJUN_M
## [1] 288.2257
#July - Mean
LuJUL_Vec <- pull(LuluJUL, "Adj Close")
LuJUL_M <- mean(LuJUL_Vec)
LuJUL_M
## [1] 290.465
#August - Mean
LuAUG_Vec <- pull(LuluAUG, "Adj Close")
LuAUG_M <- mean(LuAUG_Vec)
LuAUG_M
## [1] 317.0404
#January - Standard Deviation
LuJAN_sd <- sd(LuJAN_Vec)
LuJAN_sd
## [1] 24.51494
#February - Standard Deviation
LuFEB_sd <- sd(LuFEB_Vec)
LuFEB_sd
## [1] 14.64515
#March - Standard Deviation
LuMAR_sd <- sd(LuMAR_Vec)
LuMAR_sd
## [1] 14.64515
#April - Standard Deviation
LuAPR_sd <- sd(LuAPR_Vec)
LuAPR_sd
## [1] 14.0462
#May - Standard Deviation
LuMAY_sd <- sd(LuMAY_Vec)
LuMAY_sd
## [1] 31.66502
#June - Standard Deviation
LuJUN_sd <- sd(LuJUN_Vec)
LuJUN_sd
## [1] 11.50608
#July - Standard Deviation
LuJUL_sd <- sd(LuJUL_Vec)
LuJUL_sd
## [1] 12.15012
#August - Standard Deviation
LuAUG_sd <- sd(LuAUG_Vec)
LuAUG_sd
## [1] 8.871572