Homework #1

library(tsibble)

## 
## Attaching package: 'tsibble'

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, union

library(tidyverse)

## ── Attaching packages
## ───────────────────────────────────────
## tidyverse 1.3.2 ──

## ✔ ggplot2 3.3.6     ✔ purrr   0.3.4
## ✔ tibble  3.1.8     ✔ dplyr   1.0.9
## ✔ tidyr   1.2.0     ✔ stringr 1.4.0
## ✔ readr   2.1.2     ✔ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()

library(fpp3)

## ── Attaching packages ──────────────────────────────────────────── fpp3 0.4.0 ──
## ✔ lubridate   1.8.0     ✔ feasts      0.2.2
## ✔ tsibbledata 0.4.0     ✔ fable       0.3.1
## ── Conflicts ───────────────────────────────────────────────── fpp3_conflicts ──
## ✖ lubridate::date()     masks base::date()
## ✖ dplyr::filter()       masks stats::filter()
## ✖ tsibble::intersect()  masks base::intersect()
## ✖ lubridate::interval() masks tsibble::interval()
## ✖ dplyr::lag()          masks stats::lag()
## ✖ tsibble::setdiff()    masks base::setdiff()
## ✖ tsibble::union()      masks base::union()

library(openxlsx)
library(ggplot2)
library(tidyr)
library(timeSeries)

## Loading required package: timeDate

library(readr)
library(dplyr)

#Question 1

tute1 <- readr::read_csv("C:\\Users\\lvm12\\OneDrive\\Desktop\\tute1.csv")

## Rows: 100 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): Quarter
## dbl (3): Sales, AdBudget, GDP
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

#Step 1: Turn the data set into a tsibble
tute2 <- tute1 %>%
  mutate(Quarter = yearquarter(Quarter)) %>%
  as_tsibble(index = Quarter)

#Step 2: Construct a time series plot for each of the three series
ggplot(data=tute2, aes(x=Quarter, y=Sales, group=1)) +
  geom_line()

tuteg1 <- ggplot(data=tute2, aes(x=Quarter, y=Sales, group=1)) +
  geom_line()
ggplot(data=tute2, aes(x=Quarter, y=AdBudget, group=2)) +
  geom_line()

tuteg2 <- ggplot(data=tute2, aes(x=Quarter, y=AdBudget, group=2)) +
  geom_line()
ggplot(data=tute2, aes(x=Quarter, y=GDP, group=3)) +
  geom_line()

tuteg3 <- ggplot(data=tute2, aes(x=Quarter, y=GDP, group=3)) +
  geom_line()
#Step 3: Construct a time series plot for all three time series together
supertute <-tute2 %>%
  pivot_longer(cols=c('Sales', 'AdBudget', 'GDP'),
               names_to='measure',
               values_to='money')

ggplot(data = supertute) +
    geom_line(mapping = aes(x = Quarter, y = money)) +
  facet_grid(~ measure)

#Question 2

library(USgas)
head(us_total)

##   year   state      y
## 1 1997 Alabama 324158
## 2 1998 Alabama 329134
## 3 1999 Alabama 337270
## 4 2000 Alabama 353614
## 5 2001 Alabama 332693
## 6 2002 Alabama 379343

ngus<-us_total%>%
  as_tsibble(index= year, key = state)

ngus%>%
  dplyr::filter(state %in% c("Maine","Vermont","New Hampshire", "Massachusetts","Connecticut","Rhode Island")) %>%
  autoplot()

## Plot variable not specified, automatically selected `.vars = y`

#Question 3

tourism<-tourism%>%
  mutate(quarter = as.Date(Quarter))%>%
  select(-Quarter)

tourism2<-tourism%>%
  mutate(Quarter = yearquarter(quarter))%>%
  as_tsibble(index = Quarter, key = c("Region", "State", "Purpose"))%>%
  select(-quarter)

tourism3<-tourism2%>%
  group_by(Region, Purpose)%>%
  summarise(Avg = mean(Trips))%>%
  ungroup()%>%
  mutate(Max = max(Avg))

tourism4<-tourism%>%
  group_by(State) %>%
  summarise(Trips = sum(Trips)) %>%
  ungroup()

#Question 4

aus_arrivals

## # A tsibble: 508 x 3 [1Q]
## # Key:       Origin [4]
##    Quarter Origin Arrivals
##      <qtr> <chr>     <int>
##  1 1981 Q1 Japan     14763
##  2 1981 Q2 Japan      9321
##  3 1981 Q3 Japan     10166
##  4 1981 Q4 Japan     19509
##  5 1982 Q1 Japan     17117
##  6 1982 Q2 Japan     10617
##  7 1982 Q3 Japan     11737
##  8 1982 Q4 Japan     20961
##  9 1983 Q1 Japan     20671
## 10 1983 Q2 Japan     12235
## # … with 498 more rows
## # ℹ Use `print(n = ...)` to see more rows

autoplot(aus_arrivals)

## Plot variable not specified, automatically selected `.vars = Arrivals`

gg_season(aus_arrivals, y = Arrivals)

gg_subseries(aus_arrivals, y = Arrivals)

#In the autoplot of aus_arrivals - New Zealand has massive spike right before 1990 whereas all the other places have steady increases that follow their traditional pattern.
#Japan also plateaued from 1995 to 2000, and started declining, while all the other places continued increasing overtime

#Question 5
#library(rlang)
set.seed(123)
aus_retail

## # A tsibble: 64,532 x 5 [1M]
## # Key:       State, Industry [152]
##    State                        Industry                Serie…¹    Month Turno…²
##    <chr>                        <chr>                   <chr>      <mth>   <dbl>
##  1 Australian Capital Territory Cafes, restaurants and… A33498… 1982 Apr     4.4
##  2 Australian Capital Territory Cafes, restaurants and… A33498… 1982 May     3.4
##  3 Australian Capital Territory Cafes, restaurants and… A33498… 1982 Jun     3.6
##  4 Australian Capital Territory Cafes, restaurants and… A33498… 1982 Jul     4  
##  5 Australian Capital Territory Cafes, restaurants and… A33498… 1982 Aug     3.6
##  6 Australian Capital Territory Cafes, restaurants and… A33498… 1982 Sep     4.2
##  7 Australian Capital Territory Cafes, restaurants and… A33498… 1982 Oct     4.8
##  8 Australian Capital Territory Cafes, restaurants and… A33498… 1982 Nov     5.4
##  9 Australian Capital Territory Cafes, restaurants and… A33498… 1982 Dec     6.9
## 10 Australian Capital Territory Cafes, restaurants and… A33498… 1983 Jan     3.8
## # … with 64,522 more rows, and abbreviated variable names ¹`Series ID`,
## #   ²Turnover
## # ℹ Use `print(n = ...)` to see more rows

australian <- aus_retail
colnames(australian)

## [1] "State"     "Industry"  "Series ID" "Month"     "Turnover"

names(australian)[3]<-"ID" # we rename the column because the code was not running.

myseries<-australian%>%
  dplyr::filter(ID == sample(australian$ID,1))
    
autoplot(myseries)

## Plot variable not specified, automatically selected `.vars = Turnover`

gg_season(myseries)

## Plot variable not specified, automatically selected `y = Turnover`

gg_subseries(myseries)

## Plot variable not specified, automatically selected `y = Turnover`

gg_lag(myseries)

## Plot variable not specified, automatically selected `y = Turnover`

#Trend - There is a positive trend in the data as it has a linear trend from 1980 to right before 2020
#Seasonal - From the spikes in the data, there seems to be a seasonal spike for each year (or 2 years - hard to tell with the ) with an immediate downturn following 
#Cyclic - The data has rises and falls that follow the traditional business cycle, they seem to follow the duration of 2 years, which matches up with the seasonal time series pattern mentioned above.

#Question 6

#Calculating the mean and standard deviation of the FB closing stock price
gafa_stock

## # A tsibble: 5,032 x 8 [!]
## # Key:       Symbol [4]
##    Symbol Date        Open  High   Low Close Adj_Close    Volume
##    <chr>  <date>     <dbl> <dbl> <dbl> <dbl>     <dbl>     <dbl>
##  1 AAPL   2014-01-02  79.4  79.6  78.9  79.0      67.0  58671200
##  2 AAPL   2014-01-03  79.0  79.1  77.2  77.3      65.5  98116900
##  3 AAPL   2014-01-06  76.8  78.1  76.2  77.7      65.9 103152700
##  4 AAPL   2014-01-07  77.8  78.0  76.8  77.1      65.4  79302300
##  5 AAPL   2014-01-08  77.0  77.9  77.0  77.6      65.8  64632400
##  6 AAPL   2014-01-09  78.1  78.1  76.5  76.6      65.0  69787200
##  7 AAPL   2014-01-10  77.1  77.3  75.9  76.1      64.5  76244000
##  8 AAPL   2014-01-13  75.7  77.5  75.7  76.5      64.9  94623200
##  9 AAPL   2014-01-14  76.9  78.1  76.8  78.1      66.1  83140400
## 10 AAPL   2014-01-15  79.1  80.0  78.8  79.6      67.5  97909700
## # … with 5,022 more rows
## # ℹ Use `print(n = ...)` to see more rows

colnames(gafa_stock)

## [1] "Symbol"    "Date"      "Open"      "High"      "Low"       "Close"    
## [7] "Adj_Close" "Volume"

names(gafa_stock)[1]<-"symbol"
gafa_stock

## # A tsibble: 5,032 x 8 [!]
## # Key:       symbol [4]
##    symbol Date        Open  High   Low Close Adj_Close    Volume
##    <chr>  <date>     <dbl> <dbl> <dbl> <dbl>     <dbl>     <dbl>
##  1 AAPL   2014-01-02  79.4  79.6  78.9  79.0      67.0  58671200
##  2 AAPL   2014-01-03  79.0  79.1  77.2  77.3      65.5  98116900
##  3 AAPL   2014-01-06  76.8  78.1  76.2  77.7      65.9 103152700
##  4 AAPL   2014-01-07  77.8  78.0  76.8  77.1      65.4  79302300
##  5 AAPL   2014-01-08  77.0  77.9  77.0  77.6      65.8  64632400
##  6 AAPL   2014-01-09  78.1  78.1  76.5  76.6      65.0  69787200
##  7 AAPL   2014-01-10  77.1  77.3  75.9  76.1      64.5  76244000
##  8 AAPL   2014-01-13  75.7  77.5  75.7  76.5      64.9  94623200
##  9 AAPL   2014-01-14  76.9  78.1  76.8  78.1      66.1  83140400
## 10 AAPL   2014-01-15  79.1  80.0  78.8  79.6      67.5  97909700
## # … with 5,022 more rows
## # ℹ Use `print(n = ...)` to see more rows

fbclose <- as.data.frame(gafa_stock) %>%
  dplyr::filter(symbol == "FB") %>%
  select(symbol, Date, Close)
fbcheck <- pull(fbclose, Close)
fb_mean <- mean(fbcheck, na.rm = TRUE)
fb_mean

## [1] 120.4625

fb_median <- median(fbcheck)
fb_median

## [1] 117.675

fb_sd <- sd(fbcheck, na.rm = TRUE) 
fb_sd

## [1] 41.32364

fb_k <- kurtosis(fbcheck, na.rm = TRUE)
fb_k

## [1] -1.166207
## attr(,"method")
## [1] "excess"

#Calculating the mean, standard deviation, kurtosis, and skewness of the first difference of the FB closing stock price
library(moments)

## 
## Attaching package: 'moments'

## The following objects are masked from 'package:timeDate':
## 
##     kurtosis, skewness

fb_diff <- diff(fbcheck)
fb_meann <- mean(fb_diff, na.rm = TRUE)
fb_meann

## [1] 0.06076372

fb_sdd <- sd(fb_diff, na.rm = TRUE)
fb_sdd

## [1] 2.414555

fb_kk <- kurtosis(fb_diff, na.rm = TRUE)
fb_kk

## [1] 74.02921

fb_sck <- skewness(fb_diff, na.rm = TRUE)
fb_sck

## [1] -3.973192

#Calculating the mean, standard deviation, kurtosis, and skewness of the FB closing stock price by hand
meanhand <- sum(fbcheck)/length(fbcheck)
meanhand

## [1] 120.4625

sdhand <- sqrt(sum((fbcheck-mean(fbcheck))^2/(length(fbcheck)-1)))
sdhand

## [1] 41.32364

kurthand <- (sum((fbcheck-mean(fbcheck))^4))/length(fbcheck)/(sum((fbcheck-mean(fbcheck))^2/length(fbcheck))^2)
kurthand

## [1] 1.836712

skewhand <- (3*(meanhand-fb_median))/sdhand
skewhand

## [1] 0.2023634

#Question 7

LULU <- readr::read_csv("C:\\Users\\lvm12\\OneDrive\\Desktop\\LULU.csv")

## Rows: 252 Columns: 7
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl  (6): Open, High, Low, Close, Adj Close, Volume
## date (1): Date
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

LULU

## # A tibble: 252 × 7
##    Date        Open  High   Low Close `Adj Close`  Volume
##    <date>     <dbl> <dbl> <dbl> <dbl>       <dbl>   <dbl>
##  1 2021-09-03  388.  391.  385.  388.        388.  712300
##  2 2021-09-07  395.  395.  383.  386.        386. 1080300
##  3 2021-09-08  385.  386.  376.  381.        381. 2878300
##  4 2021-09-09  434.  434.  420.  421.        421. 7789600
##  5 2021-09-10  424   430.  417.  426.        426. 2150600
##  6 2021-09-13  428.  428.  419.  425.        425. 1244600
##  7 2021-09-14  427.  428.  419.  420.        420.  853700
##  8 2021-09-15  421.  421.  414.  419.        419.  818700
##  9 2021-09-16  417.  430.  417.  426.        426.  978600
## 10 2021-09-17  428.  433.  424.  427.        427. 1546500
## # … with 242 more rows
## # ℹ Use `print(n = ...)` to see more rows

Lulu <-LULU %>% select(-(Open:Close)) %>%
  select(-(Volume))
Lululu <- Lulu %>%
  mutate(Date = as_date(Date)) %>%
  as_tsibble(index = "Date", key = "Adj Close")
Lululu

## # A tsibble: 252 x 2 [1D]
## # Key:       Adj Close [250]
##    Date       `Adj Close`
##    <date>           <dbl>
##  1 2022-05-24        258.
##  2 2022-05-25        261.
##  3 2022-07-01        263.
##  4 2022-05-19        271.
##  5 2022-06-16        271.
##  6 2022-05-18        273.
##  7 2022-06-30        273.
##  8 2022-05-23        273.
##  9 2022-05-20        274.
## 10 2022-06-21        276.
## # … with 242 more rows
## # ℹ Use `print(n = ...)` to see more rows

Luluplot <- Lululu %>%
  dplyr::filter(Date >= as.Date("2022-06-01") & Date <= as.Date("2022-06-30")) %>%
  ggplot(aes(x = Date, y = `Adj Close`)) +
  geom_line()
Luluplot

#Finding the mean and variance for each month of the Lululemon stock

LuluJAN <- Lululu %>%
  dplyr::filter(Date >= as.Date("2022-01-01") & Date <= as.Date("2022-01-31"))
LuluFEB <- Lululu %>%
  dplyr::filter(Date >= as.Date("2022-02-01") & Date <= as.Date("2022-02-28"))
LuluMAR <- Lululu %>%
  dplyr::filter(Date >= as.Date("2022-03-01") & Date <= as.Date("2022-03-31"))
LuluAPR <- Lululu %>%
  dplyr::filter(Date >= as.Date("2022-04-01") & Date <= as.Date("2022-04-30"))
LuluMAY <- Lululu %>%
  dplyr::filter(Date >= as.Date("2022-05-01") & Date <= as.Date("2022-05-31"))
LuluJUN <- Lululu %>%
  dplyr::filter(Date >= as.Date("2022-06-01") & Date <= as.Date("2022-06-30"))
LuluJUL <- Lululu %>%
  dplyr::filter(Date >= as.Date("2022-07-01") & Date <= as.Date("2022-07-31"))
LuluAUG <- Lululu %>%
  dplyr::filter(Date >= as.Date("2022-08-01") & Date <= as.Date("2022-08-31"))

#January - Mean
LuJAN_Vec <- pull(LuluJAN, "Adj Close")
LuJAN_M <- mean(LuJAN_Vec)
LuJAN_M

## [1] 337.821

#February - Mean
LuFEB_Vec <- pull(LuluFEB, "Adj Close")
LuFEB_M <- mean(LuFEB_Vec)
LuFEB_M

## [1] 322.9658

#March - Mean
LuMAR_Vec <- pull(LuluFEB, "Adj Close")
LuMAR_M <- mean(LuMAR_Vec)
LuMAR_M

## [1] 322.9658

#April - Mean
LuAPR_Vec <- pull(LuluAPR, "Adj Close")
LuAPR_M <- mean(LuAPR_Vec)
LuAPR_M

## [1] 374.6035

#May - Mean
LuMAY_Vec <- pull(LuluMAY, "Adj Close")
LuMAY_M <- mean(LuMAY_Vec)
LuMAY_M

## [1] 301.1181

#June - Mean
LuJUN_Vec <- pull(LuluJUN, "Adj Close")
LuJUN_M <- mean(LuJUN_Vec)
LuJUN_M

## [1] 288.2257

#July - Mean
LuJUL_Vec <- pull(LuluJUL, "Adj Close")
LuJUL_M <- mean(LuJUL_Vec)
LuJUL_M

## [1] 290.465

#August - Mean
LuAUG_Vec <- pull(LuluAUG, "Adj Close")
LuAUG_M <- mean(LuAUG_Vec)
LuAUG_M

## [1] 317.0404

#January - Standard Deviation
LuJAN_sd <- sd(LuJAN_Vec)
LuJAN_sd

## [1] 24.51494

#February - Standard Deviation
LuFEB_sd <- sd(LuFEB_Vec)
LuFEB_sd

## [1] 14.64515

#March - Standard Deviation
LuMAR_sd <- sd(LuMAR_Vec)
LuMAR_sd

## [1] 14.64515

#April - Standard Deviation
LuAPR_sd <- sd(LuAPR_Vec)
LuAPR_sd

## [1] 14.0462

#May - Standard Deviation
LuMAY_sd <- sd(LuMAY_Vec)
LuMAY_sd

## [1] 31.66502

#June - Standard Deviation
LuJUN_sd <- sd(LuJUN_Vec)
LuJUN_sd

## [1] 11.50608

#July - Standard Deviation
LuJUL_sd <- sd(LuJUL_Vec)
LuJUL_sd

## [1] 12.15012

#August - Standard Deviation
LuAUG_sd <- sd(LuAUG_Vec)
LuAUG_sd

## [1] 8.871572

Homework #1

Lauren Moskowitz

2022-09-07