library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.2 ✔ readr 2.1.4
## ✔ forcats 1.0.0 ✔ stringr 1.5.0
## ✔ ggplot2 3.4.2 ✔ tibble 3.2.1
## ✔ lubridate 1.9.4 ✔ tidyr 1.3.0
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(lubridate)
library(pacman)
p_load(tidyverse, lubridate, readxl, highcharter, tidyquant,
timetk, tibbletime, scales)
p_load(quantmod, PerformanceAnalytics) # these two packages cannot be installed!
p_load(reshape2)
p_load(TTR, readr)
path_bike_orderlines <- "bike_orderlines (2).rds"
bike_orderlines_tbl <- read_rds(path_bike_orderlines)
monthly_sales <- bike_orderlines_tbl %>%
select(order_date, total_price) %>%
mutate(month = order_date %>% floor_date("month") %>% month(label = TRUE)) %>%
group_by(month) %>%
summarise(sales = sum(total_price)) %>%
ungroup() %>%
mutate(sales = scales::dollar(sales)) %>%
arrange(month) %>%
rename(
"Month" = month,
"Sales" = sales
)
print(monthly_sales)
## # A tibble: 12 × 2
## Month Sales
## <ord> <chr>
## 1 Jan $4,089,460
## 2 Feb $5,343,295
## 3 Mar $7,282,280
## 4 Apr $8,386,170
## 5 May $7,935,055
## 6 Jun $7,813,105
## 7 Jul $7,602,005
## 8 Aug $5,346,125
## 9 Sep $5,556,055
## 10 Oct $4,394,300
## 11 Nov $4,169,755
## 12 Dec $3,114,725
midterm_data <- read_delim("MidtermDataTEJ.csv", delim = "\t") # For tab-delimited files
## Rows: 11817 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (3): CO_ID, CoName, Close
## dbl (1): Date
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# Step 1: Summarize duplicates to ensure uniqueness
midterm_data_clean <- midterm_data %>%
mutate(
Close = as.numeric(str_trim(Close)), # Clean and convert Close to numeric
Date = ymd(Date) # Convert Date to proper format
) %>%
group_by(Date, CO_ID) %>% # Group by Date and CO_ID
summarise(Close = mean(Close, na.rm = TRUE), .groups = "drop") # Resolve duplicates by taking the mean
# Step 2: Pivot data to wide format
midterm_data_wide <- midterm_data_clean %>%
pivot_wider(names_from = CO_ID, values_from = Close) %>% # Pivot to wide format
arrange(Date) # Arrange by Date
# Print the resulting dataset
print(midterm_data_wide)
## # A tibble: 3,939 × 4
## Date `0050 ` `0052 ` `0056 `
## <date> <dbl> <dbl> <dbl>
## 1 2008-01-02 34.7 22.7 10.2
## 2 2008-01-03 34.1 21.8 10.1
## 3 2008-01-04 34.1 21.7 10.1
## 4 2008-01-07 32.6 20.2 9.95
## 5 2008-01-08 32.9 20.2 10.1
## 6 2008-01-09 33.5 20.3 10.2
## 7 2008-01-10 33.4 20.3 10.2
## 8 2008-01-11 33.3 20.4 10.3
## 9 2008-01-14 33.9 20.3 10.3
## 10 2008-01-15 35.2 20.6 10.4
## # ℹ 3,929 more rows
midterm_data_wide %>%
filter(year(Date) == 2023)
## # A tibble: 239 × 4
## Date `0050 ` `0052 ` `0056 `
## <date> <dbl> <dbl> <dbl>
## 1 2023-01-03 104. 86.2 21.8
## 2 2023-01-04 104. 85.8 21.8
## 3 2023-01-05 104. 86.5 21.8
## 4 2023-01-06 105. 87.0 21.9
## 5 2023-01-09 109. 90.5 22.2
## 6 2023-01-10 109. 91.2 22.3
## 7 2023-01-11 109. 91.0 22.3
## 8 2023-01-12 109. 91.0 22.2
## 9 2023-01-13 110. 92.4 22.2
## 10 2023-01-16 111. 93.0 22.2
## # ℹ 229 more rows
daily_returns <- midterm_data_wide %>%
mutate(across(where(is.numeric),
~round((. - lag(.)) / lag(.), 8),
.names = "ret_{col}")) %>%
select(Date, starts_with("ret"))
print(daily_returns, n = 10)
## # A tibble: 3,939 × 4
## Date `ret_0050 ` `ret_0052 ` `ret_0056 `
## <date> <dbl> <dbl> <dbl>
## 1 2008-01-02 NA NA NA
## 2 2008-01-03 -0.0166 -0.0380 -0.0136
## 3 2008-01-04 0 -0.00510 0.00197
## 4 2008-01-07 -0.0457 -0.0692 -0.0157
## 5 2008-01-08 0.00975 0 0.0124
## 6 2008-01-09 0.0184 0.00551 0.00789
## 7 2008-01-10 -0.00172 0 0.00743
## 8 2008-01-11 -0.00432 0.00246 0.00194
## 9 2008-01-14 0.0199 -0.00246 0.00388
## 10 2008-01-15 0.0383 0.0110 0.0124
## # ℹ 3,929 more rows
weekly_prices <- midterm_data_wide %>%
mutate(week = floor_date(Date, unit = "week")) %>%
group_by(week) %>%
slice_tail(n = 1) %>%
ungroup() %>%
select(-Date) %>%
rename(Date = week) %>%
select(Date, everything()) # This puts Date first
# Show 2008 period
weekly_prices %>%
filter(Date >= as.Date("2008-01-04"),
Date <= as.Date("2008-02-15"))
## # A tibble: 5 × 4
## Date `0050 ` `0052 ` `0056 `
## <date> <dbl> <dbl> <dbl>
## 1 2008-01-06 33.3 20.4 10.3
## 2 2008-01-13 33.8 20.0 9.95
## 3 2008-01-20 32.6 19.8 9.46
## 4 2008-01-27 32.6 19.7 9.16
## 5 2008-02-10 33.1 20.1 9.34
# Show 2020 period
weekly_prices %>%
filter(Date >= as.Date("2020-03-27"),
Date <= as.Date("2020-04-24"))
## # A tibble: 4 × 4
## Date `0050 ` `0052 ` `0056 `
## <date> <dbl> <dbl> <dbl>
## 1 2020-03-29 66.4 47.8 17.2
## 2 2020-04-05 69.4 49.8 18.1
## 3 2020-04-12 72.8 53.6 18.6
## 4 2020-04-19 70.8 52.5 18.5
# Step 1: Convert daily prices to weekly (using last price of each week)
weekly_prices <- midterm_data_wide %>%
mutate(week = floor_date(Date, unit = "week")) %>% # Create week grouping
group_by(week) %>%
slice_tail(n = 1) %>% # Take the last price of each week
ungroup() %>%
select(-Date) %>%
rename(Date = week)
# Step 2: Calculate weekly returns
weekly_returns <- weekly_prices %>%
mutate(across(
where(is.numeric),
~(. - lag(.)) / lag(.), # Calculate returns
.names = "{.col}"
)) %>%
select(Date, everything()) %>%
# Round to 8 decimal places as shown in the image
mutate(across(where(is.numeric), ~round(., 8)))
# To display specific time periods as shown in the image:
# For 2008 period
weekly_returns %>%
filter(Date >= as.Date("2008-01-04"),
Date <= as.Date("2008-02-15")) %>%
print(n = 10)
## # A tibble: 5 × 4
## Date `0050 ` `0052 ` `0056 `
## <date> <dbl> <dbl> <dbl>
## 1 2008-01-06 -0.0245 -0.0618 0.0137
## 2 2008-01-13 0.0147 -0.0194 -0.0291
## 3 2008-01-20 -0.0359 -0.0106 -0.0499
## 4 2008-01-27 0 -0.00676 -0.0315
## 5 2008-02-10 0.0160 0.0210 0.0195
# For 2020 period
weekly_returns %>%
filter(Date >= as.Date("2020-03-27"),
Date <= as.Date("2020-04-24")) %>%
print(n = 10)
## # A tibble: 4 × 4
## Date `0050 ` `0052 ` `0056 `
## <date> <dbl> <dbl> <dbl>
## 1 2020-03-29 -0.00914 -0.0143 0.00699
## 2 2020-04-05 0.0448 0.0417 0.0493
## 3 2020-04-12 0.0498 0.0752 0.0299
## 4 2020-04-19 -0.0282 -0.0203 -0.00641
monthly_prices <- midterm_data_wide %>%
mutate(month = ceiling_date(Date, unit = "month") - days(1)) %>% # Assign end-of-month dates
group_by(month) %>%
slice_tail(n = 1) %>% # Get the last row for each month
ungroup() %>%
select(-Date) %>% # Drop the original Date column
rename(Date = month) %>% # Rename 'month' to 'Date'
select(Date, everything()) # Put Date as the first column
# Show 2008 period
monthly_prices %>%
filter(Date >= as.Date("2008-01-31"),
Date <= as.Date("2008-06-30"))
## # A tibble: 6 × 4
## Date `0050 ` `0052 ` `0056 `
## <date> <dbl> <dbl> <dbl>
## 1 2008-01-31 31.7 19.5 9.03
## 2 2008-02-29 35.1 21.4 9.83
## 3 2008-03-31 34.9 21.1 10.1
## 4 2008-04-30 36.9 22.4 10.4
## 5 2008-05-31 35.9 21.9 10.2
## 6 2008-06-30 31.7 19.4 9.10
# Show 2020 period
monthly_prices %>%
filter(Date >= as.Date("2019-11-30"),
Date <= as.Date("2020-04-30"))
## # A tibble: 6 × 4
## Date `0050 ` `0052 ` `0056 `
## <date> <dbl> <dbl> <dbl>
## 1 2019-11-30 77.5 54.3 19.5
## 2 2019-12-31 82.2 58.8 20.3
## 3 2020-01-31 78.7 56.7 19.6
## 4 2020-02-29 77.6 55.3 19.6
## 5 2020-03-31 66.6 47.5 17.2
## 6 2020-04-30 74.8 54.3 19.3
# Compute monthly returns
monthly_returns <- monthly_prices %>%
mutate(across(-Date, ~ (. - lag(.)) / lag(.), .names = "return_{.col}")) %>%
select(Date, starts_with("return"))
# Show 2008 period
monthly_returns %>%
filter(Date >= as.Date("2008-02-29"),
Date <= as.Date("2008-07-31"))
## # A tibble: 6 × 4
## Date `return_0050 ` `return_0052 ` `return_0056 `
## <date> <dbl> <dbl> <dbl>
## 1 2008-02-29 0.107 0.0975 0.0889
## 2 2008-03-31 -0.00657 -0.0156 0.0303
## 3 2008-04-30 0.0571 0.0635 0.0314
## 4 2008-05-31 -0.0266 -0.0236 -0.0217
## 5 2008-06-30 -0.116 -0.113 -0.110
## 6 2008-07-31 -0.0473 -0.0621 -0.0624
# Show 2020 period
monthly_returns %>%
filter(Date >= as.Date("2019-11-30"),
Date <= as.Date("2020-04-30"))
## # A tibble: 6 × 4
## Date `return_0050 ` `return_0052 ` `return_0056 `
## <date> <dbl> <dbl> <dbl>
## 1 2019-11-30 0.0155 0.0191 0.00616
## 2 2019-12-31 0.0596 0.0825 0.0440
## 3 2020-01-31 -0.0421 -0.0346 -0.0362
## 4 2020-02-29 -0.0145 -0.0258 -0.000714
## 5 2020-03-31 -0.141 -0.140 -0.122
## 6 2020-04-30 0.123 0.143 0.121