# Load some relevant libraries
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.1 ✔ tibble 3.2.1
## ✔ lubridate 1.9.4 ✔ tidyr 1.3.1
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(lubridate)
library(pacman)
p_load(tidyverse, lubridate, readxl, highcharter, tidyquant,
timetk, tibbletime, scales)
p_load(quantmod, PerformanceAnalytics) # these two packages cannot be installed!
p_load(reshape2)
p_load(TTR, readr)
path_bike_orderlines <- "bike_orderlines.rds"
bike_orderlines_tbl <- read_rds(path_bike_orderlines)
# Which month has the highest bike sales?
colnames(bike_orderlines_tbl)
## [1] "order_date" "order_id" "order_line" "quantity"
## [5] "price" "total_price" "model" "category_1"
## [9] "category_2" "frame_material" "bikeshop_name" "city"
## [13] "state"
glimpse(bike_orderlines_tbl)
## Rows: 15,644
## Columns: 13
## $ order_date <dttm> 2011-01-07, 2011-01-07, 2011-01-10, 2011-01-10, 2011-0…
## $ order_id <dbl> 1, 1, 2, 2, 3, 3, 3, 3, 3, 4, 5, 5, 5, 5, 6, 6, 6, 6, 7…
## $ order_line <dbl> 1, 2, 1, 2, 1, 2, 3, 4, 5, 1, 1, 2, 3, 4, 1, 2, 3, 4, 1…
## $ quantity <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1…
## $ price <dbl> 6070, 5970, 2770, 5970, 10660, 3200, 12790, 5330, 1570,…
## $ total_price <dbl> 6070, 5970, 2770, 5970, 10660, 3200, 12790, 5330, 1570,…
## $ model <chr> "Jekyll Carbon 2", "Trigger Carbon 2", "Beast of the Ea…
## $ category_1 <chr> "Mountain", "Mountain", "Mountain", "Mountain", "Road",…
## $ category_2 <chr> "Over Mountain", "Over Mountain", "Trail", "Over Mounta…
## $ frame_material <chr> "Carbon", "Carbon", "Aluminum", "Carbon", "Carbon", "Ca…
## $ bikeshop_name <chr> "Ithaca Mountain Climbers", "Ithaca Mountain Climbers",…
## $ city <chr> "Ithaca", "Ithaca", "Kansas City", "Kansas City", "Loui…
## $ state <chr> "NY", "NY", "KS", "KS", "KY", "KY", "KY", "KY", "KY", "…
bike_orderlines_tbl <- bike_orderlines_tbl %>%
mutate(sales = price * quantity) # Replace 'price' and 'quantity' with correct column names
bike_sales_by_month <- bike_orderlines_tbl %>%
mutate(Month = month(order_date, label = TRUE, abbr = FALSE)) %>%
group_by(Month) %>%
summarise(Sales = sum(sales, na.rm = TRUE)) %>%
arrange(Month) %>%
mutate(Sales = scales::dollar(Sales))
print(bike_sales_by_month)
## # A tibble: 12 × 2
## Month Sales
## <ord> <chr>
## 1 January $4,089,460
## 2 February $5,343,295
## 3 March $7,282,280
## 4 April $8,386,170
## 5 May $7,935,055
## 6 June $7,813,105
## 7 July $7,602,005
## 8 August $5,346,125
## 9 September $5,556,055
## 10 October $4,394,300
## 11 November $4,169,755
## 12 December $3,114,725
#2. Download daily closing prices of ETF0050, 0052 and 0056 from TEJ database in AU library from 2008 to 2023.
#2.1 Following outputs are based on data from 2008-2020. You need to update the outputs based on data 2008-2023.
library(quantmod)
# For example, download the data for ETF0050 from 2008 to 2023
getSymbols("0050.TW", src = "yahoo", from = "2008-01-01", to = "2023-12-31")
## Warning: 0050.TW contains missing values. Some functions will not work if
## objects contain missing values in the middle of the series. Consider using
## na.omit(), na.approx(), na.fill(), etc to remove or replace them.
## [1] "0050.TW"
getSymbols("0052.TW", src = "yahoo", from = "2008-01-01", to = "2023-12-31")
## Warning: 0052.TW contains missing values. Some functions will not work if
## objects contain missing values in the middle of the series. Consider using
## na.omit(), na.approx(), na.fill(), etc to remove or replace them.
## [1] "0052.TW"
getSymbols("0056.TW", src = "yahoo", from = "2008-01-01", to = "2023-12-31")
## Warning: 0056.TW contains missing values. Some functions will not work if
## objects contain missing values in the middle of the series. Consider using
## na.omit(), na.approx(), na.fill(), etc to remove or replace them.
## [1] "0056.TW"
# Extract closing prices
ETF_0050 <- Cl(`0050.TW`)
ETF_0052 <- Cl(`0052.TW`)
ETF_0056 <- Cl(`0056.TW`)
# Combine them into one data frame
ETF_data <- data.frame(
date = index(ETF_0050),
`0050` = coredata(ETF_0050),
`0052` = coredata(ETF_0052),
`0056` = coredata(ETF_0056)
)
# Convert date to proper format
ETF_data$date <- as.Date(ETF_data$date)
# View the first few rows
head(ETF_data)
## date X0050.TW.Close X0052.TW.Close X0056.TW.Close
## 1 2008-01-02 60.01 40.14 25.70
## 2 2008-01-03 58.89 39.10 25.23
## 3 2008-01-04 59.01 38.64 25.51
## 4 2008-01-07 56.39 36.16 24.85
## 5 2008-01-08 56.98 35.82 25.38
## 6 2008-01-09 58.20 36.69 25.83