It also calculates summary statistics and visualizing trends.
# Install and load the packages through code
library(ggplot2)
library(dplyr)
library(readr)
# QQQ is a NASDAQ ETF data set
qqq_data = read_csv("QQQHistoricalPrices.csv")
qqq_data
## # A tibble: 2,516 × 6
## Date Open High Low Close Volume
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 12/31/19 212. 213. 211. 213. 18201590
## 2 12/30/19 214. 214. 211. 212. 21829910
## 3 12/27/19 215. 215. 213. 214. 18142240
## 4 12/26/19 212. 214. 212. 214. 17159311
## 5 12/24/19 212. 212. 211. 212. 7089020
## 6 12/23/19 212. 212. 212. 212. 23798670
## 7 12/20/19 212. 213. 211. 212. 27690930
## 8 12/19/19 210. 211. 210. 211. 17892061
## 9 12/18/19 210. 210. 209. 210. 15490830
## 10 12/17/19 210. 210. 209. 209. 16034650
## # ℹ 2,506 more rows
# TQQQ is a 3x leveraged NASDAQ ETF data set
tqqq_data = read_csv("TQQQHistoricalPrices.csv")
tqqq_data
## # A tibble: 2,489 × 6
## Date Open High Low Close Volume
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 12/31/19 21.3 21.7 21.2 21.6 42212960
## 2 12/30/19 21.9 22.0 21.2 21.5 56569360
## 3 12/27/19 22.3 22.3 21.8 22.0 49935040
## 4 12/26/19 21.6 22.0 21.6 22.0 32387676
## 5 12/24/19 21.5 21.5 21.3 21.5 19949364
## 6 12/23/19 21.5 21.5 21.4 21.4 36277508
## 7 12/20/19 21.3 21.4 21.2 21.3 49841200
## 8 12/19/19 20.7 21.0 20.7 21.0 41775400
## 9 12/18/19 20.7 20.8 20.6 20.6 35854924
## 10 12/17/19 20.6 20.7 20.5 20.6 42341800
## # ℹ 2,479 more rows
# The mutate() function is used to modify columns
# as.Date() converts the Date column to Date format
# gsub(",", "", Close) removes commas from the Close column
# as.numeric() converts the Close column to numeric format
qqq_data = qqq_data %>%
# Convert the 'Date' column to Date format
mutate(Date = as.Date(Date, format="%m/%d/%Y"),
# Remove commas in the 'Close' and convert values to numeric
Close = as.numeric(gsub(",", "", Close)))
qqq_data
## # A tibble: 2,516 × 6
## Date Open High Low Close Volume
## <date> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 0019-12-31 212. 213. 211. 213. 18201590
## 2 0019-12-30 214. 214. 211. 212. 21829910
## 3 0019-12-27 215. 215. 213. 214. 18142240
## 4 0019-12-26 212. 214. 212. 214. 17159311
## 5 0019-12-24 212. 212. 211. 212. 7089020
## 6 0019-12-23 212. 212. 212. 212. 23798670
## 7 0019-12-20 212. 213. 211. 212. 27690930
## 8 0019-12-19 210. 211. 210. 211. 17892061
## 9 0019-12-18 210. 210. 209. 210. 15490830
## 10 0019-12-17 210. 210. 209. 209. 16034650
## # ℹ 2,506 more rows
tqqq_data = tqqq_data %>%
# Same as QQQ we will do the same for TQQQ
# Convert the 'Date' column to Date format
mutate(Date = as.Date(Date, format="%m/%d/%Y"),
# Remove commas in the 'Close' and convert values to numeric
Close = as.numeric(gsub(",", "", Close)))
tqqq_data
## # A tibble: 2,489 × 6
## Date Open High Low Close Volume
## <date> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 0019-12-31 21.3 21.7 21.2 21.6 42212960
## 2 0019-12-30 21.9 22.0 21.2 21.5 56569360
## 3 0019-12-27 22.3 22.3 21.8 22.0 49935040
## 4 0019-12-26 21.6 22.0 21.6 22.0 32387676
## 5 0019-12-24 21.5 21.5 21.3 21.5 19949364
## 6 0019-12-23 21.5 21.5 21.4 21.4 36277508
## 7 0019-12-20 21.3 21.4 21.2 21.3 49841200
## 8 0019-12-19 20.7 21.0 20.7 21.0 41775400
## 9 0019-12-18 20.7 20.8 20.6 20.6 35854924
## 10 0019-12-17 20.6 20.7 20.5 20.6 42341800
## # ℹ 2,479 more rows
# Add percentage increase columns to both datasets
qqq_data = qqq_data %>%
arrange(Date) %>%
mutate(Percent_Change = (Close / first(Close) - 1) * 100)
tqqq_data = tqqq_data %>%
arrange(Date) %>%
mutate(Percent_Change = (Close / first(Close) - 1) * 100)
# Preview the data
head(qqq_data, 5)
## # A tibble: 5 × 7
## Date Open High Low Close Volume Percent_Change
## <date> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 0010-01-04 46.3 46.5 46.3 46.4 62822801 0
## 2 0010-01-05 46.4 46.5 46.2 46.4 62935594 0
## 3 0010-01-06 46.4 46.6 46.1 46.1 96033000 -0.603
## 4 0010-01-07 46.2 46.3 45.9 46.2 77094031 -0.539
## 5 0010-01-08 46.0 46.6 45.9 46.6 88886602 0.280
head(tqqq_data, 5)
## # A tibble: 5 × 7
## Date Open High Low Close Volume Percent_Change
## <date> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 0010-02-11 0.407 0.435 0.406 0.433 3454080. 0
## 2 0010-02-12 0.421 0.438 0.418 0.434 8597760. 0.393
## 3 0010-02-16 0.444 0.452 0.438 0.451 9615744. 4.28
## 4 0010-02-17 0.457 0.459 0.450 0.459 19176575. 6.06
## 5 0010-02-18 0.458 0.470 0.456 0.468 38866559. 8.07
# Our summary will be the Mean, Median, and Standard Deviation for the 'Close' column
# summarise() is used to calculate the summary statistics
# na.rm = TRUE ensures that any NAs are ignored
# qqq_summary and tqqq_summary store the calculations
qqq_summary = qqq_data %>%
# Mean of 'Close'
summarise(Mean = mean(Close, na.rm = TRUE),
# Median of 'Close'
Median = median(Close, na.rm = TRUE),
# Standard deviation of 'Close'
SD = sd(Close, na.rm = TRUE))
tqqq_summary = tqqq_data %>%
# Mean of 'Close'
summarise(Mean = mean(Close, na.rm = TRUE),
# Median of 'Close'
Median = median(Close, na.rm = TRUE),
# Standard deviation of 'Close'
SD = sd(Close, na.rm = TRUE))
# Display the summary
qqq_summary
## # A tibble: 1 × 3
## Mean Median SD
## <dbl> <dbl> <dbl>
## 1 105. 102. 45.7
tqqq_summary
## # A tibble: 1 × 3
## Mean Median SD
## <dbl> <dbl> <dbl>
## 1 5.49 3.79 5.33
# Now we plot it using the ggplot library
ggplot() +
geom_line(data = qqq_data, aes(x = Date, y = Close), color = "blue", size = 1) +
geom_line(data = tqqq_data, aes(x = Date, y = Close), color = "red", size = 1) +
labs(title = "QQQ vs TQQQ Closing Prices", x = "Date", y = "Close Price") +
theme_minimal()

ggplot() +
geom_line(data = qqq_data, aes(x = Date, y = Percent_Change), color = "blue", size = 1) +
geom_line(data = tqqq_data, aes(x = Date, y = Percent_Change), color = "red", size = 1) +
labs(title = "Percentage Increase: QQQ vs TQQQ", x = "Date", y = "Percentage Change (%)") +
theme_minimal()
