Project Summary

This R Markdown compares the historical performance of QQQ and TQQQ ETFs.

It also calculates summary statistics and visualizing trends.

# Install and load the packages through code
library(ggplot2)
library(dplyr)
library(readr)
# QQQ is a NASDAQ ETF data set
qqq_data = read_csv("QQQHistoricalPrices.csv")
qqq_data
## # A tibble: 2,516 × 6
##    Date      Open  High   Low Close   Volume
##    <chr>    <dbl> <dbl> <dbl> <dbl>    <dbl>
##  1 12/31/19  212.  213.  211.  213. 18201590
##  2 12/30/19  214.  214.  211.  212. 21829910
##  3 12/27/19  215.  215.  213.  214. 18142240
##  4 12/26/19  212.  214.  212.  214. 17159311
##  5 12/24/19  212.  212.  211.  212.  7089020
##  6 12/23/19  212.  212.  212.  212. 23798670
##  7 12/20/19  212.  213.  211.  212. 27690930
##  8 12/19/19  210.  211.  210.  211. 17892061
##  9 12/18/19  210.  210.  209.  210. 15490830
## 10 12/17/19  210.  210.  209.  209. 16034650
## # ℹ 2,506 more rows
# TQQQ is a 3x leveraged NASDAQ ETF data set
tqqq_data = read_csv("TQQQHistoricalPrices.csv")
tqqq_data
## # A tibble: 2,489 × 6
##    Date      Open  High   Low Close   Volume
##    <chr>    <dbl> <dbl> <dbl> <dbl>    <dbl>
##  1 12/31/19  21.3  21.7  21.2  21.6 42212960
##  2 12/30/19  21.9  22.0  21.2  21.5 56569360
##  3 12/27/19  22.3  22.3  21.8  22.0 49935040
##  4 12/26/19  21.6  22.0  21.6  22.0 32387676
##  5 12/24/19  21.5  21.5  21.3  21.5 19949364
##  6 12/23/19  21.5  21.5  21.4  21.4 36277508
##  7 12/20/19  21.3  21.4  21.2  21.3 49841200
##  8 12/19/19  20.7  21.0  20.7  21.0 41775400
##  9 12/18/19  20.7  20.8  20.6  20.6 35854924
## 10 12/17/19  20.6  20.7  20.5  20.6 42341800
## # ℹ 2,479 more rows
# The mutate() function is used to modify columns
# as.Date() converts the Date column to Date format
# gsub(",", "", Close) removes commas from the Close column
# as.numeric() converts the Close column to numeric format

qqq_data = qqq_data %>% 
  # Convert the 'Date' column to Date format
  mutate(Date = as.Date(Date, format="%m/%d/%Y"),
         # Remove commas in the 'Close' and convert values to numeric
         Close = as.numeric(gsub(",", "", Close)))

qqq_data
## # A tibble: 2,516 × 6
##    Date        Open  High   Low Close   Volume
##    <date>     <dbl> <dbl> <dbl> <dbl>    <dbl>
##  1 0019-12-31  212.  213.  211.  213. 18201590
##  2 0019-12-30  214.  214.  211.  212. 21829910
##  3 0019-12-27  215.  215.  213.  214. 18142240
##  4 0019-12-26  212.  214.  212.  214. 17159311
##  5 0019-12-24  212.  212.  211.  212.  7089020
##  6 0019-12-23  212.  212.  212.  212. 23798670
##  7 0019-12-20  212.  213.  211.  212. 27690930
##  8 0019-12-19  210.  211.  210.  211. 17892061
##  9 0019-12-18  210.  210.  209.  210. 15490830
## 10 0019-12-17  210.  210.  209.  209. 16034650
## # ℹ 2,506 more rows
tqqq_data = tqqq_data %>%
  # Same as QQQ we will do the same for TQQQ
  # Convert the 'Date' column to Date format
  mutate(Date = as.Date(Date, format="%m/%d/%Y"),
         # Remove commas in the 'Close' and convert values to numeric
         Close = as.numeric(gsub(",", "", Close)))

tqqq_data
## # A tibble: 2,489 × 6
##    Date        Open  High   Low Close   Volume
##    <date>     <dbl> <dbl> <dbl> <dbl>    <dbl>
##  1 0019-12-31  21.3  21.7  21.2  21.6 42212960
##  2 0019-12-30  21.9  22.0  21.2  21.5 56569360
##  3 0019-12-27  22.3  22.3  21.8  22.0 49935040
##  4 0019-12-26  21.6  22.0  21.6  22.0 32387676
##  5 0019-12-24  21.5  21.5  21.3  21.5 19949364
##  6 0019-12-23  21.5  21.5  21.4  21.4 36277508
##  7 0019-12-20  21.3  21.4  21.2  21.3 49841200
##  8 0019-12-19  20.7  21.0  20.7  21.0 41775400
##  9 0019-12-18  20.7  20.8  20.6  20.6 35854924
## 10 0019-12-17  20.6  20.7  20.5  20.6 42341800
## # ℹ 2,479 more rows
# Add percentage increase columns to both datasets
qqq_data = qqq_data %>%
  arrange(Date) %>%
  mutate(Percent_Change = (Close / first(Close) - 1) * 100)

tqqq_data = tqqq_data %>%
  arrange(Date) %>%
  mutate(Percent_Change = (Close / first(Close) - 1) * 100)

# Preview the data
head(qqq_data, 5)
## # A tibble: 5 × 7
##   Date        Open  High   Low Close   Volume Percent_Change
##   <date>     <dbl> <dbl> <dbl> <dbl>    <dbl>          <dbl>
## 1 0010-01-04  46.3  46.5  46.3  46.4 62822801          0    
## 2 0010-01-05  46.4  46.5  46.2  46.4 62935594          0    
## 3 0010-01-06  46.4  46.6  46.1  46.1 96033000         -0.603
## 4 0010-01-07  46.2  46.3  45.9  46.2 77094031         -0.539
## 5 0010-01-08  46.0  46.6  45.9  46.6 88886602          0.280
head(tqqq_data, 5)
## # A tibble: 5 × 7
##   Date        Open  High   Low Close    Volume Percent_Change
##   <date>     <dbl> <dbl> <dbl> <dbl>     <dbl>          <dbl>
## 1 0010-02-11 0.407 0.435 0.406 0.433  3454080.          0    
## 2 0010-02-12 0.421 0.438 0.418 0.434  8597760.          0.393
## 3 0010-02-16 0.444 0.452 0.438 0.451  9615744.          4.28 
## 4 0010-02-17 0.457 0.459 0.450 0.459 19176575.          6.06 
## 5 0010-02-18 0.458 0.470 0.456 0.468 38866559.          8.07
# Our summary will be the Mean, Median, and Standard Deviation for the 'Close' column
# summarise() is used to calculate the summary statistics
# na.rm = TRUE ensures that any NAs are ignored
# qqq_summary and tqqq_summary store the calculations

qqq_summary = qqq_data %>%
            # Mean of 'Close'
  summarise(Mean = mean(Close, na.rm = TRUE),
            # Median of 'Close'
            Median = median(Close, na.rm = TRUE),
            # Standard deviation of 'Close'
            SD = sd(Close, na.rm = TRUE))

tqqq_summary = tqqq_data %>%
            # Mean of 'Close'
  summarise(Mean = mean(Close, na.rm = TRUE),
            # Median of 'Close'
            Median = median(Close, na.rm = TRUE),
            # Standard deviation of 'Close'
            SD = sd(Close, na.rm = TRUE))

# Display the summary
qqq_summary
## # A tibble: 1 × 3
##    Mean Median    SD
##   <dbl>  <dbl> <dbl>
## 1  105.   102.  45.7
tqqq_summary
## # A tibble: 1 × 3
##    Mean Median    SD
##   <dbl>  <dbl> <dbl>
## 1  5.49   3.79  5.33
# Now we plot it using the ggplot library

ggplot() +
  geom_line(data = qqq_data, aes(x = Date, y = Close), color = "blue", size = 1) +
  geom_line(data = tqqq_data, aes(x = Date, y = Close), color = "red", size = 1) +
  labs(title = "QQQ vs TQQQ Closing Prices", x = "Date", y = "Close Price") +
  theme_minimal()

ggplot() +
  geom_line(data = qqq_data, aes(x = Date, y = Percent_Change), color = "blue", size = 1) +
  geom_line(data = tqqq_data, aes(x = Date, y = Percent_Change), color = "red", size = 1) +
  labs(title = "Percentage Increase: QQQ vs TQQQ", x = "Date", y = "Percentage Change (%)") +
  theme_minimal()