library(httr)
library(jsonlite)
library(dplyr)
library(ggplot2)
library(lubridate)

NOTE: If you are going to run this markdown I kindly request that you create your own API key by going to https://massive.com and clicking the “create API key” button

Loading Data from a stock API called Polygon.io/Massive.com

# I already gave it my API key in a hidden text box to prevent leaking it on RPUBS 
# Those of you with the actual RMD file will be able to see it, so please make sure to change it if you plan on running it

# A few symbols to test:
# "AAPL" is Apple
# "NVDA" is Nvidia
# "AMZN" is Amazon
# It's important to note that it does not work for all symbols because the JSON it returns is not formatted the same as the rest
# Example of this issue: "UBI.PA" is ubisoft entertainment
# However, "UBSFY" is also Ubisoft Entertainment but "UBI.PA" won't work while "UBSFY" will
symbol <- "AAPL"

# Notice the usage of <- as my assigning operator. 
# This is standard practice in R as the = operator is mostly used for passing arguments within a function

# Aggregating the data based on day
# Currently uses the range of the entirety of 2024 but you can just change the URL to suit whatever information you want to gather in whatever basket size you want
url <- paste0(
  "https://api.polygon.io/v2/aggs/ticker/", symbol,
  "/range/1/day/2024-01-01/2024-12-31?adjusted=true&sort=asc&limit=5000&apiKey=",
  api_key
)

response <- GET(url)
data_json <- fromJSON(content(response, "text"))

# Save the results to an environment variable
stock_data <- data_json$results

# Cleans the data as well as makes headers easier to read using mutate
stock_df <- stock_data %>% 
  # The %>% operator above is the R syntax for a pipe operator
  # Mutate at its simplest does "new column name" = "old column name." 
  # However it can get more complicated by adding logic within the mutate, there is an example of this later on
  mutate(
    date = as.Date(as.POSIXct(t / 1000, origin = "1970-01-01")),
    close = c,
    open = o,
    high = h,
    low = l,
    volume = v
  ) %>%
  select(date, open, high, low, close, volume)

Previewing the Data

# Use the head function to get preview data (Very similar to Python)
head(stock_df)
##         date    open   high      low  close   volume
## 1 2024-04-22 165.515 167.26 164.7700 165.84 48116443
## 2 2024-04-23 165.350 167.05 164.9200 166.90 49537761
## 3 2024-04-24 166.540 169.30 166.2100 169.02 48251835
## 4 2024-04-25 169.525 170.61 168.1511 169.89 50558329
## 5 2024-04-26 169.880 171.34 169.1800 169.30 44838354
## 6 2024-04-29 173.370 176.03 173.1000 173.50 68169419

Very very basic chart

# One example of the type of plots that R can do is a simple Line chart
# This chart just maps the price of each stock to its date and then draws a line
# Pay attention to the aes function. This basically says "I want the chart to look like this so assign the date column to x and the close column to y"
ggplot(stock_df, aes(x = date, y = close)) +
# Notice geom_line (important) basically just means the graph is going to be a geometric line graph
  geom_line() +
# Key note: the usage of plus signs inside this plotting functions is the same as saying "Add another layer to this plot" 
# Read below note for a translation
# labs is short for labels, self explanatory
  labs(
    title = paste("Closing Price for", symbol),
    x = "Date",
    y = "Price (USD)"
  ) +
  theme_classic()

NOTE: the plus signs in this case mean: Start the plot + add a line to the plot + add labels to the plot + add a theme to the plot

Volume Over Time

# Another example of a chart that R can do is a histogram

# I also used the color command within my aes in order to add a color scale which can help differentiate columns that are stuck within clusters

# NOTE: ggplot actually offers two different types of coloring: fill and color. 
# using fill changes the column colors themselves; however, using color will simply add an outline to the columns

ggplot(stock_df, aes(x = date, y = volume, fill = volume)) +
  # See how above we used geom_line for a line chart, now we are using geom_colm for a column chart or histogram
  geom_col() +
  scale_fill_gradient2(
    low = "black",
    mid = "yellow",
    high = "green"
  )+
  labs(
    title = paste("Trading Volume for", symbol),
    x = "Date",
    y = "Volume"
  ) +
  theme_gray()

Simple example of overlaying plots

ggplot(stock_df, aes(x = date, y = volume)) +
  geom_col(alpha = .6) +
  # By just adding + geom_line() we can very easily overlay different plots
  geom_line(aes(y = volume), color = "orange") +
  labs(
    title = paste("Trading Volume for", symbol),
    x = "Date",
    y = "Volume"
  ) +
  theme_minimal()

Note: Notice how hard it is to see the orange line in this graph. This can be solved in two ways, both demonstrated below

Solution 1

ggplot(stock_df, aes(x = date, y = volume)) +
  geom_col(alpha = .6) +
# Add a linewidth modifier to make the line more thick
  geom_line(aes(y = volume), color = "orange", linewidth = 1) +
  labs(
    title = paste("Trading Volume for", symbol),
    x = "Date",
    y = "Volume"
  ) +
  theme_classic()

Solution 2

ggplot(stock_df, aes(x = date, y = volume)) +
  geom_col(alpha = .6) +
# Add a size modifier to make the line more thick
  geom_line(aes(y = volume), color = "orange") +
  labs(
    title = paste("Trading Volume for", symbol),
    x = "Date",
    y = "Volume"
  ) +
# You can change the theme of the entire chart which can drastically improve the readability without tinkering with anything else
  theme_dark()

Advanced example of overlaying plots

# This is a much more complex execution so we'll go through it line by line

# Line 1: Take stock_df and pass it forward to the next function. 

stock_df_arranged <- stock_df %>%
# Note the stock_df <- just means the result of everything that follows will be saved to that local variable
  
# Line 2: arranges the piped dataframe based on the date column and then pipes that arranged dataframe forward
  arrange(date) %>%
# Line 3: mutates the piped dataframe by creating a new column, called ma20, and then calls the function rollmean from the library zoo

  mutate(ma20 = zoo::rollmean(close, 20, fill = NA, align = "right"))
# Note: this is R's version of library calling, python uses library.function but R uses library::function 

# This is another line chart however it illustrates how you can overlay data onto the same chart by just calling the shape you wish to overlay it
ggplot(stock_df_arranged, aes(x = date)) +
  geom_line(aes(y = close), alpha = 0.6) +
  geom_line(aes(y = ma20), color = "blue") +
  labs(
    title = paste("Price with 20-Day Moving Average:", symbol),
    x = "Date",
    y = "Price"
  ) +
  theme_bw()
## Warning: Removed 19 rows containing missing values or values outside the scale range
## (`geom_line()`).

Much more advanced example

stock_df_arranged <- stock_df_arranged %>%
  mutate(
    # Lag function basically says "pick the one before the current one" so this line says "The previous close is the close value from the day before"
    prev_close = lag(close),
    prev_date = lag(date),
    # An example of logic in R. ifelse is basically your standard if-else in python; however, its instead a function with 3 inputs
    # Input 1 is your logic check: in this case its if the current close is higher than the previous close
    # Input 2 is your true value: so basically if input 1 is true then this is the value you will assign
    # Input 3 is your false value: So if input 1 is false then this is the value you will assign
    direction = ifelse(close > prev_close, "up", "down"),
    ma20 = zoo::rollmean(close, 20, fill = NA, align = "right")
  )

ggplot(stock_df_arranged) +
  # A key thing to note is that now we have one geom_segment and one geom_line
  # This basically draws the line in segments which is important because that means we can then color based on the properties of those segments
  geom_segment(
    aes(
      # This AES is a little different than the previous examples because it requires a start x and y and an xend and yend
      # Basically this is how you choose the bounds of your segments, in this case the start of the segment is the date of the previous value and ends at the current date
      x = prev_date, xend = date,
      y = prev_close, yend = close,
      color = direction
    )
  ) +
  geom_line(
    aes(x = date, y = ma20, color = "MA20"),
    linewidth = 1,
    # na.rm means remove NA values so NA remove = na.rm
    na.rm = TRUE,
  ) +
  # This color setting is similar to the previous one; however, instead of assigning a gradient, we will instead choose what color each value maps to
  scale_color_manual(
    # You will notice on this line the values = c(...   c in R stands for "concatenate" or "combine, but in simple terms it just creates a vector based on the         information within the parentheses
    values = c(
      up = "green",
      down = "red",
      MA20 = "yellow"
    ),
    # The purpose of the labels section is so that in the legend on the plot it will show those names instead of the dataframe value
    labels = c(
      up = "Up",
      down = "Down",
      MA20 = "Moving Average"
    ),
    # This function basically means that if a NA value is somehow present, it will prevent that information from showing up on the key
    na.translate = FALSE
  ) +
  labs(
    title = "Chart of Closing Values Over Time",
    x = "Date",
    y = "Closing Value",
    color = "Price movement"
  ) +
  theme_dark()
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_segment()`).