#install.packages("forecast")
library(forecast)
#install.packages("tidyverse")
library(tidyverse)
#install.packages("ggplot2")
library(ggplot2)
#install.packages("quantmod")
library(quantmod)
#install.packages("dplyr")
library(dplyr)
#install.packages("Metrics")
library(Metrics)
getSymbols('AAPL', src = 'yahoo',
from = "2004-01-01", to = "2024-01-03")
## [1] "AAPL"
apple <- data.frame(
AAPL,
date = as.Date(rownames(data.frame(AAPL)))
)
summary(apple)
## AAPL.Open AAPL.High AAPL.Low AAPL.Close
## Min. : 0.3825 Min. : 0.3884 Min. : 0.3782 Min. : 0.380
## 1st Qu.: 5.8538 1st Qu.: 5.9504 1st Qu.: 5.7532 1st Qu.: 5.854
## Median : 21.4473 Median : 21.6714 Median : 21.2441 Median : 21.527
## Mean : 42.9220 Mean : 43.3884 Mean : 42.4735 Mean : 42.951
## 3rd Qu.: 47.9694 3rd Qu.: 48.5425 3rd Qu.: 47.7531 3rd Qu.: 48.126
## Max. :198.0200 Max. :199.6200 Max. :197.0000 Max. :198.110
## AAPL.Volume AAPL.Adjusted date
## Min. :2.405e+07 Min. : 0.3202 Min. :2004-01-02
## 1st Qu.:1.122e+08 1st Qu.: 4.9329 1st Qu.:2008-12-31
## Median :2.739e+08 Median : 18.2892 Median :2014-01-01
## Mean :4.071e+08 Mean : 41.1501 Mean :2013-12-31
## 3rd Qu.:5.760e+08 3rd Qu.: 45.8199 3rd Qu.:2019-01-01
## Max. :3.373e+09 Max. :196.9277 Max. :2024-01-02
str(apple)
## 'data.frame': 5034 obs. of 7 variables:
## $ AAPL.Open : num 0.385 0.382 0.397 0.395 0.408 ...
## $ AAPL.High : num 0.388 0.4 0.4 0.408 0.424 ...
## $ AAPL.Low : num 0.378 0.382 0.388 0.392 0.404 ...
## $ AAPL.Close : num 0.38 0.396 0.394 0.403 0.417 ...
## $ AAPL.Volume : num 1.45e+08 3.95e+08 5.09e+08 5.87e+08 4.60e+08 ...
## $ AAPL.Adjusted: num 0.32 0.334 0.332 0.34 0.351 ...
## $ date : Date, format: "2004-01-02" "2004-01-05" ...
head(apple)
## AAPL.Open AAPL.High AAPL.Low AAPL.Close AAPL.Volume AAPL.Adjusted
## 2004-01-02 0.384821 0.388393 0.378214 0.380000 144642400 0.3201959
## 2004-01-05 0.382500 0.399821 0.382500 0.395893 395018400 0.3335877
## 2004-01-06 0.397321 0.400357 0.387679 0.394464 509348000 0.3323835
## 2004-01-07 0.394643 0.407679 0.391607 0.403393 586874400 0.3399073
## 2004-01-08 0.407857 0.423750 0.404464 0.417143 460303200 0.3514933
## 2004-01-09 0.414821 0.430893 0.406964 0.410714 427459200 0.3460762
## date
## 2004-01-02 2004-01-02
## 2004-01-05 2004-01-05
## 2004-01-06 2004-01-06
## 2004-01-07 2004-01-07
## 2004-01-08 2004-01-08
## 2004-01-09 2004-01-09
tail(apple)
## AAPL.Open AAPL.High AAPL.Low AAPL.Close AAPL.Volume AAPL.Adjusted
## 2023-12-22 195.18 195.41 192.97 193.60 37122800 192.4446
## 2023-12-26 193.61 193.89 192.83 193.05 28919300 191.8979
## 2023-12-27 192.49 193.50 191.09 193.15 48087700 191.9973
## 2023-12-28 194.14 194.66 193.17 193.58 34049900 192.4247
## 2023-12-29 193.90 194.40 191.73 192.53 42628800 191.3810
## 2024-01-02 187.15 188.44 183.89 185.64 82488700 184.5321
## date
## 2023-12-22 2023-12-22
## 2023-12-26 2023-12-26
## 2023-12-27 2023-12-27
## 2023-12-28 2023-12-28
## 2023-12-29 2023-12-29
## 2024-01-02 2024-01-02
mean_price <- mean(apple$AAPL.Close, na.rm = TRUE)
#sd_price <- log(sd(apple$AAPL.Close, na.rm = TRUE))
# Generate simulated stock prices using normal distribution
set.seed(123)
simulated_prices <- rnorm(n = nrow(apple), mean = mean_price, sd = 10)
# Create a new dataframe with simulated stock prices
simulated_stock <- apple %>%
mutate(Simulated.Close = simulated_prices)
# View first few rows
head(simulated_stock)
## AAPL.Open AAPL.High AAPL.Low AAPL.Close AAPL.Volume AAPL.Adjusted
## 2004-01-02 0.384821 0.388393 0.378214 0.380000 144642400 0.3201959
## 2004-01-05 0.382500 0.399821 0.382500 0.395893 395018400 0.3335877
## 2004-01-06 0.397321 0.400357 0.387679 0.394464 509348000 0.3323835
## 2004-01-07 0.394643 0.407679 0.391607 0.403393 586874400 0.3399073
## 2004-01-08 0.407857 0.423750 0.404464 0.417143 460303200 0.3514933
## 2004-01-09 0.414821 0.430893 0.406964 0.410714 427459200 0.3460762
## date Simulated.Close
## 2004-01-02 2004-01-02 37.34610
## 2004-01-05 2004-01-05 40.64908
## 2004-01-06 2004-01-06 58.53794
## 2004-01-07 2004-01-07 43.65594
## 2004-01-08 2004-01-08 44.24374
## 2004-01-09 2004-01-09 60.10151
simulated_stock$date <- as.Date(simulated_stock$date)
real_prices <- simulated_stock$AAPL.Close
simulated_prices <- simulated_stock$Simulated.Close
# Remove NA values (if any)
valid_indices <- complete.cases(real_prices, simulated_prices)
real_prices <- real_prices[valid_indices]
simulated_prices <- simulated_prices[valid_indices]
# Mean Absolute Error (MAE)
mae_value <- mae(real_prices, simulated_prices)
# Mean Squared Error (MSE)
mse_value <- mse(real_prices, simulated_prices)
# Root Mean Squared Error (RMSE)
rmse_value <- rmse(real_prices, simulated_prices)
# R-squared (R²)
r_squared <- cor(real_prices, simulated_prices)^2
# Mean Absolute Percentage Error (MAPE)
mape_value <- mean(abs((real_prices - simulated_prices) / real_prices)) * 100
# Print results
cat("Mean Absolute Error (MAE):", mae_value, "\n")
## Mean Absolute Error (MAE): 41.33864
cat("Mean Squared Error (MSE):", mse_value, "\n")
## Mean Squared Error (MSE): 2944.238
cat("Root Mean Squared Error (RMSE):", rmse_value, "\n")
## Root Mean Squared Error (RMSE): 54.26083
cat("R-squared Score (R²):", r_squared, "\n")
## R-squared Score (R²): 0.0002366267
cat("Mean Absolute Percentage Error (MAPE):", mape_value, "%\n")
## Mean Absolute Percentage Error (MAPE): 782.793 %
# Calculate errors
errors <- simulated_prices - real_prices # Difference between simulated and real prices
abs_errors <- abs(errors) # Absolute errors
squared_errors <- errors^2 # Squared errors
percentage_errors <- abs_errors / real_prices * 100 # Percentage errors
# Add errors to the dataset
simulated_stock$Error <- NA
simulated_stock$Absolute_Error <- NA
simulated_stock$Squared_Error <- NA
simulated_stock$Percentage_Error <- NA
# Store errors in dataset
simulated_stock$Error[valid_indices] <- errors
simulated_stock$Absolute_Error[valid_indices] <- abs_errors
simulated_stock$Squared_Error[valid_indices] <- squared_errors
simulated_stock$Percentage_Error[valid_indices] <- percentage_errors
ggplot(simulated_stock, aes(x = date, y = Error, fill = Error > 0)) +
geom_col() + # Creates a bar plot
geom_hline(yintercept = 0, linetype = "dashed", color = "black") +
scale_fill_manual(values = c("red", "blue"), labels = c("Underestimated", "Overestimated")) +
labs(title = "Stock Price Simulation Errors Over Time",
x = "Date",
y = "Error (Simulated - Real)",
fill = "Error Type") +
theme_minimal()
How close was the simulated data to real stock
prices?
The simulated stock prices compared to the real stock prices using the
mean and normal distribution resulted in simultaed prices that were
considerably off to the real stock prices. This was due to the fact that
the simultaed stock prices were only using the mean and a standard
deviation to simulated the real stock prices. Due to the simplicity of
the simulation and the fact that this is a time series dataset the stock
prices aren’t considered accurate.
What does the R² value indicate?
The R² that was calculated for the real and the simulated stock prices
is 0.0002366267. This indicates that the simulated prices have almost no
real appliaction to explain the real stock prices.
How high is the MAPE percentage?
The MAPE percentage of the simulated and real stock prices simulation is
782.793 %. This means that the simulated stock prices deviate from the
real stock prices on average nearly 8 times. This indicates that the
simulation is very inaccurate at compared to the real prices.
# Plot real vs. simulated stock prices
ggplot(simulated_stock, aes(x = date)) + geom_line(aes(y = AAPL.Close, color = "Real Prices"), size = 1) +
geom_line(aes(y = Simulated.Close, color = "Simulated Prices"), size = 1, linetype = "dashed") +
labs(title = "Real vs Simulated Apple Stock Prices (2005-2024)",
x = "Date",
y = "Stock Price (USD)",
color = "Legend") +
scale_color_manual(values = c("Real Prices" = "blue", "Simulated Prices" = "red")) +
theme_minimal()
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
# Plot real vs. simulated stock prices