# Load necessary libraries
library(quantmod)
## Loading required package: xts
## Loading required package: zoo
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
## Loading required package: TTR
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
# Step 1: Data Collection
symbol <- "AAPL" # Use Apple stock as an example
getSymbols(symbol, src = "yahoo", from = "2018-01-01", to = "2023-12-31")
## [1] "AAPL"
# Extract Adjusted Closing Prices
prices <- Cl(get(symbol)) # Get the Adjusted Close price
prices <- na.omit(prices) # Remove any missing data
# Step 2: Prepare Data with Multiple Lagged Features
# Create a lagged dataset manually
data <- data.frame(Date = index(prices), Price = as.numeric(prices))
data$Lag1 <- c(NA, head(data$Price, -1)) # 1-day lag
data$Lag2 <- c(NA, NA, head(data$Price, -2)) # 2-day lag
data$Lag3 <- c(NA, NA, NA, head(data$Price, -3)) # 3-day lag
data <- na.omit(data) # Remove rows with missing values
# Step 3: Split Data into Training and Testing Sets
set.seed(123) # For reproducibility
train_size <- floor(0.8 * nrow(data)) # 80% for training
train_data <- data[1:train_size, ] # First 80% as training data
test_data <- data[(train_size + 1):nrow(data), ] # Remaining 20% as testing data
# Step 4: Build the Multiple Regression Model
model <- lm(Price ~ Lag1 + Lag2 + Lag3, data = train_data)
# Print Model Summary
summary(model)
##
## Call:
## lm(formula = Price ~ Lag1 + Lag2 + Lag3, data = train_data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -10.6632 -0.8162 -0.0270 0.8664 11.0606
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.19819 0.14255 1.390 0.165
## Lag1 0.93376 0.02890 32.311 <2e-16 ***
## Lag2 0.04823 0.03962 1.217 0.224
## Lag3 0.01688 0.02895 0.583 0.560
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.144 on 1200 degrees of freedom
## Multiple R-squared: 0.9979, Adjusted R-squared: 0.9979
## F-statistic: 1.865e+05 on 3 and 1200 DF, p-value: < 2.2e-16
# Step 5: Make Predictions
predictions <- predict(model, newdata = test_data)
# Step 6: Evaluate the Model
actuals <- test_data$Price
mse <- mean((predictions - actuals)^2) # Mean Squared Error
cat("Mean Squared Error (MSE):", mse, "\n")
## Mean Squared Error (MSE): 6.139336
# Step 7: Visualize Results
plot(test_data$Date, actuals, type = "l", col = "blue", lwd = 2,
main = "Actual vs Predicted Prices", xlab = "Date", ylab = "Price")
lines(test_data$Date, predictions, col = "red", lwd = 2)
legend("topright", legend = c("Actual", "Predicted"), col = c("blue", "red"), lty = 1, lwd = 2)
