# Load necessary libraries
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(caret)
## Loading required package: ggplot2
## Loading required package: lattice
library(Metrics)
##
## Attaching package: 'Metrics'
## The following objects are masked from 'package:caret':
##
## precision, recall
### Financial Application Example ###
# Generate synthetic stock price data
set.seed(123)
stock_df <- data.frame(
Date = seq(as.Date("2023-01-01"), by = "day", length.out = 100),
Open = rnorm(100, mean = 100, sd = 5),
High = rnorm(100, mean = 105, sd = 5),
Low = rnorm(100, mean = 95, sd = 5),
Close = rnorm(100, mean = 100, sd = 5),
Volume = rpois(100, lambda = 1000)
)
# Preview the stock price data
head(stock_df)
## Date Open High Low Close Volume
## 1 2023-01-01 97.19762 101.4480 105.99405 96.42379 997
## 2 2023-01-02 98.84911 106.2844 101.56206 96.23656 947
## 3 2023-01-03 107.79354 103.7665 93.67427 95.30731 999
## 4 2023-01-04 100.35254 103.2623 97.71597 94.73743 1021
## 5 2023-01-05 100.64644 100.2419 92.92830 97.81420 947
## 6 2023-01-06 108.57532 104.7749 92.61877 101.65590 1037
# Split data into training and test sets
train_index <- 1:70
test_index <- 71:100
train_data <- stock_df[train_index, ]
test_data <- stock_df[test_index, ]
# Linear Regression Model
lm_model <- lm(Close ~ Open + High + Low + Volume, data = train_data)
# Predict on test set
test_data$pred_Close <- predict(lm_model, newdata = test_data)
# Evaluate Model
mse <- mean((test_data$Close - test_data$pred_Close)^2)
cat("Mean Squared Error:", mse, "\n")
## Mean Squared Error: 27.08185
# Plot actual vs. predicted closing prices
plot(test_data$Date, test_data$Close, type = "l", col = "blue", xlab = "Date", ylab = "Price", main = "Actual vs. Predicted Closing Prices")
lines(test_data$Date, test_data$pred_Close, col = "red")
legend("topright", legend = c("Actual", "Predicted"), col = c("blue", "red"), lty = 1)
# Summary of the linear regression model
summary(lm_model)
##
## Call:
## lm(formula = Close ~ Open + High + Low + Volume, data = train_data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -12.6649 -3.2726 0.3966 3.8520 12.1650
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 92.16773 35.02980 2.631 0.0106 *
## Open -0.05196 0.14677 -0.354 0.7245
## High -0.03438 0.12886 -0.267 0.7905
## Low 0.03132 0.13858 0.226 0.8219
## Volume 0.01374 0.02197 0.625 0.5340
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 5.356 on 65 degrees of freedom
## Multiple R-squared: 0.01039, Adjusted R-squared: -0.05051
## F-statistic: 0.1707 on 4 and 65 DF, p-value: 0.9526
This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.
When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:
summary(cars)
## speed dist
## Min. : 4.0 Min. : 2.00
## 1st Qu.:12.0 1st Qu.: 26.00
## Median :15.0 Median : 36.00
## Mean :15.4 Mean : 42.98
## 3rd Qu.:19.0 3rd Qu.: 56.00
## Max. :25.0 Max. :120.00
You can also embed plots, for example:
Note that the echo = FALSE
parameter was added to the
code chunk to prevent printing of the R code that generated the
plot.