# Introduction:
# This project analyzes and forecasts NVIDIA's stock prices using historical data. By leveraging data visualization, statistical modeling, and predictive analytics, this study aims to uncover patterns in stock price movements and provide actionable insights for strategic decision-making.
# Problem Statement:
# NVIDIA's stock prices have shown considerable volatility and growth over time. Investors and stakeholders need a reliable forecast to make informed decisions on investments and risk management. This project addresses the need for a data-driven approach to understand past trends and predict future price behavior.
# Objectives:
# - To visualize NVIDIA's historical adjusted closing prices and identify trends.
# - To build predictive models using linear regression and exponential smoothing.
# - To compare model performance and provide recommendations for stock price forecasting.
# Methods:
# The analysis involves creating a time-series plot for visual analysis, building 
# statistical models like linear regression for trend analysis, and exponential 
# smoothing for short-term forecasting. Performance is evaluated using metrics like 
# Mean Squared Error (MSE) and Mean Absolute Percentage Error (MAPE).
# Load necessary libraries
library(ggplot2)
library(lubridate)
library(scales)
library(forecast)
# Load NVIDIA stock price data from a CSV file
nvidia_data <- read.csv("nvidia_stock_data.csv")  # Prompt user to select file
head(nvidia_data)  # Display the first few rows of the dataset
##         Date   Open   High    Low  Close Adj.Close    Volume
## 1 2023-01-03 14.851 14.996 14.096 14.315  14.30558 401277000
## 2 2023-01-04 14.567 14.853 14.241 14.749  14.73929 431324000
## 3 2023-01-05 14.491 14.564 14.148 14.265  14.25561 389168000
## 4 2023-01-06 14.474 15.010 14.034 14.859  14.84922 405044000
## 5 2023-01-09 15.284 16.056 15.141 15.628  15.61772 504231000
## 6 2023-01-10 15.507 15.962 15.472 15.909  15.89853 384101000
# Convert 'Date' column to Date format for time series analysis
nvidia_data$Date <- as.Date(nvidia_data$Date, format = "%Y-%m-%d")
# Plot Adjusted Close Price over time
ggplot(data = nvidia_data, aes(x = Date, y = Adj.Close)) +
  geom_line(color = "orange") +
  labs(
    title = "NVIDIA Adjusted Close Price Over Time",
    x = "Date",
    y = "Adjusted Close Price (USD)"
  ) +
  scale_x_date(
    date_breaks = "1 month",  # Show one date label per month
    date_labels = "%Y-%m"    # Format labels as Year-Month
  ) +
  scale_y_continuous(
    breaks = seq(0, max(nvidia_data$Adj.Close), by = 20)  # Dynamic y-axis intervals
  ) +
  theme_minimal() +
  theme(
    axis.text.x = element_text(angle = 45, hjust = 1),  # Rotate x-axis labels
    panel.grid = element_blank()  # Simplify grid for clarity
  )

# Interpretation: The time series plot highlights NVIDIA's stock price trends over the years,revealing periods of growth, stability, or volatility.
# Add a numeric Period column for regression analysis
nvidia_data$Period <- as.numeric(format(nvidia_data$Date, "%Y"))

# Build a simple linear regression model
model <- lm(Adj.Close ~ Period, data = nvidia_data)
summary(model)  # Display regression results
## 
## Call:
## lm(formula = Adj.Close ~ Period, data = nvidia_data)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -49.91 -10.84   4.45  10.52  38.10 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -1.232e+05  3.389e+03  -36.35   <2e-16 ***
## Period       6.091e+01  1.675e+00   36.37   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 17.32 on 435 degrees of freedom
## Multiple R-squared:  0.7525, Adjusted R-squared:  0.7519 
## F-statistic:  1323 on 1 and 435 DF,  p-value: < 2.2e-16
# Interpretation: 
# The regression model output provides the slope and intercept, showing the average change in stock price per year. A statistically significant p-value (typically <0.05) confirms a linear trend.
# Test various alpha values for exponential smoothing
alpha_values <- c(0.2, 0.8, 0.9)
exponential_results <- data.frame(
  Alpha = alpha_values,
  MSE = NA,
  MAPE = NA
)

# Calculate Mean Squared Error (MSE) and Mean Absolute Percentage Error (MAPE) for each alpha
for (i in 1:length(alpha_values)) {
  alpha <- alpha_values[i]
  exp_model <- ses(nvidia_data$Adj.Close, alpha = alpha)
  exp_predictions <- fitted(exp_model)
  
  mse <- mean((nvidia_data$Adj.Close - exp_predictions)^2)
  mape <- mean(abs((nvidia_data$Adj.Close - exp_predictions) / nvidia_data$Adj.Close)) * 100
  
  exponential_results$MSE[i] <- mse
  exponential_results$MAPE[i] <- mape
}
exponential_results  # Display results
##   Alpha       MSE     MAPE
## 1   0.2 16.655292 4.246071
## 2   0.8  6.255753 2.398323
## 3   0.9  6.159363 2.359094
# Interpretation:
# Lower MSE and MAPE values indicate better predictive performance. This comparison helps identify the most accurate model for forecasting NVIDIA's stock prices.
# Linear regression predictions
linear_model <- lm(Adj.Close ~ Period, data = nvidia_data)
linear_predictions <- predict(linear_model, newdata = nvidia_data)
linear_mse <- mean((nvidia_data$Adj.Close - linear_predictions)^2)
linear_mape <- mean(abs((nvidia_data$Adj.Close - linear_predictions) / nvidia_data$Adj.Close)) * 100

# Combine results for comparison
comparison <- data.frame(
  Model = c("Linear Regression", "Exponential Smoothing (α=0.9)"),
  MSE = c(linear_mse, exponential_results$MSE[3]),
  MAPE = c(linear_mape, exponential_results$MAPE[3])
)
comparison
##                           Model        MSE      MAPE
## 1             Linear Regression 298.768267 29.376348
## 2 Exponential Smoothing (α=0.9)   6.159363  2.359094
# Plot MSE comparison
ggplot(comparison, aes(x = Model, y = MSE, fill = Model)) +
  geom_bar(stat = "identity", width = 0.5) +
  ggtitle("Comparison of MSE") +
  theme_minimal() +
  labs(y = "Mean Squared Error (MSE)")

# Plot MAPE comparison
ggplot(comparison, aes(x = Model, y = MAPE, fill = Model)) +
  geom_bar(stat = "identity", width = 0.5) +
  ggtitle("Comparison of MAPE") +
  theme_minimal() +
  labs(y = "Mean Absolute Percentage Error (MAPE)")

# Conclusion:
# The analysis revealed significant trends in NVIDIA's stock prices. The exponential smoothing model with α=0.9 provided the most accurate predictions for short-term forecasts, while linear regression captured long-term trends.

# Recommendations:
# 1. Use exponential smoothing for short-term trading strategies and volatility predictions.
# 2. Rely on linear regression for long-term investment planning and growth projections.
# 3. Regularly update models with new data to maintain forecasting accuracy.