# First, I load the necessary libraries for time series analysis and plotting.
library(quantmod) # For financial data
## Loading required package: xts
## Loading required package: zoo
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
## Loading required package: TTR
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
library(forecast) # For ARIMA and time series modeling
library(ggplot2) # For plotting
# i. Plot the daily closing prices for IBM stock and the ACF and PACF.
# I fetch daily IBM stock prices from Yahoo Finance starting from January 1, 2020.
getSymbols("IBM", from = "2020-01-01", to = Sys.Date())
## [1] "IBM"
# I extract the adjusted closing prices since they account for dividends and splits.
ibm_prices <- Ad(IBM)
# I plot the adjusted daily closing prices to observe any trends or seasonality.
price_plot <- ggplot(
data.frame(Date = index(ibm_prices), Price = as.numeric(ibm_prices)),
aes(x = Date, y = Price)
) +
geom_line() +
labs(
title = "Daily Adjusted Closing Prices of IBM Stock",
y = "Price",
x = "Date"
) +
theme_minimal()
print(price_plot) # Display the price plot

# I calculate log returns to stabilize variance and check for stationarity.
ibm_returns <- diff(log(ibm_prices))[-1]
# I check if there are any missing values in the return series.
sum(is.na(ibm_returns)) # Count of NA values
## [1] 0
# I remove any missing values from the returns.
ibm_returns_cleaned <- na.omit(ibm_returns)
# I plot the ACF of the cleaned returns to examine autocorrelation.
acf(ibm_returns_cleaned, main = "ACF of IBM Daily Returns (Cleaned)")

# I also plot the PACF to understand direct lags affecting the series.
pacf(ibm_returns_cleaned, main = "PACF of IBM Daily Returns (Cleaned)")

# --- Explanation (ii) ---
# - The price plot shows a clear trend over time, indicating non-stationarity.
# - ACF of returns drops quickly, indicating that returns are more stationary.
# - PACF confirms that only a few lags have strong influence, which is typical of stationary data.
# iii. Apply differencing on price series to achieve stationarity.
# I apply first-order differencing to the price series to remove the trend.
ibm_prices_diff <- diff(ibm_prices)
# I check for and remove any missing values in the differenced series.
sum(is.na(ibm_prices_diff))
## [1] 1
ibm_prices_diff_cleaned <- na.omit(ibm_prices_diff)
# I plot the differenced prices to visually confirm if stationarity is achieved.
diff_price_plot <- ggplot(
data.frame(Date = index(ibm_prices_diff_cleaned), Diff_Price = as.numeric(ibm_prices_diff_cleaned)),
aes(x = Date, y = Diff_Price)
) +
geom_line() +
labs(
title = "First-Order Differenced Adjusted Closing Prices of IBM Stock",
y = "Differenced Price",
x = "Date"
) +
theme_minimal()
print(diff_price_plot) # Show the differenced price plot

# I plot ACF and PACF of the differenced series to verify if it’s now stationary.
acf(ibm_prices_diff_cleaned, main = "ACF of Differenced IBM Prices")

pacf(ibm_prices_diff_cleaned, main = "PACF of Differenced IBM Prices")

# Apply ARIMA model to capture the patterns in the cleaned returns data.
# I use auto.arima to automatically determine the best ARIMA model for the returns.
arima_model_returns_cleaned <- auto.arima(ibm_returns_cleaned)
summary(arima_model_returns_cleaned) # Display the ARIMA model summary
## Series: ibm_returns_cleaned
## ARIMA(3,0,3) with non-zero mean
##
## Coefficients:
## ar1 ar2 ar3 ma1 ma2 ma3 mean
## -1.6220 -0.6881 0.0926 1.5894 0.6795 -0.0444 7e-04
## s.e. 0.6938 1.2439 0.6449 0.6997 1.2236 0.6094 5e-04
##
## sigma^2 = 0.0002932: log likelihood = 3578.72
## AIC=-7141.43 AICc=-7141.32 BIC=-7099.77
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set 5.14115e-06 0.01707879 0.01133028 -Inf Inf 0.6829957 0.0004746699
# If I want to fit ARIMA on the differenced prices instead, I could uncomment the code below:
# arima_model_prices_diff_cleaned <- auto.arima(ibm_prices_diff_cleaned)
# summary(arima_model_prices_diff_cleaned)