# Set the values for p and q
p <- 2; q <- 1
# Set the AR and MA coefficients
ar_coefs <- c(0.8, 0.5)
ma_coefs <- 0.3
# Set the length of the simulated process
n <- 1000
# Generate random series
random_series <- rnorm(n, mean = 0, sd = 1)
# Create an empty vector to store the simulated process
arma_process <- numeric(n)
# Simulate the ARMA process
for (i in 1:n) {
if (i <= p) {
arma_process[i] <- random_series[i]
} else {
arma_process[i] <- sum(ar_coefs * arma_process[(i - p):(i - 1)]) +
ma_coefs * random_series[i - q]
}
}
# Plot the simulated ARMA process
plot(arma_process, type = "l", main = "Simulated Explosive ARMA(2,1) Process")
acf(arma_process, lag.max = 20)
library(forecast)
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
# Convert the arma_process to a time series object
arma_ts <- ts(arma_process)
# Fit an ARIMA model to the arma_ts
arma_model <- auto.arima(arma_ts)
# Make a forecast for 12 periods ahead
forecast_result <- forecast(arma_model, h = 12)
# Plot the forecasted values and interval confidences
plot(forecast_result, main = "Forecasted Values and Interval Confidences")
# Load the required libraries
library(readxl)
# Load the Excel file
excel_data <- read_excel("transportation_index.xlsx")
## New names:
## • `` -> `...1`
# Check the structure of the data
str(excel_data)
## tibble [516 × 4] (S3: tbl_df/tbl/data.frame)
## $ ...1 : POSIXct[1:516], format: NA "2000-01-31" ...
## $ USTRASERV: chr [1:516] NA "101.7" NA "101.1" ...
## $ USTRAFREI: chr [1:516] NA "105.1" NA "103.1" ...
## $ USTRAPASS: chr [1:516] NA "95.1" NA "97" ...
# handle with the NA values
excel_data <- na.omit(excel_data)
# Convert the data into time series
time_series_USTRASERV <- ts((as.numeric(excel_data$USTRASERV)), start = c(2000, 1), frequency = 12)
time_series_USTRAFREI <- ts((as.numeric(excel_data$USTRAFREI)), start = c(2000, 1), frequency = 12)
time_series_USTRAPASS <- ts((as.numeric(excel_data$USTRAPASS)), start = c(2000, 1), frequency = 12)
# Check the structure of the time series
str(time_series_USTRASERV)
## Time-Series [1:258] from 2000 to 2021: 101.7 101.1 99.4 98.3 99.9 ...
str(time_series_USTRAFREI)
## Time-Series [1:258] from 2000 to 2021: 105.1 103.1 99.7 97.9 98.8 ...
str(time_series_USTRAPASS)
## Time-Series [1:258] from 2000 to 2021: 95.1 97 98.7 99.2 102.2 ...
# Generate a date series
date_series <- seq(as.Date("2000-01-31"), by = "month", length.out = nrow(excel_data))
# Check the structure of the date series
str(date_series)
## Date[1:258], format: "2000-01-31" "2000-03-02" "2000-03-31" "2000-05-01" "2000-05-31" ...
#Combine the three time series into a data frame
plot_data <- data.frame(
Date = date_series,
USTRASERV = time_series_USTRASERV,
USTRAFREI = time_series_USTRAFREI,
USTRAPASS = time_series_USTRAPASS
)
# Plot using base R plot
plot(plot_data$Date, plot_data$USTRASERV, type = "l", col = "blue", xlab = "Date", ylab = "Indicator Values", ylim = range(c(plot_data$USTRASERV, plot_data$USTRAFREI, plot_data$USTRAPASS)))
lines(plot_data$Date, plot_data$USTRAFREI, col = "red")
lines(plot_data$Date, plot_data$USTRAPASS, col = "green")
legend("topright", legend = c("USTRASERV", "USTRAFREI", "USTRAPASS"), col = c("blue", "red", "green"), lty = 1)
# Add a title
title("Transportation Indicators Over Time")
Autocorrelation Function (ACF): - The ACF measures the correlation between a time series and its lagged values. - It calculates the correlation coefficient between the series at different lags (time points in the past). - ACF helps identify patterns or dependencies in the time series.
Partial Autocorrelation Function (PACF): - The PACF measures the correlation between a time series and its lagged values, controlling for the effects of intermediate lags. - It measures the direct relationship between two data points at a specific lag, excluding the influence of other lags in between. - PACF helps identify the direct linear relationship between a data point and its lagged values. - It provides insights into the specific lags that contribute significantly to the time series.
# Plot ACF
acf(time_series_USTRAPASS, type="correlation", main = "ACF Plot of Passenger Index Data")
# Perform Ljung-Box test
ljung_box_test <- Box.test(time_series_USTRAPASS, lag = 20, type = "Ljung-Box")
# Print the test results
print(ljung_box_test)
##
## Box-Ljung test
##
## data: time_series_USTRAPASS
## X-squared = 1131.4, df = 20, p-value < 2.2e-16
With the ACF we find that the process is not stationary so we could not modelise this time series
# Import the data of births using scan
births_url <- "http://robjhyndman.com/tsdldata/data/nybirths.dat"
births <- scan(births_url)
# Import the data of sales using scan
sales_url <- "http://robjhyndman.com/tsdldata/data/fancy.dat"
sales <- scan(sales_url)
# Print the imported data
head(births)
## [1] 26.663 23.598 26.931 24.740 25.806 24.364
head(sales)
## [1] 1664.81 2397.53 2840.71 3547.29 3752.96 3714.74
births.ts <- ts(births, frequency=12, start=c(1946,1))
sales.ts <- ts(sales, frequency=12, start=c(1946,1))
# Plot the births data
plot(births.ts, type = "l", xlab = "Time", ylab = "Births", main = "Births Data")
# Plot the sales data
plot(sales.ts, type = "l", xlab = "Time", ylab = "Sales", main = "Sales Data")
the approriate method is the logarithmic transfromation
# Apply logarithmic transformation to the births data
births_log <- log(births.ts)
# Plot the transformed births data
plot(births_log, type = "l", xlab = "Time", ylab = "Log(Births)", main = "Transformed log Births Data")
# Apply logarithmic transformation to the sales data
sales_log <- log(sales.ts)
# Plot the transformed sales data
plot(sales_log, type = "l", xlab = "Time", ylab = "Log(Sales)", main = "Transformed log Sales Data")
# Plot ACF for births data
acf(births, lag.max = 36, main = "ACF Plot of Births Data")
# Plot ACF for sales data
acf(sales, lag.max = 36, main = "ACF Plot of Sales Data")
months <- rep(1:12, length.out = length(births))
# The factor() function is used to convert the months variable into a factor, which is then used to create the seasonal dummies using the model.matrix() function.
seasonal_dummies <- model.matrix(~factor(months))
# - births_ts is the response variable
# - seasonal_dummies are the regressors
model <- lm(births ~ seasonal_dummies)
summary(model)
##
## Call:
## lm(formula = births ~ seasonal_dummies)
##
## Residuals:
## Min 1Q Median 3Q Max
## -4.1096 -1.9188 -0.0052 1.8997 3.7131
##
## Coefficients: (1 not defined because of singularities)
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 24.3786 0.5744 42.442 <2e-16 ***
## seasonal_dummies(Intercept) NA NA NA NA
## seasonal_dummiesfactor(months)2 -1.4931 0.8123 -1.838 0.0680 .
## seasonal_dummiesfactor(months)3 1.5164 0.8123 1.867 0.0638 .
## seasonal_dummiesfactor(months)4 -0.1501 0.8123 -0.185 0.8536
## seasonal_dummiesfactor(months)5 0.9371 0.8123 1.154 0.2504
## seasonal_dummiesfactor(months)6 0.4919 0.8123 0.606 0.5457
## seasonal_dummiesfactor(months)7 2.0946 0.8123 2.578 0.0108 *
## seasonal_dummiesfactor(months)8 1.9083 0.8123 2.349 0.0201 *
## seasonal_dummiesfactor(months)9 1.4235 0.8123 1.752 0.0817 .
## seasonal_dummiesfactor(months)10 1.4891 0.8123 1.833 0.0687 .
## seasonal_dummiesfactor(months)11 -0.4001 0.8123 -0.493 0.6230
## seasonal_dummiesfactor(months)12 0.3504 0.8123 0.431 0.6668
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.149 on 156 degrees of freedom
## Multiple R-squared: 0.1975, Adjusted R-squared: 0.1409
## F-statistic: 3.49 on 11 and 156 DF, p-value: 0.000222
# The trend variable is created to represent a linear trend from 1 to the length of the births data.
trend <- 1:length(births)
model_with_trend <- lm(births ~ seasonal_dummies + trend)
summary(model_with_trend)
##
## Call:
## lm(formula = births ~ seasonal_dummies + trend)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.1819 -0.5458 -0.1180 0.4999 5.1607
##
## Coefficients: (1 not defined because of singularities)
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 21.465397 0.323663 66.320 < 2e-16 ***
## seasonal_dummies(Intercept) NA NA NA NA
## seasonal_dummiesfactor(months)2 -1.529948 0.414028 -3.695 0.000304 ***
## seasonal_dummiesfactor(months)3 1.442604 0.414039 3.484 0.000642 ***
## seasonal_dummiesfactor(months)4 -0.260772 0.414058 -0.630 0.529755
## seasonal_dummiesfactor(months)5 0.789637 0.414084 1.907 0.058377 .
## seasonal_dummiesfactor(months)6 0.307546 0.414117 0.743 0.458815
## seasonal_dummiesfactor(months)7 1.873312 0.414157 4.523 1.2e-05 ***
## seasonal_dummiesfactor(months)8 1.650150 0.414205 3.984 0.000104 ***
## seasonal_dummiesfactor(months)9 1.128488 0.414260 2.724 0.007188 **
## seasonal_dummiesfactor(months)10 1.157254 0.414323 2.793 0.005879 **
## seasonal_dummiesfactor(months)11 -0.768908 0.414393 -1.856 0.065423 .
## seasonal_dummiesfactor(months)12 -0.055213 0.414470 -0.133 0.894197
## trend 0.036877 0.001747 21.108 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.095 on 155 degrees of freedom
## Multiple R-squared: 0.7929, Adjusted R-squared: 0.7768
## F-statistic: 49.44 on 12 and 155 DF, p-value: < 2.2e-16
# Calculate the residuals
residuals <- residuals(model_with_trend)
# Plot a histogram of residuals to check for normality
hist(residuals, main = "Histogram of Residuals", xlab = "Residuals")
# Perform a Q-Q plot to further assess normality
qqnorm(residuals)
qqline(residuals)
# Plot the ACF of residuals to check for autocorrelation
acf(residuals, main = "Autocorrelation Function of Residuals")
# Set the lag value for the Ljung-Box test
lag_value <- 12 # You can adjust this value based on the seasonality
# Perform Ljung-Box test
ljung_box_test <- Box.test(residuals, lag = lag_value, type = "Ljung-Box")
# Print the results of the Ljung-Box test
ljung_box_test
##
## Box-Ljung test
##
## data: residuals
## X-squared = 260.6, df = 12, p-value < 2.2e-16
This suggests that there is still significant autocorrelation in the residuals, meaning that the model has not fully captured the underlying structure of the data, including both trend and seasonal patterns. The autocorrelation present in the residuals implies that there are systematic patterns that the model has not accounted for, and the residuals are not behaving like white noise.
library(TSstudio)
head(USgas)
## Jan Feb Mar Apr May Jun
## 2000 2510.5 2330.7 2050.6 1783.3 1632.9 1513.1
ts_info(USgas)
## The USgas series is a ts object with 1 variable and 238 observations
## Frequency: 12
## Start time: 2000 1
## End time: 2019 10
ts_plot(USgas)
# Apply one-sided moving average smoothing with a window size of 11
one_side_ma <- ts_ma(ts.obj = USgas, n_left = 11,n=NULL,plot=TRUE)
# Apply two-sided moving average smoothing with appropriate window sizes
two_side_ma <- ts_ma(ts.obj = USgas, n=6,plot=TRUE)
USgas_ts <- ts(USgas,frequency=12)
one_side_ma_ts<-ts(one_side_ma$unbalanced_ma_12,frequency=12)
two_side_ma_ts<-ts(two_side_ma$ma_6,frequency=12)
plot(USgas_ts, type = "l", col = "black", xlab = "Time", ylab = "Gas Consumption", main = "Gas Consumption with Moving Averages")
lines(one_side_ma_ts, col = "red")
lines(two_side_ma_ts, col = "blue")
legend("topright", legend = c("Gas Consumption", "One-sided Moving Average", "Two-sided Moving Average"), col = c("black", "red", "blue"), lty = 1,cex=0.6)
# ACF plot of the original data
acf(USgas, main = "ACF of Original Data")
# ACF plot of the one-sided moving average smoothed data
acf(one_side_ma_ts, main = "ACF of One-sided Moving Average")
# ACF plot of the two-sided moving average smoothed data
acf(two_side_ma_ts, main = "ACF of Two-sided Moving Average")
# Load necessary libraries
library(ggplot2)
# Load the Mauna Loa Atmospheric CO2 Concentration dataset
co2<-datasets::co2
# Display the first few rows of the dataset
head(co2)
## Jan Feb Mar Apr May Jun
## 1959 315.42 316.31 316.50 317.56 318.13 318.00
By doing this, it remove the seasonal pattern
co2_diff <- diff(co2, differences = 1)
acf(co2_diff)
co2_seasonal_diff <- diff(co2, lag=1, differences = 12)
acf(co2_seasonal_diff)
To neutralize the seasonal factor, you can apply seasonal differencing by subtracting the observation from the previous observation from the same season. In this case, the Mauna Loa Atmospheric CO2 Concentration data is measured monthly, so the seasonal difference is the difference between an observation and the observation from the previous month.
# Calculate and plot the ACF of the time series
plot(co2_diff, main ="Mauna Loa Atmospheric CO2 Concentration")
plot(co2_seasonal_diff, main = "Mauna Loa Atmospheric CO2 Concentration (Seasonal Difference)")
# Load the CPI data from the CSV file
cpi <- read.csv("us_cpi.csv", sep=";", stringsAsFactors = FALSE)
# Convert the date column to a proper date format
cpi$Date <- as.Date(cpi$date, format = "%d/%m/%Y")
# Convert the CPI column to numeric type and replace commas with decimal points
cpi$CPI <- as.numeric(gsub(",", ".", cpi$CPI))
# Convert the data into a time series object
cpi_ts <- ts(cpi$CPI, start = c(1971, 1), frequency = 12)
plot(cpi_ts)
# Subset the CPI data for the specified period
cpi_subset <- cpi[cpi$Date >= as.Date("1990-01-01") & cpi$Date <= as.Date("2005-12-31"), ]
# Calculate the return
monthly_growth <- numeric(length = length(cpi_subset$CPI) - 1)
for (i in 2:length(cpi_subset$CPI)) {
monthly_growth[i-1] <- ((cpi_subset$CPI[i] - cpi_subset$CPI[i-1]) / cpi_subset$CPI[i-1]) * 100
}
# Plot the returns
plot(monthly_growth, type = "l")
acf(monthly_growth)
After calculating the monthly growth rate, we can model the CPI
inflation rate using an ARMA(p, q) model.
# Plot ACF and PACF
acf_result <- acf(monthly_growth, lag.max = 24, plot = FALSE)
pacf_result <- pacf(monthly_growth, lag.max = 24, plot = FALSE)
# Plot ACF
plot(acf_result, main = "ACF of Monthly CPI Growth Rate", xlab = "Lag", ylab = "Autocorrelation")
# Plot PACF
plot(pacf_result, main = "PACF of Monthly CPI Growth Rate", xlab = "Lag", ylab = "Partial Autocorrelation")
# Define the range of p and q values to explore
p_values <- 0:3
q_values <- 0:3
# Create an empty matrix to store the likelihood values
likelihood_matrix <- matrix(NA, nrow = length(p_values), ncol = length(q_values))
# Loop over each combination of p and q
for (i in 1:length(p_values)) {
for (j in 1:length(q_values)) {
# Fit an ARMA model to the differenced time series
arma_model <- arima(monthly_growth, order = c(p_values[i], 1, q_values[j]))
# Calculate the log-likelihood value
likelihood_value <- logLik(arma_model)
# Store the log-likelihood value in the likelihood matrix
likelihood_matrix[i, j] <- likelihood_value
}
}
# Calculate the AIC and BIC values
AIC_matrix <- -2 * likelihood_matrix + 2 * matrix(rep(p_values + 1, each = length(p_values)), nrow = length(p_values))
BIC_matrix <- -2 * likelihood_matrix + log(length(monthly_growth)) * matrix(rep(p_values + 1, each = length(p_values)), nrow = length(p_values))
# Find the indices of the minimum AIC and BIC values
min_AIC_indices <- which(AIC_matrix == min(AIC_matrix), arr.ind = TRUE)
min_BIC_indices <- which(BIC_matrix == min(BIC_matrix), arr.ind = TRUE)
# Extract the corresponding p and q values
qualified_AIC_p <- p_values[min_AIC_indices[, 1]]
qualified_AIC_q <- q_values[min_AIC_indices[, 2]]
qualified_BIC_p <- p_values[min_BIC_indices[, 1]]
qualified_BIC_q <- q_values[min_BIC_indices[, 2]]
# Print the qualified combinations according to AIC
cat("Qualified combination according to AIC: p =", qualified_AIC_p, ", q =", qualified_AIC_q, "\n")
## Qualified combination according to AIC: p = 3 , q = 1
# Print the qualified combinations according to BIC
cat("Qualified combination according to BIC: p =", qualified_BIC_p, ", q =", qualified_BIC_q, "\n")
## Qualified combination according to BIC: p = 3 , q = 1
library(forecast)
# Fit an ARIMA model to the differenced monthly growth rate
arima_model <- Arima(monthly_growth, order = c(3, 1, 1), include.mean = FALSE)
# Generate simulated data based on the ARIMA model
simulated_data <- simulate(arima_model, nsim = length(monthly_growth))
# Calculate the predicted values using the estimated ARIMA model
predicted_values <- fitted(arima_model)
# Calculate the estimated residuals
estimated_residuals <- residuals(arima_model)
# Print the estimated coefficients
cat("Estimated Coefficients:\n")
## Estimated Coefficients:
head(coef(arima_model))
## ar1 ar2 ar3 ma1
## 0.26583995 -0.30441125 -0.03864879 -0.91776518
# Print the predicted values
cat("Predicted Values:\n")
## Predicted Values:
head(predicted_values)
## Time Series:
## Start = 1
## End = 6
## Frequency = 1
## [1] 0.3917647 0.4098791 0.4059460 0.2528447 0.3123343 0.5166998
# Print the estimated residuals
cat("Estimated Residuals:\n")
## Estimated Residuals:
head(estimated_residuals)
## Time Series:
## Start = 1
## End = 6
## Frequency = 1
## [1] 0.0003921565 0.0588709173 -0.1726645316 -0.0976856850 0.3073403927
## [6] -0.0548060014
# Plot the simulated data and the dependent variable
plot(monthly_growth, type = "l", col = "blue", ylab = "Monthly Growth Rate")
lines(predicted_values, col = "red")
legend("topleft", legend = c("Monthly Growth Rate", "Predicted Data"), col = c("blue", "red"), lty = 1)
# ACF and PACF Plots
acf(residuals,lag.max = 60)
pacf(residuals, lag.max = 60)
# Ljung-Box Test
Box.test(residuals, lag = 20, type = "Ljung-Box")
##
## Box-Ljung test
##
## data: residuals
## X-squared = 261.85, df = 20, p-value < 2.2e-16
# Normality Test
shapiro.test(residuals)
##
## Shapiro-Wilk normality test
##
## data: residuals
## W = 0.90277, p-value = 4.343e-09
qqnorm(residuals)
qqline(residuals)
# Stability Test
split_point <- floor(length(monthly_growth)/2)
first_half <- monthly_growth[1:split_point]
second_half <- monthly_growth[(split_point+1):length(monthly_growth)]
arima_model_1st_half <- arima(first_half, order = c(3, 1, 1))
## Warning in log(s2): NaNs produced
arima_model_2nd_half <- arima(second_half, order = c(3, 1, 1))
coef_comparison <- cbind(coef(arima_model_1st_half), coef(arima_model_2nd_half))
colnames(coef_comparison) <- c("1st Half", "2nd Half")
print(coef_comparison)
## 1st Half 2nd Half
## ar1 0.297214660 0.1947362
## ar2 -0.005112823 -0.4067466
## ar3 0.111002942 -0.1465214
## ma1 -0.935528686 -0.8859582
# Fit an ARIMA model to the monthly growth rate
arima_model <- arima(monthly_growth, order = c(3, 1, 1))
# Forecast the CPI for the next three months
forecast_result <- forecast(arima_model, h = 3)
# Extract the forecasted values
forecast_values <- forecast_result$mean
# Print the forecasted values
print(forecast_values)
## Time Series:
## Start = 192
## End = 194
## Frequency = 1
## [1] 0.4456701 0.4364974 0.2789802
# Subset the CPI data for the specified period
cpi_subset <- cpi[cpi$Date >= as.Date("1990-01-01") & cpi$Date <= as.Date("2021-12-31"), ]
# Calculate the return
monthly_growth_2021 <- numeric(length = length(cpi_subset$CPI) - 1)
for (i in 2:length(cpi_subset$CPI)) {
monthly_growth_2021[i-1] <- ((cpi_subset$CPI[i] - cpi_subset$CPI[i-1]) / cpi_subset$CPI[i-1]) * 100
}
# Plot the returns
plot(monthly_growth_2021, type = "l")
acf(monthly_growth_2021)
# Define the range of p and q values to explore
p_values <- 0:3
q_values <- 0:3
# Create an empty matrix to store the likelihood values
likelihood_matrix_2021 <- matrix(NA, nrow = length(p_values), ncol = length(q_values))
# Loop over each combination of p and q
for (i in 1:length(p_values)) {
for (j in 1:length(q_values)) {
# Fit an ARMA model to the differenced time series
arma_model_2021 <- arima(monthly_growth_2021, order = c(p_values[i], 1, q_values[j]))
# Calculate the log-likelihood value
likelihood_value <- logLik(arma_model_2021)
# Store the log-likelihood value in the likelihood matrix
likelihood_matrix_2021[i, j] <- likelihood_value
}
}
# Calculate the AIC and BIC values
AIC_matrix_2021 <- -2 * likelihood_matrix_2021 + 2 * matrix(rep(p_values + 1, each = length(p_values)), nrow = length(p_values))
BIC_matrix_2021 <- -2 * likelihood_matrix_2021 + log(length(monthly_growth_2021)) * matrix(rep(p_values + 1, each = length(p_values)), nrow = length(p_values))
# Find the indices of the minimum AIC and BIC values
min_AIC_indices_2021 <- which(AIC_matrix_2021 == min(AIC_matrix_2021), arr.ind = TRUE)
min_BIC_indices_2021 <- which(BIC_matrix_2021 == min(BIC_matrix_2021), arr.ind = TRUE)
# Extract the corresponding p and q values
qualified_AIC_p_2021 <- p_values[min_AIC_indices[, 1]]
qualified_AIC_q_2021 <- q_values[min_AIC_indices[, 2]]
qualified_BIC_p_2021 <- p_values[min_BIC_indices[, 1]]
qualified_BIC_q_2021 <- q_values[min_BIC_indices[, 2]]
# Print the qualified combinations according to AIC
cat("Qualified combination according to AIC: p =", qualified_AIC_p_2021, ", q =", qualified_AIC_q_2021, "\n")
## Qualified combination according to AIC: p = 3 , q = 1
# Print the qualified combinations according to BIC
cat("Qualified combination according to BIC: p =", qualified_BIC_p_2021, ", q =", qualified_BIC_q_2021, "\n")
## Qualified combination according to BIC: p = 3 , q = 1
# Read the CSV file
df <- read.csv("s&p500.csv")
# Convert the 'Date' column to Date type
df$Date <- as.Date(df$Date)
plot(df$Close, type = "l")
# Calculate the monthly returns
df$Returns <- c(NA, diff(log(df$Close)))
plot(df$Returns, type = "l", main = "monthly returns S&P500")
sp500_returns_ts <- na.omit(ts(df$Returns))
# Plot the ACF and PACF
acf(sp500_returns_ts, main = "ACF Plot of S&P500 Index", lag.max = 60)
pacf(sp500_returns_ts, main = "PACF Plot of S&P500 Index", lag.max = 60)
# Perform Ljung-Box test
ljung_box_test <- Box.test(sp500_returns_ts, lag = 20, type = "Ljung-Box")
# Print the test results
print(ljung_box_test)
##
## Box-Ljung test
##
## data: sp500_returns_ts
## X-squared = 19.989, df = 20, p-value = 0.4586
# Define the range of p and q values to explore
p_values <- 0:4
q_values <- 0:4
# Create an empty matrix to store the likelihood values
likelihood_matrix_2021 <- matrix(NA, nrow = length(p_values), ncol = length(q_values))
# Loop over each combination of p and q
for (i in 1:length(p_values)) {
for (j in 1:length(q_values)) {
# Fit an ARMA model to the differenced time series
arma_model_2021 <- arima(sp500_returns_ts, order = c(p_values[i], 0, q_values[j]))
# Calculate the log-likelihood value
likelihood_value <- logLik(arma_model_2021)
# Store the log-likelihood value in the likelihood matrix
likelihood_matrix_2021[i, j] <- likelihood_value
}
}
## Warning in arima(sp500_returns_ts, order = c(p_values[i], 0, q_values[j])):
## possible convergence problem: optim gave code = 1
## Warning in arima(sp500_returns_ts, order = c(p_values[i], 0, q_values[j])):
## possible convergence problem: optim gave code = 1
# Calculate the AIC and BIC values
AIC_matrix_2021 <- -2 * likelihood_matrix_2021 + 2 * matrix(rep(p_values + 1, each = length(p_values)), nrow = length(p_values))
BIC_matrix_2021 <- -2 * likelihood_matrix_2021 + log(length(sp500_returns_ts)) * matrix(rep(p_values + 1, each = length(p_values)), nrow = length(p_values))
# Find the indices of the minimum AIC and BIC values
min_AIC_indices_2021 <- which(AIC_matrix_2021 == min(AIC_matrix_2021), arr.ind = TRUE)
min_BIC_indices_2021 <- which(BIC_matrix_2021 == min(BIC_matrix_2021), arr.ind = TRUE)
# Extract the corresponding p and q values
qualified_AIC_p_2021 <- p_values[min_AIC_indices[, 1]]
qualified_AIC_q_2021 <- q_values[min_AIC_indices[, 2]]
qualified_BIC_p_2021 <- p_values[min_BIC_indices[, 1]]
qualified_BIC_q_2021 <- q_values[min_BIC_indices[, 2]]
# Print the qualified combinations according to AIC
cat("Qualified combination according to AIC: p =", qualified_AIC_p_2021, ", q =", qualified_AIC_q_2021, "\n")
## Qualified combination according to AIC: p = 3 , q = 1
# Print the qualified combinations according to BIC
cat("Qualified combination according to BIC: p =", qualified_BIC_p_2021, ", q =", qualified_BIC_q_2021, "\n")
## Qualified combination according to BIC: p = 3 , q = 1
library(forecast)
# Fit an ARIMA model to the differenced monthly growth rate
arima_model <- Arima(sp500_returns_ts, order = c(3, 0, 1), include.mean = FALSE)
# Generate simulated data based on the ARIMA model
simulated_data <- simulate(arima_model, nsim = length(sp500_returns_ts))
# Calculate the predicted values using the estimated ARIMA model
predicted_values <- fitted(arima_model)
# Calculate the estimated residuals
estimated_residuals <- residuals(arima_model)
# Print the estimated coefficients
cat("Estimated Coefficients:\n")
## Estimated Coefficients:
head(coef(arima_model))
## ar1 ar2 ar3 ma1
## 0.42939463 -0.08037551 -0.03693234 -0.38258299
# Print the predicted values
cat("Predicted Values:\n")
## Predicted Values:
head(predicted_values)
## Time Series:
## Start = 2
## End = 7
## Frequency = 1
## [1] 7.314067e-05 6.863118e-04 -5.487718e-04 -1.199745e-03 -1.384428e-03
## [6] 6.574403e-04
# Print the estimated residuals
cat("Estimated Residuals:\n")
## Estimated Residuals:
head(estimated_residuals)
## Time Series:
## Start = 2
## End = 7
## Frequency = 1
## [1] 0.0134154657 0.0052109543 0.0002656875 -0.0143649520 -0.0002087052
## [6] 0.0298212030
# Plot the simulated data and the dependent variable
plot(sp500_returns_ts, type = "l", col = "blue", ylab = "Monthly Growth Rate")
lines(predicted_values, col = "red")
legend("topleft", legend = c("Monthly Growth Rate", "Predicted Data"), col = c("blue", "red"), lty = 1)