#Intalling packages
install.packages(c("readxl", "Matrix", "corrplot"))
##
## The downloaded binary packages are in
## /var/folders/d2/w90xhjgx0wgcxgdd_0pmdpjh0000gn/T//RtmpGYltM1/downloaded_packages
##Installing Libraries
library(readxl)
library(Matrix)
library(corrplot)
## corrplot 0.95 loaded
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
#Data Loading
file_path <- "~/Downloads/Data 5"
set_1 <- read_excel(file.path(file_path, "set_1.xlsx"))
set_2 <- read_excel(file.path(file_path, "set_2.xlsx"))
set_3 <- read_excel(file.path(file_path, "set_3.xlsx"))
cat("Structure of Set 1:\n")
## Structure of Set 1:
str(set_1)
## tibble [756 × 5] (S3: tbl_df/tbl/data.frame)
## $ Data: chr [1:756] "2018-11-26" "2018-11-27" "2018-11-28" "2018-11-29" ...
## $ NDAQ: num [1:756] 85 85.1 85.5 -86 -86.8 ...
## $ DJI : num [1:756] 24640 24749 25366 NA NA ...
## $ FTM : num [1:756] 18719 18661 18640 18609 18481 ...
## $ SPX : num [1:756] 2673 2682 2744 NA NA ...
cat("\nStructure of Set 2:\n")
##
## Structure of Set 2:
str(set_2)
## tibble [756 × 5] (S3: tbl_df/tbl/data.frame)
## $ Data: chr [1:756] "2018-11-26" "2018-11-27" "2018-11-28" "2018-11-29" ...
## $ NDAQ: num [1:756] 85 85.1 85.5 86 86.8 ...
## $ DJI : num [1:756] 24640 24749 25366 25339 25538 ...
## $ FTM : num [1:756] 18719 18661 18640 18609 18481 ...
## $ SPX : num [1:756] 2673 2682 2744 2738 2760 ...
cat("\nStructure of Set 3:\n")
##
## Structure of Set 3:
str(set_3)
## tibble [756 × 5] (S3: tbl_df/tbl/data.frame)
## $ Data: chr [1:756] "2018-11-26" "2018-11-27" "2018-11-28" "2018-11-29" ...
## $ NDAQ: num [1:756] 85 85.1 85.5 86 86.8 ...
## $ DJI : num [1:756] -170 -170 -171 -172 -174 ...
## $ FTM : num [1:756] 170 170 171 172 174 ...
## $ SPX : num [1:756] 170 170 171 172 174 ...
#Exercise 1
#Corelation Matrix
set_1_numeric <- set_1 %>% select(-Data)
set_2_numeric <- set_2 %>% select(-Data)
set_3_numeric <- set_3 %>% select(-Data)
cor_matrix_1 <- cor(set_1_numeric, use = "complete.obs")
cor_matrix_2 <- cor(set_2_numeric, use = "complete.obs")
cor_matrix_3 <- cor(set_3_numeric, use = "complete.obs")
cat("Correlation Matrix for Set 1:\n")
## Correlation Matrix for Set 1:
print(cor_matrix_1)
## NDAQ DJI FTM SPX
## NDAQ 1.0000000 0.9356364 0.8145226 0.9767219
## DJI 0.9356364 1.0000000 0.9442291 0.9849028
## FTM 0.8145226 0.9442291 1.0000000 0.8903784
## SPX 0.9767219 0.9849028 0.8903784 1.0000000
cat("\nCorrelation Matrix for Set 2:\n")
##
## Correlation Matrix for Set 2:
print(cor_matrix_2)
## NDAQ DJI FTM SPX
## NDAQ 1.0000000 0.9187433 0.6924651 0.9713451
## DJI 0.9187433 1.0000000 0.8853569 0.9787758
## FTM 0.6924651 0.8853569 1.0000000 0.7916723
## SPX 0.9713451 0.9787758 0.7916723 1.0000000
cat("\nCorrelation Matrix for Set 3:\n")
##
## Correlation Matrix for Set 3:
print(cor_matrix_3)
## NDAQ DJI FTM SPX
## NDAQ 1 -1 1 1
## DJI -1 1 -1 -1
## FTM 1 -1 1 1
## SPX 1 -1 1 1
Set 1 and Set 2 :The correlation values are within the valid range of -1 to 1. The matrices appear reasonable and do not require adjustment.A valid correlation matrix must be positive definite.
eigen_1 <- eigen(cor_matrix_1)$values
eigen_2 <- eigen(cor_matrix_2)$values
cat("Eigenvalues for Set 1 Correlation Matrix:\n")
## Eigenvalues for Set 1 Correlation Matrix:
print(eigen_1)
## [1] 3.77535583 0.20177405 0.02043119 0.00243893
cat("\nEigenvalues for Set 2 Correlation Matrix:\n")
##
## Eigenvalues for Set 2 Correlation Matrix:
print(eigen_2)
## [1] 3.626803507 0.341935198 0.028137692 0.003123603
Basing on the results above all eigenvalues are positive, which confirms that the matrix is positive definite.Henceforth he correlation matrix for Set 1 & Set 2 are valid and does not need adjustment.
Set 3 :All values in the correlation matrix are either 1 or -1, which is unusual.This suggests an issue with the dataset, such as lack of variability in the column or linear dependencies among variables.Henceforth the correlation matrix is unrealistic and likely invalid.
#Exercise 2
##Spectral Decomposition Method
spectral_decomposition <- function(cor_matrix, epsilon = 1e-8) {
eig <- eigen(cor_matrix)
adjusted_values <- pmax(eig$values, epsilon)
adjusted_matrix <- eig$vectors %*% diag(adjusted_values) %*% t(eig$vectors)
adjusted_matrix <- (adjusted_matrix + t(adjusted_matrix)) / 2
return(adjusted_matrix)
}
##Alternating Projection Method
alternating_projection <- function(cor_matrix, tolerance = 1e-12, tau = 1e-8) {
adjusted_matrix <- cor_matrix
iterations <- 0
distance <- Inf
while (distance > tolerance) {
iterations <- iterations + 1
previous_matrix <- adjusted_matrix
eig <- eigen(adjusted_matrix)
adjusted_values <- pmax(eig$values, tau)
adjusted_matrix <- eig$vectors %*% diag(adjusted_values) %*% t(eig$vectors)
adjusted_matrix <- (adjusted_matrix + t(adjusted_matrix)) / 2
diag(adjusted_matrix) <- 1
distance <- sqrt(sum((adjusted_matrix - previous_matrix)^2))
}
return(list(
adjusted_matrix = adjusted_matrix,
iterations = iterations,
distance = distance
))
}
##Testing the Functions
adjusted_matrix_spectral <- spectral_decomposition(cor_matrix_3)
cat("Adjusted Matrix (Spectral Decomposition):\n")
## Adjusted Matrix (Spectral Decomposition):
print(adjusted_matrix_spectral)
## [,1] [,2] [,3] [,4]
## [1,] 1 -1 1 1
## [2,] -1 1 -1 -1
## [3,] 1 -1 1 1
## [4,] 1 -1 1 1
adjusted_results <- alternating_projection(cor_matrix_3)
cat("\nAdjusted Matrix (Alternating Projection):\n")
##
## Adjusted Matrix (Alternating Projection):
print(adjusted_results$adjusted_matrix)
## [,1] [,2] [,3] [,4]
## [1,] 1 -1 1 1
## [2,] -1 1 -1 -1
## [3,] 1 -1 1 1
## [4,] 1 -1 1 1
cat("\nNumber of Iterations:", adjusted_results$iterations, "\n")
##
## Number of Iterations: 33
cat("Distance Between Matrices:", adjusted_results$distance, "\n")
## Distance Between Matrices: 8.714869e-13
Spectral Decomposition Method ; The matrix is unchanged, showing only 1 and -1 values. This is because the input correlation matrix (cor_matrix_3) contains all 1s and -1s, and spectral decomposition doesn’t inherently fix unrealistic correlation values in this specific scenario.
Alternating Projection Method Adjusted Matrix:The matrix is also unchanged, with only 1s and -1s, indicating the method could not address the issue of perfect correlation values directly. Iterations: The method converged in 33 iterations, which is a reasonable number for such a small matrix. Distance Between Matrices: The distance is extremely small meaning the method has converged to a solution very close to the original matrix.
The results indicate that Set 3’s correlation matrix is inherently problematic due to its unrealistic values (all 1 and -1). Neither method could fix this fundamental issue because: