#Intalling packages

install.packages(c("readxl", "Matrix", "corrplot"))
## 
## The downloaded binary packages are in
##  /var/folders/d2/w90xhjgx0wgcxgdd_0pmdpjh0000gn/T//RtmpGYltM1/downloaded_packages

##Installing Libraries

library(readxl)  
library(Matrix)   
library(corrplot)
## corrplot 0.95 loaded
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

#Data Loading

file_path <- "~/Downloads/Data 5"


set_1 <- read_excel(file.path(file_path, "set_1.xlsx"))
set_2 <- read_excel(file.path(file_path, "set_2.xlsx"))
set_3 <- read_excel(file.path(file_path, "set_3.xlsx"))
cat("Structure of Set 1:\n")
## Structure of Set 1:
str(set_1)
## tibble [756 × 5] (S3: tbl_df/tbl/data.frame)
##  $ Data: chr [1:756] "2018-11-26" "2018-11-27" "2018-11-28" "2018-11-29" ...
##  $ NDAQ: num [1:756] 85 85.1 85.5 -86 -86.8 ...
##  $ DJI : num [1:756] 24640 24749 25366 NA NA ...
##  $ FTM : num [1:756] 18719 18661 18640 18609 18481 ...
##  $ SPX : num [1:756] 2673 2682 2744 NA NA ...
cat("\nStructure of Set 2:\n")
## 
## Structure of Set 2:
str(set_2)
## tibble [756 × 5] (S3: tbl_df/tbl/data.frame)
##  $ Data: chr [1:756] "2018-11-26" "2018-11-27" "2018-11-28" "2018-11-29" ...
##  $ NDAQ: num [1:756] 85 85.1 85.5 86 86.8 ...
##  $ DJI : num [1:756] 24640 24749 25366 25339 25538 ...
##  $ FTM : num [1:756] 18719 18661 18640 18609 18481 ...
##  $ SPX : num [1:756] 2673 2682 2744 2738 2760 ...
cat("\nStructure of Set 3:\n")
## 
## Structure of Set 3:
str(set_3)
## tibble [756 × 5] (S3: tbl_df/tbl/data.frame)
##  $ Data: chr [1:756] "2018-11-26" "2018-11-27" "2018-11-28" "2018-11-29" ...
##  $ NDAQ: num [1:756] 85 85.1 85.5 86 86.8 ...
##  $ DJI : num [1:756] -170 -170 -171 -172 -174 ...
##  $ FTM : num [1:756] 170 170 171 172 174 ...
##  $ SPX : num [1:756] 170 170 171 172 174 ...

#Exercise 1

#Corelation Matrix

set_1_numeric <- set_1 %>% select(-Data)
set_2_numeric <- set_2 %>% select(-Data)
set_3_numeric <- set_3 %>% select(-Data)

cor_matrix_1 <- cor(set_1_numeric, use = "complete.obs")
cor_matrix_2 <- cor(set_2_numeric, use = "complete.obs")
cor_matrix_3 <- cor(set_3_numeric, use = "complete.obs")

cat("Correlation Matrix for Set 1:\n")
## Correlation Matrix for Set 1:
print(cor_matrix_1)
##           NDAQ       DJI       FTM       SPX
## NDAQ 1.0000000 0.9356364 0.8145226 0.9767219
## DJI  0.9356364 1.0000000 0.9442291 0.9849028
## FTM  0.8145226 0.9442291 1.0000000 0.8903784
## SPX  0.9767219 0.9849028 0.8903784 1.0000000
cat("\nCorrelation Matrix for Set 2:\n")
## 
## Correlation Matrix for Set 2:
print(cor_matrix_2)
##           NDAQ       DJI       FTM       SPX
## NDAQ 1.0000000 0.9187433 0.6924651 0.9713451
## DJI  0.9187433 1.0000000 0.8853569 0.9787758
## FTM  0.6924651 0.8853569 1.0000000 0.7916723
## SPX  0.9713451 0.9787758 0.7916723 1.0000000
cat("\nCorrelation Matrix for Set 3:\n")
## 
## Correlation Matrix for Set 3:
print(cor_matrix_3)
##      NDAQ DJI FTM SPX
## NDAQ    1  -1   1   1
## DJI    -1   1  -1  -1
## FTM     1  -1   1   1
## SPX     1  -1   1   1

Set 1 and Set 2 :The correlation values are within the valid range of -1 to 1. The matrices appear reasonable and do not require adjustment.A valid correlation matrix must be positive definite.

eigen_1 <- eigen(cor_matrix_1)$values
eigen_2 <- eigen(cor_matrix_2)$values

cat("Eigenvalues for Set 1 Correlation Matrix:\n")
## Eigenvalues for Set 1 Correlation Matrix:
print(eigen_1)
## [1] 3.77535583 0.20177405 0.02043119 0.00243893
cat("\nEigenvalues for Set 2 Correlation Matrix:\n")
## 
## Eigenvalues for Set 2 Correlation Matrix:
print(eigen_2)
## [1] 3.626803507 0.341935198 0.028137692 0.003123603

Basing on the results above all eigenvalues are positive, which confirms that the matrix is positive definite.Henceforth he correlation matrix for Set 1 & Set 2 are valid and does not need adjustment.

Set 3 :All values in the correlation matrix are either 1 or -1, which is unusual.This suggests an issue with the dataset, such as lack of variability in the column or linear dependencies among variables.Henceforth the correlation matrix is unrealistic and likely invalid.

#Exercise 2

##Spectral Decomposition Method

spectral_decomposition <- function(cor_matrix, epsilon = 1e-8) {
  eig <- eigen(cor_matrix)
  
    adjusted_values <- pmax(eig$values, epsilon)
  
  adjusted_matrix <- eig$vectors %*% diag(adjusted_values) %*% t(eig$vectors)
  
  adjusted_matrix <- (adjusted_matrix + t(adjusted_matrix)) / 2
  
  return(adjusted_matrix)
}

##Alternating Projection Method

alternating_projection <- function(cor_matrix, tolerance = 1e-12, tau = 1e-8) {
  adjusted_matrix <- cor_matrix
  iterations <- 0
  distance <- Inf
  
  while (distance > tolerance) {
    iterations <- iterations + 1
    previous_matrix <- adjusted_matrix
    
    eig <- eigen(adjusted_matrix)
    adjusted_values <- pmax(eig$values, tau)
    adjusted_matrix <- eig$vectors %*% diag(adjusted_values) %*% t(eig$vectors)
    
    adjusted_matrix <- (adjusted_matrix + t(adjusted_matrix)) / 2
    diag(adjusted_matrix) <- 1
    
    distance <- sqrt(sum((adjusted_matrix - previous_matrix)^2))
  }
  
  return(list(
    adjusted_matrix = adjusted_matrix,
    iterations = iterations,
    distance = distance
  ))
}

##Testing the Functions

adjusted_matrix_spectral <- spectral_decomposition(cor_matrix_3)
cat("Adjusted Matrix (Spectral Decomposition):\n")
## Adjusted Matrix (Spectral Decomposition):
print(adjusted_matrix_spectral)
##      [,1] [,2] [,3] [,4]
## [1,]    1   -1    1    1
## [2,]   -1    1   -1   -1
## [3,]    1   -1    1    1
## [4,]    1   -1    1    1
adjusted_results <- alternating_projection(cor_matrix_3)
cat("\nAdjusted Matrix (Alternating Projection):\n")
## 
## Adjusted Matrix (Alternating Projection):
print(adjusted_results$adjusted_matrix)
##      [,1] [,2] [,3] [,4]
## [1,]    1   -1    1    1
## [2,]   -1    1   -1   -1
## [3,]    1   -1    1    1
## [4,]    1   -1    1    1
cat("\nNumber of Iterations:", adjusted_results$iterations, "\n")
## 
## Number of Iterations: 33
cat("Distance Between Matrices:", adjusted_results$distance, "\n")
## Distance Between Matrices: 8.714869e-13

Spectral Decomposition Method ; The matrix is unchanged, showing only 1 and -1 values. This is because the input correlation matrix (cor_matrix_3) contains all 1s and -1s, and spectral decomposition doesn’t inherently fix unrealistic correlation values in this specific scenario.

Alternating Projection Method Adjusted Matrix:The matrix is also unchanged, with only 1s and -1s, indicating the method could not address the issue of perfect correlation values directly. Iterations: The method converged in 33 iterations, which is a reasonable number for such a small matrix. Distance Between Matrices: The distance is extremely small meaning the method has converged to a solution very close to the original matrix.

The results indicate that Set 3’s correlation matrix is inherently problematic due to its unrealistic values (all 1 and -1). Neither method could fix this fundamental issue because: