The objective here is to see if Indepemdent varaibles have correlation between each other, this can lead to a problem called multicolinearity, which gives rise to
-wrong extimation of coeffcients
-overfitting
library(data.table)
Data=read.csv("BostonHousing.csv")
suppressPackageStartupMessages(library(corrplot))
correlations <- cor(Data[,-c(1,15)]) # ID and medv
corrplot(correlations, order = "hclust")
suppressPackageStartupMessages(library(caret))
highCorr <- findCorrelation(correlations, cutoff = 0.50) # Cut off can be higher too like 0.7
reduced_Data = Data[,-c(highCorr)]
corMatfterFiler <- cor(reduced_Data)
###After the independent variables with high correlation is removed
corrplot(corMatfterFiler, order = "hclust")
library(DT)
datatable(reduced_Data)