# Load MASS library
library(MASS)
# Load Boston dataset
data(Boston)
# Remove 'chas' and 'rad' columns
Boston <- Boston[, !(names(Boston) %in% c("chas", "rad"))]
#summary(Boston)
# Load psych library
library(psych)
## Warning: package 'psych' was built under R version 4.1.3
# Q3 Create summary table for every feature using describe function
describe(Boston)
## vars n mean sd median trimmed mad min max range skew
## crim 1 506 3.61 8.60 0.26 1.68 0.33 0.01 88.98 88.97 5.19
## zn 2 506 11.36 23.32 0.00 5.08 0.00 0.00 100.00 100.00 2.21
## indus 3 506 11.14 6.86 9.69 10.93 9.37 0.46 27.74 27.28 0.29
## nox 4 506 0.55 0.12 0.54 0.55 0.13 0.38 0.87 0.49 0.72
## rm 5 506 6.28 0.70 6.21 6.25 0.51 3.56 8.78 5.22 0.40
## age 6 506 68.57 28.15 77.50 71.20 28.98 2.90 100.00 97.10 -0.60
## dis 7 506 3.80 2.11 3.21 3.54 1.91 1.13 12.13 11.00 1.01
## tax 8 506 408.24 168.54 330.00 400.04 108.23 187.00 711.00 524.00 0.67
## ptratio 9 506 18.46 2.16 19.05 18.66 1.70 12.60 22.00 9.40 -0.80
## black 10 506 356.67 91.29 391.44 383.17 8.09 0.32 396.90 396.58 -2.87
## lstat 11 506 12.65 7.14 11.36 11.90 7.11 1.73 37.97 36.24 0.90
## medv 12 506 22.53 9.20 21.20 21.56 5.93 5.00 50.00 45.00 1.10
## kurtosis se
## crim 36.60 0.38
## zn 3.95 1.04
## indus -1.24 0.30
## nox -0.09 0.01
## rm 1.84 0.03
## age -0.98 1.25
## dis 0.46 0.09
## tax -1.15 7.49
## ptratio -0.30 0.10
## black 7.10 4.06
## lstat 0.46 0.32
## medv 1.45 0.41
# Load corrplot library
library(corrplot)
## Warning: package 'corrplot' was built under R version 4.1.3
## corrplot 0.92 loaded
# Compute correlation matrix for Boston dataset
corr_matrix <- cor(Boston)
# Plot correlation matrix using corrplot function
corrplot(corr_matrix, method = "color",diag = FALSE,type = 'lower',addCoefasPercent = TRUE,addCoef.col = TRUE,order = 'FPC')

# Calculate correlation coefficients between each feature and 'mdev'
correlations <- cor(Boston)
corr_mdev <- correlations[, "medv"]^2
corr_mdev <- sort(corr_mdev, decreasing = TRUE)
# Get name of feature with highest correlation to 'mdev'
highest_corr_feature <- names(corr_mdev)[2]
second_highest_corr_feature <- names(corr_mdev)[3]
# Create scatter plot of 'highest_corr_feature' vs. 'mdev'
plot(Boston[, highest_corr_feature], Boston$medv, xlab = highest_corr_feature, ylab = "mdev")

plot(Boston[, second_highest_corr_feature], Boston$medv, xlab = second_highest_corr_feature, ylab = "mdev")
