library(ISLR2)
library(GGally)
## Loading required package: ggplot2
## Registered S3 method overwritten by 'GGally':
## method from
## +.gg ggplot2
library(ggplot2)
# Load Boston data
data(Boston)
# (a) View dataset
head(Boston)
## crim zn indus chas nox rm age dis rad tax ptratio lstat medv
## 1 0.00632 18 2.31 0 0.538 6.575 65.2 4.0900 1 296 15.3 4.98 24.0
## 2 0.02731 0 7.07 0 0.469 6.421 78.9 4.9671 2 242 17.8 9.14 21.6
## 3 0.02729 0 7.07 0 0.469 7.185 61.1 4.9671 2 242 17.8 4.03 34.7
## 4 0.03237 0 2.18 0 0.458 6.998 45.8 6.0622 3 222 18.7 2.94 33.4
## 5 0.06905 0 2.18 0 0.458 7.147 54.2 6.0622 3 222 18.7 5.33 36.2
## 6 0.02985 0 2.18 0 0.458 6.430 58.7 6.0622 3 222 18.7 5.21 28.7
# (b) Pairwise scatterplots (first 6 variables for simplicity)
ggpairs(Boston[, 1:6])

# (c) Correlation of crim with other predictors
cor(Boston$crim, Boston)
## crim zn indus chas nox rm age
## [1,] 1 -0.2004692 0.4065834 -0.05589158 0.4209717 -0.2192467 0.3527343
## dis rad tax ptratio lstat medv
## [1,] -0.3796701 0.6255051 0.5827643 0.2899456 0.4556215 -0.3883046
# (d) Summary of key predictors
summary(Boston$crim)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.00632 0.08204 0.25651 3.61352 3.67708 88.97620
summary(Boston$tax)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 187.0 279.0 330.0 408.2 666.0 711.0
summary(Boston$ptratio)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 12.60 17.40 19.05 18.46 20.20 22.00
sum(Boston$chas == 1)
## [1] 35
median(Boston$ptratio)
## [1] 19.05
Boston[which.min(Boston$medv), ]
## crim zn indus chas nox rm age dis rad tax ptratio lstat medv
## 399 38.3518 0 18.1 0 0.693 5.453 100 1.4896 24 666 20.2 30.59 5
sum(Boston$rm > 7)
## [1] 64