Question 9

library(ISLR2)
data("Auto")
auto <- Auto
auto <- na.omit(auto)
str(auto)
## 'data.frame':    392 obs. of  9 variables:
##  $ mpg         : num  18 15 18 16 17 15 14 14 14 15 ...
##  $ cylinders   : int  8 8 8 8 8 8 8 8 8 8 ...
##  $ displacement: num  307 350 318 304 302 429 454 440 455 390 ...
##  $ horsepower  : int  130 165 150 150 140 198 220 215 225 190 ...
##  $ weight      : int  3504 3693 3436 3433 3449 4341 4354 4312 4425 3850 ...
##  $ acceleration: num  12 11.5 11 12 10.5 10 9 8.5 10 8.5 ...
##  $ year        : int  70 70 70 70 70 70 70 70 70 70 ...
##  $ origin      : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ name        : Factor w/ 304 levels "amc ambassador brougham",..: 49 36 231 14 161 141 54 223 241 2 ...
##  - attr(*, "na.action")= 'omit' Named int [1:5] 33 127 331 337 355
##   ..- attr(*, "names")= chr [1:5] "33" "127" "331" "337" ...

sapply(Auto[,1:7], range)
##       mpg cylinders displacement horsepower weight acceleration year
## [1,]  9.0         3           68         46   1613          8.0   70
## [2,] 46.6         8          455        230   5140         24.8   82
sapply(auto[, -c(4, 9)], mean)
##          mpg    cylinders displacement       weight acceleration         year 
##    23.445918     5.471939   194.411990  2977.584184    15.541327    75.979592 
##       origin 
##     1.576531
sapply(auto[, -c(4, 9)], sd)
##          mpg    cylinders displacement       weight acceleration         year 
##    7.8050075    1.7057832  104.6440039  849.4025600    2.7588641    3.6837365 
##       origin 
##    0.8055182
subset <- auto[-c(10:85), -c(4,9)]
sapply(subset, range)
##       mpg cylinders displacement weight acceleration year origin
## [1,] 11.0         3           68   1649          8.5   70      1
## [2,] 46.6         8          455   4997         24.8   82      3
sapply(subset, mean)
##          mpg    cylinders displacement       weight acceleration         year 
##    24.404430     5.373418   187.240506  2935.971519    15.726899    77.145570 
##       origin 
##     1.601266
sapply(subset, sd)
##          mpg    cylinders displacement       weight acceleration         year 
##     7.867283     1.654179    99.678367   811.300208     2.693721     3.106217 
##       origin 
##     0.819910
pairs(auto)

- From the plots we can understand that mpg of the car decreases with the increase in displacement and cylinders of the car. - the new models are having higher mpg than the older ones. this can be concluded by observing the mpg and year graph.

auto$horsepower <- as.numeric(auto$horsepower)
cor(auto$weight, auto$horsepower)
## [1] 0.8645377
cor(auto$weight, auto$displacement)
## [1] 0.9329944
cor(auto$displacement, auto$horsepower)
## [1] 0.897257

Question 10

data("Boston")
Boston -> Boston
dim(Boston)
## [1] 506  13
nrow(Boston)
## [1] 506
ncol(Boston)
## [1] 13
str(Boston)
## 'data.frame':    506 obs. of  13 variables:
##  $ crim   : num  0.00632 0.02731 0.02729 0.03237 0.06905 ...
##  $ zn     : num  18 0 0 0 0 0 12.5 12.5 12.5 12.5 ...
##  $ indus  : num  2.31 7.07 7.07 2.18 2.18 2.18 7.87 7.87 7.87 7.87 ...
##  $ chas   : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ nox    : num  0.538 0.469 0.469 0.458 0.458 0.458 0.524 0.524 0.524 0.524 ...
##  $ rm     : num  6.58 6.42 7.18 7 7.15 ...
##  $ age    : num  65.2 78.9 61.1 45.8 54.2 58.7 66.6 96.1 100 85.9 ...
##  $ dis    : num  4.09 4.97 4.97 6.06 6.06 ...
##  $ rad    : int  1 2 2 3 3 3 5 5 5 5 ...
##  $ tax    : num  296 242 242 222 222 222 311 311 311 311 ...
##  $ ptratio: num  15.3 17.8 17.8 18.7 18.7 18.7 15.2 15.2 15.2 15.2 ...
##  $ lstat  : num  4.98 9.14 4.03 2.94 5.33 ...
##  $ medv   : num  24 21.6 34.7 33.4 36.2 28.7 22.9 27.1 16.5 18.9 ...
pairs(~crim+nox+dis+tax+medv, data = Boston)

cor(Boston[-1],Boston$crim)
##                [,1]
## zn      -0.20046922
## indus    0.40658341
## chas    -0.05589158
## nox      0.42097171
## rm      -0.21924670
## age      0.35273425
## dis     -0.37967009
## rad      0.62550515
## tax      0.58276431
## ptratio  0.28994558
## lstat    0.45562148
## medv    -0.38830461
pairs(Boston[Boston$crim < 20, ])

## -Crim has negative linear relationship with medv, dis , chas. - crim has positive linear relationship with indus ,nox , rad and tax

High.Crime = Boston[which(Boston$crim > mean(Boston$crim) + 2*sd(Boston$crim)),]
print(High.Crime)
##        crim zn indus chas   nox    rm   age    dis rad tax ptratio lstat medv
## 379 23.6482  0  18.1    0 0.671 6.380  96.2 1.3861  24 666    20.2 23.69 13.1
## 381 88.9762  0  18.1    0 0.671 6.968  91.9 1.4165  24 666    20.2 17.21 10.4
## 387 24.3938  0  18.1    0 0.700 4.652 100.0 1.4672  24 666    20.2 28.28 10.5
## 388 22.5971  0  18.1    0 0.700 5.000  89.5 1.5184  24 666    20.2 31.99  7.4
## 399 38.3518  0  18.1    0 0.693 5.453 100.0 1.4896  24 666    20.2 30.59  5.0
## 401 25.0461  0  18.1    0 0.693 5.987 100.0 1.5888  24 666    20.2 26.77  5.6
## 404 24.8017  0  18.1    0 0.693 5.349  96.0 1.7028  24 666    20.2 19.77  8.3
## 405 41.5292  0  18.1    0 0.693 5.531  85.4 1.6074  24 666    20.2 27.38  8.5
## 406 67.9208  0  18.1    0 0.693 5.683 100.0 1.4254  24 666    20.2 22.98  5.0
## 411 51.1358  0  18.1    0 0.597 5.757 100.0 1.4130  24 666    20.2 10.11 15.0
## 414 28.6558  0  18.1    0 0.597 5.155 100.0 1.5894  24 666    20.2 20.08 16.3
## 415 45.7461  0  18.1    0 0.693 4.519 100.0 1.6582  24 666    20.2 36.98  7.0
## 418 25.9406  0  18.1    0 0.679 5.304  89.1 1.6475  24 666    20.2 26.64 10.4
## 419 73.5341  0  18.1    0 0.679 5.957 100.0 1.8026  24 666    20.2 20.62  8.8
## 428 37.6619  0  18.1    0 0.679 6.202  78.7 1.8629  24 666    20.2 14.52 10.9
## 441 22.0511  0  18.1    0 0.740 5.818  92.4 1.8662  24 666    20.2 22.11 10.5
range(Boston$crim) ; mean(Boston$crim) ; sd(Boston$crim)
## [1]  0.00632 88.97620
## [1] 3.613524
## [1] 8.601545
nrow(High.Crime)
## [1] 16

-There are 16 suburbs with a crime rate higher than 95% of the other suburbs. -The range is very wide, it goes from a rate of near zero to 89.

High.Tax = Boston[which(Boston$tax > mean(Boston$tax) + 2*sd(Boston$tax)),]
nrow(High.Tax)
## [1] 0
range(Boston$tax)
## [1] 187 711
High.PT = Boston[which(Boston$ptratio > mean(Boston$ptratio) + 2*sd(Boston$ptratio)),]
range(Boston$ptratio)
## [1] 12.6 22.0
nrow(High.PT)
## [1] 0

-No suburbs exhibit a high pupil-to-teacher ratio, which is a reasonable outcome considering that educational regulations impose limits on the number of teachers or students per class or school. -The range is relatively narrow, and all pupil-teacher ratios fall within 2 standard deviations of the mean. -There are instances where pupil-teacher ratios exceed 1 standard deviation.

sum(Boston$chas==1)
## [1] 35
median(Boston$ptratio)
## [1] 19.05
median(Boston$ptratio)
## [1] 19.05
which(Boston$medv == min(Boston$medv))
## [1] 399 406
Boston[399,]
##        crim zn indus chas   nox    rm age    dis rad tax ptratio lstat medv
## 399 38.3518  0  18.1    0 0.693 5.453 100 1.4896  24 666    20.2 30.59    5
range(Boston$lstat)
## [1]  1.73 37.97
range(Boston$ptratio)
## [1] 12.6 22.0

399 and 406 suburbs are having lowest median property values.

sum(Boston$rm > 7)
## [1] 64
sum(Boston$rm > 8)
## [1] 13
summary(Boston)
##       crim                zn             indus            chas        
##  Min.   : 0.00632   Min.   :  0.00   Min.   : 0.46   Min.   :0.00000  
##  1st Qu.: 0.08205   1st Qu.:  0.00   1st Qu.: 5.19   1st Qu.:0.00000  
##  Median : 0.25651   Median :  0.00   Median : 9.69   Median :0.00000  
##  Mean   : 3.61352   Mean   : 11.36   Mean   :11.14   Mean   :0.06917  
##  3rd Qu.: 3.67708   3rd Qu.: 12.50   3rd Qu.:18.10   3rd Qu.:0.00000  
##  Max.   :88.97620   Max.   :100.00   Max.   :27.74   Max.   :1.00000  
##       nox               rm             age              dis        
##  Min.   :0.3850   Min.   :3.561   Min.   :  2.90   Min.   : 1.130  
##  1st Qu.:0.4490   1st Qu.:5.886   1st Qu.: 45.02   1st Qu.: 2.100  
##  Median :0.5380   Median :6.208   Median : 77.50   Median : 3.207  
##  Mean   :0.5547   Mean   :6.285   Mean   : 68.57   Mean   : 3.795  
##  3rd Qu.:0.6240   3rd Qu.:6.623   3rd Qu.: 94.08   3rd Qu.: 5.188  
##  Max.   :0.8710   Max.   :8.780   Max.   :100.00   Max.   :12.127  
##       rad              tax           ptratio          lstat      
##  Min.   : 1.000   Min.   :187.0   Min.   :12.60   Min.   : 1.73  
##  1st Qu.: 4.000   1st Qu.:279.0   1st Qu.:17.40   1st Qu.: 6.95  
##  Median : 5.000   Median :330.0   Median :19.05   Median :11.36  
##  Mean   : 9.549   Mean   :408.2   Mean   :18.46   Mean   :12.65  
##  3rd Qu.:24.000   3rd Qu.:666.0   3rd Qu.:20.20   3rd Qu.:16.95  
##  Max.   :24.000   Max.   :711.0   Max.   :22.00   Max.   :37.97  
##       medv      
##  Min.   : 5.00  
##  1st Qu.:17.02  
##  Median :21.20  
##  Mean   :22.53  
##  3rd Qu.:25.00  
##  Max.   :50.00
summary(subset(Boston, rm > 8))
##       crim               zn            indus             chas       
##  Min.   :0.02009   Min.   : 0.00   Min.   : 2.680   Min.   :0.0000  
##  1st Qu.:0.33147   1st Qu.: 0.00   1st Qu.: 3.970   1st Qu.:0.0000  
##  Median :0.52014   Median : 0.00   Median : 6.200   Median :0.0000  
##  Mean   :0.71879   Mean   :13.62   Mean   : 7.078   Mean   :0.1538  
##  3rd Qu.:0.57834   3rd Qu.:20.00   3rd Qu.: 6.200   3rd Qu.:0.0000  
##  Max.   :3.47428   Max.   :95.00   Max.   :19.580   Max.   :1.0000  
##       nox               rm             age             dis       
##  Min.   :0.4161   Min.   :8.034   Min.   : 8.40   Min.   :1.801  
##  1st Qu.:0.5040   1st Qu.:8.247   1st Qu.:70.40   1st Qu.:2.288  
##  Median :0.5070   Median :8.297   Median :78.30   Median :2.894  
##  Mean   :0.5392   Mean   :8.349   Mean   :71.54   Mean   :3.430  
##  3rd Qu.:0.6050   3rd Qu.:8.398   3rd Qu.:86.50   3rd Qu.:3.652  
##  Max.   :0.7180   Max.   :8.780   Max.   :93.90   Max.   :8.907  
##       rad              tax           ptratio          lstat           medv     
##  Min.   : 2.000   Min.   :224.0   Min.   :13.00   Min.   :2.47   Min.   :21.9  
##  1st Qu.: 5.000   1st Qu.:264.0   1st Qu.:14.70   1st Qu.:3.32   1st Qu.:41.7  
##  Median : 7.000   Median :307.0   Median :17.40   Median :4.14   Median :48.3  
##  Mean   : 7.462   Mean   :325.1   Mean   :16.36   Mean   :4.31   Mean   :44.2  
##  3rd Qu.: 8.000   3rd Qu.:307.0   3rd Qu.:17.40   3rd Qu.:5.12   3rd Qu.:50.0  
##  Max.   :24.000   Max.   :666.0   Max.   :20.20   Max.   :7.44   Max.   :50.0