Applied section in 2.4 Exercises

Question 8

8a, 8b

Load the data from csv file, and remove college name column:

rawdata <- read.table('data/College.csv', header = TRUE, sep = ',', quote = '')
# here `quote = ''` is mandatory, or the single quote in the source file will be misinterpreted
head(rawdata)
college <- rawdata
rownames(college) <- college[,1]  # overwrite the row names with the values in the first column
college <- college[,-1]           # remove the first column (which has been set as row names)
head(college)

8c

i ~ iii:

summary(college)
 Private        Apps               Accept              Enroll           Top10perc          Top25perc        
 No :212   Min.   :   81.000   Min.   :   72.000   Min.   :  35.000   Min.   : 1.00000   Min.   :  9.00000  
 Yes:565   1st Qu.:  776.000   1st Qu.:  604.000   1st Qu.: 242.000   1st Qu.:15.00000   1st Qu.: 41.00000  
           Median : 1558.000   Median : 1110.000   Median : 434.000   Median :23.00000   Median : 54.00000  
           Mean   : 3001.638   Mean   : 2018.804   Mean   : 779.973   Mean   :27.55856   Mean   : 55.79665  
           3rd Qu.: 3624.000   3rd Qu.: 2424.000   3rd Qu.: 902.000   3rd Qu.:35.00000   3rd Qu.: 69.00000  
           Max.   :48094.000   Max.   :26330.000   Max.   :6392.000   Max.   :96.00000   Max.   :100.00000  
  F.Undergrad         P.Undergrad            Outstate          Room.Board           Books         
 Min.   :  139.000   Min.   :    1.0000   Min.   : 2340.00   Min.   :1780.000   Min.   :  96.000  
 1st Qu.:  992.000   1st Qu.:   95.0000   1st Qu.: 7320.00   1st Qu.:3597.000   1st Qu.: 470.000  
 Median : 1707.000   Median :  353.0000   Median : 9990.00   Median :4200.000   Median : 500.000  
 Mean   : 3699.907   Mean   :  855.2986   Mean   :10440.67   Mean   :4357.526   Mean   : 549.381  
 3rd Qu.: 4005.000   3rd Qu.:  967.0000   3rd Qu.:12925.00   3rd Qu.:5050.000   3rd Qu.: 600.000  
 Max.   :31643.000   Max.   :21836.0000   Max.   :21700.00   Max.   :8124.000   Max.   :2340.000  
    Personal             PhD               Terminal          S.F.Ratio        perc.alumni      
 Min.   : 250.000   Min.   :  8.00000   Min.   : 24.0000   Min.   : 2.5000   Min.   : 0.00000  
 1st Qu.: 850.000   1st Qu.: 62.00000   1st Qu.: 71.0000   1st Qu.:11.5000   1st Qu.:13.00000  
 Median :1200.000   Median : 75.00000   Median : 82.0000   Median :13.6000   Median :21.00000  
 Mean   :1340.642   Mean   : 72.66023   Mean   : 79.7027   Mean   :14.0897   Mean   :22.74389  
 3rd Qu.:1700.000   3rd Qu.: 85.00000   3rd Qu.: 92.0000   3rd Qu.:16.5000   3rd Qu.:31.00000  
 Max.   :6800.000   Max.   :103.00000   Max.   :100.0000   Max.   :39.8000   Max.   :64.00000  
     Expend            Grad.Rate        
 Min.   : 3186.000   Min.   : 10.00000  
 1st Qu.: 6751.000   1st Qu.: 53.00000  
 Median : 8377.000   Median : 65.00000  
 Mean   : 9660.171   Mean   : 65.46332  
 3rd Qu.:10830.000   3rd Qu.: 78.00000  
 Max.   :56233.000   Max.   :118.00000  
pairs(college[,1:10])

plot(college$Private, college$Outstate)

iv:

college$Elite = "No"
college$Elite[college$Top10perc > 50] = "Yes"
college$Elite = as.factor(college$Elite)
summary(college$Elite)  # there are 27 Elite colleges here
 No Yes 
312  27 
plot(college$Elite, college$Outstate)

v:

par(mfrow=c(2,2))
hist(college$Apps)
hist(college$perc.alumni, col = 2)
hist(college$S.F.Ratio, breaks = 20, col = 3)
hist(college$Expend, breaks = 100)

Question 9

Make sure the ISLR library is installed. Or install it with install.packages("ISLR").

library(ISLR)
data("Auto")
head(Auto)
str(Auto)
'data.frame':   392 obs. of  9 variables:
 $ mpg         : num  18 15 18 16 17 15 14 14 14 15 ...
 $ cylinders   : num  8 8 8 8 8 8 8 8 8 8 ...
 $ displacement: num  307 350 318 304 302 429 454 440 455 390 ...
 $ horsepower  : num  130 165 150 150 140 198 220 215 225 190 ...
 $ weight      : num  3504 3693 3436 3433 3449 ...
 $ acceleration: num  12 11.5 11 12 10.5 10 9 8.5 10 8.5 ...
 $ year        : num  70 70 70 70 70 70 70 70 70 70 ...
 $ origin      : num  1 1 1 1 1 1 1 1 1 1 ...
 $ name        : Factor w/ 304 levels "amc ambassador brougham",..: 49 36 231 14 161 141 54 223 241 2 ...

9a

Quantitative predictors: mpg, cylinder, displacement, horsepower, weight, acceleration, year.

Qualitative predictors: origin, name.

9b ~ 9d

# (b)
sapply(Auto[,1:7], range)
      mpg cylinders displacement horsepower weight acceleration year
[1,]  9.0         3           68         46   1613          8.0   70
[2,] 46.6         8          455        230   5140         24.8   82
# (c)
sapply(Auto[,1:7], mean)
         mpg    cylinders displacement   horsepower       weight acceleration         year 
   23.445918     5.471939   194.411990   104.469388  2977.584184    15.541327    75.979592 
sapply(Auto[,1:7], sd)
         mpg    cylinders displacement   horsepower       weight acceleration         year 
    7.805007     1.705783   104.644004    38.491160   849.402560     2.758864     3.683737 
# (d)
sbset <- Auto[-(10:85),]
sapply(sbset[,1:7], range)
      mpg cylinders displacement horsepower weight acceleration year
[1,] 11.0         3           68         46   1649          8.5   70
[2,] 46.6         8          455        230   4997         24.8   82
sapply(sbset[,1:7], mean)
         mpg    cylinders displacement   horsepower       weight acceleration         year 
   24.404430     5.373418   187.240506   100.721519  2935.971519    15.726899    77.145570 
sapply(sbset[,1:7], sd)
         mpg    cylinders displacement   horsepower       weight acceleration         year 
    7.867283     1.654179    99.678367    35.708853   811.300208     2.693721     3.106217 

9e

pairs(Auto)

plot(Auto$weight, Auto$mpg)

The plot shows that with the increasing of the weight, the mpg is decrease. In other words, lighter cars are more energy effective.

9f

Learn from the output graph of pairs(Auto) above.

Question 10

10a ~ 10c

# (a)
library(MASS)
?Boston
# (b)
pairs(Boston)

# (c)
plot(Boston$age, Boston$crim)

Older house with more crime.

10d

library(dplyr)

Attaching package: ‘dplyr’

The following object is masked from ‘package:neuralnet’:

    compute

The following object is masked from ‘package:MASS’:

    select

The following objects are masked from ‘package:stats’:

    filter, lag

The following objects are masked from ‘package:base’:

    intersect, setdiff, setequal, union
head(arrange(Boston, desc(crim)))

10e

nrow(Boston[Boston$chas > 0,])
[1] 35

10f

median(Boston$ptratio)
[1] 19.05

10g

subset(Boston, medv == min(Boston$medv))

10h

nrow(Boston[Boston$rm > 7,])
[1] 64
summary(subset(Boston, rm > 8))
      crim               zn            indus             chas             nox        
 Min.   :0.02009   Min.   : 0.00   Min.   : 2.680   Min.   :0.0000   Min.   :0.4161  
 1st Qu.:0.33147   1st Qu.: 0.00   1st Qu.: 3.970   1st Qu.:0.0000   1st Qu.:0.5040  
 Median :0.52014   Median : 0.00   Median : 6.200   Median :0.0000   Median :0.5070  
 Mean   :0.71879   Mean   :13.62   Mean   : 7.078   Mean   :0.1538   Mean   :0.5392  
 3rd Qu.:0.57834   3rd Qu.:20.00   3rd Qu.: 6.200   3rd Qu.:0.0000   3rd Qu.:0.6050  
 Max.   :3.47428   Max.   :95.00   Max.   :19.580   Max.   :1.0000   Max.   :0.7180  
       rm             age             dis             rad              tax       
 Min.   :8.034   Min.   : 8.40   Min.   :1.801   Min.   : 2.000   Min.   :224.0  
 1st Qu.:8.247   1st Qu.:70.40   1st Qu.:2.288   1st Qu.: 5.000   1st Qu.:264.0  
 Median :8.297   Median :78.30   Median :2.894   Median : 7.000   Median :307.0  
 Mean   :8.349   Mean   :71.54   Mean   :3.430   Mean   : 7.462   Mean   :325.1  
 3rd Qu.:8.398   3rd Qu.:86.50   3rd Qu.:3.652   3rd Qu.: 8.000   3rd Qu.:307.0  
 Max.   :8.780   Max.   :93.90   Max.   :8.907   Max.   :24.000   Max.   :666.0  
    ptratio          black           lstat           medv     
 Min.   :13.00   Min.   :354.6   Min.   :2.47   Min.   :21.9  
 1st Qu.:14.70   1st Qu.:384.5   1st Qu.:3.32   1st Qu.:41.7  
 Median :17.40   Median :386.9   Median :4.14   Median :48.3  
 Mean   :16.36   Mean   :385.2   Mean   :4.31   Mean   :44.2  
 3rd Qu.:17.40   3rd Qu.:389.7   3rd Qu.:5.12   3rd Qu.:50.0  
 Max.   :20.20   Max.   :396.9   Max.   :7.44   Max.   :50.0  
summary(Boston)
      crim                zn             indus            chas              nox        
 Min.   : 0.00632   Min.   :  0.00   Min.   : 0.46   Min.   :0.00000   Min.   :0.3850  
 1st Qu.: 0.08204   1st Qu.:  0.00   1st Qu.: 5.19   1st Qu.:0.00000   1st Qu.:0.4490  
 Median : 0.25651   Median :  0.00   Median : 9.69   Median :0.00000   Median :0.5380  
 Mean   : 3.61352   Mean   : 11.36   Mean   :11.14   Mean   :0.06917   Mean   :0.5547  
 3rd Qu.: 3.67708   3rd Qu.: 12.50   3rd Qu.:18.10   3rd Qu.:0.00000   3rd Qu.:0.6240  
 Max.   :88.97620   Max.   :100.00   Max.   :27.74   Max.   :1.00000   Max.   :0.8710  
       rm             age              dis              rad              tax       
 Min.   :3.561   Min.   :  2.90   Min.   : 1.130   Min.   : 1.000   Min.   :187.0  
 1st Qu.:5.886   1st Qu.: 45.02   1st Qu.: 2.100   1st Qu.: 4.000   1st Qu.:279.0  
 Median :6.208   Median : 77.50   Median : 3.207   Median : 5.000   Median :330.0  
 Mean   :6.285   Mean   : 68.57   Mean   : 3.795   Mean   : 9.549   Mean   :408.2  
 3rd Qu.:6.623   3rd Qu.: 94.08   3rd Qu.: 5.188   3rd Qu.:24.000   3rd Qu.:666.0  
 Max.   :8.780   Max.   :100.00   Max.   :12.127   Max.   :24.000   Max.   :711.0  
    ptratio          black            lstat            medv      
 Min.   :12.60   Min.   :  0.32   Min.   : 1.73   Min.   : 5.00  
 1st Qu.:17.40   1st Qu.:375.38   1st Qu.: 6.95   1st Qu.:17.02  
 Median :19.05   Median :391.44   Median :11.36   Median :21.20  
 Mean   :18.46   Mean   :356.67   Mean   :12.65   Mean   :22.53  
 3rd Qu.:20.20   3rd Qu.:396.23   3rd Qu.:16.95   3rd Qu.:25.00  
 Max.   :22.00   Max.   :396.90   Max.   :37.97   Max.   :50.00  

Suburbs with more average number of rooms per dwelling have lower crime rate (comparing the mean and median), etc.

LS0tCnRpdGxlOiAiQ2hhcHRlciAyIgpvdXRwdXQ6IGh0bWxfbm90ZWJvb2sKLS0tCgoqQXBwbGllZCogc2VjdGlvbiBpbiAqMi40IEV4ZXJjaXNlcyoKCiMgUXVlc3Rpb24gOAoKIyMgOGEsIDhiCgpMb2FkIHRoZSBkYXRhIGZyb20gY3N2IGZpbGUsIGFuZCByZW1vdmUgKmNvbGxlZ2UgbmFtZSogY29sdW1uOgpgYGB7cn0KcmF3ZGF0YSA8LSByZWFkLnRhYmxlKCdkYXRhL0NvbGxlZ2UuY3N2JywgaGVhZGVyID0gVFJVRSwgc2VwID0gJywnLCBxdW90ZSA9ICcnKQojIGhlcmUgYHF1b3RlID0gJydgIGlzIG1hbmRhdG9yeSwgb3IgdGhlIHNpbmdsZSBxdW90ZSBpbiB0aGUgc291cmNlIGZpbGUgd2lsbCBiZSBtaXNpbnRlcnByZXRlZApoZWFkKHJhd2RhdGEpCmNvbGxlZ2UgPC0gcmF3ZGF0YQpyb3duYW1lcyhjb2xsZWdlKSA8LSBjb2xsZWdlWywxXSAgIyBvdmVyd3JpdGUgdGhlIHJvdyBuYW1lcyB3aXRoIHRoZSB2YWx1ZXMgaW4gdGhlIGZpcnN0IGNvbHVtbgpjb2xsZWdlIDwtIGNvbGxlZ2VbLC0xXSAgICAgICAgICAgIyByZW1vdmUgdGhlIGZpcnN0IGNvbHVtbiAod2hpY2ggaGFzIGJlZW4gc2V0IGFzIHJvdyBuYW1lcykKaGVhZChjb2xsZWdlKQpgYGAKCiMjIDhjCgppIH4gaWlpOgpgYGB7cn0Kc3VtbWFyeShjb2xsZWdlKQpwYWlycyhjb2xsZWdlWywxOjEwXSkKcGxvdChjb2xsZWdlJFByaXZhdGUsIGNvbGxlZ2UkT3V0c3RhdGUpCmBgYAoKaXY6CmBgYHtyfQpjb2xsZWdlJEVsaXRlID0gIk5vIgpjb2xsZWdlJEVsaXRlW2NvbGxlZ2UkVG9wMTBwZXJjID4gNTBdID0gIlllcyIKY29sbGVnZSRFbGl0ZSA9IGFzLmZhY3Rvcihjb2xsZWdlJEVsaXRlKQpzdW1tYXJ5KGNvbGxlZ2UkRWxpdGUpICAjIHRoZXJlIGFyZSAyNyBFbGl0ZSBjb2xsZWdlcyBoZXJlCnBsb3QoY29sbGVnZSRFbGl0ZSwgY29sbGVnZSRPdXRzdGF0ZSkKYGBgCgp2OgpgYGB7cn0KcGFyKG1mcm93PWMoMiwyKSkKaGlzdChjb2xsZWdlJEFwcHMpCmhpc3QoY29sbGVnZSRwZXJjLmFsdW1uaSwgY29sID0gMikKaGlzdChjb2xsZWdlJFMuRi5SYXRpbywgYnJlYWtzID0gMjAsIGNvbCA9IDMpCmhpc3QoY29sbGVnZSRFeHBlbmQsIGJyZWFrcyA9IDEwMCkKYGBgCgojIFF1ZXN0aW9uIDkKCk1ha2Ugc3VyZSB0aGUgKklTTFIqIGxpYnJhcnkgaXMgaW5zdGFsbGVkLgpPciBpbnN0YWxsIGl0IHdpdGggYGluc3RhbGwucGFja2FnZXMoIklTTFIiKWAuCmBgYHtyfQpsaWJyYXJ5KElTTFIpCmRhdGEoIkF1dG8iKQpoZWFkKEF1dG8pCnN0cihBdXRvKQpgYGAKCiMjIDlhCgpRdWFudGl0YXRpdmUgcHJlZGljdG9yczoKbXBnLCBjeWxpbmRlciwgZGlzcGxhY2VtZW50LCBob3JzZXBvd2VyLCB3ZWlnaHQsIGFjY2VsZXJhdGlvbiwgeWVhci4KClF1YWxpdGF0aXZlIHByZWRpY3RvcnM6IG9yaWdpbiwgbmFtZS4KCiMjIDliIH4gOWQKCmBgYHtyfQojIChiKQpzYXBwbHkoQXV0b1ssMTo3XSwgcmFuZ2UpCgojIChjKQpzYXBwbHkoQXV0b1ssMTo3XSwgbWVhbikKc2FwcGx5KEF1dG9bLDE6N10sIHNkKQoKIyAoZCkKc2JzZXQgPC0gQXV0b1stKDEwOjg1KSxdCnNhcHBseShzYnNldFssMTo3XSwgcmFuZ2UpCnNhcHBseShzYnNldFssMTo3XSwgbWVhbikKc2FwcGx5KHNic2V0WywxOjddLCBzZCkKYGBgCgojIyA5ZQoKYGBge3J9CnBhaXJzKEF1dG8pCnBsb3QoQXV0byR3ZWlnaHQsIEF1dG8kbXBnKQpgYGAKVGhlIHBsb3Qgc2hvd3MgdGhhdCB3aXRoIHRoZSBpbmNyZWFzaW5nIG9mIHRoZSB3ZWlnaHQsIHRoZSBtcGcgaXMgZGVjcmVhc2UuCkluIG90aGVyIHdvcmRzLCBsaWdodGVyIGNhcnMgYXJlIG1vcmUgZW5lcmd5IGVmZmVjdGl2ZS4KCiMjIDlmCgpMZWFybiBmcm9tIHRoZSBvdXRwdXQgZ3JhcGggb2YgYHBhaXJzKEF1dG8pYCBhYm92ZS4KCiMgUXVlc3Rpb24gMTAKCiMjIDEwYSB+IDEwYwoKYGBge3J9CiMgKGEpCmxpYnJhcnkoTUFTUykKP0Jvc3RvbgoKIyAoYikKcGFpcnMoQm9zdG9uKQoKIyAoYykKcGxvdChCb3N0b24kYWdlLCBCb3N0b24kY3JpbSkKYGBgCgpPbGRlciBob3VzZSB3aXRoIG1vcmUgY3JpbWUuCgojIyAxMGQKCmBgYHtyfQpsaWJyYXJ5KGRwbHlyKQpoZWFkKGFycmFuZ2UoQm9zdG9uLCBkZXNjKGNyaW0pKSkKYGBgCgojIyAxMGUKCmBgYHtyfQpucm93KEJvc3RvbltCb3N0b24kY2hhcyA+IDAsXSkKYGBgCgojIyAxMGYKCmBgYHtyfQptZWRpYW4oQm9zdG9uJHB0cmF0aW8pCmBgYAoKIyMgMTBnCgpgYGB7cn0Kc3Vic2V0KEJvc3RvbiwgbWVkdiA9PSBtaW4oQm9zdG9uJG1lZHYpKQpgYGAKCiMjIDEwaAoKYGBge3J9Cm5yb3coQm9zdG9uW0Jvc3RvbiRybSA+IDcsXSkKc3VtbWFyeShzdWJzZXQoQm9zdG9uLCBybSA+IDgpKQpzdW1tYXJ5KEJvc3RvbikKYGBgCgpTdWJ1cmJzIHdpdGggbW9yZSBhdmVyYWdlIG51bWJlciBvZiByb29tcyBwZXIgZHdlbGxpbmcgaGF2ZSBsb3dlciBjcmltZSByYXRlIChjb21wYXJpbmcgdGhlIG1lYW4gYW5kIG1lZGlhbiksIGV0Yy4=