Question 8

8a, 8b

Load the data from csv file, and remove college name column:

rawdata <- read.table('data/College.csv', header = TRUE, sep = ',', quote = '')
# here `quote = ''` is mandatory, or the single quote in the source file will be misinterpreted
head(rawdata)
college <- rawdata
rownames(college) <- college[,1]  # overwrite the row names with the values in the first column
college <- college[,-1]           # remove the first column (which has been set as row names)
head(college)

8c

i ~ iii:

summary(college)
 Private        Apps           Accept          Enroll       Top10perc    
 No :212   Min.   :   81   Min.   :   72   Min.   :  35   Min.   : 1.00  
 Yes:565   1st Qu.:  776   1st Qu.:  604   1st Qu.: 242   1st Qu.:15.00  
           Median : 1558   Median : 1110   Median : 434   Median :23.00  
           Mean   : 3002   Mean   : 2019   Mean   : 780   Mean   :27.56  
           3rd Qu.: 3624   3rd Qu.: 2424   3rd Qu.: 902   3rd Qu.:35.00  
           Max.   :48094   Max.   :26330   Max.   :6392   Max.   :96.00  
   Top25perc      F.Undergrad     P.Undergrad         Outstate       Room.Board  
 Min.   :  9.0   Min.   :  139   Min.   :    1.0   Min.   : 2340   Min.   :1780  
 1st Qu.: 41.0   1st Qu.:  992   1st Qu.:   95.0   1st Qu.: 7320   1st Qu.:3597  
 Median : 54.0   Median : 1707   Median :  353.0   Median : 9990   Median :4200  
 Mean   : 55.8   Mean   : 3700   Mean   :  855.3   Mean   :10441   Mean   :4358  
 3rd Qu.: 69.0   3rd Qu.: 4005   3rd Qu.:  967.0   3rd Qu.:12925   3rd Qu.:5050  
 Max.   :100.0   Max.   :31643   Max.   :21836.0   Max.   :21700   Max.   :8124  
     Books           Personal         PhD            Terminal       S.F.Ratio    
 Min.   :  96.0   Min.   : 250   Min.   :  8.00   Min.   : 24.0   Min.   : 2.50  
 1st Qu.: 470.0   1st Qu.: 850   1st Qu.: 62.00   1st Qu.: 71.0   1st Qu.:11.50  
 Median : 500.0   Median :1200   Median : 75.00   Median : 82.0   Median :13.60  
 Mean   : 549.4   Mean   :1341   Mean   : 72.66   Mean   : 79.7   Mean   :14.09  
 3rd Qu.: 600.0   3rd Qu.:1700   3rd Qu.: 85.00   3rd Qu.: 92.0   3rd Qu.:16.50  
 Max.   :2340.0   Max.   :6800   Max.   :103.00   Max.   :100.0   Max.   :39.80  
  perc.alumni        Expend        Grad.Rate     
 Min.   : 0.00   Min.   : 3186   Min.   : 10.00  
 1st Qu.:13.00   1st Qu.: 6751   1st Qu.: 53.00  
 Median :21.00   Median : 8377   Median : 65.00  
 Mean   :22.74   Mean   : 9660   Mean   : 65.46  
 3rd Qu.:31.00   3rd Qu.:10830   3rd Qu.: 78.00  
 Max.   :64.00   Max.   :56233   Max.   :118.00  
pairs(college[,1:10])

plot(college$Private, college$Outstate)

iv:

college$Elite = "No"
college$Elite[college$Top10perc > 50] = "Yes"
college$Elite = as.factor(college$Elite)
summary(college$Elite)  # there are 27 Elite colleges here
 No Yes 
699  78 
plot(college$Elite, college$Outstate)

v:

par(mfrow=c(2,2))
hist(college$Apps)
hist(college$perc.alumni, col = 2)
hist(college$S.F.Ratio, breaks = 20, col = 3)
hist(college$Expend, breaks = 100)

Question 9

Make sure the ISLR library is installed. Or install it with install.packages("ISLR").

library(ISLR)
data("Auto")
head(Auto)
str(Auto)
'data.frame':   392 obs. of  9 variables:
 $ mpg         : num  18 15 18 16 17 15 14 14 14 15 ...
 $ cylinders   : num  8 8 8 8 8 8 8 8 8 8 ...
 $ displacement: num  307 350 318 304 302 429 454 440 455 390 ...
 $ horsepower  : num  130 165 150 150 140 198 220 215 225 190 ...
 $ weight      : num  3504 3693 3436 3433 3449 ...
 $ acceleration: num  12 11.5 11 12 10.5 10 9 8.5 10 8.5 ...
 $ year        : num  70 70 70 70 70 70 70 70 70 70 ...
 $ origin      : num  1 1 1 1 1 1 1 1 1 1 ...
 $ name        : Factor w/ 304 levels "amc ambassador brougham",..: 49 36 231 14 161 141 54 223 241 2 ...

9a

Quantitative predictors: mpg, cylinder, displacement, horsepower, weight, acceleration, year.

Qualitative predictors: origin, name.

9b ~ 9d

# (b)
sapply(Auto[,1:7], range)
      mpg cylinders displacement horsepower weight acceleration year
[1,]  9.0         3           68         46   1613          8.0   70
[2,] 46.6         8          455        230   5140         24.8   82
# (c)
sapply(Auto[,1:7], mean)
         mpg    cylinders displacement   horsepower       weight acceleration 
   23.445918     5.471939   194.411990   104.469388  2977.584184    15.541327 
        year 
   75.979592 
sapply(Auto[,1:7], sd)
         mpg    cylinders displacement   horsepower       weight acceleration 
    7.805007     1.705783   104.644004    38.491160   849.402560     2.758864 
        year 
    3.683737 
# (d)
sbset <- Auto[-(10:85),]
sapply(sbset[,1:7], range)
      mpg cylinders displacement horsepower weight acceleration year
[1,] 11.0         3           68         46   1649          8.5   70
[2,] 46.6         8          455        230   4997         24.8   82
sapply(sbset[,1:7], mean)
         mpg    cylinders displacement   horsepower       weight acceleration 
   24.404430     5.373418   187.240506   100.721519  2935.971519    15.726899 
        year 
   77.145570 
sapply(sbset[,1:7], sd)
         mpg    cylinders displacement   horsepower       weight acceleration 
    7.867283     1.654179    99.678367    35.708853   811.300208     2.693721 
        year 
    3.106217 

9e

pairs(Auto)

plot(Auto$weight, Auto$mpg)

The plot shows that with the increasing of the weight, the mpg is decrease. In other words, lighter cars are more energy effective.

9f

Learn from the output graph of pairs(Auto) above.

Question 10

10a ~ 10c

# (a)
library(MASS)
?Boston
# (b)
pairs(Boston)

# (c)
plot(Boston$age, Boston$crim)

Older house with more crime.

10d

library(dplyr)
Error in library(dplyr) : there is no package called ‘dplyr’

10e

10f

10g

10h

Suburbs with more average number of rooms per dwelling have lower crime rate (comparing the mean and median), etc.

LS0tCnRpdGxlOiAiQXBwbGllZCBFeGVyY2lzZXMgb2YgQ2hhcHRlciAyIgpvdXRwdXQ6IGh0bWxfbm90ZWJvb2sKLS0tCgojIFF1ZXN0aW9uIDgKCiMjIDhhLCA4YgoKTG9hZCB0aGUgZGF0YSBmcm9tIGNzdiBmaWxlLCBhbmQgcmVtb3ZlICpjb2xsZWdlIG5hbWUqIGNvbHVtbjoKYGBge3J9CnJhd2RhdGEgPC0gcmVhZC50YWJsZSgnZGF0YS9Db2xsZWdlLmNzdicsIGhlYWRlciA9IFRSVUUsIHNlcCA9ICcsJywgcXVvdGUgPSAnJykKIyBoZXJlIGBxdW90ZSA9ICcnYCBpcyBtYW5kYXRvcnksIG9yIHRoZSBzaW5nbGUgcXVvdGUgaW4gdGhlIHNvdXJjZSBmaWxlIHdpbGwgYmUgbWlzaW50ZXJwcmV0ZWQKaGVhZChyYXdkYXRhKQpjb2xsZWdlIDwtIHJhd2RhdGEKcm93bmFtZXMoY29sbGVnZSkgPC0gY29sbGVnZVssMV0gICMgb3ZlcndyaXRlIHRoZSByb3cgbmFtZXMgd2l0aCB0aGUgdmFsdWVzIGluIHRoZSBmaXJzdCBjb2x1bW4KY29sbGVnZSA8LSBjb2xsZWdlWywtMV0gICAgICAgICAgICMgcmVtb3ZlIHRoZSBmaXJzdCBjb2x1bW4gKHdoaWNoIGhhcyBiZWVuIHNldCBhcyByb3cgbmFtZXMpCmhlYWQoY29sbGVnZSkKYGBgCgojIyA4YwoKaSB+IGlpaToKYGBge3J9CnN1bW1hcnkoY29sbGVnZSkKcGFpcnMoY29sbGVnZVssMToxMF0pCnBsb3QoY29sbGVnZSRQcml2YXRlLCBjb2xsZWdlJE91dHN0YXRlKQpgYGAKCml2OgpgYGB7cn0KY29sbGVnZSRFbGl0ZSA9ICJObyIKY29sbGVnZSRFbGl0ZVtjb2xsZWdlJFRvcDEwcGVyYyA+IDUwXSA9ICJZZXMiCmNvbGxlZ2UkRWxpdGUgPSBhcy5mYWN0b3IoY29sbGVnZSRFbGl0ZSkKc3VtbWFyeShjb2xsZWdlJEVsaXRlKSAgIyB0aGVyZSBhcmUgMjcgRWxpdGUgY29sbGVnZXMgaGVyZQpwbG90KGNvbGxlZ2UkRWxpdGUsIGNvbGxlZ2UkT3V0c3RhdGUpCmBgYAoKdjoKYGBge3J9CnBhcihtZnJvdz1jKDIsMikpCmhpc3QoY29sbGVnZSRBcHBzKQpoaXN0KGNvbGxlZ2UkcGVyYy5hbHVtbmksIGNvbCA9IDIpCmhpc3QoY29sbGVnZSRTLkYuUmF0aW8sIGJyZWFrcyA9IDIwLCBjb2wgPSAzKQpoaXN0KGNvbGxlZ2UkRXhwZW5kLCBicmVha3MgPSAxMDApCmBgYAoKIyBRdWVzdGlvbiA5CgpNYWtlIHN1cmUgdGhlICpJU0xSKiBsaWJyYXJ5IGlzIGluc3RhbGxlZC4KT3IgaW5zdGFsbCBpdCB3aXRoIGBpbnN0YWxsLnBhY2thZ2VzKCJJU0xSIilgLgpgYGB7cn0KbGlicmFyeShJU0xSKQpkYXRhKCJBdXRvIikKaGVhZChBdXRvKQpzdHIoQXV0bykKYGBgCgojIyA5YQoKUXVhbnRpdGF0aXZlIHByZWRpY3RvcnM6Cm1wZywgY3lsaW5kZXIsIGRpc3BsYWNlbWVudCwgaG9yc2Vwb3dlciwgd2VpZ2h0LCBhY2NlbGVyYXRpb24sIHllYXIuCgpRdWFsaXRhdGl2ZSBwcmVkaWN0b3JzOiBvcmlnaW4sIG5hbWUuCgojIyA5YiB+IDlkCgpgYGB7cn0KIyAoYikKc2FwcGx5KEF1dG9bLDE6N10sIHJhbmdlKQoKIyAoYykKc2FwcGx5KEF1dG9bLDE6N10sIG1lYW4pCnNhcHBseShBdXRvWywxOjddLCBzZCkKCiMgKGQpCnNic2V0IDwtIEF1dG9bLSgxMDo4NSksXQpzYXBwbHkoc2JzZXRbLDE6N10sIHJhbmdlKQpzYXBwbHkoc2JzZXRbLDE6N10sIG1lYW4pCnNhcHBseShzYnNldFssMTo3XSwgc2QpCmBgYAoKIyMgOWUKCmBgYHtyfQpwYWlycyhBdXRvKQpwbG90KEF1dG8kd2VpZ2h0LCBBdXRvJG1wZykKYGBgClRoZSBwbG90IHNob3dzIHRoYXQgd2l0aCB0aGUgaW5jcmVhc2luZyBvZiB0aGUgd2VpZ2h0LCB0aGUgbXBnIGlzIGRlY3JlYXNlLgpJbiBvdGhlciB3b3JkcywgbGlnaHRlciBjYXJzIGFyZSBtb3JlIGVuZXJneSBlZmZlY3RpdmUuCgojIyA5ZgoKTGVhcm4gZnJvbSB0aGUgb3V0cHV0IGdyYXBoIG9mIGBwYWlycyhBdXRvKWAgYWJvdmUuCgojIFF1ZXN0aW9uIDEwCgojIyAxMGEgfiAxMGMKCmBgYHtyfQojIChhKQpsaWJyYXJ5KE1BU1MpCj9Cb3N0b24KCiMgKGIpCnBhaXJzKEJvc3RvbikKCiMgKGMpCnBsb3QoQm9zdG9uJGFnZSwgQm9zdG9uJGNyaW0pCmBgYAoKT2xkZXIgaG91c2Ugd2l0aCBtb3JlIGNyaW1lLgoKIyMgMTBkCgpgYGB7cn0KbGlicmFyeShkcGx5cikKaGVhZChhcnJhbmdlKEJvc3RvbiwgZGVzYyhjcmltKSkpCmBgYAoKIyMgMTBlCgpgYGB7cn0KbnJvdyhCb3N0b25bQm9zdG9uJGNoYXMgPiAwLF0pCmBgYAoKIyMgMTBmCgpgYGB7cn0KbWVkaWFuKEJvc3RvbiRwdHJhdGlvKQpgYGAKCiMjIDEwZwoKYGBge3J9CnN1YnNldChCb3N0b24sIG1lZHYgPT0gbWluKEJvc3RvbiRtZWR2KSkKYGBgCgojIyAxMGgKCmBgYHtyfQpucm93KEJvc3RvbltCb3N0b24kcm0gPiA3LF0pCnN1bW1hcnkoc3Vic2V0KEJvc3Rvbiwgcm0gPiA4KSkKc3VtbWFyeShCb3N0b24pCmBgYAoKU3VidXJicyB3aXRoIG1vcmUgYXZlcmFnZSBudW1iZXIgb2Ygcm9vbXMgcGVyIGR3ZWxsaW5nIGhhdmUgbG93ZXIgY3JpbWUgcmF0ZSAoY29tcGFyaW5nIHRoZSBtZWFuIGFuZCBtZWRpYW4pLCBldGMu