library(ISLR)
library(ggplot2)

data(College)

What are the variables?

?College

What does the data look like?

head(College)
##                              Private Apps Accept Enroll Top10perc
## Abilene Christian University     Yes 1660   1232    721        23
## Adelphi University               Yes 2186   1924    512        16
## Adrian College                   Yes 1428   1097    336        22
## Agnes Scott College              Yes  417    349    137        60
## Alaska Pacific University        Yes  193    146     55        16
## Albertson College                Yes  587    479    158        38
##                              Top25perc F.Undergrad P.Undergrad Outstate
## Abilene Christian University        52        2885         537     7440
## Adelphi University                  29        2683        1227    12280
## Adrian College                      50        1036          99    11250
## Agnes Scott College                 89         510          63    12960
## Alaska Pacific University           44         249         869     7560
## Albertson College                   62         678          41    13500
##                              Room.Board Books Personal PhD Terminal
## Abilene Christian University       3300   450     2200  70       78
## Adelphi University                 6450   750     1500  29       30
## Adrian College                     3750   400     1165  53       66
## Agnes Scott College                5450   450      875  92       97
## Alaska Pacific University          4120   800     1500  76       72
## Albertson College                  3335   500      675  67       73
##                              S.F.Ratio perc.alumni Expend Grad.Rate
## Abilene Christian University      18.1          12   7041        60
## Adelphi University                12.2          16  10527        56
## Adrian College                    12.9          30   8735        54
## Agnes Scott College                7.7          37  19016        59
## Alaska Pacific University         11.9           2  10922        15
## Albertson College                  9.4          11   9727        55
# or View(College) interactively

What are the top schools by some of these metrics? (I would use dplyr here, I think it’s easier to read than using order and dollar signs).

library(dplyr)
## 
## Attaching package: 'dplyr'
## 
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## 
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
# smallest schools
head(arrange(College, Enroll))
##   Private Apps Accept Enroll Top10perc Top25perc F.Undergrad P.Undergrad
## 1     Yes  100     90     35        10        52         282         331
## 2     Yes  167    130     46        16        50         199         676
## 3     Yes   81     72     51        33        71         139           3
## 4     Yes  193    146     55        16        44         249         869
## 5     Yes  141    118     55        12        21         201         173
## 6     Yes  191    165     63         5        25         494         574
##   Outstate Room.Board Books Personal PhD Terminal S.F.Ratio perc.alumni
## 1     8400       2812   300     2134  10       50      12.1          24
## 2    11428       5084   450      475  67       76       8.3          43
## 3     8730       3600   400      800  92       92       9.3          17
## 4     7560       4120   800     1500  76       72      11.9           2
## 5     8300       4850   450     1300  53       53       9.5          19
## 6    11550       4270   300      500  43       77      14.5           8
##   Expend Grad.Rate
## 1   7976        52
## 2  10291        67
## 3  10922        58
## 4  10922        15
## 5   6936        76
## 6   9209        40
# largest schools
head(arrange(College, desc(Enroll)))
##   Private  Apps Accept Enroll Top10perc Top25perc F.Undergrad P.Undergrad
## 1      No 14474  10519   6392        49        85       31643        2798
## 2      No 18114  15096   6180        23        57       26640        4120
## 3      No 21804  18744   5874        29        60       26213        4065
## 4      No 16587  13243   5873        25        72       24763        2717
## 5      No 14939  11652   5705        52        88       25422         911
## 6      No 14752   9572   5329        48        85       30017        5189
##   Outstate Room.Board Books Personal PhD Terminal S.F.Ratio perc.alumni
## 1     5130       3412   600     2144  89       91      23.1          29
## 2    10658       3734   504      600  93       95      14.0           9
## 3     9556       3990   570     1060  86       86      18.2          15
## 4     9766       3990   600     2000  77       88      21.3          24
## 5     7560       4574   500     1982  87       90      17.4          13
## 6     5130       3309   650     3140  91       99      19.7          11
##   Expend Grad.Rate
## 1   8471        69
## 2  10520        71
## 3   8604        67
## 4   8686        68
## 5   8559        81
## 6   7837        65
# highest % of PhDs
head(arrange(College, desc(PhD)))
##   Private  Apps Accept Enroll Top10perc Top25perc F.Undergrad P.Undergrad
## 1      No   529    481    243        22        47        1206         134
## 2     Yes  1465    810    313        71        95        1088          16
## 3     Yes  1377    572    178        95       100         654           5
## 4     Yes  1133    630    220        37        73         750          30
## 5     Yes 12586   3239   1462        87        95        5643         349
## 6     Yes  1860    767    227        71        93         887           1
##   Outstate Room.Board Books Personal PhD Terminal S.F.Ratio perc.alumni
## 1     4860       3122   600      650 103       88      17.4          16
## 2    18165       6750   500     1200 100      100      12.3          49
## 3    17230       6690   700      900 100      100       8.2          46
## 4    17688       5900   650      850 100      100      10.4          11
## 5    19528       5926   720     1100  99      100       7.6          39
## 6    17000       6010   500      850  99       99       9.6          52
##   Expend Grad.Rate
## 1   6415        43
## 2  17449        89
## 3  21569       100
## 4  14820        73
## 5  20440        97
## 6  18443        87

Hey, does it really think one school has a 103% PhDs? This shows sometimes data is bad. We’ll get into data cleaning in a future class, etc.

How are the number of applications and enrollments related?

ggplot(College, aes(Apps, Enroll)) +
  geom_point()

How does size differ between private and public schools?

ggplot(College, aes(Private, Enroll)) +
  geom_boxplot()

Does the percentage of faculty with PhDs predict the (out-of-state) tuition?

ggplot(College, aes(PhD, Outstate)) +
  geom_point()

Does the fraction of students from the top 10% of the class predict what fraction graduates?

ggplot(College, aes(Top10perc, Grad.Rate)) +
  geom_point()