1.

library(tidyverse)
## ── Attaching packages ───────────────────────────────────────── tidyverse 1.3.0 ──
## ✓ ggplot2 3.3.2     ✓ purrr   0.3.4
## ✓ tibble  3.0.3     ✓ dplyr   1.0.2
## ✓ tidyr   1.1.1     ✓ stringr 1.4.0
## ✓ readr   1.3.1     ✓ forcats 0.5.0
## ── Conflicts ──────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
Auto <- read.csv("auto.csv.xls", na.strings = "?")
Auto <- na.omit(Auto)

1a)

str(Auto)
## 'data.frame':    392 obs. of  9 variables:
##  $ mpg         : num  18 15 18 16 17 15 14 14 14 15 ...
##  $ cylinders   : int  8 8 8 8 8 8 8 8 8 8 ...
##  $ displacement: num  307 350 318 304 302 429 454 440 455 390 ...
##  $ horsepower  : int  130 165 150 150 140 198 220 215 225 190 ...
##  $ weight      : int  3504 3693 3436 3433 3449 4341 4354 4312 4425 3850 ...
##  $ acceleration: num  12 11.5 11 12 10.5 10 9 8.5 10 8.5 ...
##  $ year        : int  70 70 70 70 70 70 70 70 70 70 ...
##  $ origin      : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ name        : chr  "chevrolet chevelle malibu" "buick skylark 320" "plymouth satellite" "amc rebel sst" ...
##  - attr(*, "na.action")= 'omit' Named int [1:5] 33 127 331 337 355
##   ..- attr(*, "names")= chr [1:5] "33" "127" "331" "337" ...

The quantitative predictors in the Auto data set are mpg, cylinders, displacement, weight, acceleration, year, horsepower and origin. Name is qualitative.

1b)

The range of each quantitative predictors is as follows:

range(Auto$mpg)
## [1]  9.0 46.6
range(Auto$cylinders)
## [1] 3 8
range(Auto$displacement)
## [1]  68 455
range(Auto$weight)
## [1] 1613 5140
range(Auto$acceleration)
## [1]  8.0 24.8
range(Auto$year)
## [1] 70 82
range(Auto$origin)
## [1] 1 3

1c)

Mean and standard deviations:

Auto %>% summarise(mean(mpg), sd(mpg))
##   mean(mpg)  sd(mpg)
## 1  23.44592 7.805007
Auto %>% summarise(mean(cylinders), sd(cylinders))
##   mean(cylinders) sd(cylinders)
## 1        5.471939      1.705783
Auto %>% summarise(mean(displacement), sd(displacement))
##   mean(displacement) sd(displacement)
## 1            194.412          104.644
Auto %>% summarise(mean(weight), sd(weight))
##   mean(weight) sd(weight)
## 1     2977.584   849.4026
Auto %>% summarise(mean(acceleration), sd(acceleration))
##   mean(acceleration) sd(acceleration)
## 1           15.54133         2.758864
Auto %>% summarise(mean(year), sd(year))
##   mean(year) sd(year)
## 1   75.97959 3.683737
Auto %>% summarise(mean(origin), sd(origin))
##   mean(origin) sd(origin)
## 1     1.576531  0.8055182

1d)

autoSubset <- Auto[-c(10,85),]
autoSubset %>% summarise(mean(mpg), sd(mpg))
##   mean(mpg)  sd(mpg)
## 1  23.49436 7.795198
autoSubset %>% summarise(mean(cylinders), sd(cylinders))
##   mean(cylinders) sd(cylinders)
## 1        5.458974      1.700479
autoSubset %>% summarise(mean(displacement), sd(displacement))
##   mean(displacement) sd(displacement)
## 1           193.5115         104.1407
autoSubset %>% summarise(mean(weight), sd(weight))
##   mean(weight) sd(weight)
## 1     2972.469   848.5121
autoSubset %>% summarise(mean(acceleration), sd(acceleration))
##   mean(acceleration) sd(acceleration)
## 1            15.5659         2.739672
autoSubset %>% summarise(mean(year), sd(year))
##   mean(year) sd(year)
## 1   76.00256 3.677556
autoSubset %>% summarise(mean(origin), sd(origin))
##   mean(origin) sd(origin)
## 1     1.579487  0.8065221

1e)

ggplot(autoSubset, aes(x=year, y=mpg, color = origin)) + geom_point() + ggtitle("Year vs. MPG")

This scatter plot compares year to mpg. You can see a rough correlation between year and mpg.

ggplot(autoSubset, aes(x = year)) + geom_histogram(binwidth = 1) + ggtitle("Year of Car")

This histogram allows you to see how many cars were made in each year.

ggplot(autoSubset, aes(x = cylinders, y = weight, fill = cylinders, group = cylinders)) + geom_boxplot() + ggtitle("Cylinders vs. Weight Boxplot")

This boxplot compares the number of cylinders in a car with its weight. It can be seen that generally, as cylinders increase so does weight.

1f)

Based on the first plot, mpg seems to be roughly positively correlated on the age of the car. Meaning the newer the car, the higher the mpg is likely to be.

2.

2a)

college <- read.csv("http://faculty.marshall.usc.edu/gareth-james/ISL/College.csv", header=TRUE)

2b)

view(college)
rownames(college) <- college[,1]

2c)

summary(college)
##       X               Private               Apps           Accept     
##  Length:777         Length:777         Min.   :   81   Min.   :   72  
##  Class :character   Class :character   1st Qu.:  776   1st Qu.:  604  
##  Mode  :character   Mode  :character   Median : 1558   Median : 1110  
##                                        Mean   : 3002   Mean   : 2019  
##                                        3rd Qu.: 3624   3rd Qu.: 2424  
##                                        Max.   :48094   Max.   :26330  
##      Enroll       Top10perc       Top25perc      F.Undergrad   
##  Min.   :  35   Min.   : 1.00   Min.   :  9.0   Min.   :  139  
##  1st Qu.: 242   1st Qu.:15.00   1st Qu.: 41.0   1st Qu.:  992  
##  Median : 434   Median :23.00   Median : 54.0   Median : 1707  
##  Mean   : 780   Mean   :27.56   Mean   : 55.8   Mean   : 3700  
##  3rd Qu.: 902   3rd Qu.:35.00   3rd Qu.: 69.0   3rd Qu.: 4005  
##  Max.   :6392   Max.   :96.00   Max.   :100.0   Max.   :31643  
##   P.Undergrad         Outstate       Room.Board       Books       
##  Min.   :    1.0   Min.   : 2340   Min.   :1780   Min.   :  96.0  
##  1st Qu.:   95.0   1st Qu.: 7320   1st Qu.:3597   1st Qu.: 470.0  
##  Median :  353.0   Median : 9990   Median :4200   Median : 500.0  
##  Mean   :  855.3   Mean   :10441   Mean   :4358   Mean   : 549.4  
##  3rd Qu.:  967.0   3rd Qu.:12925   3rd Qu.:5050   3rd Qu.: 600.0  
##  Max.   :21836.0   Max.   :21700   Max.   :8124   Max.   :2340.0  
##     Personal         PhD            Terminal       S.F.Ratio    
##  Min.   : 250   Min.   :  8.00   Min.   : 24.0   Min.   : 2.50  
##  1st Qu.: 850   1st Qu.: 62.00   1st Qu.: 71.0   1st Qu.:11.50  
##  Median :1200   Median : 75.00   Median : 82.0   Median :13.60  
##  Mean   :1341   Mean   : 72.66   Mean   : 79.7   Mean   :14.09  
##  3rd Qu.:1700   3rd Qu.: 85.00   3rd Qu.: 92.0   3rd Qu.:16.50  
##  Max.   :6800   Max.   :103.00   Max.   :100.0   Max.   :39.80  
##   perc.alumni        Expend        Grad.Rate     
##  Min.   : 0.00   Min.   : 3186   Min.   : 10.00  
##  1st Qu.:13.00   1st Qu.: 6751   1st Qu.: 53.00  
##  Median :21.00   Median : 8377   Median : 65.00  
##  Mean   :22.74   Mean   : 9660   Mean   : 65.46  
##  3rd Qu.:31.00   3rd Qu.:10830   3rd Qu.: 78.00  
##  Max.   :64.00   Max.   :56233   Max.   :118.00
pairs(college[,3:13])

ggplot(college, aes(x=Private, y=Outstate, group=Private, fill=Private)) + geom_boxplot()

2d)

Elite <- rep("No", nrow(college))
Elite[college$Top10perc > 50] = "Yes"
Elite <- as.factor(Elite)
college <- data.frame(college, Elite)
summary(Elite)
##  No Yes 
## 699  78
ggplot(college, aes(x=Elite, y=Outstate, group=Elite, fill=Elite)) + geom_boxplot() + ggtitle("Elite University vs Outstate Tuition")

Based on our criteria, there are 699 universities not considered elite, and 78 considered elite.