Problem 1

library(tidyverse)
## ── Attaching packages ──────────────────────────────────────────────────────────────── tidyverse 1.3.0 ──
## ✓ ggplot2 3.3.2     ✓ purrr   0.3.4
## ✓ tibble  3.0.3     ✓ dplyr   1.0.2
## ✓ tidyr   1.1.2     ✓ stringr 1.4.0
## ✓ readr   1.3.1     ✓ forcats 0.5.0
## ── Conflicts ─────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(ggplot2)
Auto <- read.table("http://faculty.marshall.usc.edu/gareth-james/ISL/Auto.data", 
                   header=TRUE,
                   na.strings = "?")
#View(Auto)
Auto = na.omit(Auto)

1a.

str(Auto)
## 'data.frame':    392 obs. of  9 variables:
##  $ mpg         : num  18 15 18 16 17 15 14 14 14 15 ...
##  $ cylinders   : int  8 8 8 8 8 8 8 8 8 8 ...
##  $ displacement: num  307 350 318 304 302 429 454 440 455 390 ...
##  $ horsepower  : num  130 165 150 150 140 198 220 215 225 190 ...
##  $ weight      : num  3504 3693 3436 3433 3449 ...
##  $ acceleration: num  12 11.5 11 12 10.5 10 9 8.5 10 8.5 ...
##  $ year        : int  70 70 70 70 70 70 70 70 70 70 ...
##  $ origin      : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ name        : chr  "chevrolet chevelle malibu" "buick skylark 320" "plymouth satellite" "amc rebel sst" ...
##  - attr(*, "na.action")= 'omit' Named int [1:5] 33 127 331 337 355
##   ..- attr(*, "names")= chr [1:5] "33" "127" "331" "337" ...

The predictors that are quantitative are all but name and the predictor that is qualitative is name.

1b.

range(Auto$mpg)
## [1]  9.0 46.6

The range of Mpg is from 9 to 46.6

range(Auto$cylinders)
## [1] 3 8

The range of Cylinders is from 3 to 8

range(Auto$displacement)
## [1]  68 455

The range of displacement is from 68 to 455

range(Auto$horsepower)
## [1]  46 230

The range of horsepower is from 46 to 230

range(Auto$weight)
## [1] 1613 5140

The range of Weight is from 1613 to 5140

range(Auto$acceleration)
## [1]  8.0 24.8

The range of Acceleration is from 8.0 to 24.8

range(Auto$year)
## [1] 70 82

The range of Year is from 70 to 82

range(Auto$origin)
## [1] 1 3

The range of Origin is from 1 to 3

1c.

mean(Auto$mpg) #23.44592
## [1] 23.44592
sd(Auto$mpg) #7.805007
## [1] 7.805007

The mean for mpg is 23.44592 and the standard deviation for mpg is 7.805007

mean(Auto$cylinders)
## [1] 5.471939
sd(Auto$cylinders)
## [1] 1.705783

The mean for cylinders is 5.471939 and the standard deviation for cylinders is 1.705783

mean(Auto$displacement) 
## [1] 194.412
sd(Auto$displacement) 
## [1] 104.644

The mean for displacement is 194.412 and the standard deviation for displacement is 104.644

mean(Auto$horsepower) 
## [1] 104.4694
sd(Auto$horsepower) 
## [1] 38.49116

The mean for horsepower is 104.644 and the standard deviation for horsepower is 38.49116

mean(Auto$weight) 
## [1] 2977.584
sd(Auto$weight) 
## [1] 849.4026

The mean for weight is 2977.584 and the standard deviation for weight is 849.4026

mean(Auto$acceleration) 
## [1] 15.54133
sd(Auto$acceleration) 
## [1] 2.758864

The mean for acceleration is 15.54133 and the standard deviation for acceleration is 2.758864

mean(Auto$year) 
## [1] 75.97959
sd(Auto$year) 
## [1] 3.683737

The mean for year is 75.97959 and the standard deviation for year is 3.683737

mean(Auto$origin) 
## [1] 1.576531
sd(Auto$origin) 
## [1] 0.8055182

The mean for origin is 1.576531 and the standard deviation for origin is 0.8055182

1d.

r <- Auto[-c(10:84),]
view(r)

I did 10:84 because when I did 10:85, it did not include row 86, and I wanted 86.

range(r$mpg) 
## [1] 11.0 46.6
mean(r$mpg)
## [1] 24.36845
sd(r$mpg)
## [1] 7.880898

range = 11 to 46.6, mean = 24.36845, standard deviation = 7.880898

range(r$cylinders) 
## [1] 3 8
mean(r$cylinders)
## [1] 5.381703
sd(r$cylinders) 
## [1] 1.658135

range = 3 to 8, mean = 5.381703, standard deviation = 1.658135

range(r$displacement) 
## [1]  68 455
mean(r$displacement) 
## [1] 187.7539
sd(r$displacement) 
## [1] 99.93949

range = 68 to 455, mean = 187.7539, standard deviation = 99.93949

range(r$horsepower) 
## [1]  46 230
mean(r$horsepower) 
## [1] 100.9558
sd(r$horsepower) 
## [1] 35.89557

range = 46 to 230, mean = 100.9558, standard deviation = 35.89557

range(r$weight) 
## [1] 1649 4997
mean(r$weight) 
## [1] 2939.644
sd(r$weight) 
## [1] 812.6496

range = 1649 to 4997, mean = 2939.644, standard deviation = 812.6496

range(r$acceleration) 
## [1]  8.5 24.8
mean(r$acceleration) 
## [1] 15.7183
sd(r$acceleration) 
## [1] 2.693813

range = 8.5 to 24.8, mean = 15.7183, standard deviation = 2.693813

range(r$year) 
## [1] 70 82
mean(r$year) 
## [1] 77.13249
sd(r$year) 
## [1] 3.110026

range = 70 to 82, mean = 77.13249, standard deviation = 3.110026

range(r$origin)
## [1] 1 3
mean(r$origin) 
## [1] 1.599369
sd(r$origin) 
## [1] 0.8193079

range = 1 to 3, mean = 1.599369, standard deviation = 0.8193079

1e.

pairs(~mpg + weight + acceleration, Auto)

We can see from the graph that as weight increases the mpg decreases. Also, we can see that cars with more weight, have less acceleration. From the plot, there is a slight positive linear relationship between mpg and acceleration.

1f.

My plot suggests that if you want a car with good mpg, then you should not get a heavy car. Also, if you have a car with good acceleration, you most likely have good mpg since there is a linear relationship

Problem 2

college<-read.csv("http://faculty.marshall.usc.edu/gareth-james/ISL/College.csv",header=TRUE)
view(college)

2b.

rownames(college) <- college[,1]
#view(college)
college <- college[,-1]
#view(college)

2c(a).

summary(college)
##    Private               Apps           Accept          Enroll    
##  Length:777         Min.   :   81   Min.   :   72   Min.   :  35  
##  Class :character   1st Qu.:  776   1st Qu.:  604   1st Qu.: 242  
##  Mode  :character   Median : 1558   Median : 1110   Median : 434  
##                     Mean   : 3002   Mean   : 2019   Mean   : 780  
##                     3rd Qu.: 3624   3rd Qu.: 2424   3rd Qu.: 902  
##                     Max.   :48094   Max.   :26330   Max.   :6392  
##    Top10perc       Top25perc      F.Undergrad     P.Undergrad     
##  Min.   : 1.00   Min.   :  9.0   Min.   :  139   Min.   :    1.0  
##  1st Qu.:15.00   1st Qu.: 41.0   1st Qu.:  992   1st Qu.:   95.0  
##  Median :23.00   Median : 54.0   Median : 1707   Median :  353.0  
##  Mean   :27.56   Mean   : 55.8   Mean   : 3700   Mean   :  855.3  
##  3rd Qu.:35.00   3rd Qu.: 69.0   3rd Qu.: 4005   3rd Qu.:  967.0  
##  Max.   :96.00   Max.   :100.0   Max.   :31643   Max.   :21836.0  
##     Outstate       Room.Board       Books           Personal   
##  Min.   : 2340   Min.   :1780   Min.   :  96.0   Min.   : 250  
##  1st Qu.: 7320   1st Qu.:3597   1st Qu.: 470.0   1st Qu.: 850  
##  Median : 9990   Median :4200   Median : 500.0   Median :1200  
##  Mean   :10441   Mean   :4358   Mean   : 549.4   Mean   :1341  
##  3rd Qu.:12925   3rd Qu.:5050   3rd Qu.: 600.0   3rd Qu.:1700  
##  Max.   :21700   Max.   :8124   Max.   :2340.0   Max.   :6800  
##       PhD            Terminal       S.F.Ratio      perc.alumni   
##  Min.   :  8.00   Min.   : 24.0   Min.   : 2.50   Min.   : 0.00  
##  1st Qu.: 62.00   1st Qu.: 71.0   1st Qu.:11.50   1st Qu.:13.00  
##  Median : 75.00   Median : 82.0   Median :13.60   Median :21.00  
##  Mean   : 72.66   Mean   : 79.7   Mean   :14.09   Mean   :22.74  
##  3rd Qu.: 85.00   3rd Qu.: 92.0   3rd Qu.:16.50   3rd Qu.:31.00  
##  Max.   :103.00   Max.   :100.0   Max.   :39.80   Max.   :64.00  
##      Expend        Grad.Rate     
##  Min.   : 3186   Min.   : 10.00  
##  1st Qu.: 6751   1st Qu.: 53.00  
##  Median : 8377   Median : 65.00  
##  Mean   : 9660   Mean   : 65.46  
##  3rd Qu.:10830   3rd Qu.: 78.00  
##  Max.   :56233   Max.   :118.00

2c(b).

str(college)
## 'data.frame':    777 obs. of  18 variables:
##  $ Private    : chr  "Yes" "Yes" "Yes" "Yes" ...
##  $ Apps       : int  1660 2186 1428 417 193 587 353 1899 1038 582 ...
##  $ Accept     : int  1232 1924 1097 349 146 479 340 1720 839 498 ...
##  $ Enroll     : int  721 512 336 137 55 158 103 489 227 172 ...
##  $ Top10perc  : int  23 16 22 60 16 38 17 37 30 21 ...
##  $ Top25perc  : int  52 29 50 89 44 62 45 68 63 44 ...
##  $ F.Undergrad: int  2885 2683 1036 510 249 678 416 1594 973 799 ...
##  $ P.Undergrad: int  537 1227 99 63 869 41 230 32 306 78 ...
##  $ Outstate   : int  7440 12280 11250 12960 7560 13500 13290 13868 15595 10468 ...
##  $ Room.Board : int  3300 6450 3750 5450 4120 3335 5720 4826 4400 3380 ...
##  $ Books      : int  450 750 400 450 800 500 500 450 300 660 ...
##  $ Personal   : int  2200 1500 1165 875 1500 675 1500 850 500 1800 ...
##  $ PhD        : int  70 29 53 92 76 67 90 89 79 40 ...
##  $ Terminal   : int  78 30 66 97 72 73 93 100 84 41 ...
##  $ S.F.Ratio  : num  18.1 12.2 12.9 7.7 11.9 9.4 11.5 13.7 11.3 11.5 ...
##  $ perc.alumni: int  12 16 30 37 2 11 26 37 23 15 ...
##  $ Expend     : int  7041 10527 8735 19016 10922 9727 8861 11487 11644 8991 ...
##  $ Grad.Rate  : int  60 56 54 59 15 55 63 73 80 52 ...
college$Private<-as.factor(college$Private)
pairs(college[,1:10])

2c(c).

ggplot(college, aes(x = Private, y = Outstate)) +
  geom_boxplot()

#2c(d).

Elite <- rep("No", nrow(college))
Elite[college$Top10perc > 50] = "Yes"
Elite <- as.factor(Elite)
college <- data.frame(college, Elite)

summary(Elite)
##  No Yes 
## 699  78
ggplot(college, aes(x = Elite, y = Outstate)) +
  geom_boxplot()