library(tidyverse)
## ── Attaching packages ────────────────────────────────────── tidyverse 1.3.0 ──
## ✓ ggplot2 3.2.1 ✓ purrr 0.3.3
## ✓ tibble 2.1.3 ✓ dplyr 0.8.3
## ✓ tidyr 1.0.2 ✓ stringr 1.4.0
## ✓ readr 1.3.1 ✓ forcats 0.4.0
## ── Conflicts ───────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
Auto <- read.table("http://faculty.marshall.usc.edu/gareth-james/ISL/Auto.data",
header=TRUE,
na.strings = "?")
Auto = na.omit(Auto)
head(Auto)
## mpg cylinders displacement horsepower weight acceleration year origin
## 1 18 8 307 130 3504 12.0 70 1
## 2 15 8 350 165 3693 11.5 70 1
## 3 18 8 318 150 3436 11.0 70 1
## 4 16 8 304 150 3433 12.0 70 1
## 5 17 8 302 140 3449 10.5 70 1
## 6 15 8 429 198 4341 10.0 70 1
## name
## 1 chevrolet chevelle malibu
## 2 buick skylark 320
## 3 plymouth satellite
## 4 amc rebel sst
## 5 ford torino
## 6 ford galaxie 500
Problem 1a:
The quantitative predictors are mpg, displacement, horsepower, weight, acceleration, and cylinders. The qualitative predictors are year, origin, and name. Year can be considered either.
## 'data.frame': 392 obs. of 9 variables:
## $ mpg : num 18 15 18 16 17 15 14 14 14 15 ...
## $ cylinders : int 8 8 8 8 8 8 8 8 8 8 ...
## $ displacement: num 307 350 318 304 302 429 454 440 455 390 ...
## $ horsepower : num 130 165 150 150 140 198 220 215 225 190 ...
## $ weight : num 3504 3693 3436 3433 3449 ...
## $ acceleration: num 12 11.5 11 12 10.5 10 9 8.5 10 8.5 ...
## $ year : int 70 70 70 70 70 70 70 70 70 70 ...
## $ origin : int 1 1 1 1 1 1 1 1 1 1 ...
## $ name : Factor w/ 304 levels "amc ambassador brougham",..: 49 36 231 14 161 141 54 223 241 2 ...
## - attr(*, "na.action")= 'omit' Named int 33 127 331 337 355
## ..- attr(*, "names")= chr "33" "127" "331" "337" ...
Problem 1b:
range(Auto$mpg)
## [1] 9.0 46.6
range(Auto$cylinders)
## [1] 3 8
range(Auto$displacement)
## [1] 68 455
range(Auto$horsepower)
## [1] 46 230
range(Auto$weight)
## [1] 1613 5140
range(Auto$acceleration)
## [1] 8.0 24.8
range(Auto$year)
## [1] 70 82
Problem 1c:
mean(Auto$mpg)
## [1] 23.44592
sd(Auto$mpg)
## [1] 7.805007
mean(Auto$cylinders)
## [1] 5.471939
sd(Auto$cylinders)
## [1] 1.705783
mean(Auto$displacement)
## [1] 194.412
sd(Auto$displacement)
## [1] 104.644
mean(Auto$horsepower)
## [1] 104.4694
sd(Auto$horsepower)
## [1] 38.49116
mean(Auto$weight)
## [1] 2977.584
sd(Auto$weight)
## [1] 849.4026
mean(Auto$acceleration)
## [1] 15.54133
sd(Auto$acceleration)
## [1] 2.758864
mean(Auto$year)
## [1] 75.97959
sd(Auto$year)
## [1] 3.683737
Problem 1d:
subAuto=Auto[-c(10:85),]
range(subAuto$mpg)
## [1] 11.0 46.6
mean(subAuto$mpg)
## [1] 24.40443
sd(subAuto$mpg)
## [1] 7.867283
range(subAuto$cylinders)
## [1] 3 8
mean(subAuto$cylinders)
## [1] 5.373418
sd(subAuto$cylinders)
## [1] 1.654179
range(subAuto$displacement)
## [1] 68 455
mean(subAuto$displacement)
## [1] 187.2405
sd(subAuto$displacement)
## [1] 99.67837
range(subAuto$horsepower)
## [1] 46 230
mean(subAuto$horsepower)
## [1] 100.7215
sd(subAuto$horsepower)
## [1] 35.70885
range(subAuto$weight)
## [1] 1649 4997
mean(subAuto$weight)
## [1] 2935.972
sd(subAuto$weight)
## [1] 811.3002
range(subAuto$acceleration)
## [1] 8.5 24.8
mean(subAuto$acceleration)
## [1] 15.7269
sd(subAuto$acceleration)
## [1] 2.693721
range(subAuto$year)
## [1] 70 82
mean(subAuto$year)
## [1] 77.14557
sd(subAuto$year)
## [1] 3.106217
Problem 1e:
#pairs(Auto)
hist(Auto$mpg)
plot(Auto$mpg, Auto$cylinders,)
boxplot(mpg~cylinders, data=Auto)
Problem 1f: Year, horsepower, and the number of cylinders all have somewhat linear relationships with mpg. MPG has a positive relationship with year and a negative relationship with cylinders and horsepower. You could use these other variable to predict mpg.
Problem 2a:
college=read.csv("College.csv", header=TRUE)
Problem 2b:
rownames(college) <- college[,1]
#View(college)
college <- college[,-1]
#View(college)
Problem 2c:
summary(college)
## Private Apps Accept Enroll Top10perc
## No :212 Min. : 81 Min. : 72 Min. : 35 Min. : 1.00
## Yes:565 1st Qu.: 776 1st Qu.: 604 1st Qu.: 242 1st Qu.:15.00
## Median : 1558 Median : 1110 Median : 434 Median :23.00
## Mean : 3002 Mean : 2019 Mean : 780 Mean :27.56
## 3rd Qu.: 3624 3rd Qu.: 2424 3rd Qu.: 902 3rd Qu.:35.00
## Max. :48094 Max. :26330 Max. :6392 Max. :96.00
## Top25perc F.Undergrad P.Undergrad Outstate
## Min. : 9.0 Min. : 139 Min. : 1.0 Min. : 2340
## 1st Qu.: 41.0 1st Qu.: 992 1st Qu.: 95.0 1st Qu.: 7320
## Median : 54.0 Median : 1707 Median : 353.0 Median : 9990
## Mean : 55.8 Mean : 3700 Mean : 855.3 Mean :10441
## 3rd Qu.: 69.0 3rd Qu.: 4005 3rd Qu.: 967.0 3rd Qu.:12925
## Max. :100.0 Max. :31643 Max. :21836.0 Max. :21700
## Room.Board Books Personal PhD
## Min. :1780 Min. : 96.0 Min. : 250 Min. : 8.00
## 1st Qu.:3597 1st Qu.: 470.0 1st Qu.: 850 1st Qu.: 62.00
## Median :4200 Median : 500.0 Median :1200 Median : 75.00
## Mean :4358 Mean : 549.4 Mean :1341 Mean : 72.66
## 3rd Qu.:5050 3rd Qu.: 600.0 3rd Qu.:1700 3rd Qu.: 85.00
## Max. :8124 Max. :2340.0 Max. :6800 Max. :103.00
## Terminal S.F.Ratio perc.alumni Expend
## Min. : 24.0 Min. : 2.50 Min. : 0.00 Min. : 3186
## 1st Qu.: 71.0 1st Qu.:11.50 1st Qu.:13.00 1st Qu.: 6751
## Median : 82.0 Median :13.60 Median :21.00 Median : 8377
## Mean : 79.7 Mean :14.09 Mean :22.74 Mean : 9660
## 3rd Qu.: 92.0 3rd Qu.:16.50 3rd Qu.:31.00 3rd Qu.:10830
## Max. :100.0 Max. :39.80 Max. :64.00 Max. :56233
## Grad.Rate
## Min. : 10.00
## 1st Qu.: 53.00
## Median : 65.00
## Mean : 65.46
## 3rd Qu.: 78.00
## Max. :118.00
#pairs(college[,1:10])
boxplot(college$Outstate~college$Private,
main="Outstate vs Private",
ylab="Out of State",
xlab="Private",
col="pink")
Problem 2d:
Elite <- rep("No", nrow(college))
Elite[college$Top10perc > 50] = "Yes"
Elite <- as.factor(Elite)
college <- data.frame(college, Elite)
summary(college)
## Private Apps Accept Enroll Top10perc
## No :212 Min. : 81 Min. : 72 Min. : 35 Min. : 1.00
## Yes:565 1st Qu.: 776 1st Qu.: 604 1st Qu.: 242 1st Qu.:15.00
## Median : 1558 Median : 1110 Median : 434 Median :23.00
## Mean : 3002 Mean : 2019 Mean : 780 Mean :27.56
## 3rd Qu.: 3624 3rd Qu.: 2424 3rd Qu.: 902 3rd Qu.:35.00
## Max. :48094 Max. :26330 Max. :6392 Max. :96.00
## Top25perc F.Undergrad P.Undergrad Outstate
## Min. : 9.0 Min. : 139 Min. : 1.0 Min. : 2340
## 1st Qu.: 41.0 1st Qu.: 992 1st Qu.: 95.0 1st Qu.: 7320
## Median : 54.0 Median : 1707 Median : 353.0 Median : 9990
## Mean : 55.8 Mean : 3700 Mean : 855.3 Mean :10441
## 3rd Qu.: 69.0 3rd Qu.: 4005 3rd Qu.: 967.0 3rd Qu.:12925
## Max. :100.0 Max. :31643 Max. :21836.0 Max. :21700
## Room.Board Books Personal PhD
## Min. :1780 Min. : 96.0 Min. : 250 Min. : 8.00
## 1st Qu.:3597 1st Qu.: 470.0 1st Qu.: 850 1st Qu.: 62.00
## Median :4200 Median : 500.0 Median :1200 Median : 75.00
## Mean :4358 Mean : 549.4 Mean :1341 Mean : 72.66
## 3rd Qu.:5050 3rd Qu.: 600.0 3rd Qu.:1700 3rd Qu.: 85.00
## Max. :8124 Max. :2340.0 Max. :6800 Max. :103.00
## Terminal S.F.Ratio perc.alumni Expend
## Min. : 24.0 Min. : 2.50 Min. : 0.00 Min. : 3186
## 1st Qu.: 71.0 1st Qu.:11.50 1st Qu.:13.00 1st Qu.: 6751
## Median : 82.0 Median :13.60 Median :21.00 Median : 8377
## Mean : 79.7 Mean :14.09 Mean :22.74 Mean : 9660
## 3rd Qu.: 92.0 3rd Qu.:16.50 3rd Qu.:31.00 3rd Qu.:10830
## Max. :100.0 Max. :39.80 Max. :64.00 Max. :56233
## Grad.Rate Elite
## Min. : 10.00 No :699
## 1st Qu.: 53.00 Yes: 78
## Median : 65.00
## Mean : 65.46
## 3rd Qu.: 78.00
## Max. :118.00
boxplot(Outstate~Elite, data=college,
main = "Elite vs Out of State",
col = "lightblue")