Auto <- read.table("http://faculty.marshall.usc.edu/gareth-james/ISL/Auto.data", 
                   header=TRUE,
                   na.strings = "?")

Problem #1

A. Which of the predictors are quantitative, and which are qualitative?

####Qualitative:

str(Auto$name)
##  Factor w/ 304 levels "amc ambassador brougham",..: 49 36 231 14 161 141 54 223 241 2 ...
str(Auto$origin)
##  int [1:397] 1 1 1 1 1 1 1 1 1 1 ...

####Quantitative

str(Auto$mpg)
##  num [1:397] 18 15 18 16 17 15 14 14 14 15 ...
str(Auto$cylinders)
##  int [1:397] 8 8 8 8 8 8 8 8 8 8 ...
str(Auto$displacement)
##  num [1:397] 307 350 318 304 302 429 454 440 455 390 ...
str(Auto$horsepower)
##  num [1:397] 130 165 150 150 140 198 220 215 225 190 ...
str(Auto$weight)
##  num [1:397] 3504 3693 3436 3433 3449 ...
str(Auto$acceleration)
##  num [1:397] 12 11.5 11 12 10.5 10 9 8.5 10 8.5 ...
str(Auto$year)
##  int [1:397] 70 70 70 70 70 70 70 70 70 70 ...

B. What is the range, mean, and standard devation of each quantitative predictor?

Range

range(Auto$mpg)
## [1]  9.0 46.6
range(Auto$cylinders)
## [1] 3 8
range(Auto$displacement)
## [1]  68 455
range(Auto$horsepower)
## [1] NA NA
range(Auto$weight)
## [1] 1613 5140
range(Auto$acceleration)
## [1]  8.0 24.8
range(Auto$year)
## [1] 70 82

C.

Mean

mean(Auto$mpg)
## [1] 23.51587
mean(Auto$cylinders)
## [1] 5.458438
mean(Auto$displacement)
## [1] 193.5327
mean(Auto$horsepower)
## [1] NA
mean(Auto$weight)
## [1] 2970.262
mean(Auto$acceleration)
## [1] 15.55567
mean(Auto$year)
## [1] 75.99496

Standard Devation

sd(Auto$mpg)
## [1] 7.825804
sd(Auto$cylinders)
## [1] 1.701577
sd(Auto$displacement)
## [1] 104.3796
sd(Auto$horsepower)
## [1] NA
sd(Auto$weight)
## [1] 847.9041
sd(Auto$acceleration)
## [1] 2.749995
sd(Auto$year)
## [1] 3.690005

D.

AutoC <- Auto[c(1:9,85:392),]

Range

range(AutoC$mpg)
## [1] 11.0 46.6
range(AutoC$cylinders)
## [1] 3 8
range(AutoC$displacement)
## [1]  68 455
range(AutoC$horsepower)
## [1] NA NA
range(AutoC$weight)
## [1] 1649 4997
range(AutoC$acceleration)
## [1]  8.5 24.8
range(AutoC$year)
## [1] 70 82

Mean

mean(AutoC$mpg)
## [1] 24.32114
mean(AutoC$cylinders)
## [1] 5.388013
mean(AutoC$displacement)
## [1] 187.7886
mean(AutoC$horsepower)
## [1] NA
mean(AutoC$weight)
## [1] 2937.987
mean(AutoC$acceleration)
## [1] 15.69022
mean(AutoC$year)
## [1] 77.05994

Standard Devation

sd(AutoC$mpg)
## [1] 7.85805
sd(AutoC$cylinders)
## [1] 1.656665
sd(AutoC$displacement)
## [1] 100.0393
sd(AutoC$horsepower)
## [1] NA
sd(AutoC$weight)
## [1] 814.7361
sd(AutoC$acceleration)
## [1] 2.627688
sd(AutoC$year)
## [1] 3.083162

E.

Horsepower and Acceleration
plot(Auto$acceleration, Auto$horsepower)

##### Weight and Displacement

plot(Auto$weight, Auto$displacement)

Cylinders and Horsepower
plot(Auto$cylinders, Auto$horsepower)

F.

Other variables that could be useful in predicting mpg include weight, displacement, and horsepower. These plots illustrate linear relationships between these variables and a car’s gas milage. Weight/mpg highlights how increasing a cars wieght leads to reduced gas milage and vice versa. Cylinders/mpg shows a relationship between fewer cylinders and increased gas milage, while the plot of Horsepower/mpg shows a reduction in gas milage as horsepower is increased.
plot(Auto$weight, Auto$mpg)

plot(Auto$cylinders, Auto$mpg)

plot(Auto$horsepower, Auto$mpg)

Problem #2

Box office Star Wars (in millions!)

new_hope <- c(460.998, 314.4)
empire_strikes <- c(290.475, 247.900)
return_jedi <- c(309.306, 165.8)
# Vectors region and titles, used for naming
region <- c("US", "non-US")
titles <- c("A New Hope", "The Empire Strikes Back", "Return of
the Jedi")

A.

starWars = matrix(data = c(new_hope, empire_strikes, return_jedi), nrow = 2, byrow = TRUE)
print(starWars)
##         [,1]    [,2]    [,3]
## [1,] 460.998 314.400 290.475
## [2,] 247.900 309.306 165.800

B.

starWars <- matrix(data = c(new_hope, empire_strikes, return_jedi), nrow = 3, byrow = FALSE, dimnames = list( c("A New Hope","The Empire Strikes Back","Return of the Jedi"), c("US","Non-US")))
print(starWars)
##                              US  Non-US
## A New Hope              460.998 247.900
## The Empire Strikes Back 314.400 309.306
## Return of the Jedi      290.475 165.800

C.

boxoffice <- rowSums(starWars, 1, dims = 1)
print(boxoffice)
##              A New Hope The Empire Strikes Back      Return of the Jedi 
##                 708.898                 623.706                 456.275

D.

cbind( c(starWars), deparse.level = 1)
##         [,1]
## [1,] 460.998
## [2,] 314.400
## [3,] 290.475
## [4,] 247.900
## [5,] 309.306
## [6,] 165.800

E.

# Prequels
phantom_menace <- c(474.5, 552.5)
attack_clones <- c(310.7, 338.7)
revenge_sith <- c(380.3, 468.5)
titles2 <- c( "The Phantom Menance", "Attack of The Clones", "Revenge of The Sith")

starWars2 <- matrix(data = c(phantom_menace, attack_clones, revenge_sith), nrow = 3, byrow = FALSE, dimnames = list( c(titles2), c("US","Non-US")))

print(starWars2)
##                         US Non-US
## The Phantom Menance  474.5  338.7
## Attack of The Clones 552.5  380.3
## Revenge of The Sith  310.7  468.5

F.

allStarWars <- rbind(starWars, starWars2, deparse.level = 1)
print(allStarWars)
##                              US  Non-US
## A New Hope              460.998 247.900
## The Empire Strikes Back 314.400 309.306
## Return of the Jedi      290.475 165.800
## The Phantom Menance     474.500 338.700
## Attack of The Clones    552.500 380.300
## Revenge of The Sith     310.700 468.500

G.

colSums(allStarWars, na.rm = FALSE, dims = 1)
##       US   Non-US 
## 2403.573 1910.506

Problem 3

A.

college <-read.csv("College.csv", header = TRUE)

B.

C.

a.
summary(college)
##                             X       Private        Apps      
##  Abilene Christian University:  1   No :212   Min.   :   81  
##  Adelphi University          :  1   Yes:565   1st Qu.:  776  
##  Adrian College              :  1             Median : 1558  
##  Agnes Scott College         :  1             Mean   : 3002  
##  Alaska Pacific University   :  1             3rd Qu.: 3624  
##  Albertson College           :  1             Max.   :48094  
##  (Other)                     :771                            
##      Accept          Enroll       Top10perc       Top25perc    
##  Min.   :   72   Min.   :  35   Min.   : 1.00   Min.   :  9.0  
##  1st Qu.:  604   1st Qu.: 242   1st Qu.:15.00   1st Qu.: 41.0  
##  Median : 1110   Median : 434   Median :23.00   Median : 54.0  
##  Mean   : 2019   Mean   : 780   Mean   :27.56   Mean   : 55.8  
##  3rd Qu.: 2424   3rd Qu.: 902   3rd Qu.:35.00   3rd Qu.: 69.0  
##  Max.   :26330   Max.   :6392   Max.   :96.00   Max.   :100.0  
##                                                                
##   F.Undergrad     P.Undergrad         Outstate       Room.Board  
##  Min.   :  139   Min.   :    1.0   Min.   : 2340   Min.   :1780  
##  1st Qu.:  992   1st Qu.:   95.0   1st Qu.: 7320   1st Qu.:3597  
##  Median : 1707   Median :  353.0   Median : 9990   Median :4200  
##  Mean   : 3700   Mean   :  855.3   Mean   :10441   Mean   :4358  
##  3rd Qu.: 4005   3rd Qu.:  967.0   3rd Qu.:12925   3rd Qu.:5050  
##  Max.   :31643   Max.   :21836.0   Max.   :21700   Max.   :8124  
##                                                                  
##      Books           Personal         PhD            Terminal    
##  Min.   :  96.0   Min.   : 250   Min.   :  8.00   Min.   : 24.0  
##  1st Qu.: 470.0   1st Qu.: 850   1st Qu.: 62.00   1st Qu.: 71.0  
##  Median : 500.0   Median :1200   Median : 75.00   Median : 82.0  
##  Mean   : 549.4   Mean   :1341   Mean   : 72.66   Mean   : 79.7  
##  3rd Qu.: 600.0   3rd Qu.:1700   3rd Qu.: 85.00   3rd Qu.: 92.0  
##  Max.   :2340.0   Max.   :6800   Max.   :103.00   Max.   :100.0  
##                                                                  
##    S.F.Ratio      perc.alumni        Expend        Grad.Rate     
##  Min.   : 2.50   Min.   : 0.00   Min.   : 3186   Min.   : 10.00  
##  1st Qu.:11.50   1st Qu.:13.00   1st Qu.: 6751   1st Qu.: 53.00  
##  Median :13.60   Median :21.00   Median : 8377   Median : 65.00  
##  Mean   :14.09   Mean   :22.74   Mean   : 9660   Mean   : 65.46  
##  3rd Qu.:16.50   3rd Qu.:31.00   3rd Qu.:10830   3rd Qu.: 78.00  
##  Max.   :39.80   Max.   :64.00   Max.   :56233   Max.   :118.00  
## 
b.
pairs(c(college[1:10]))

##### c.

plot(college$Outstate, college$Private)

##### d.

Elite <- rep("No", nrow(college))
Elite[college$Top10perc > 50] = "Yes"
Elite <- as.factor(Elite)
college <- data.frame(college, Elite)

summary(Elite)
##  No Yes 
## 699  78
plot(college$Outstate, college$Elite)