Homework #1

Problem 1

Auto <- read.table("http://faculty.marshall.usc.edu/gareth-james/ISL/Auto.data", 
                   header=TRUE,
                   na.strings = c("?"))
Auto<-na.omit(Auto)

Part A

str(Auto)
## 'data.frame':    392 obs. of  9 variables:
##  $ mpg         : num  18 15 18 16 17 15 14 14 14 15 ...
##  $ cylinders   : int  8 8 8 8 8 8 8 8 8 8 ...
##  $ displacement: num  307 350 318 304 302 429 454 440 455 390 ...
##  $ horsepower  : num  130 165 150 150 140 198 220 215 225 190 ...
##  $ weight      : num  3504 3693 3436 3433 3449 ...
##  $ acceleration: num  12 11.5 11 12 10.5 10 9 8.5 10 8.5 ...
##  $ year        : int  70 70 70 70 70 70 70 70 70 70 ...
##  $ origin      : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ name        : Factor w/ 304 levels "amc ambassador brougham",..: 49 36 231 14 161 141 54 223 241 2 ...
##  - attr(*, "na.action")= 'omit' Named int  33 127 331 337 355
##   ..- attr(*, "names")= chr  "33" "127" "331" "337" ...

Quantitative: mpg, cylinders, displacement, hp, weight, acceleration, year

Qualitative: origin, name

Part B

range(Auto$mpg)
## [1]  9.0 46.6
range(Auto$cylinders)
## [1] 3 8
range(Auto$displacement)
## [1]  68 455
range(Auto$horsepower)
## [1]  46 230
range(Auto$weight)
## [1] 1613 5140
range(Auto$acceleration)
## [1]  8.0 24.8
range(Auto$year)
## [1] 70 82

Ranges of Quantitative Predictors:

mgp: 9.0 - 46.6

cylinders: 3 - 8

displacement: 68 - 455

horsepower: 46 - 230

weight: 1613 - 5140

acceleration: 8.0 - 24.8

year: 70 - 82

Part C

mean(Auto$mpg)
## [1] 23.44592
mean(Auto$cylinders)
## [1] 5.471939
mean(Auto$displacement)
## [1] 194.412
mean(Auto$horsepower)
## [1] 104.4694
mean(Auto$weight)
## [1] 2977.584
mean(Auto$acceleration)
## [1] 15.54133
mean(Auto$year)
## [1] 75.97959
sd(Auto$mpg)
## [1] 7.805007
sd(Auto$cylinders)
## [1] 1.705783
sd(Auto$displacement)
## [1] 104.644
sd(Auto$horsepower)
## [1] 38.49116
sd(Auto$weight)
## [1] 849.4026
sd(Auto$acceleration)
## [1] 2.758864
sd(Auto$year)
## [1] 3.683737

Means of Quantitative Predictors:

mpg: 23.44592

cylinders: 5.471939

displacement: 194.412

horsepower: 104.4694

weight: 2977.584

acceleration: 15.54133

year: 75.97959

Standard Deviations of Quantitative Predictors:

mpg: 7.805007

cylinders: 1.705783

displacement: 104.644

horsepower: 38.49116

weight: 849.4026

acceleration: 2.758864

year: 3.683737

Part D

Auto_trunc <- Auto[-10:-85,]

#range
range(Auto_trunc$mpg)
## [1] 11.0 46.6
range(Auto_trunc$cylinders)
## [1] 3 8
range(Auto_trunc$displacement)
## [1]  68 455
range(Auto_trunc$horsepower)
## [1]  46 230
range(Auto_trunc$weight)
## [1] 1649 4997
range(Auto_trunc$acceleration)
## [1]  8.5 24.8
range(Auto_trunc$year)
## [1] 70 82
#mean
mean(Auto_trunc$mpg)
## [1] 24.40443
mean(Auto_trunc$cylinders)
## [1] 5.373418
mean(Auto_trunc$displacement)
## [1] 187.2405
mean(Auto_trunc$horsepower)
## [1] 100.7215
mean(Auto_trunc$weight)
## [1] 2935.972
mean(Auto_trunc$acceleration)
## [1] 15.7269
mean(Auto_trunc$year)
## [1] 77.14557
#Standard Deviation
sd(Auto_trunc$mpg)
## [1] 7.867283
sd(Auto_trunc$cylinders)
## [1] 1.654179
sd(Auto_trunc$displacement)
## [1] 99.67837
sd(Auto_trunc$horsepower)
## [1] 35.70885
sd(Auto_trunc$weight)
## [1] 811.3002
sd(Auto_trunc$acceleration)
## [1] 2.693721
sd(Auto_trunc$year)
## [1] 3.106217

Ranges of Quantitative Predictors:

mgp: 11.0 - 46.6

cylinders: 3 - 8

displacement: 68 - 455

horsepower: 46 - 230

weight: 1613 - 4997

acceleration: 8.5 - 24.8

year: 70 - 82

Means of Quantitative Predictors:

mpg: 24.40443

cylinders: 5.373418

displacement: 187.2405

horsepower: 100.7215

weight: 2935.972

acceleration: 15.7269

year: 77.14557

Standard Deviations of Quantitative Predictors:

mpg: 7.867283

cylinders: 1.654179

displacement: 99.67837

horsepower: 35.70885

weight: 811.3002

acceleration: 2.693721

year: 3.106217

Parts E & F

As shown in the following scatter plots, the number of cylinders a car engine is correlated to many characterists of the car’s performance. There is a positive correlation with horsepower and weight, but a negative correlation with mpg, and acceleration. This suggests that cars with more cylinders are less efficient.

pairs(Auto)

ggplot(Auto, aes(x=cylinders, y=mpg)) +
  geom_point(shape=1) 

ggplot(Auto, aes(x=cylinders, y=horsepower)) +
  geom_point(shape=1) 

ggplot(Auto, aes(x=cylinders, y=weight)) +
  geom_point(shape=1) 

ggplot(Auto, aes(x=cylinders, y=acceleration)) +
  geom_point(shape=1) 

Problem 2

# Box office Star Wars (in millions!)
new_hope <- c(460.998, 314.4)
empire_strikes <- c(290.475, 247.900)
return_jedi <- c(309.306, 165.8)

# Vectors region and titles, used for naming
region <- c("US", "non-US")
titles <- c("A New Hope", "The Empire Strikes Back", "Return of the Jedi")

Part A

starWars <- matrix(c(new_hope, empire_strikes, return_jedi), nrow = 3, byrow = TRUE)
starWars
##         [,1]  [,2]
## [1,] 460.998 314.4
## [2,] 290.475 247.9
## [3,] 309.306 165.8

Part B

colnames(starWars) <- region
rownames(starWars) <- titles
starWars
##                              US non-US
## A New Hope              460.998  314.4
## The Empire Strikes Back 290.475  247.9
## Return of the Jedi      309.306  165.8

Part C

worldwide_vector <- rowSums(starWars)
worldwide_vector
##              A New Hope The Empire Strikes Back      Return of the Jedi 
##                 775.398                 538.375                 475.106

Part D

all_wars_matrix <- cbind(starWars, worldwide_vector)
all_wars_matrix
##                              US non-US worldwide_vector
## A New Hope              460.998  314.4          775.398
## The Empire Strikes Back 290.475  247.9          538.375
## Return of the Jedi      309.306  165.8          475.106

Part E

phantom_menace <- c(474.5, 552.5)
attack_clones <- c(310.7, 338.7)
revenge_sith <- c(380.3, 468.5)

titles2<-c("The Phantom Menace", "Attack of the Clones", "Revenge of the Sith")

starWars2 <- matrix(c(phantom_menace, attack_clones, revenge_sith), nrow = 3, byrow = TRUE)
colnames(starWars2) <- region
rownames(starWars2) <- titles2

Part F

allStarWars <- rbind(starWars, starWars2)
allStarWars
##                              US non-US
## A New Hope              460.998  314.4
## The Empire Strikes Back 290.475  247.9
## Return of the Jedi      309.306  165.8
## The Phantom Menace      474.500  552.5
## Attack of the Clones    310.700  338.7
## Revenge of the Sith     380.300  468.5

Part G

nonUS_revenue <- colSums(allStarWars, 2)
nonUS_revenue
##       US   non-US 
## 2226.279 2087.800

Problem 3

Part A

college<-read.csv("http://faculty.marshall.usc.edu/gareth-james/ISL/College.csv",header=TRUE)
rownames(college) <- college[,1]

Part B

#View(college)

rownames(college) <- college[,1]
#View(college)

college <- college[,-1]
#View(college)

Part C

A

summary(college)
##  Private        Apps           Accept          Enroll       Top10perc    
##  No :212   Min.   :   81   Min.   :   72   Min.   :  35   Min.   : 1.00  
##  Yes:565   1st Qu.:  776   1st Qu.:  604   1st Qu.: 242   1st Qu.:15.00  
##            Median : 1558   Median : 1110   Median : 434   Median :23.00  
##            Mean   : 3002   Mean   : 2019   Mean   : 780   Mean   :27.56  
##            3rd Qu.: 3624   3rd Qu.: 2424   3rd Qu.: 902   3rd Qu.:35.00  
##            Max.   :48094   Max.   :26330   Max.   :6392   Max.   :96.00  
##    Top25perc      F.Undergrad     P.Undergrad         Outstate    
##  Min.   :  9.0   Min.   :  139   Min.   :    1.0   Min.   : 2340  
##  1st Qu.: 41.0   1st Qu.:  992   1st Qu.:   95.0   1st Qu.: 7320  
##  Median : 54.0   Median : 1707   Median :  353.0   Median : 9990  
##  Mean   : 55.8   Mean   : 3700   Mean   :  855.3   Mean   :10441  
##  3rd Qu.: 69.0   3rd Qu.: 4005   3rd Qu.:  967.0   3rd Qu.:12925  
##  Max.   :100.0   Max.   :31643   Max.   :21836.0   Max.   :21700  
##    Room.Board       Books           Personal         PhD        
##  Min.   :1780   Min.   :  96.0   Min.   : 250   Min.   :  8.00  
##  1st Qu.:3597   1st Qu.: 470.0   1st Qu.: 850   1st Qu.: 62.00  
##  Median :4200   Median : 500.0   Median :1200   Median : 75.00  
##  Mean   :4358   Mean   : 549.4   Mean   :1341   Mean   : 72.66  
##  3rd Qu.:5050   3rd Qu.: 600.0   3rd Qu.:1700   3rd Qu.: 85.00  
##  Max.   :8124   Max.   :2340.0   Max.   :6800   Max.   :103.00  
##     Terminal       S.F.Ratio      perc.alumni        Expend     
##  Min.   : 24.0   Min.   : 2.50   Min.   : 0.00   Min.   : 3186  
##  1st Qu.: 71.0   1st Qu.:11.50   1st Qu.:13.00   1st Qu.: 6751  
##  Median : 82.0   Median :13.60   Median :21.00   Median : 8377  
##  Mean   : 79.7   Mean   :14.09   Mean   :22.74   Mean   : 9660  
##  3rd Qu.: 92.0   3rd Qu.:16.50   3rd Qu.:31.00   3rd Qu.:10830  
##  Max.   :100.0   Max.   :39.80   Max.   :64.00   Max.   :56233  
##    Grad.Rate     
##  Min.   : 10.00  
##  1st Qu.: 53.00  
##  Median : 65.00  
##  Mean   : 65.46  
##  3rd Qu.: 78.00  
##  Max.   :118.00

B

pairs(college[, 1:10])

C

ggplot(college, aes(y=Outstate, fill=Private))+
  geom_boxplot()+
  theme_bw()

D

Elite <- rep("No", nrow(college))
Elite[college$Top10perc > 50] = "Yes"
Elite <- as.factor(Elite)
college <- data.frame(college, Elite)
summary(Elite)
##  No Yes 
## 699  78
ggplot(college, aes(y=Outstate, fill=Elite))+
  geom_boxplot()+
  theme_bw()