Auto <- read.table("http://faculty.marshall.usc.edu/gareth-james/ISL/Auto.data",
header=TRUE,
na.strings = "?")
####Qualitative:
str(Auto$name)
## Factor w/ 304 levels "amc ambassador brougham",..: 49 36 231 14 161 141 54 223 241 2 ...
str(Auto$origin)
## int [1:397] 1 1 1 1 1 1 1 1 1 1 ...
####Quantitative
str(Auto$mpg)
## num [1:397] 18 15 18 16 17 15 14 14 14 15 ...
str(Auto$cylinders)
## int [1:397] 8 8 8 8 8 8 8 8 8 8 ...
str(Auto$displacement)
## num [1:397] 307 350 318 304 302 429 454 440 455 390 ...
str(Auto$horsepower)
## num [1:397] 130 165 150 150 140 198 220 215 225 190 ...
str(Auto$weight)
## num [1:397] 3504 3693 3436 3433 3449 ...
str(Auto$acceleration)
## num [1:397] 12 11.5 11 12 10.5 10 9 8.5 10 8.5 ...
str(Auto$year)
## int [1:397] 70 70 70 70 70 70 70 70 70 70 ...
range(Auto$mpg)
## [1] 9.0 46.6
range(Auto$cylinders)
## [1] 3 8
range(Auto$displacement)
## [1] 68 455
range(Auto$horsepower)
## [1] NA NA
range(Auto$weight)
## [1] 1613 5140
range(Auto$acceleration)
## [1] 8.0 24.8
range(Auto$year)
## [1] 70 82
mean(Auto$mpg)
## [1] 23.51587
mean(Auto$cylinders)
## [1] 5.458438
mean(Auto$displacement)
## [1] 193.5327
mean(Auto$horsepower)
## [1] NA
mean(Auto$weight)
## [1] 2970.262
mean(Auto$acceleration)
## [1] 15.55567
mean(Auto$year)
## [1] 75.99496
sd(Auto$mpg)
## [1] 7.825804
sd(Auto$cylinders)
## [1] 1.701577
sd(Auto$displacement)
## [1] 104.3796
sd(Auto$horsepower)
## [1] NA
sd(Auto$weight)
## [1] 847.9041
sd(Auto$acceleration)
## [1] 2.749995
sd(Auto$year)
## [1] 3.690005
AutoC <- Auto[c(1:9,85:392),]
range(AutoC$mpg)
## [1] 11.0 46.6
range(AutoC$cylinders)
## [1] 3 8
range(AutoC$displacement)
## [1] 68 455
range(AutoC$horsepower)
## [1] NA NA
range(AutoC$weight)
## [1] 1649 4997
range(AutoC$acceleration)
## [1] 8.5 24.8
range(AutoC$year)
## [1] 70 82
mean(AutoC$mpg)
## [1] 24.32114
mean(AutoC$cylinders)
## [1] 5.388013
mean(AutoC$displacement)
## [1] 187.7886
mean(AutoC$horsepower)
## [1] NA
mean(AutoC$weight)
## [1] 2937.987
mean(AutoC$acceleration)
## [1] 15.69022
mean(AutoC$year)
## [1] 77.05994
sd(AutoC$mpg)
## [1] 7.85805
sd(AutoC$cylinders)
## [1] 1.656665
sd(AutoC$displacement)
## [1] 100.0393
sd(AutoC$horsepower)
## [1] NA
sd(AutoC$weight)
## [1] 814.7361
sd(AutoC$acceleration)
## [1] 2.627688
sd(AutoC$year)
## [1] 3.083162
plot(Auto$acceleration, Auto$horsepower)
##### Weight and Displacement
plot(Auto$weight, Auto$displacement)
plot(Auto$cylinders, Auto$horsepower)
plot(Auto$weight, Auto$mpg)
plot(Auto$cylinders, Auto$mpg)
plot(Auto$horsepower, Auto$mpg)
new_hope <- c(460.998, 314.4)
empire_strikes <- c(290.475, 247.900)
return_jedi <- c(309.306, 165.8)
# Vectors region and titles, used for naming
region <- c("US", "non-US")
titles <- c("A New Hope", "The Empire Strikes Back", "Return of
the Jedi")
starWars = matrix(data = c(new_hope, empire_strikes, return_jedi), nrow = 2, byrow = TRUE)
print(starWars)
## [,1] [,2] [,3]
## [1,] 460.998 314.400 290.475
## [2,] 247.900 309.306 165.800
starWars <- matrix(data = c(new_hope, empire_strikes, return_jedi), nrow = 3, byrow = FALSE, dimnames = list( c("A New Hope","The Empire Strikes Back","Return of the Jedi"), c("US","Non-US")))
print(starWars)
## US Non-US
## A New Hope 460.998 247.900
## The Empire Strikes Back 314.400 309.306
## Return of the Jedi 290.475 165.800
boxoffice <- rowSums(starWars, 1, dims = 1)
print(boxoffice)
## A New Hope The Empire Strikes Back Return of the Jedi
## 708.898 623.706 456.275
cbind( c(starWars), deparse.level = 1)
## [,1]
## [1,] 460.998
## [2,] 314.400
## [3,] 290.475
## [4,] 247.900
## [5,] 309.306
## [6,] 165.800
# Prequels
phantom_menace <- c(474.5, 552.5)
attack_clones <- c(310.7, 338.7)
revenge_sith <- c(380.3, 468.5)
titles2 <- c( "The Phantom Menance", "Attack of The Clones", "Revenge of The Sith")
starWars2 <- matrix(data = c(phantom_menace, attack_clones, revenge_sith), nrow = 3, byrow = FALSE, dimnames = list( c(titles2), c("US","Non-US")))
print(starWars2)
## US Non-US
## The Phantom Menance 474.5 338.7
## Attack of The Clones 552.5 380.3
## Revenge of The Sith 310.7 468.5
allStarWars <- rbind(starWars, starWars2, deparse.level = 1)
print(allStarWars)
## US Non-US
## A New Hope 460.998 247.900
## The Empire Strikes Back 314.400 309.306
## Return of the Jedi 290.475 165.800
## The Phantom Menance 474.500 338.700
## Attack of The Clones 552.500 380.300
## Revenge of The Sith 310.700 468.500
colSums(allStarWars, na.rm = FALSE, dims = 1)
## US Non-US
## 2403.573 1910.506
college <-read.csv("College.csv", header = TRUE)
summary(college)
## X Private Apps
## Abilene Christian University: 1 No :212 Min. : 81
## Adelphi University : 1 Yes:565 1st Qu.: 776
## Adrian College : 1 Median : 1558
## Agnes Scott College : 1 Mean : 3002
## Alaska Pacific University : 1 3rd Qu.: 3624
## Albertson College : 1 Max. :48094
## (Other) :771
## Accept Enroll Top10perc Top25perc
## Min. : 72 Min. : 35 Min. : 1.00 Min. : 9.0
## 1st Qu.: 604 1st Qu.: 242 1st Qu.:15.00 1st Qu.: 41.0
## Median : 1110 Median : 434 Median :23.00 Median : 54.0
## Mean : 2019 Mean : 780 Mean :27.56 Mean : 55.8
## 3rd Qu.: 2424 3rd Qu.: 902 3rd Qu.:35.00 3rd Qu.: 69.0
## Max. :26330 Max. :6392 Max. :96.00 Max. :100.0
##
## F.Undergrad P.Undergrad Outstate Room.Board
## Min. : 139 Min. : 1.0 Min. : 2340 Min. :1780
## 1st Qu.: 992 1st Qu.: 95.0 1st Qu.: 7320 1st Qu.:3597
## Median : 1707 Median : 353.0 Median : 9990 Median :4200
## Mean : 3700 Mean : 855.3 Mean :10441 Mean :4358
## 3rd Qu.: 4005 3rd Qu.: 967.0 3rd Qu.:12925 3rd Qu.:5050
## Max. :31643 Max. :21836.0 Max. :21700 Max. :8124
##
## Books Personal PhD Terminal
## Min. : 96.0 Min. : 250 Min. : 8.00 Min. : 24.0
## 1st Qu.: 470.0 1st Qu.: 850 1st Qu.: 62.00 1st Qu.: 71.0
## Median : 500.0 Median :1200 Median : 75.00 Median : 82.0
## Mean : 549.4 Mean :1341 Mean : 72.66 Mean : 79.7
## 3rd Qu.: 600.0 3rd Qu.:1700 3rd Qu.: 85.00 3rd Qu.: 92.0
## Max. :2340.0 Max. :6800 Max. :103.00 Max. :100.0
##
## S.F.Ratio perc.alumni Expend Grad.Rate
## Min. : 2.50 Min. : 0.00 Min. : 3186 Min. : 10.00
## 1st Qu.:11.50 1st Qu.:13.00 1st Qu.: 6751 1st Qu.: 53.00
## Median :13.60 Median :21.00 Median : 8377 Median : 65.00
## Mean :14.09 Mean :22.74 Mean : 9660 Mean : 65.46
## 3rd Qu.:16.50 3rd Qu.:31.00 3rd Qu.:10830 3rd Qu.: 78.00
## Max. :39.80 Max. :64.00 Max. :56233 Max. :118.00
##
pairs(c(college[1:10]))
##### c.
plot(college$Outstate, college$Private)
##### d.
Elite <- rep("No", nrow(college))
Elite[college$Top10perc > 50] = "Yes"
Elite <- as.factor(Elite)
college <- data.frame(college, Elite)
summary(Elite)
## No Yes
## 699 78
plot(college$Outstate, college$Elite)