#Problem 1A
The name and origin is the qualitative data. The rest is quantitative,
#1B
range(auto2$cylinders)
## [1] 3 8
range(auto2$displacement)
## [1] 68 455
range(auto2$horsepower)
## [1] 46 230
range(auto2$mpg)
## [1] 9.0 46.6
range(auto2$weight)
## [1] 1613 5140
range(auto2$acceleration)
## [1] 8.0 24.8
range(auto2$year)
## [1] 70 82
cyl:3-8 disp: 68-455 hp: 46-230 mpg:9.0-46.6 weight:1613-5140 acc:8.0-24.8 year:70-82
#1C mean
mean(auto2$cylinders)
## [1] 5.471939
mean(auto2$displacement)
## [1] 194.412
mean(auto2$horsepower)
## [1] 104.4694
mean(auto2$mpg)
## [1] 23.44592
mean(auto2$weight)
## [1] 2977.584
mean(auto2$acceleration)
## [1] 15.54133
mean(auto2$year)
## [1] 75.97959
cyl:5.4719 disp: 194.412 hp: 104.4694 mpg:23.44592 weight:2977.584 acc:15.54133 year:75.97959
SD
sd(auto2$cylinders)
## [1] 1.705783
sd(auto2$displacement)
## [1] 104.644
sd(auto2$horsepower)
## [1] 38.49116
sd(auto2$mpg)
## [1] 7.805007
sd(auto2$weight)
## [1] 849.4026
sd(auto2$acceleration)
## [1] 2.758864
sd(auto2$year)
## [1] 3.683737
cyl:1.705 disp:104.644 hp: 38.49166 mpg:7.805007 weight:849.4026 acc:15.2.758864 year:3.683737
#1D
auto3=auto2[-c(10:85),]
range
range(auto3$cylinders)
## [1] 3 8
range(auto3$displacement)
## [1] 68 455
range(auto3$horsepower)
## [1] 46 230
range(auto3$mpg)
## [1] 11.0 46.6
range(auto3$weight)
## [1] 1649 4997
range(auto3$acceleration)
## [1] 8.5 24.8
range(auto3$year)
## [1] 70 82
cyl:3-8 disp:68-455 hp:46-230 mpg:11.0-46.6 weight:1649-4997 acc:8.5-24.8 year:70-82
mean
mean(auto3$cylinders)
## [1] 5.373418
mean(auto3$displacement)
## [1] 187.2405
mean(auto3$horsepower)
## [1] 100.7215
mean(auto3$mpg)
## [1] 24.40443
mean(auto3$weight)
## [1] 2935.972
mean(auto3$acceleration)
## [1] 15.7269
mean(auto3$year)
## [1] 77.14557
cyl:5.37341 disp:187.2405 hp:100.7215 mpg:24.40443 weight:2945.972 acc:15.7269 year:77.14557
SD
sd(auto3$cylinders)
## [1] 1.654179
sd(auto3$displacement)
## [1] 99.67837
sd(auto3$horsepower)
## [1] 35.70885
sd(auto3$mpg)
## [1] 7.867283
sd(auto3$weight)
## [1] 811.3002
sd(auto3$acceleration)
## [1] 2.693721
sd(auto3$year)
## [1] 3.106217
cyl:1.654179 disp:99.67837 hp:35.70885 mpg:7.86 weight:811.3002 acc:2.693721 year:3.106217
1E
plot(auto$weight, auto$mpg, col="red")
plot(auto$year, auto$mpg, col="blue")
plot(auto$displacement, auto$horsepower, col="orange")
We can see a relationship between differenct varibales, such as how the weight affects mpg, or how more displacment is correleated to more horsepower.
1F
Mpg seems cloely related to the weight of the car, as it decreases when the car is heaver. Ther is also a nticble relationship between the newer cars having bettter mpg than the older ones. Plotting a best fit line can show these relationships.
#2A
new_hope <- c(460.998, 314.4)
empire_strikes <- c(290.475, 247.900)
return_jedi <- c(309.306, 165.8)
starWars=matrix(data=c(new_hope, empire_strikes, return_jedi),nrow=3, ncol=2)
starWars
## [,1] [,2]
## [1,] 460.998 247.900
## [2,] 314.400 309.306
## [3,] 290.475 165.800
#2B
region <- c("US", "non-US")
titles <- c("A New Hope", "The Empire Strikes Back", "Return of the Jedi")
colnames(starWars)=region
rownames(starWars)=titles
starWars
## US non-US
## A New Hope 460.998 247.900
## The Empire Strikes Back 314.400 309.306
## Return of the Jedi 290.475 165.800
#2C
sales=rowSums(starWars)
sales
## A New Hope The Empire Strikes Back Return of the Jedi
## 708.898 623.706 456.275
#2D
cbind(starWars, sales)
## US non-US sales
## A New Hope 460.998 247.900 708.898
## The Empire Strikes Back 314.400 309.306 623.706
## Return of the Jedi 290.475 165.800 456.275
#2E
phantom_menace <- c(474.5, 552.5)
attack_clones <- c(310.7, 338.7)
revenge_sith <- c(380.3, 468.5)
titles2<-c("The Phantom Menace", "Attack of the Clones", "Revenge of the Sith")
starWars2=matrix(data=c(phantom_menace, attack_clones, revenge_sith), nrow=3, ncol=2)
colnames(starWars2)=region
rownames(starWars2)=titles2
starWars2
## US non-US
## The Phantom Menace 474.5 338.7
## Attack of the Clones 552.5 380.3
## Revenge of the Sith 310.7 468.5
#2F
allstarWars=rbind(starWars,starWars2)
allstarWars
## US non-US
## A New Hope 460.998 247.900
## The Empire Strikes Back 314.400 309.306
## Return of the Jedi 290.475 165.800
## The Phantom Menace 474.500 338.700
## Attack of the Clones 552.500 380.300
## Revenge of the Sith 310.700 468.500
#2G
colSums(allstarWars)
## US non-US
## 2403.573 1910.506
#3A
college=read.csv("http://faculty.marshall.usc.edu/gareth-james/ISL/College.csv",header=TRUE)
View(college)
#3B
rownames(college) = college[,1]
college <- college[,-1]
View(college)
#3Ca
summary(college)
## Private Apps Accept Enroll Top10perc
## No :212 Min. : 81 Min. : 72 Min. : 35 Min. : 1.00
## Yes:565 1st Qu.: 776 1st Qu.: 604 1st Qu.: 242 1st Qu.:15.00
## Median : 1558 Median : 1110 Median : 434 Median :23.00
## Mean : 3002 Mean : 2019 Mean : 780 Mean :27.56
## 3rd Qu.: 3624 3rd Qu.: 2424 3rd Qu.: 902 3rd Qu.:35.00
## Max. :48094 Max. :26330 Max. :6392 Max. :96.00
## Top25perc F.Undergrad P.Undergrad Outstate
## Min. : 9.0 Min. : 139 Min. : 1.0 Min. : 2340
## 1st Qu.: 41.0 1st Qu.: 992 1st Qu.: 95.0 1st Qu.: 7320
## Median : 54.0 Median : 1707 Median : 353.0 Median : 9990
## Mean : 55.8 Mean : 3700 Mean : 855.3 Mean :10441
## 3rd Qu.: 69.0 3rd Qu.: 4005 3rd Qu.: 967.0 3rd Qu.:12925
## Max. :100.0 Max. :31643 Max. :21836.0 Max. :21700
## Room.Board Books Personal PhD
## Min. :1780 Min. : 96.0 Min. : 250 Min. : 8.00
## 1st Qu.:3597 1st Qu.: 470.0 1st Qu.: 850 1st Qu.: 62.00
## Median :4200 Median : 500.0 Median :1200 Median : 75.00
## Mean :4358 Mean : 549.4 Mean :1341 Mean : 72.66
## 3rd Qu.:5050 3rd Qu.: 600.0 3rd Qu.:1700 3rd Qu.: 85.00
## Max. :8124 Max. :2340.0 Max. :6800 Max. :103.00
## Terminal S.F.Ratio perc.alumni Expend
## Min. : 24.0 Min. : 2.50 Min. : 0.00 Min. : 3186
## 1st Qu.: 71.0 1st Qu.:11.50 1st Qu.:13.00 1st Qu.: 6751
## Median : 82.0 Median :13.60 Median :21.00 Median : 8377
## Mean : 79.7 Mean :14.09 Mean :22.74 Mean : 9660
## 3rd Qu.: 92.0 3rd Qu.:16.50 3rd Qu.:31.00 3rd Qu.:10830
## Max. :100.0 Max. :39.80 Max. :64.00 Max. :56233
## Grad.Rate
## Min. : 10.00
## 1st Qu.: 53.00
## Median : 65.00
## Mean : 65.46
## 3rd Qu.: 78.00
## Max. :118.00
#3Cb
pairs(college[,1:10])
#3Cc
plot(college$Private, college$Outstate, col="blue")
#3Cd
Elite <- rep("No", nrow(college))
Elite[college$Top10perc > 50] = "Yes"
Elite <- as.factor(Elite)
college <- data.frame(college, Elite)
summary(Elite)
## No Yes
## 699 78
plot(college$Elite, college$Outstate, col="purple")