HW 1

Problem 1

library(tidyverse)

## -- Attaching packages ---------------- tidyverse 1.2.1 --

## v ggplot2 3.2.1     v purrr   0.3.2
## v tibble  2.1.3     v dplyr   0.8.3
## v tidyr   1.0.0     v stringr 1.4.0
## v readr   1.3.1     v forcats 0.4.0

## -- Conflicts ------------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()

auto1=read.csv("Auto.csv", header=TRUE, na.strings = "?")
auto=na.omit(auto1)
dim(auto)

## [1] 392   9

A.-D.

str(auto)

## 'data.frame':    392 obs. of  9 variables:
##  $ mpg         : num  18 15 18 16 17 15 14 14 14 15 ...
##  $ cylinders   : int  8 8 8 8 8 8 8 8 8 8 ...
##  $ displacement: num  307 350 318 304 302 429 454 440 455 390 ...
##  $ horsepower  : int  130 165 150 150 140 198 220 215 225 190 ...
##  $ weight      : int  3504 3693 3436 3433 3449 4341 4354 4312 4425 3850 ...
##  $ acceleration: num  12 11.5 11 12 10.5 10 9 8.5 10 8.5 ...
##  $ year        : int  70 70 70 70 70 70 70 70 70 70 ...
##  $ origin      : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ name        : Factor w/ 304 levels "amc ambassador brougham",..: 49 36 231 14 161 141 54 223 241 2 ...
##  - attr(*, "na.action")= 'omit' Named int  33 127 331 337 355
##   ..- attr(*, "names")= chr  "33" "127" "331" "337" ...

range(auto$displacement)

## [1]  68 455

range(auto$horsepower)

## [1]  46 230

range(auto$acceleration)

## [1]  8.0 24.8

mean(auto$displacement)

## [1] 194.412

sd(auto$displacement)

## [1] 104.644

mean(auto$horsepower)

## [1] 104.4694

sd(auto$horsepower)

## [1] 38.49116

mean(auto$weight)

## [1] 2977.584

sd(auto$weight)

## [1] 849.4026

mean(auto$acceleration)

## [1] 15.54133

sd(auto$acceleration)

## [1] 2.758864

subauto=auto[-c(10:85),]
     
dim(subauto)

## [1] 316   9

range(subauto$displacement)

## [1]  68 455

range(subauto$horsepower)

## [1]  46 230

range(subauto$weight)

## [1] 1649 4997

range(subauto$acceleration)

## [1]  8.5 24.8

mean(auto$displacement)

## [1] 194.412

sd(subauto$displacement)

## [1] 99.67837

mean(subauto$horsepower)

## [1] 100.7215

sd(subauto$horsepower)

## [1] 35.70885

mean(subauto$weight)

## [1] 2935.972

sd(subauto$weight)

## [1] 811.3002

mean(subauto$acceleration)

## [1] 15.7269

sd(subauto$acceleration)

## [1] 2.693721

hist(auto$mpg)

plot(auto$mpg, auto$horsepower,)

boxplot(mpg~cylinders, data=auto)

# F. Looking at the scatterplots, we see that cylinders, horsepower, and weight have a negative correlation because of the negative line of best fit.  Horse power does not show correlation on the scatter plot.

Problem 2

# Box office Star Wars (in millions!)
new_hope <- c(460.998, 314.4)
empire_strikes <- c(290.475, 247.900)
return_jedi <- c(309.306, 165.8)
# Vectors region and titles, used for naming
region <- c("US", "non-US")
titles <- c("A New Hope", "The Empire Strikes Back", "Return of
the Jedi")

#A. Construct a matrix, where rows represent each movie. Name this matrix starWars and output it.
#Starwars
movies=c(new_hope, empire_strikes, return_jedi)
starwars=matrix(movies, nrow=3, ncol=2, byrow = TRUE)
starwars

##         [,1]  [,2]
## [1,] 460.998 314.4
## [2,] 290.475 247.9
## [3,] 309.306 165.8

rownames(starwars)=titles
colnames(starwars)=region
starwars

##                              US non-US
## A New Hope              460.998  314.4
## The Empire Strikes Back 290.475  247.9
## Return of\nthe Jedi     309.306  165.8

wwboxoffice=rowSums(starwars)
wwboxoffice

##              A New Hope The Empire Strikes Back     Return of\nthe Jedi 
##                 775.398                 538.375                 475.106

cbind(starwars,wwboxoffice)

##                              US non-US wwboxoffice
## A New Hope              460.998  314.4     775.398
## The Empire Strikes Back 290.475  247.9     538.375
## Return of\nthe Jedi     309.306  165.8     475.106

# Prequels
phantom_menace <- c(474.5, 552.5)
attack_clones <- c(310.7, 338.7)
revenge_sith <- c(380.3, 468.5)
titles2<-c("The Phantom Menace", "Attack of the Clones",
            "Revenge of the Sith")
movies2=c(phantom_menace, attack_clones, revenge_sith)
starWars2=matrix(movies2, nrow=3, byrow = TRUE)
rownames(starWars2)=titles2
colnames(starWars2)=region
starWars2

##                         US non-US
## The Phantom Menace   474.5  552.5
## Attack of the Clones 310.7  338.7
## Revenge of the Sith  380.3  468.5

allstarwars=rbind(starwars, starWars2)

colSums(allstarwars)

##       US   non-US 
## 2226.279 2087.800

Problem 3

college<-read.csv("http://faculty.marshall.usc.edu/gareth-james/ISL/
College.csv",header=TRUE)
view(college)

rownames(college)=college[,1]
view(college)

college = college[,-1]
view(college)

summary(college)

##  Private        Apps           Accept          Enroll       Top10perc    
##  No :212   Min.   :   81   Min.   :   72   Min.   :  35   Min.   : 1.00  
##  Yes:565   1st Qu.:  776   1st Qu.:  604   1st Qu.: 242   1st Qu.:15.00  
##            Median : 1558   Median : 1110   Median : 434   Median :23.00  
##            Mean   : 3002   Mean   : 2019   Mean   : 780   Mean   :27.56  
##            3rd Qu.: 3624   3rd Qu.: 2424   3rd Qu.: 902   3rd Qu.:35.00  
##            Max.   :48094   Max.   :26330   Max.   :6392   Max.   :96.00  
##    Top25perc      F.Undergrad     P.Undergrad         Outstate    
##  Min.   :  9.0   Min.   :  139   Min.   :    1.0   Min.   : 2340  
##  1st Qu.: 41.0   1st Qu.:  992   1st Qu.:   95.0   1st Qu.: 7320  
##  Median : 54.0   Median : 1707   Median :  353.0   Median : 9990  
##  Mean   : 55.8   Mean   : 3700   Mean   :  855.3   Mean   :10441  
##  3rd Qu.: 69.0   3rd Qu.: 4005   3rd Qu.:  967.0   3rd Qu.:12925  
##  Max.   :100.0   Max.   :31643   Max.   :21836.0   Max.   :21700  
##    Room.Board       Books           Personal         PhD        
##  Min.   :1780   Min.   :  96.0   Min.   : 250   Min.   :  8.00  
##  1st Qu.:3597   1st Qu.: 470.0   1st Qu.: 850   1st Qu.: 62.00  
##  Median :4200   Median : 500.0   Median :1200   Median : 75.00  
##  Mean   :4358   Mean   : 549.4   Mean   :1341   Mean   : 72.66  
##  3rd Qu.:5050   3rd Qu.: 600.0   3rd Qu.:1700   3rd Qu.: 85.00  
##  Max.   :8124   Max.   :2340.0   Max.   :6800   Max.   :103.00  
##     Terminal       S.F.Ratio      perc.alumni        Expend     
##  Min.   : 24.0   Min.   : 2.50   Min.   : 0.00   Min.   : 3186  
##  1st Qu.: 71.0   1st Qu.:11.50   1st Qu.:13.00   1st Qu.: 6751  
##  Median : 82.0   Median :13.60   Median :21.00   Median : 8377  
##  Mean   : 79.7   Mean   :14.09   Mean   :22.74   Mean   : 9660  
##  3rd Qu.: 92.0   3rd Qu.:16.50   3rd Qu.:31.00   3rd Qu.:10830  
##  Max.   :100.0   Max.   :39.80   Max.   :64.00   Max.   :56233  
##    Grad.Rate     
##  Min.   : 10.00  
##  1st Qu.: 53.00  
##  Median : 65.00  
##  Mean   : 65.46  
##  3rd Qu.: 78.00  
##  Max.   :118.00

pairs(college[,1:10])

plot(college$Outstate, college$Private)

Elite <- rep("No", nrow(college))
Elite[college$Top10perc > 50] = "Yes"
Elite <- as.factor(Elite)
college <- data.frame(college, Elite)

summary(Elite)

##  No Yes 
## 699  78

plot(college$Outstate, Elite)

HW 1

Olivia Chu

9/24/2019

Problem 1

Problem 2

Problem 3