Getting set up for problem 1: I called up tidyverse and the Auto data set. Then, I did a listwise deletion of all the missing values.

library(tidyverse)
## -- Attaching packages ---------------------------------------------------------------------------------------- tidyverse 1.3.0 --
## v ggplot2 3.3.2     v purrr   0.3.4
## v tibble  3.0.3     v dplyr   1.0.2
## v tidyr   1.1.1     v stringr 1.4.0
## v readr   1.3.1     v forcats 0.5.0
## -- Conflicts ------------------------------------------------------------------------------------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
Auto <- read.table("http://faculty.marshall.usc.edu/gareth-james/ISL/Auto.data", 
                   header=TRUE,
                   na.strings = "?")
Auto=na.omit(Auto)

Problem 1: Auto Data

#problem 1a
str(Auto)
## 'data.frame':    392 obs. of  9 variables:
##  $ mpg         : num  18 15 18 16 17 15 14 14 14 15 ...
##  $ cylinders   : int  8 8 8 8 8 8 8 8 8 8 ...
##  $ displacement: num  307 350 318 304 302 429 454 440 455 390 ...
##  $ horsepower  : num  130 165 150 150 140 198 220 215 225 190 ...
##  $ weight      : num  3504 3693 3436 3433 3449 ...
##  $ acceleration: num  12 11.5 11 12 10.5 10 9 8.5 10 8.5 ...
##  $ year        : int  70 70 70 70 70 70 70 70 70 70 ...
##  $ origin      : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ name        : chr  "chevrolet chevelle malibu" "buick skylark 320" "plymouth satellite" "amc rebel sst" ...
##  - attr(*, "na.action")= 'omit' Named int [1:5] 33 127 331 337 355
##   ..- attr(*, "names")= chr [1:5] "33" "127" "331" "337" ...
#problem 1b
Auto %>%
  summarise(range(mpg))
##   range(mpg)
## 1        9.0
## 2       46.6
Auto %>%
  summarise(range(cylinders))
##   range(cylinders)
## 1                3
## 2                8
Auto %>%
  summarise(range(displacement))
##   range(displacement)
## 1                  68
## 2                 455
Auto %>%
  summarise(range(horsepower))
##   range(horsepower)
## 1                46
## 2               230
Auto %>%
  summarise(range(weight))
##   range(weight)
## 1          1613
## 2          5140
Auto %>%
  summarise(range(acceleration))
##   range(acceleration)
## 1                 8.0
## 2                24.8
Auto %>%
  summarise(range(year))
##   range(year)
## 1          70
## 2          82
Auto %>%
  summarise(range(origin))
##   range(origin)
## 1             1
## 2             3
#problem 1c
Auto%>%
  summarise(mean(mpg), sd(mpg))
##   mean(mpg)  sd(mpg)
## 1  23.44592 7.805007
Auto%>%
  summarise(mean(cylinders), sd(cylinders))
##   mean(cylinders) sd(cylinders)
## 1        5.471939      1.705783
Auto%>%
  summarise(mean(displacement), sd(displacement))
##   mean(displacement) sd(displacement)
## 1            194.412          104.644
Auto%>%
  summarise(mean(horsepower), sd(horsepower))
##   mean(horsepower) sd(horsepower)
## 1         104.4694       38.49116
Auto%>%
  summarise(mean(weight), sd(weight))
##   mean(weight) sd(weight)
## 1     2977.584   849.4026
Auto%>%
  summarise(mean(acceleration), sd(acceleration))
##   mean(acceleration) sd(acceleration)
## 1           15.54133         2.758864
Auto%>%
  summarise(mean(year), sd(year))
##   mean(year) sd(year)
## 1   75.97959 3.683737
Auto%>%
  summarise(mean(origin), sd(origin))
##   mean(origin) sd(origin)
## 1     1.576531  0.8055182
#problem 1d
Auto_rowsremoved <- Auto[c(1:10, 85:392),]

Auto_rowsremoved %>%
  summarise(mean(mpg), sd(mpg))
##   mean(mpg)  sd(mpg)
## 1  24.33899 7.885977
Auto_rowsremoved %>%
  summarise(mean(cylinders), sd(cylinders))
##   mean(cylinders) sd(cylinders)
## 1        5.389937      1.662016
Auto_rowsremoved %>%
  summarise(mean(displacement), sd(displacement))
##   mean(displacement) sd(displacement)
## 1           188.3899         100.4242
Auto_rowsremoved %>%
  summarise(mean(horsepower), sd(horsepower))
##   mean(horsepower) sd(horsepower)
## 1         101.2358       36.18509
Auto_rowsremoved %>%
  summarise(mean(weight), sd(weight))
##   mean(weight) sd(weight)
## 1     2942.506   812.9713
Auto_rowsremoved %>%
  summarise(mean(acceleration), sd(acceleration))
##   mean(acceleration) sd(acceleration)
## 1            15.6956          2.71985
Auto_rowsremoved %>%
  summarise(mean(year), sd(year))
##   mean(year) sd(year)
## 1   77.11006 3.130771
Auto_rowsremoved %>%
  summarise(mean(origin), sd(origin))
##   mean(origin) sd(origin)
## 1     1.597484  0.8187048

In problem 1e, I used scatter plots to look at relationships between variables. I first wanted to know if there was a relatinship between time and MGP. The first plot looks at MPG over time and also takes weight into account. It looks like there is a relationships between time and MGP: as time increases, MPG increases. The colors of this plot lead me to my next question: is there a relationship between MGP and weight? I plotted them next and found that yes, not only is there a relationship between weight and MPG, but also with the origin of the car (in color). Then, I wanted to know if horsepower has changed over time, so I plotted this relationship and found that there isn’t really any relationship here, but there is more variation in the early 70s than in the early 80s.

#problem 1e
library(ggplot2)

ggplot(Auto, aes(x=year, y=mpg, color=weight))+
    geom_point()+
    expand_limits(y=0)

ggplot(Auto, aes(x=weight, y=mpg, color=origin))+
  geom_point()+
  expand_limits(y=0)

ggplot(Auto, aes(x=year, y=horsepower))+
    geom_point()+
    expand_limits(y=0)

Problem 1f asks us to find any predictors that are related to mpg. I found that mpg has a positive relationship with year and a negative repationship with weight. Origin of the car also is related to MPG.

Problem 2: College Data

#Problem 2a
college<-read.csv("http://faculty.marshall.usc.edu/gareth-james/ISL/College.csv",header=TRUE)

#Problem 2b
#View(college)
rownames(college)<- college[,1]
#View(college)
college<- college[,-1]
str(college)
## 'data.frame':    777 obs. of  18 variables:
##  $ Private    : chr  "Yes" "Yes" "Yes" "Yes" ...
##  $ Apps       : int  1660 2186 1428 417 193 587 353 1899 1038 582 ...
##  $ Accept     : int  1232 1924 1097 349 146 479 340 1720 839 498 ...
##  $ Enroll     : int  721 512 336 137 55 158 103 489 227 172 ...
##  $ Top10perc  : int  23 16 22 60 16 38 17 37 30 21 ...
##  $ Top25perc  : int  52 29 50 89 44 62 45 68 63 44 ...
##  $ F.Undergrad: int  2885 2683 1036 510 249 678 416 1594 973 799 ...
##  $ P.Undergrad: int  537 1227 99 63 869 41 230 32 306 78 ...
##  $ Outstate   : int  7440 12280 11250 12960 7560 13500 13290 13868 15595 10468 ...
##  $ Room.Board : int  3300 6450 3750 5450 4120 3335 5720 4826 4400 3380 ...
##  $ Books      : int  450 750 400 450 800 500 500 450 300 660 ...
##  $ Personal   : int  2200 1500 1165 875 1500 675 1500 850 500 1800 ...
##  $ PhD        : int  70 29 53 92 76 67 90 89 79 40 ...
##  $ Terminal   : int  78 30 66 97 72 73 93 100 84 41 ...
##  $ S.F.Ratio  : num  18.1 12.2 12.9 7.7 11.9 9.4 11.5 13.7 11.3 11.5 ...
##  $ perc.alumni: int  12 16 30 37 2 11 26 37 23 15 ...
##  $ Expend     : int  7041 10527 8735 19016 10922 9727 8861 11487 11644 8991 ...
##  $ Grad.Rate  : int  60 56 54 59 15 55 63 73 80 52 ...
college$Private<- as.factor(college$Private)

#Problem 2c 
#a- Summary
summary(college) 
##  Private        Apps           Accept          Enroll       Top10perc    
##  No :212   Min.   :   81   Min.   :   72   Min.   :  35   Min.   : 1.00  
##  Yes:565   1st Qu.:  776   1st Qu.:  604   1st Qu.: 242   1st Qu.:15.00  
##            Median : 1558   Median : 1110   Median : 434   Median :23.00  
##            Mean   : 3002   Mean   : 2019   Mean   : 780   Mean   :27.56  
##            3rd Qu.: 3624   3rd Qu.: 2424   3rd Qu.: 902   3rd Qu.:35.00  
##            Max.   :48094   Max.   :26330   Max.   :6392   Max.   :96.00  
##    Top25perc      F.Undergrad     P.Undergrad         Outstate    
##  Min.   :  9.0   Min.   :  139   Min.   :    1.0   Min.   : 2340  
##  1st Qu.: 41.0   1st Qu.:  992   1st Qu.:   95.0   1st Qu.: 7320  
##  Median : 54.0   Median : 1707   Median :  353.0   Median : 9990  
##  Mean   : 55.8   Mean   : 3700   Mean   :  855.3   Mean   :10441  
##  3rd Qu.: 69.0   3rd Qu.: 4005   3rd Qu.:  967.0   3rd Qu.:12925  
##  Max.   :100.0   Max.   :31643   Max.   :21836.0   Max.   :21700  
##    Room.Board       Books           Personal         PhD        
##  Min.   :1780   Min.   :  96.0   Min.   : 250   Min.   :  8.00  
##  1st Qu.:3597   1st Qu.: 470.0   1st Qu.: 850   1st Qu.: 62.00  
##  Median :4200   Median : 500.0   Median :1200   Median : 75.00  
##  Mean   :4358   Mean   : 549.4   Mean   :1341   Mean   : 72.66  
##  3rd Qu.:5050   3rd Qu.: 600.0   3rd Qu.:1700   3rd Qu.: 85.00  
##  Max.   :8124   Max.   :2340.0   Max.   :6800   Max.   :103.00  
##     Terminal       S.F.Ratio      perc.alumni        Expend     
##  Min.   : 24.0   Min.   : 2.50   Min.   : 0.00   Min.   : 3186  
##  1st Qu.: 71.0   1st Qu.:11.50   1st Qu.:13.00   1st Qu.: 6751  
##  Median : 82.0   Median :13.60   Median :21.00   Median : 8377  
##  Mean   : 79.7   Mean   :14.09   Mean   :22.74   Mean   : 9660  
##  3rd Qu.: 92.0   3rd Qu.:16.50   3rd Qu.:31.00   3rd Qu.:10830  
##  Max.   :100.0   Max.   :39.80   Max.   :64.00   Max.   :56233  
##    Grad.Rate     
##  Min.   : 10.00  
##  1st Qu.: 53.00  
##  Median : 65.00  
##  Mean   : 65.46  
##  3rd Qu.: 78.00  
##  Max.   :118.00
#b- Scatter Plot Matrix
college_firstten<- college [,1:10]

pairs(college_firstten)

  #c- boxplots
ggplot(college, aes(x=Outstate, y=Private))+
    geom_boxplot()

#Problem 2d
Elite <- rep("No", nrow(college))
Elite[college$Top10perc > 50] = "Yes"
Elite <- as.factor(Elite)
college <- data.frame(college, Elite)

summary(Elite)
##  No Yes 
## 699  78
library(ggplot2)
ggplot(college, aes(x=Outstate, y=Elite))+
    geom_boxplot()