library(readr)

Q.8

college <- read.csv("College.csv")

rownames (college )=college [,1]
fix(college)
college =college [,-1]
fix(college)
summary(college)
##    Private               Apps           Accept          Enroll    
##  Length:777         Min.   :   81   Min.   :   72   Min.   :  35  
##  Class :character   1st Qu.:  776   1st Qu.:  604   1st Qu.: 242  
##  Mode  :character   Median : 1558   Median : 1110   Median : 434  
##                     Mean   : 3002   Mean   : 2019   Mean   : 780  
##                     3rd Qu.: 3624   3rd Qu.: 2424   3rd Qu.: 902  
##                     Max.   :48094   Max.   :26330   Max.   :6392  
##    Top10perc       Top25perc      F.Undergrad     P.Undergrad     
##  Min.   : 1.00   Min.   :  9.0   Min.   :  139   Min.   :    1.0  
##  1st Qu.:15.00   1st Qu.: 41.0   1st Qu.:  992   1st Qu.:   95.0  
##  Median :23.00   Median : 54.0   Median : 1707   Median :  353.0  
##  Mean   :27.56   Mean   : 55.8   Mean   : 3700   Mean   :  855.3  
##  3rd Qu.:35.00   3rd Qu.: 69.0   3rd Qu.: 4005   3rd Qu.:  967.0  
##  Max.   :96.00   Max.   :100.0   Max.   :31643   Max.   :21836.0  
##     Outstate       Room.Board       Books           Personal   
##  Min.   : 2340   Min.   :1780   Min.   :  96.0   Min.   : 250  
##  1st Qu.: 7320   1st Qu.:3597   1st Qu.: 470.0   1st Qu.: 850  
##  Median : 9990   Median :4200   Median : 500.0   Median :1200  
##  Mean   :10441   Mean   :4358   Mean   : 549.4   Mean   :1341  
##  3rd Qu.:12925   3rd Qu.:5050   3rd Qu.: 600.0   3rd Qu.:1700  
##  Max.   :21700   Max.   :8124   Max.   :2340.0   Max.   :6800  
##       PhD            Terminal       S.F.Ratio      perc.alumni   
##  Min.   :  8.00   Min.   : 24.0   Min.   : 2.50   Min.   : 0.00  
##  1st Qu.: 62.00   1st Qu.: 71.0   1st Qu.:11.50   1st Qu.:13.00  
##  Median : 75.00   Median : 82.0   Median :13.60   Median :21.00  
##  Mean   : 72.66   Mean   : 79.7   Mean   :14.09   Mean   :22.74  
##  3rd Qu.: 85.00   3rd Qu.: 92.0   3rd Qu.:16.50   3rd Qu.:31.00  
##  Max.   :103.00   Max.   :100.0   Max.   :39.80   Max.   :64.00  
##      Expend        Grad.Rate     
##  Min.   : 3186   Min.   : 10.00  
##  1st Qu.: 6751   1st Qu.: 53.00  
##  Median : 8377   Median : 65.00  
##  Mean   : 9660   Mean   : 65.46  
##  3rd Qu.:10830   3rd Qu.: 78.00  
##  Max.   :56233   Max.   :118.00
college$Private = as.factor(college$Private)
pairs(college[,1:10])

attach(college)
Private = as.factor(Private)
boxplot(Outstate ~ Private, col="skyblue")

Elite=rep("No",nrow(college))
Elite[college$Top10perc >50]=" Yes"
Elite=as.factor(Elite)
college=data.frame(college , Elite)
summary(Elite)
##  Yes   No 
##   78  699
boxplot(Outstate ~ Elite, col="lavenderblush3")

After exploring this data I noticed that not everyone that gets accepted enrolls in the college. Out of 777 people only 78 are elites.So chances of getting into the college even if they are not an elite is high.

Q.9

Auto <- read.csv("Auto.csv", header=T,na.strings="?")
Auto = na.omit(Auto)

The quantitative predictors in this data are mpg, cylinders, displacement, horsepower, weight, acceleration, year, and origin.The qualitative predictor is name.

attach(Auto)
Auto2 = Auto[,c(1:8)]

The range of each quantitative predictor is:

sapply(Auto2, range)
##       mpg cylinders displacement horsepower weight acceleration year origin
## [1,]  9.0         3           68         46   1613          8.0   70      1
## [2,] 46.6         8          455        230   5140         24.8   82      3

The mean of each quantitative predictor is:

sapply(Auto2, mean)
##          mpg    cylinders displacement   horsepower       weight acceleration 
##    23.445918     5.471939   194.411990   104.469388  2977.584184    15.541327 
##         year       origin 
##    75.979592     1.576531

The standard deviation of each quantitative predictor is:

sapply(Auto2, sd)
##          mpg    cylinders displacement   horsepower       weight acceleration 
##    7.8050075    1.7057832  104.6440039   38.4911599  849.4025600    2.7588641 
##         year       origin 
##    3.6837365    0.8055182
Auto3 = Auto2[-c(10:85),]

The range of each quantitative predictor of the new data is:

sapply(Auto3, range)
##       mpg cylinders displacement horsepower weight acceleration year origin
## [1,] 11.0         3           68         46   1649          8.5   70      1
## [2,] 46.6         8          455        230   4997         24.8   82      3

The mean of each quantitative predictor of the new data is:

sapply(Auto3, mean)
##          mpg    cylinders displacement   horsepower       weight acceleration 
##    24.404430     5.373418   187.240506   100.721519  2935.971519    15.726899 
##         year       origin 
##    77.145570     1.601266

The standard deviation of each quantitative predictor of the new data is:

sapply(Auto3, sd)
##          mpg    cylinders displacement   horsepower       weight acceleration 
##     7.867283     1.654179    99.678367    35.708853   811.300208     2.693721 
##         year       origin 
##     3.106217     0.819910
pairs(~., Auto2)

Q.10

library(MASS)
fix(Boston)

The Boston data frame has 506 rows and 14 columns. This data frame contains the following columns:

crim =per capita crime rate by town. zn = proportion of residential land zoned for lots over 25,000 sq.ft. indus = proportion of non-retail business acres per town. chas = Charles River dummy variable (= 1 if tract bounds river; 0 otherwise). nox = nitrogen oxides concentration (parts per 10 million). rm = average number of rooms per dwelling. age = proportion of owner-occupied units built prior to 1940. dis = weighted mean of distances to five Boston employment centers. rad = index of accessibility to radial highways. tax = full-value property-tax rate per $10,000. ptratio = pupil-teacher ratio by town. black = 1000(Bk - 0.63)^2 where Bk is the proportion of blacks by town. lstat = lower status of the population (percent). medv = median value of owner-occupied homes in $1000s.

attach(Boston)
pairs(~., Boston)

The range of each quantitative predictor is:

sapply(Boston, range)
##          crim  zn indus chas   nox    rm   age     dis rad tax ptratio  black
## [1,]  0.00632   0  0.46    0 0.385 3.561   2.9  1.1296   1 187    12.6   0.32
## [2,] 88.97620 100 27.74    1 0.871 8.780 100.0 12.1265  24 711    22.0 396.90
##      lstat medv
## [1,]  1.73    5
## [2,] 37.97   50

The mean of each quantitative predictor is:

sapply(Boston, mean)
##         crim           zn        indus         chas          nox           rm 
##   3.61352356  11.36363636  11.13677866   0.06916996   0.55469506   6.28463439 
##          age          dis          rad          tax      ptratio        black 
##  68.57490119   3.79504269   9.54940711 408.23715415  18.45553360 356.67403162 
##        lstat         medv 
##  12.65306324  22.53280632

The median of each quantitative predictor is:

sapply(Boston, median)
##      crim        zn     indus      chas       nox        rm       age       dis 
##   0.25651   0.00000   9.69000   0.00000   0.53800   6.20850  77.50000   3.20745 
##       rad       tax   ptratio     black     lstat      medv 
##   5.00000 330.00000  19.05000 391.44000  11.36000  21.20000
sum(chas)
## [1] 35

35 suburbs in this data set bound the Charles river.

median(ptratio)
## [1] 19.05

The median pupil-teacher ratio among the towns in this data set is 19.05.

Minimum= sapply(Boston, min)
Median=sapply(Boston, median)
data.frame(Minimum, Median)
##           Minimum    Median
## crim      0.00632   0.25651
## zn        0.00000   0.00000
## indus     0.46000   9.69000
## chas      0.00000   0.00000
## nox       0.38500   0.53800
## rm        3.56100   6.20850
## age       2.90000  77.50000
## dis       1.12960   3.20745
## rad       1.00000   5.00000
## tax     187.00000 330.00000
## ptratio  12.60000  19.05000
## black     0.32000 391.44000
## lstat     1.73000  11.36000
## medv      5.00000  21.20000
sum(rm>7)
## [1] 64

64 suburbs average more than seven rooms per dwelling.

sum(rm>8)
## [1] 13

13 suburbs average more than seven rooms per dwelling.