#Using the autos database:
theUrl <- ("https://archive.ics.uci.edu/ml/machine-learning-databases/autos/imports-85.data")
# including na.string = "?" argumen to prevent the values from being factorized.
cars <- read.table(theUrl, header = FALSE, sep = ",", na.strings = "?")
#Review sample top rows.
head(cars)
## V1 V2 V3 V4 V5 V6 V7 V8 V9 V10 V11 V12
## 1 3 NA alfa-romero gas std two convertible rwd front 88.6 168.8 64.1
## 2 3 NA alfa-romero gas std two convertible rwd front 88.6 168.8 64.1
## 3 1 NA alfa-romero gas std two hatchback rwd front 94.5 171.2 65.5
## 4 2 164 audi gas std four sedan fwd front 99.8 176.6 66.2
## 5 2 164 audi gas std four sedan 4wd front 99.4 176.6 66.4
## 6 2 NA audi gas std two sedan fwd front 99.8 177.3 66.3
## V13 V14 V15 V16 V17 V18 V19 V20 V21 V22 V23 V24 V25 V26
## 1 48.8 2548 dohc four 130 mpfi 3.47 2.68 9.0 111 5000 21 27 13495
## 2 48.8 2548 dohc four 130 mpfi 3.47 2.68 9.0 111 5000 21 27 16500
## 3 52.4 2823 ohcv six 152 mpfi 2.68 3.47 9.0 154 5000 19 26 16500
## 4 54.3 2337 ohc four 109 mpfi 3.19 3.40 10.0 102 5500 24 30 13950
## 5 54.3 2824 ohc five 136 mpfi 3.19 3.40 8.0 115 5500 18 22 17450
## 6 53.1 2507 ohc five 136 mpfi 3.19 3.40 8.5 110 5500 19 25 15250
#Summary stats.
summary(cars)
## V1 V2 V3 V4 V5
## Min. :-2.0000 Min. : 65 toyota : 32 diesel: 20 std :168
## 1st Qu.: 0.0000 1st Qu.: 94 nissan : 18 gas :185 turbo: 37
## Median : 1.0000 Median :115 mazda : 17
## Mean : 0.8341 Mean :122 honda : 13
## 3rd Qu.: 2.0000 3rd Qu.:150 mitsubishi: 13
## Max. : 3.0000 Max. :256 subaru : 12
## NA's :41 (Other) :100
## V6 V7 V8 V9 V10
## four:114 convertible: 6 4wd: 9 front:202 Min. : 86.60
## two : 89 hardtop : 8 fwd:120 rear : 3 1st Qu.: 94.50
## NA's: 2 hatchback :70 rwd: 76 Median : 97.00
## sedan :96 Mean : 98.76
## wagon :25 3rd Qu.:102.40
## Max. :120.90
##
## V11 V12 V13 V14
## Min. :141.1 Min. :60.30 Min. :47.80 Min. :1488
## 1st Qu.:166.3 1st Qu.:64.10 1st Qu.:52.00 1st Qu.:2145
## Median :173.2 Median :65.50 Median :54.10 Median :2414
## Mean :174.0 Mean :65.91 Mean :53.72 Mean :2556
## 3rd Qu.:183.1 3rd Qu.:66.90 3rd Qu.:55.50 3rd Qu.:2935
## Max. :208.1 Max. :72.30 Max. :59.80 Max. :4066
##
## V15 V16 V17 V18 V19
## dohc : 12 eight : 5 Min. : 61.0 mpfi :94 Min. :2.54
## dohcv: 1 five : 11 1st Qu.: 97.0 2bbl :66 1st Qu.:3.15
## l : 12 four :159 Median :120.0 idi :20 Median :3.31
## ohc :148 six : 24 Mean :126.9 1bbl :11 Mean :3.33
## ohcf : 15 three : 1 3rd Qu.:141.0 spdi : 9 3rd Qu.:3.59
## ohcv : 13 twelve: 1 Max. :326.0 4bbl : 3 Max. :3.94
## rotor: 4 two : 4 (Other): 2 NA's :4
## V20 V21 V22 V23
## Min. :2.070 Min. : 7.00 Min. : 48.0 Min. :4150
## 1st Qu.:3.110 1st Qu.: 8.60 1st Qu.: 70.0 1st Qu.:4800
## Median :3.290 Median : 9.00 Median : 95.0 Median :5200
## Mean :3.255 Mean :10.14 Mean :104.3 Mean :5125
## 3rd Qu.:3.410 3rd Qu.: 9.40 3rd Qu.:116.0 3rd Qu.:5500
## Max. :4.170 Max. :23.00 Max. :288.0 Max. :6600
## NA's :4 NA's :2 NA's :2
## V24 V25 V26
## Min. :13.00 Min. :16.00 Min. : 5118
## 1st Qu.:19.00 1st Qu.:25.00 1st Qu.: 7775
## Median :24.00 Median :30.00 Median :10295
## Mean :25.22 Mean :30.75 Mean :13207
## 3rd Qu.:30.00 3rd Qu.:34.00 3rd Qu.:16500
## Max. :49.00 Max. :54.00 Max. :45400
## NA's :4
#Subsetting columns as per assignment.
cars_new <- cars[, 1:5]
#Review sample top rows from subsetted data.
head(cars_new)
## V1 V2 V3 V4 V5
## 1 3 NA alfa-romero gas std
## 2 3 NA alfa-romero gas std
## 3 1 NA alfa-romero gas std
## 4 2 164 audi gas std
## 5 2 164 audi gas std
## 6 2 NA audi gas std
#Converting data to a data.frame structure.
dfcars <- data.frame(cars_new)
#Adding the column names.
colnames(dfcars) <- c("symboling", "normalized_losses", "Make", "Fuel_Type", "Aspiration")
#Review sample top rows from data.frame.
head(dfcars)
## symboling normalized_losses Make Fuel_Type Aspiration
## 1 3 NA alfa-romero gas std
## 2 3 NA alfa-romero gas std
## 3 1 NA alfa-romero gas std
## 4 2 164 audi gas std
## 5 2 164 audi gas std
## 6 2 NA audi gas std
#Summary stats.
summary(dfcars)
## symboling normalized_losses Make Fuel_Type
## Min. :-2.0000 Min. : 65 toyota : 32 diesel: 20
## 1st Qu.: 0.0000 1st Qu.: 94 nissan : 18 gas :185
## Median : 1.0000 Median :115 mazda : 17
## Mean : 0.8341 Mean :122 honda : 13
## 3rd Qu.: 2.0000 3rd Qu.:150 mitsubishi: 13
## Max. : 3.0000 Max. :256 subaru : 12
## NA's :41 (Other) :100
## Aspiration
## std :168
## turbo: 37
##
##
##
##
##
Note that the echo = FALSE parameter was added to the code chunk to prevent printing of the R code that generated the plot.