Using: Auto-Mpg Data from University of California. URL: https://archive.ics.uci.edu/ml/datasets/Auto+MPG
## Loading required package: bitops
#Load Data set
auto_mpg_data <- getURL("https://raw.githubusercontent.com/jasonjgy2000/IS607/ee3ff28ee83ad2f9beecfcc56b58fec367517e06/Assignments/Week%203/auto-mpg.data")
data_set <- read.table(textConnection(auto_mpg_data),header=FALSE )
# View first 10 rows of the dataframe
data_set[1:10,]
## V1 V2 V3 V4 V5 V6 V7 V8 V9
## 1 18 8 307 130.0 3504 12.0 70 1 chevrolet chevelle malibu
## 2 15 8 350 165.0 3693 11.5 70 1 buick skylark 320
## 3 18 8 318 150.0 3436 11.0 70 1 plymouth satellite
## 4 16 8 304 150.0 3433 12.0 70 1 amc rebel sst
## 5 17 8 302 140.0 3449 10.5 70 1 ford torino
## 6 15 8 429 198.0 4341 10.0 70 1 ford galaxie 500
## 7 14 8 454 220.0 4354 9.0 70 1 chevrolet impala
## 8 14 8 440 215.0 4312 8.5 70 1 plymouth fury iii
## 9 14 8 455 225.0 4425 10.0 70 1 pontiac catalina
## 10 15 8 390 190.0 3850 8.5 70 1 amc ambassador dpl
# Assign column names to the dataframe
names(data_set) <- c("mpg","cylinders","displacement","horsepower","weight","acceleration","model year","origin","car name")
# View the first 8 rows of the dataframe
data_set[1:8,]
## mpg cylinders displacement horsepower weight acceleration model year
## 1 18 8 307 130.0 3504 12.0 70
## 2 15 8 350 165.0 3693 11.5 70
## 3 18 8 318 150.0 3436 11.0 70
## 4 16 8 304 150.0 3433 12.0 70
## 5 17 8 302 140.0 3449 10.5 70
## 6 15 8 429 198.0 4341 10.0 70
## 7 14 8 454 220.0 4354 9.0 70
## 8 14 8 440 215.0 4312 8.5 70
## origin car name
## 1 1 chevrolet chevelle malibu
## 2 1 buick skylark 320
## 3 1 plymouth satellite
## 4 1 amc rebel sst
## 5 1 ford torino
## 6 1 ford galaxie 500
## 7 1 chevrolet impala
## 8 1 plymouth fury iii
# I noticed that the data within the "car name" column consisted of the car's manufacter and model. My next step is to separate and place them into sepearate columns.
rexp <- "^(\\w+)\\s?(.*)$"
data_set$manufacturer <- sub(rexp,"\\1",data_set$`car name`)
data_set$model <- sub(rexp,"\\2",data_set$`car name`)
# View the first 8 rows of the dataframe
data_set[1:3,]
## mpg cylinders displacement horsepower weight acceleration model year
## 1 18 8 307 130.0 3504 12.0 70
## 2 15 8 350 165.0 3693 11.5 70
## 3 18 8 318 150.0 3436 11.0 70
## origin car name manufacturer model
## 1 1 chevrolet chevelle malibu chevrolet chevelle malibu
## 2 1 buick skylark 320 buick skylark 320
## 3 1 plymouth satellite plymouth satellite
# My next step is to remove the "car name" column as it is no longer needed.
data_set$`car name` <- NULL
# View the first 8 rows of the dataframe
data_set[1:3,]
## mpg cylinders displacement horsepower weight acceleration model year
## 1 18 8 307 130.0 3504 12.0 70
## 2 15 8 350 165.0 3693 11.5 70
## 3 18 8 318 150.0 3436 11.0 70
## origin manufacturer model
## 1 1 chevrolet chevelle malibu
## 2 1 buick skylark 320
## 3 1 plymouth satellite
# I observed that vehiches with origin "1" were manufactured in North America, "2" Europe and "3" Asia. My next step is to transform the dataframe to reflect this.
data_set$origin <- ifelse(data_set$origin == 1,"North America", ifelse(data_set$origin == 2,"Europe","Asia") )
# View first 60 rows of the transformed dataframe
data_set[1:60,]
## mpg cylinders displacement horsepower weight acceleration model year
## 1 18 8 307.0 130.0 3504 12.0 70
## 2 15 8 350.0 165.0 3693 11.5 70
## 3 18 8 318.0 150.0 3436 11.0 70
## 4 16 8 304.0 150.0 3433 12.0 70
## 5 17 8 302.0 140.0 3449 10.5 70
## 6 15 8 429.0 198.0 4341 10.0 70
## 7 14 8 454.0 220.0 4354 9.0 70
## 8 14 8 440.0 215.0 4312 8.5 70
## 9 14 8 455.0 225.0 4425 10.0 70
## 10 15 8 390.0 190.0 3850 8.5 70
## 11 15 8 383.0 170.0 3563 10.0 70
## 12 14 8 340.0 160.0 3609 8.0 70
## 13 15 8 400.0 150.0 3761 9.5 70
## 14 14 8 455.0 225.0 3086 10.0 70
## 15 24 4 113.0 95.00 2372 15.0 70
## 16 22 6 198.0 95.00 2833 15.5 70
## 17 18 6 199.0 97.00 2774 15.5 70
## 18 21 6 200.0 85.00 2587 16.0 70
## 19 27 4 97.0 88.00 2130 14.5 70
## 20 26 4 97.0 46.00 1835 20.5 70
## 21 25 4 110.0 87.00 2672 17.5 70
## 22 24 4 107.0 90.00 2430 14.5 70
## 23 25 4 104.0 95.00 2375 17.5 70
## 24 26 4 121.0 113.0 2234 12.5 70
## 25 21 6 199.0 90.00 2648 15.0 70
## 26 10 8 360.0 215.0 4615 14.0 70
## 27 10 8 307.0 200.0 4376 15.0 70
## 28 11 8 318.0 210.0 4382 13.5 70
## 29 9 8 304.0 193.0 4732 18.5 70
## 30 27 4 97.0 88.00 2130 14.5 71
## 31 28 4 140.0 90.00 2264 15.5 71
## 32 25 4 113.0 95.00 2228 14.0 71
## 33 25 4 98.0 ? 2046 19.0 71
## 34 19 6 232.0 100.0 2634 13.0 71
## 35 16 6 225.0 105.0 3439 15.5 71
## 36 17 6 250.0 100.0 3329 15.5 71
## 37 19 6 250.0 88.00 3302 15.5 71
## 38 18 6 232.0 100.0 3288 15.5 71
## 39 14 8 350.0 165.0 4209 12.0 71
## 40 14 8 400.0 175.0 4464 11.5 71
## 41 14 8 351.0 153.0 4154 13.5 71
## 42 14 8 318.0 150.0 4096 13.0 71
## 43 12 8 383.0 180.0 4955 11.5 71
## 44 13 8 400.0 170.0 4746 12.0 71
## 45 13 8 400.0 175.0 5140 12.0 71
## 46 18 6 258.0 110.0 2962 13.5 71
## 47 22 4 140.0 72.00 2408 19.0 71
## 48 19 6 250.0 100.0 3282 15.0 71
## 49 18 6 250.0 88.00 3139 14.5 71
## 50 23 4 122.0 86.00 2220 14.0 71
## 51 28 4 116.0 90.00 2123 14.0 71
## 52 30 4 79.0 70.00 2074 19.5 71
## 53 30 4 88.0 76.00 2065 14.5 71
## 54 31 4 71.0 65.00 1773 19.0 71
## 55 35 4 72.0 69.00 1613 18.0 71
## 56 27 4 97.0 60.00 1834 19.0 71
## 57 26 4 91.0 70.00 1955 20.5 71
## 58 24 4 113.0 95.00 2278 15.5 72
## 59 25 4 97.5 80.00 2126 17.0 72
## 60 23 4 97.0 54.00 2254 23.5 72
## origin manufacturer model
## 1 North America chevrolet chevelle malibu
## 2 North America buick skylark 320
## 3 North America plymouth satellite
## 4 North America amc rebel sst
## 5 North America ford torino
## 6 North America ford galaxie 500
## 7 North America chevrolet impala
## 8 North America plymouth fury iii
## 9 North America pontiac catalina
## 10 North America amc ambassador dpl
## 11 North America dodge challenger se
## 12 North America plymouth 'cuda 340
## 13 North America chevrolet monte carlo
## 14 North America buick estate wagon (sw)
## 15 Asia toyota corona mark ii
## 16 North America plymouth duster
## 17 North America amc hornet
## 18 North America ford maverick
## 19 Asia datsun pl510
## 20 Europe volkswagen 1131 deluxe sedan
## 21 Europe peugeot 504
## 22 Europe audi 100 ls
## 23 Europe saab 99e
## 24 Europe bmw 2002
## 25 North America amc gremlin
## 26 North America ford f250
## 27 North America chevy c20
## 28 North America dodge d200
## 29 North America hi 1200d
## 30 Asia datsun pl510
## 31 North America chevrolet vega 2300
## 32 Asia toyota corona
## 33 North America ford pinto
## 34 North America amc gremlin
## 35 North America plymouth satellite custom
## 36 North America chevrolet chevelle malibu
## 37 North America ford torino 500
## 38 North America amc matador
## 39 North America chevrolet impala
## 40 North America pontiac catalina brougham
## 41 North America ford galaxie 500
## 42 North America plymouth fury iii
## 43 North America dodge monaco (sw)
## 44 North America ford country squire (sw)
## 45 North America pontiac safari (sw)
## 46 North America amc hornet sportabout (sw)
## 47 North America chevrolet vega (sw)
## 48 North America pontiac firebird
## 49 North America ford mustang
## 50 North America mercury capri 2000
## 51 Europe opel 1900
## 52 Europe peugeot 304
## 53 Europe fiat 124b
## 54 Asia toyota corolla 1200
## 55 Asia datsun 1200
## 56 Europe volkswagen model 111
## 57 North America plymouth cricket
## 58 Asia toyota corona hardtop
## 59 North America dodge colt hardtop
## 60 Europe volkswagen type 3