Using: Auto-Mpg Data from University of California. URL: https://archive.ics.uci.edu/ml/datasets/Auto+MPG

## Loading required package: bitops
#Load Data set
auto_mpg_data <- getURL("https://raw.githubusercontent.com/jasonjgy2000/IS607/ee3ff28ee83ad2f9beecfcc56b58fec367517e06/Assignments/Week%203/auto-mpg.data")
data_set <- read.table(textConnection(auto_mpg_data),header=FALSE )

# View first 10 rows of the dataframe
data_set[1:10,]
##    V1 V2  V3    V4   V5   V6 V7 V8                        V9
## 1  18  8 307 130.0 3504 12.0 70  1 chevrolet chevelle malibu
## 2  15  8 350 165.0 3693 11.5 70  1         buick skylark 320
## 3  18  8 318 150.0 3436 11.0 70  1        plymouth satellite
## 4  16  8 304 150.0 3433 12.0 70  1             amc rebel sst
## 5  17  8 302 140.0 3449 10.5 70  1               ford torino
## 6  15  8 429 198.0 4341 10.0 70  1          ford galaxie 500
## 7  14  8 454 220.0 4354  9.0 70  1          chevrolet impala
## 8  14  8 440 215.0 4312  8.5 70  1         plymouth fury iii
## 9  14  8 455 225.0 4425 10.0 70  1          pontiac catalina
## 10 15  8 390 190.0 3850  8.5 70  1        amc ambassador dpl
# Assign column names to the dataframe

names(data_set) <- c("mpg","cylinders","displacement","horsepower","weight","acceleration","model year","origin","car name")

# View the first 8 rows of the dataframe
data_set[1:8,]
##   mpg cylinders displacement horsepower weight acceleration model year
## 1  18         8          307      130.0   3504         12.0         70
## 2  15         8          350      165.0   3693         11.5         70
## 3  18         8          318      150.0   3436         11.0         70
## 4  16         8          304      150.0   3433         12.0         70
## 5  17         8          302      140.0   3449         10.5         70
## 6  15         8          429      198.0   4341         10.0         70
## 7  14         8          454      220.0   4354          9.0         70
## 8  14         8          440      215.0   4312          8.5         70
##   origin                  car name
## 1      1 chevrolet chevelle malibu
## 2      1         buick skylark 320
## 3      1        plymouth satellite
## 4      1             amc rebel sst
## 5      1               ford torino
## 6      1          ford galaxie 500
## 7      1          chevrolet impala
## 8      1         plymouth fury iii
# I noticed that the data within the "car name" column consisted of the car's manufacter and model. My next step is to separate and place them into sepearate columns.

rexp <- "^(\\w+)\\s?(.*)$"
data_set$manufacturer <- sub(rexp,"\\1",data_set$`car name`)
data_set$model <- sub(rexp,"\\2",data_set$`car name`)
# View the first 8 rows of the dataframe
data_set[1:3,]
##   mpg cylinders displacement horsepower weight acceleration model year
## 1  18         8          307      130.0   3504         12.0         70
## 2  15         8          350      165.0   3693         11.5         70
## 3  18         8          318      150.0   3436         11.0         70
##   origin                  car name manufacturer           model
## 1      1 chevrolet chevelle malibu    chevrolet chevelle malibu
## 2      1         buick skylark 320        buick     skylark 320
## 3      1        plymouth satellite     plymouth       satellite
# My next step is to remove the "car name" column as it is no longer needed.
data_set$`car name` <- NULL

# View the first 8 rows of the dataframe
data_set[1:3,]
##   mpg cylinders displacement horsepower weight acceleration model year
## 1  18         8          307      130.0   3504         12.0         70
## 2  15         8          350      165.0   3693         11.5         70
## 3  18         8          318      150.0   3436         11.0         70
##   origin manufacturer           model
## 1      1    chevrolet chevelle malibu
## 2      1        buick     skylark 320
## 3      1     plymouth       satellite
# I observed that vehiches with origin "1" were manufactured in North America, "2" Europe and "3" Asia. My next step is to transform the dataframe to reflect this.
data_set$origin <- ifelse(data_set$origin == 1,"North America", ifelse(data_set$origin == 2,"Europe","Asia") )

# View first 60 rows of the transformed dataframe
data_set[1:60,]
##    mpg cylinders displacement horsepower weight acceleration model year
## 1   18         8        307.0      130.0   3504         12.0         70
## 2   15         8        350.0      165.0   3693         11.5         70
## 3   18         8        318.0      150.0   3436         11.0         70
## 4   16         8        304.0      150.0   3433         12.0         70
## 5   17         8        302.0      140.0   3449         10.5         70
## 6   15         8        429.0      198.0   4341         10.0         70
## 7   14         8        454.0      220.0   4354          9.0         70
## 8   14         8        440.0      215.0   4312          8.5         70
## 9   14         8        455.0      225.0   4425         10.0         70
## 10  15         8        390.0      190.0   3850          8.5         70
## 11  15         8        383.0      170.0   3563         10.0         70
## 12  14         8        340.0      160.0   3609          8.0         70
## 13  15         8        400.0      150.0   3761          9.5         70
## 14  14         8        455.0      225.0   3086         10.0         70
## 15  24         4        113.0      95.00   2372         15.0         70
## 16  22         6        198.0      95.00   2833         15.5         70
## 17  18         6        199.0      97.00   2774         15.5         70
## 18  21         6        200.0      85.00   2587         16.0         70
## 19  27         4         97.0      88.00   2130         14.5         70
## 20  26         4         97.0      46.00   1835         20.5         70
## 21  25         4        110.0      87.00   2672         17.5         70
## 22  24         4        107.0      90.00   2430         14.5         70
## 23  25         4        104.0      95.00   2375         17.5         70
## 24  26         4        121.0      113.0   2234         12.5         70
## 25  21         6        199.0      90.00   2648         15.0         70
## 26  10         8        360.0      215.0   4615         14.0         70
## 27  10         8        307.0      200.0   4376         15.0         70
## 28  11         8        318.0      210.0   4382         13.5         70
## 29   9         8        304.0      193.0   4732         18.5         70
## 30  27         4         97.0      88.00   2130         14.5         71
## 31  28         4        140.0      90.00   2264         15.5         71
## 32  25         4        113.0      95.00   2228         14.0         71
## 33  25         4         98.0          ?   2046         19.0         71
## 34  19         6        232.0      100.0   2634         13.0         71
## 35  16         6        225.0      105.0   3439         15.5         71
## 36  17         6        250.0      100.0   3329         15.5         71
## 37  19         6        250.0      88.00   3302         15.5         71
## 38  18         6        232.0      100.0   3288         15.5         71
## 39  14         8        350.0      165.0   4209         12.0         71
## 40  14         8        400.0      175.0   4464         11.5         71
## 41  14         8        351.0      153.0   4154         13.5         71
## 42  14         8        318.0      150.0   4096         13.0         71
## 43  12         8        383.0      180.0   4955         11.5         71
## 44  13         8        400.0      170.0   4746         12.0         71
## 45  13         8        400.0      175.0   5140         12.0         71
## 46  18         6        258.0      110.0   2962         13.5         71
## 47  22         4        140.0      72.00   2408         19.0         71
## 48  19         6        250.0      100.0   3282         15.0         71
## 49  18         6        250.0      88.00   3139         14.5         71
## 50  23         4        122.0      86.00   2220         14.0         71
## 51  28         4        116.0      90.00   2123         14.0         71
## 52  30         4         79.0      70.00   2074         19.5         71
## 53  30         4         88.0      76.00   2065         14.5         71
## 54  31         4         71.0      65.00   1773         19.0         71
## 55  35         4         72.0      69.00   1613         18.0         71
## 56  27         4         97.0      60.00   1834         19.0         71
## 57  26         4         91.0      70.00   1955         20.5         71
## 58  24         4        113.0      95.00   2278         15.5         72
## 59  25         4         97.5      80.00   2126         17.0         72
## 60  23         4         97.0      54.00   2254         23.5         72
##           origin manufacturer                  model
## 1  North America    chevrolet        chevelle malibu
## 2  North America        buick            skylark 320
## 3  North America     plymouth              satellite
## 4  North America          amc              rebel sst
## 5  North America         ford                 torino
## 6  North America         ford            galaxie 500
## 7  North America    chevrolet                 impala
## 8  North America     plymouth               fury iii
## 9  North America      pontiac               catalina
## 10 North America          amc         ambassador dpl
## 11 North America        dodge          challenger se
## 12 North America     plymouth              'cuda 340
## 13 North America    chevrolet            monte carlo
## 14 North America        buick      estate wagon (sw)
## 15          Asia       toyota         corona mark ii
## 16 North America     plymouth                 duster
## 17 North America          amc                 hornet
## 18 North America         ford               maverick
## 19          Asia       datsun                  pl510
## 20        Europe   volkswagen      1131 deluxe sedan
## 21        Europe      peugeot                    504
## 22        Europe         audi                 100 ls
## 23        Europe         saab                    99e
## 24        Europe          bmw                   2002
## 25 North America          amc                gremlin
## 26 North America         ford                   f250
## 27 North America        chevy                    c20
## 28 North America        dodge                   d200
## 29 North America           hi                  1200d
## 30          Asia       datsun                  pl510
## 31 North America    chevrolet              vega 2300
## 32          Asia       toyota                 corona
## 33 North America         ford                  pinto
## 34 North America          amc                gremlin
## 35 North America     plymouth       satellite custom
## 36 North America    chevrolet        chevelle malibu
## 37 North America         ford             torino 500
## 38 North America          amc                matador
## 39 North America    chevrolet                 impala
## 40 North America      pontiac      catalina brougham
## 41 North America         ford            galaxie 500
## 42 North America     plymouth               fury iii
## 43 North America        dodge            monaco (sw)
## 44 North America         ford    country squire (sw)
## 45 North America      pontiac            safari (sw)
## 46 North America          amc hornet sportabout (sw)
## 47 North America    chevrolet              vega (sw)
## 48 North America      pontiac               firebird
## 49 North America         ford                mustang
## 50 North America      mercury             capri 2000
## 51        Europe         opel                   1900
## 52        Europe      peugeot                    304
## 53        Europe         fiat                   124b
## 54          Asia       toyota           corolla 1200
## 55          Asia       datsun                   1200
## 56        Europe   volkswagen              model 111
## 57 North America     plymouth                cricket
## 58          Asia       toyota         corona hardtop
## 59 North America        dodge           colt hardtop
## 60        Europe   volkswagen                 type 3