Objective

The objective of this document is to gain information on what are the specifications and features of a car that is available in the market

Data Source

The data is obtained from this link https://www.kaggle.com/CooperUnion/cardataset

Input data

car_data <- read.csv("car_data.csv")

Data Inspection

head(car_data)
##   Make      Model Year            Engine.Fuel.Type Engine.HP Engine.Cylinders
## 1  BMW 1 Series M 2011 premium unleaded (required)       335                6
## 2  BMW   1 Series 2011 premium unleaded (required)       300                6
## 3  BMW   1 Series 2011 premium unleaded (required)       300                6
## 4  BMW   1 Series 2011 premium unleaded (required)       230                6
## 5  BMW   1 Series 2011 premium unleaded (required)       230                6
## 6  BMW   1 Series 2012 premium unleaded (required)       230                6
##   Transmission.Type    Driven_Wheels Number.of.Doors
## 1            MANUAL rear wheel drive               2
## 2            MANUAL rear wheel drive               2
## 3            MANUAL rear wheel drive               2
## 4            MANUAL rear wheel drive               2
## 5            MANUAL rear wheel drive               2
## 6            MANUAL rear wheel drive               2
##                         Market.Category Vehicle.Size Vehicle.Style highway.MPG
## 1 Factory Tuner,Luxury,High-Performance      Compact         Coupe          26
## 2                    Luxury,Performance      Compact   Convertible          28
## 3               Luxury,High-Performance      Compact         Coupe          28
## 4                    Luxury,Performance      Compact         Coupe          28
## 5                                Luxury      Compact   Convertible          28
## 6                    Luxury,Performance      Compact         Coupe          28
##   city.mpg Popularity  MSRP
## 1       19       3916 46135
## 2       19       3916 40650
## 3       20       3916 36350
## 4       18       3916 29450
## 5       18       3916 34500
## 6       18       3916 31200
tail(car_data)
##          Make  Model Year               Engine.Fuel.Type Engine.HP
## 11909   Acura    ZDX 2011    premium unleaded (required)       300
## 11910   Acura    ZDX 2012    premium unleaded (required)       300
## 11911   Acura    ZDX 2012    premium unleaded (required)       300
## 11912   Acura    ZDX 2012    premium unleaded (required)       300
## 11913   Acura    ZDX 2013 premium unleaded (recommended)       300
## 11914 Lincoln Zephyr 2006               regular unleaded       221
##       Engine.Cylinders Transmission.Type     Driven_Wheels Number.of.Doors
## 11909                6         AUTOMATIC   all wheel drive               4
## 11910                6         AUTOMATIC   all wheel drive               4
## 11911                6         AUTOMATIC   all wheel drive               4
## 11912                6         AUTOMATIC   all wheel drive               4
## 11913                6         AUTOMATIC   all wheel drive               4
## 11914                6         AUTOMATIC front wheel drive               4
##                  Market.Category Vehicle.Size Vehicle.Style highway.MPG
## 11909 Crossover,Hatchback,Luxury      Midsize 4dr Hatchback          23
## 11910 Crossover,Hatchback,Luxury      Midsize 4dr Hatchback          23
## 11911 Crossover,Hatchback,Luxury      Midsize 4dr Hatchback          23
## 11912 Crossover,Hatchback,Luxury      Midsize 4dr Hatchback          23
## 11913 Crossover,Hatchback,Luxury      Midsize 4dr Hatchback          23
## 11914                     Luxury      Midsize         Sedan          26
##       city.mpg Popularity  MSRP
## 11909       16        204 50520
## 11910       16        204 46120
## 11911       16        204 56670
## 11912       16        204 50620
## 11913       16        204 50920
## 11914       17         61 28995
dim(car_data)
## [1] 11914    16

Data Cleansing

Check data type and missing value

str(car_data)
## 'data.frame':    11914 obs. of  16 variables:
##  $ Make             : chr  "BMW" "BMW" "BMW" "BMW" ...
##  $ Model            : chr  "1 Series M" "1 Series" "1 Series" "1 Series" ...
##  $ Year             : int  2011 2011 2011 2011 2011 2012 2012 2012 2012 2013 ...
##  $ Engine.Fuel.Type : chr  "premium unleaded (required)" "premium unleaded (required)" "premium unleaded (required)" "premium unleaded (required)" ...
##  $ Engine.HP        : int  335 300 300 230 230 230 300 300 230 230 ...
##  $ Engine.Cylinders : int  6 6 6 6 6 6 6 6 6 6 ...
##  $ Transmission.Type: chr  "MANUAL" "MANUAL" "MANUAL" "MANUAL" ...
##  $ Driven_Wheels    : chr  "rear wheel drive" "rear wheel drive" "rear wheel drive" "rear wheel drive" ...
##  $ Number.of.Doors  : int  2 2 2 2 2 2 2 2 2 2 ...
##  $ Market.Category  : chr  "Factory Tuner,Luxury,High-Performance" "Luxury,Performance" "Luxury,High-Performance" "Luxury,Performance" ...
##  $ Vehicle.Size     : chr  "Compact" "Compact" "Compact" "Compact" ...
##  $ Vehicle.Style    : chr  "Coupe" "Convertible" "Coupe" "Coupe" ...
##  $ highway.MPG      : int  26 28 28 28 28 28 26 28 28 27 ...
##  $ city.mpg         : int  19 19 20 18 18 18 17 20 18 18 ...
##  $ Popularity       : int  3916 3916 3916 3916 3916 3916 3916 3916 3916 3916 ...
##  $ MSRP             : int  46135 40650 36350 29450 34500 31200 44100 39300 36900 37200 ...
car_data[car_data == ""] <- NA
colSums(is.na(car_data))
##              Make             Model              Year  Engine.Fuel.Type 
##                 0                 0                 0                 3 
##         Engine.HP  Engine.Cylinders Transmission.Type     Driven_Wheels 
##                69                30                 0                 0 
##   Number.of.Doors   Market.Category      Vehicle.Size     Vehicle.Style 
##                 6                 0                 0                 0 
##       highway.MPG          city.mpg        Popularity              MSRP 
##                 0                 0                 0                 0

Remove missing value

car_data <- na.omit(car_data)
colSums(is.na(car_data))
##              Make             Model              Year  Engine.Fuel.Type 
##                 0                 0                 0                 0 
##         Engine.HP  Engine.Cylinders Transmission.Type     Driven_Wheels 
##                 0                 0                 0                 0 
##   Number.of.Doors   Market.Category      Vehicle.Size     Vehicle.Style 
##                 0                 0                 0                 0 
##       highway.MPG          city.mpg        Popularity              MSRP 
##                 0                 0                 0                 0

Changing data type without using library

car_data$Make <- as.factor(car_data$Make)
car_data$Model <- as.factor(car_data$Model)
car_data$Engine.Fuel.Type <- as.factor(car_data$Engine.Fuel.Type)
car_data$Engine.HP <- as.factor(car_data$Engine.HP)
car_data$Engine.Cylinders <- as.factor(car_data$Engine.Cylinders)
car_data$Transmission.Type <- as.factor(car_data$Transmission.Type)
car_data$Driven_Wheels <- as.factor(car_data$Driven_Wheels)
car_data$Number.of.Doors <- as.factor(car_data$Number.of.Doors)
car_data$Market.Category <- as.factor(car_data$Market.Category)
car_data$Vehicle.Size <- as.factor(car_data$Vehicle.Size)
car_data$Vehicle.Style <- as.factor(car_data$Vehicle.Style)

Check final data structure

str(car_data)
## 'data.frame':    11812 obs. of  16 variables:
##  $ Make             : Factor w/ 47 levels "Acura","Alfa Romeo",..: 6 6 6 6 6 6 6 6 6 6 ...
##  $ Model            : Factor w/ 904 levels "1 Series","1 Series M",..: 2 1 1 1 1 1 1 1 1 1 ...
##  $ Year             : int  2011 2011 2011 2011 2011 2012 2012 2012 2012 2013 ...
##  $ Engine.Fuel.Type : Factor w/ 9 levels "diesel","electric",..: 8 8 8 8 8 8 8 8 8 8 ...
##  $ Engine.HP        : Factor w/ 355 levels "55","62","63",..: 219 195 195 139 139 139 195 195 139 139 ...
##  $ Engine.Cylinders : Factor w/ 9 levels "0","3","4","5",..: 5 5 5 5 5 5 5 5 5 5 ...
##  $ Transmission.Type: Factor w/ 5 levels "AUTOMATED_MANUAL",..: 4 4 4 4 4 4 4 4 4 4 ...
##  $ Driven_Wheels    : Factor w/ 4 levels "all wheel drive",..: 4 4 4 4 4 4 4 4 4 4 ...
##  $ Number.of.Doors  : Factor w/ 3 levels "2","3","4": 1 1 1 1 1 1 1 1 1 1 ...
##  $ Market.Category  : Factor w/ 71 levels "Crossover","Crossover,Diesel",..: 38 67 64 67 63 67 67 64 63 63 ...
##  $ Vehicle.Size     : Factor w/ 3 levels "Compact","Large",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ Vehicle.Style    : Factor w/ 16 levels "2dr Hatchback",..: 9 7 9 9 7 9 7 9 7 7 ...
##  $ highway.MPG      : int  26 28 28 28 28 28 26 28 28 27 ...
##  $ city.mpg         : int  19 19 20 18 18 18 17 20 18 18 ...
##  $ Popularity       : int  3916 3916 3916 3916 3916 3916 3916 3916 3916 3916 ...
##  $ MSRP             : int  46135 40650 36350 29450 34500 31200 44100 39300 36900 37200 ...
##  - attr(*, "na.action")= 'omit' Named int [1:102] 540 541 542 1984 1985 2906 2907 2908 2909 3717 ...
##   ..- attr(*, "names")= chr [1:102] "540" "541" "542" "1984" ...

Data Explanation

summary(car_data)
##          Make                     Model            Year     
##  Chevrolet :1115   Silverado 1500    :  156   Min.   :1990  
##  Ford      : 868   Tundra            :  140   1st Qu.:2007  
##  Volkswagen: 805   F-150             :  126   Median :2015  
##  Toyota    : 743   Sierra 1500       :   90   Mean   :2010  
##  Dodge     : 626   Beetle Convertible:   89   3rd Qu.:2016  
##  Nissan    : 548   Tacoma            :   80   Max.   :2017  
##  (Other)   :7107   (Other)           :11131                 
##                                   Engine.Fuel.Type   Engine.HP   
##  regular unleaded                         :7155    200    : 454  
##  premium unleaded (required)              :1991    170    : 351  
##  premium unleaded (recommended)           :1519    210    : 320  
##  flex-fuel (unleaded/E85)                 : 899    240    : 268  
##  diesel                                   : 153    285    : 246  
##  flex-fuel (premium unleaded required/E85):  54    185    : 241  
##  (Other)                                  :  41    (Other):9932  
##  Engine.Cylinders        Transmission.Type           Driven_Wheels 
##  4      :4743     AUTOMATED_MANUAL: 625    all wheel drive  :2336  
##  6      :4470     AUTOMATIC       :8231    four wheel drive :1403  
##  8      :2031     DIRECT_DRIVE    :  15    front wheel drive:4732  
##  12     : 229     MANUAL          :2922    rear wheel drive :3341  
##  5      : 225     UNKNOWN         :  19                            
##  10     :  68                                                      
##  (Other):  46                                                      
##  Number.of.Doors           Market.Category  Vehicle.Size 
##  2:3154          N/A               :3728   Compact:4708  
##  3: 395          Crossover         :1103   Large  :2748  
##  4:8263          Flex Fuel         : 872   Midsize:4356  
##                  Luxury            : 851                 
##                  Luxury,Performance: 673                 
##                  Hatchback         : 614                 
##                  (Other)           :3971                 
##          Vehicle.Style   highway.MPG        city.mpg        Popularity  
##  Sedan          :3013   Min.   : 12.00   Min.   :  7.00   Min.   :   2  
##  4dr SUV        :2480   1st Qu.: 22.00   1st Qu.: 16.00   1st Qu.: 549  
##  Coupe          :1190   Median : 26.00   Median : 18.00   Median :1385  
##  Convertible    : 793   Mean   : 26.32   Mean   : 19.33   Mean   :1554  
##  Crew Cab Pickup: 681   3rd Qu.: 30.00   3rd Qu.: 22.00   3rd Qu.:2009  
##  4dr Hatchback  : 678   Max.   :354.00   Max.   :137.00   Max.   :5657  
##  (Other)        :2977                                                   
##       MSRP        
##  Min.   :   2000  
##  1st Qu.:  20990  
##  Median :  29970  
##  Mean   :  40560  
##  3rd Qu.:  42205  
##  Max.   :2065902  
## 

Focus on Chevrolet Data

In this section I will focus only on Chevrolet as the most selling car brand in this dataset

car_data_filter <- car_data[car_data$Make == "Chevrolet",]

summary(car_data_filter)
##            Make                         Model          Year     
##  Chevrolet   :1115   Silverado 1500        :156   Min.   :1990  
##  Acura       :   0   Corvette              : 60   1st Qu.:2005  
##  Alfa Romeo  :   0   C/K 1500 Series       : 56   Median :2014  
##  Aston Martin:   0   Colorado              : 54   Mean   :2010  
##  Audi        :   0   Silverado 1500 Classic: 49   3rd Qu.:2016  
##  Bentley     :   0   Sonic                 : 40   Max.   :2017  
##  (Other)     :   0   (Other)               :700                 
##                        Engine.Fuel.Type   Engine.HP   Engine.Cylinders
##  regular unleaded              :740     285    :143   6      :432     
##  flex-fuel (unleaded/E85)      :251     355    : 92   4      :349     
##  premium unleaded (recommended): 85     200    : 84   8      :321     
##  premium unleaded (required)   : 32     138    : 80   0      :  6     
##  electric                      :  6     190    : 44   5      :  5     
##  diesel                        :  1     165    : 32   3      :  2     
##  (Other)                       :  0     (Other):640   (Other):  0     
##         Transmission.Type           Driven_Wheels Number.of.Doors
##  AUTOMATED_MANUAL:  0     all wheel drive  : 51   2:311          
##  AUTOMATIC       :739     four wheel drive :259   3: 73          
##  DIRECT_DRIVE    :  8     front wheel drive:376   4:731          
##  MANUAL          :368     rear wheel drive :429                  
##  UNKNOWN         :  0                                            
##                                                                  
##                                                                  
##             Market.Category  Vehicle.Size             Vehicle.Style
##  N/A                :501    Compact:426   4dr SUV            :205  
##  Flex Fuel          :235    Large  :411   Sedan              :155  
##  Hatchback          : 67    Midsize:278   Crew Cab Pickup    :151  
##  Crossover          : 62                  Extended Cab Pickup:129  
##  High-Performance   : 62                  Regular Cab Pickup :114  
##  Crossover,Flex Fuel: 42                  Coupe              : 90  
##  (Other)            :146                  (Other)            :271  
##   highway.MPG        city.mpg        Popularity        MSRP      
##  Min.   : 15.00   Min.   : 11.00   Min.   :1385   Min.   : 2000  
##  1st Qu.: 21.00   1st Qu.: 15.00   1st Qu.:1385   1st Qu.:18203  
##  Median : 23.00   Median : 17.00   Median :1385   Median :26270  
##  Mean   : 25.67   Mean   : 18.84   Mean   :1385   Mean   :28273  
##  3rd Qu.: 29.00   3rd Qu.: 20.00   3rd Qu.:1385   3rd Qu.:36373  
##  Max.   :109.00   Max.   :128.00   Max.   :1385   Max.   :92395  
## 

Insight:

  1. The best selling model of Chevrolet car is Silverado 1500
  2. The engine fuel type that is used by Chevrolet is dominantly from regular unleaded fuel, the use of electric motor is still minimum, which only counts 6 cars
  3. The most commonly used engine power is 285 HP which is quite huge for a car, and the most number of cylinder is 6 cylinder meaning the engine that is used by Chevrolet is huge

Then if look on the distribution of several numerical data such as fuel consumption in Mile per Gallon (MPG) in highway and city, and Manufacture’s Suggested Retail Price (MSRP) are as follow

boxplot(x = car_data_filter$MSRP)

hist(car_data_filter$MSRP)

Insight

The price of Chevrolet car is ranging from around 20,000 - 40,000 USD

hist(car_data_filter$highway.MPG)

hist(car_data_filter$city.mpg)

Insigth

Interms of fuel consumption its quite obvious that driving in highway is more efficient rather than driving in city. Moslty when driving in city the fuel consumption is 0-20 Mile per Gallon, however when driving in highway, the fuel consumption is mostly around 20-30 Mile per Gallon, some can reach 40-50 Mile per Gallon.

There’s also data that showed higher than 100 MPG fuel consumption, then if we look closer that is for electric car. I personally would not recommend that it is written like that. As full electric car is not using any gasoline fuel at all, so it is better to be written 0

car_data_filter[car_data_filter$highway.MPG > 100,]
##           Make    Model Year Engine.Fuel.Type Engine.HP Engine.Cylinders
## 9868 Chevrolet Spark EV 2014         electric       140                0
## 9869 Chevrolet Spark EV 2014         electric       140                0
## 9870 Chevrolet Spark EV 2015         electric       140                0
## 9871 Chevrolet Spark EV 2015         electric       140                0
## 9872 Chevrolet Spark EV 2016         electric       140                0
## 9873 Chevrolet Spark EV 2016         electric       140                0
##      Transmission.Type     Driven_Wheels Number.of.Doors Market.Category
## 9868      DIRECT_DRIVE front wheel drive               4       Hatchback
## 9869      DIRECT_DRIVE front wheel drive               4       Hatchback
## 9870      DIRECT_DRIVE front wheel drive               4       Hatchback
## 9871      DIRECT_DRIVE front wheel drive               4       Hatchback
## 9872      DIRECT_DRIVE front wheel drive               4       Hatchback
## 9873      DIRECT_DRIVE front wheel drive               4       Hatchback
##      Vehicle.Size Vehicle.Style highway.MPG city.mpg Popularity  MSRP
## 9868      Compact 4dr Hatchback         109      128       1385 26685
## 9869      Compact 4dr Hatchback         109      128       1385 27010
## 9870      Compact 4dr Hatchback         109      128       1385 25170
## 9871      Compact 4dr Hatchback         109      128       1385 25560
## 9872      Compact 4dr Hatchback         109      128       1385 25510
## 9873      Compact 4dr Hatchback         109      128       1385 25120
car_data_filter[car_data_filter$city.mpg > 100,]
##           Make    Model Year Engine.Fuel.Type Engine.HP Engine.Cylinders
## 9868 Chevrolet Spark EV 2014         electric       140                0
## 9869 Chevrolet Spark EV 2014         electric       140                0
## 9870 Chevrolet Spark EV 2015         electric       140                0
## 9871 Chevrolet Spark EV 2015         electric       140                0
## 9872 Chevrolet Spark EV 2016         electric       140                0
## 9873 Chevrolet Spark EV 2016         electric       140                0
##      Transmission.Type     Driven_Wheels Number.of.Doors Market.Category
## 9868      DIRECT_DRIVE front wheel drive               4       Hatchback
## 9869      DIRECT_DRIVE front wheel drive               4       Hatchback
## 9870      DIRECT_DRIVE front wheel drive               4       Hatchback
## 9871      DIRECT_DRIVE front wheel drive               4       Hatchback
## 9872      DIRECT_DRIVE front wheel drive               4       Hatchback
## 9873      DIRECT_DRIVE front wheel drive               4       Hatchback
##      Vehicle.Size Vehicle.Style highway.MPG city.mpg Popularity  MSRP
## 9868      Compact 4dr Hatchback         109      128       1385 26685
## 9869      Compact 4dr Hatchback         109      128       1385 27010
## 9870      Compact 4dr Hatchback         109      128       1385 25170
## 9871      Compact 4dr Hatchback         109      128       1385 25560
## 9872      Compact 4dr Hatchback         109      128       1385 25510
## 9873      Compact 4dr Hatchback         109      128       1385 25120