607 Final Project: Are Hybrid vehicles more fuel efficient than Internal Combustion Engine vehicles?

Chi Hang(Philip) Cheung

2024-11-19

Project objectives:

library(tidyverse)
library(RMySQL)
library("RSocrata")
library(dotenv)
library(infer)

Dataset #1: MPG data extracted from the EPA website:

Dataset background: EPA in-house MPG test results on all 2025 car models from all auto manufactuers.

url<- 'https://raw.githubusercontent.com/stormwhale/data-mines/refs/heads/main/Copy%20of%202025%20FE%20Guide%20for%20DOE-release%20dates%20before%2010-17-2024-no-sales%20-10-17-2024public.csv'
df<- read.csv(url)
head(df)
##   Model.Year            Mfr.Name                 Division      Carline
## 1       2025        aston martin Aston Martin Lagonda Ltd      Vantage
## 2       2025                 BMW                      BMW      Z4 M40i
## 3       2025                 BMW                      BMW      Z4 M40i
## 4       2025                 BMW                      BMW Z4 sDrive30i
## 5       2025 Volkswagen Group of                  Bugatti      Mistral
## 6       2025      General Motors                Chevrolet     CORVETTE
##   Verify.Mfr.Cd Index..Model.Type.Index. Eng.Displ X..Cyl Transmission
## 1           ASX                        2       4.0      8     Auto(A8)
## 2           BMX                      352       3.0      6     Auto(S8)
## 3           BMX                      353       3.0      6   Manual(M6)
## 4           BMX                      350       2.0      4     Auto(S8)
## 5           VGA                       48       8.0     16  Auto(AM-S7)
## 6           GMX                       50       6.2      8     Auto(S8)
##   City.FE..Guide....Conventional.Fuel Hwy.FE..Guide....Conventional.Fuel
## 1                                  15                                 22
## 2                                  23                                 29
## 3                                  19                                 26
## 4                                  25                                 33
## 5                                   8                                 12
## 6                                  16                                 25
##   Comb.FE..Guide....Conventional.Fuel City.Unadj.FE...Conventional.Fuel
## 1                                  18                           19.0837
## 2                                  25                           29.0000
## 3                                  22                           23.5000
## 4                                  28                           32.3000
## 5                                   9                            9.5000
## 6                                  19                           19.0902
##   Hwy.Unadj.FE...Conventional.Fuel Comb.Unadj.FE...Conventional.Fuel
## 1                          31.2168                           23.1290
## 2                          42.1000                           33.7219
## 3                          37.4000                           28.2196
## 4                          47.6000                           37.7620
## 5                          16.5000                           11.7416
## 6                          35.0498                           24.0099
##   City.Unrd.Adj.FE...Conventional.Fuel Hwy.Unrd.Adj.FE...Conventional.Fuel
## 1                              15.4128                             22.3919
## 2                              22.6786                             29.4644
## 3                              18.7066                             26.4527
## 4                              24.9954                             32.9095
## 5                               7.9235                             12.2481
## 6                              15.7535                             25.0000
##   Comb.Unrd.Adj.FE...Conventional.Fuel Guzzler. Air.Aspir.Method
## 1                              17.9272                        TC
## 2                              25.3007                        TC
## 3                              21.5458                        TC
## 4                              28.0285                        TC
## 5                               9.4203        G               TC
## 6                              19.1107                      <NA>
##   Air.Aspiration.Method.Desc Trans
## 1               Turbocharged     A
## 2               Turbocharged    SA
## 3               Turbocharged     M
## 4               Turbocharged    SA
## 5               Turbocharged   AMS
## 6        Naturally Aspirated    SA
##                                                          Trans.Desc
## 1                                                         Automatic
## 2                                                    Semi-Automatic
## 3                                                            Manual
## 4                                                    Semi-Automatic
## 5 Automated Manual- Selectable (e.g. Automated Manual with paddles)
## 6                                                    Semi-Automatic
##   Trans..Other X..Gears Lockup.Torque.Converter Trans.Creeper.Gear Drive.Sys
## 1           NA        8                       Y                  N         R
## 2           NA        8                       Y                  N         R
## 3           NA        6                       N                  N         R
## 4           NA        8                       Y                  N         R
## 5           NA        7                       N                  N         A
## 6           NA        8                       Y                  N         R
##            Drive.Desc Max.Ethanol.....Gasoline Max.Biodiesel..
## 1 2-Wheel Drive, Rear                       10              NA
## 2 2-Wheel Drive, Rear                       10              NA
## 3 2-Wheel Drive, Rear                       10              NA
## 4 2-Wheel Drive, Rear                       10              NA
## 5     All Wheel Drive                       15              NA
## 6 2-Wheel Drive, Rear                       10              NA
##   Range1...Model.Type.Driving.Range...Conventional.Fuel
## 1                                                      
## 2                                                      
## 3                                                      
## 4                                                      
## 5                                                      
## 6                                                      
##   Fuel.Usage....Conventional.Fuel     Fuel.Usage.Desc...Conventional.Fuel
## 1                              GP Gasoline (Premium Unleaded Recommended)
## 2                              GP Gasoline (Premium Unleaded Recommended)
## 3                              GP Gasoline (Premium Unleaded Recommended)
## 4                              GP Gasoline (Premium Unleaded Recommended)
## 5                             GPR    Gasoline (Premium Unleaded Required)
## 6                             GPR    Gasoline (Premium Unleaded Required)
##   Fuel.Unit...Conventional.Fuel Fuel.Unit.Desc...Conventional.Fuel
## 1                           MPG                   miles per gallon
## 2                           MPG                   miles per gallon
## 3                           MPG                   miles per gallon
## 4                           MPG                   miles per gallon
## 5                           MPG                   miles per gallon
## 6                           MPG                   miles per gallon
##   Gas.Guzzler.Exempt..Where.Truck...1975.NHTSA.truck.definition.
## 1                                                              N
## 2                                                              N
## 3                                                              N
## 4                                                              N
## 5                                                              N
## 6                                                              N
##   Gas.Guzzler.Exempt.Desc..Where.Truck...1975.NHTSA.truck.definition.
## 1                                                          Not exempt
## 2                                                          Not exempt
## 3                                                          Not exempt
## 4                                                          Not exempt
## 5                                                          Not exempt
## 6                                                          Not exempt
##   X2Dr.Pass.Vol X2Dr.Lugg.Vol X4Dr.Pass.Vol X4Dr.Lugg.Vol Htchbk.Pass.Vol
## 1            NA            NA            NA            NA              NA
## 2            NA            NA            NA            NA              NA
## 3            NA            NA            NA            NA              NA
## 4            NA            NA            NA            NA              NA
## 5            NA            NA            NA            NA              NA
## 6            NA            NA            NA            NA              NA
##   Htchbk.Lugg.Vol Annual.Fuel1.Cost...Conventional.Fuel
## 1              NA                                  3550
## 2              NA                                  2550
## 3              NA                                  2900
## 4              NA                                  2300
## 5              NA                                  7100
## 6              NA                                  3350
##   EPA.Calculated.Annual.Fuel.Cost...Conventional.Fuel........Annual.fuel.cost.error..Please.revise.Verify.
## 1                                                                                                     3600
## 2                                                                                                     2600
## 3                                                                                                     2950
## 4                                                                                                     2300
## 5                                                                                                     7150
## 6                                                                                                     3400
##   City2.FE..Guide....Alternative.Fuel Hwy2.Fuel.FE..Guide....Alternative.Fuel
## 1                                  NA                                      NA
## 2                                  NA                                      NA
## 3                                  NA                                      NA
## 4                                  NA                                      NA
## 5                                  NA                                      NA
## 6                                  NA                                      NA
##   Comb2.Fuel.FE..Guide....Alternative.Fuel City2.Unadj.FE...Alternative.Fuel
## 1                                       NA                                NA
## 2                                       NA                                NA
## 3                                       NA                                NA
## 4                                       NA                                NA
## 5                                       NA                                NA
## 6                                       NA                                NA
##   Hwy2.Unadj.FE...Alternative.Fuel Comb2.Unadj.FE...Alternative.Fuel
## 1                               NA                                NA
## 2                               NA                                NA
## 3                               NA                                NA
## 4                               NA                                NA
## 5                               NA                                NA
## 6                               NA                                NA
##   City2.Unrd.Adj.FE...Alternative.Fuel Hwy2.Unrd.Adj.FE...Alternative.Fuel
## 1                                   NA                                  NA
## 2                                   NA                                  NA
## 3                                   NA                                  NA
## 4                                   NA                                  NA
## 5                                   NA                                  NA
## 6                                   NA                                  NA
##   Cmb2.Unrd.Adj.FE...Alternative.Fuel
## 1                                  NA
## 2                                  NA
## 3                                  NA
## 4                                  NA
## 5                                  NA
## 6                                  NA
##   Range2...Alt.Fuel.Model.Typ.Driving.Range...Alternative.Fuel
## 1                                                             
## 2                                                             
## 3                                                             
## 4                                                             
## 5                                                             
## 6                                                             
##   Fuel2.Usage...Alternative.Fuel Fuel2.Usage.Desc...Alternative.Fuel
## 1                                                                   
## 2                                                                   
## 3                                                                   
## 4                                                                   
## 5                                                                   
## 6                                                                   
##   Fuel2.Unit...Alternative.Fuel Fuel2.Unit.Desc...Alternative.Fuel
## 1                                                                 
## 2                                                                 
## 3                                                                 
## 4                                                                 
## 5                                                                 
## 6                                                                 
##   Fuel2.Annual.Fuel.Cost...Alternative.Fuel City.CO2.Rounded.Adjusted...Fuel2
## 1                                        NA                                NA
## 2                                        NA                                NA
## 3                                        NA                                NA
## 4                                        NA                                NA
## 5                                        NA                                NA
## 6                                        NA                                NA
##   Hwy.CO2.Rounded.Adjusted...Fuel2 Comb.CO2.Rounded.Adjusted...Fuel2
## 1                               NA                                NA
## 2                               NA                                NA
## 3                               NA                                NA
## 4                               NA                                NA
## 5                               NA                                NA
## 6                               NA                                NA
##   Fuel2.EPA.Calculated.Annual.Fuel.Cost...Alternative.Fuel
## 1                                                       NA
## 2                                                       NA
## 3                                                       NA
## 4                                                       NA
## 5                                                       NA
## 6                                                       NA
##   Descriptor...Model.Type..40.Char.or.less. Intake.Valves.Per.Cyl
## 1                                    SIDI;                      2
## 2                                    SIDI;                      2
## 3                                    SIDI;                      2
## 4                                    SIDI;                      2
## 5                                    SIDI;                      2
## 6                                    SIDI;                      1
##   Exhaust.Valves.Per.Cyl Carline.Class Carline.Class.Desc
## 1                      2             1        Two Seaters
## 2                      2             1        Two Seaters
## 3                      2             1        Two Seaters
## 4                      2             1        Two Seaters
## 5                      2             1        Two Seaters
## 6                      1             1        Two Seaters
##   Car.Truck.Category...Cash.for.Clunkers.Bill.             Calc.Approach.Desc
## 1                                          car          Derived 5-cycle label
## 2                                          car          Derived 5-cycle label
## 3                                          car          Derived 5-cycle label
## 4                                          car          Derived 5-cycle label
## 5                                          car          Derived 5-cycle label
## 6                                          car Vehicle Specific 5-cycle label
##   Release.Date EPA.FE.Label.Dataset.ID  X Unique.Label. Label.Recalc. Relabel
## 1     5/1/2024                   34754 NA             N             N        
## 2    3/22/2024                   34509 NA             Y             N        
## 3    3/22/2024                   34510 NA             N             N        
## 4     3/8/2024                   34465 NA             N             N        
## 5    9/20/2024                   35630 NA             Y             N        
## 6    8/28/2024                   35434 NA             N             N        
##   Relabel.Desc Suppressed. Police.Emerg. Comments...Mfr.Eng.Cnfg Cyl.Deact.
## 1                        N             N      Vantage Power Spec          N
## 2                        N             N                                  N
## 3                        N             N                                  N
## 4                        N             N                                  N
## 5                        N             N  Applies to DVVA var: 0          N
## 6                        N             N          ZERV Y2 Engine          Y
##           Cyl.Deact.Desc Var.Valve.Timing.
## 1                                        Y
## 2                                        Y
## 3                                        Y
## 4                                        Y
## 5                                        Y
## 6 Active Fuel Management                 Y
##                                           Var.Valve.Timing.Desc Var.Valve.Lift.
## 1                                 Inlet and exhaust cam phasing               N
## 2              variable valve timing at inlet and outlet valves               Y
## 3              variable valve timing at inlet and outlet valves               Y
## 4              variable valve timing at inlet and outlet valves               Y
## 5 INLET AND OUTLET CONTINUOUSLY VARIABLE / MECHANICAL-HYDRAULIC               N
## 6     Dual equal cam phaser, single cam, continuously variable.               N
##                        Var.Valve.Lift.Desc Energy.Storage.Device.Desc
## 1                                                                    
## 2 variable valve lift only at inlet valves                           
## 3 variable valve lift only at inlet valves                           
## 4 variable valve lift only at inlet valves                           
## 5                                                                    
## 6                                                                    
##   Energy.Storage.Device.If.Other X..Batteries Battery.Type.Desc
## 1                             NA           NA                  
## 2                             NA           NA                  
## 3                             NA           NA                  
## 4                             NA           NA                  
## 5                             NA           NA                  
## 6                             NA           NA                  
##   Battery.Type..If.Other Total.Voltage.for.Battery.Pack.s.
## 1                     NA                                NA
## 2                     NA                                NA
## 3                     NA                                NA
## 4                     NA                                NA
## 5                     NA                                NA
## 6                     NA                                NA
##   Batt.Energy.Capacity..Amp.hrs. Batt.Specific.Energy..Watt.hr.kg.
## 1                             NA                                NA
## 2                             NA                                NA
## 3                             NA                                NA
## 4                             NA                                NA
## 5                             NA                                NA
## 6                             NA                                NA
##   Batt.Charger.Type.Desc Comments X..Capacitors Regen.Braking.Type.Desc
## 1                              NA            NA                        
## 2                              NA            NA                        
## 3                              NA            NA                        
## 4                              NA            NA                        
## 5                              NA            NA                        
## 6                              NA            NA                        
##   Regen.Braking.Type..If.Other Regen.Braking.Wheels.Source..Front..Rear..Both.
## 1                           NA                                                
## 2                           NA                                                
## 3                           NA                                                
## 4                           NA                                                
## 5                           NA                                                
## 6                           NA                                                
##   Driver.Cntrl.Regen.Braking. Fuel.Cell.Desc Usable.H2.Fill.Capacity..kg.
## 1                                         NA                           NA
## 2                                         NA                           NA
## 3                                         NA                           NA
## 4                                         NA                           NA
## 5                                         NA                           NA
## 6                                         NA                           NA
##   Fuel.Cell.Onboard.H2.Capacity..kg. HEV.EV.Comments X..Drive.Motor.Gen
## 1                                 NA                                 NA
## 2                                 NA                                 NA
## 3                                 NA                                 NA
## 4                                 NA                                 NA
## 5                                 NA                                 NA
## 6                                 NA                                 NA
##   Motor.Gen.Type.Desc Motor.Gen.Type..If.Other Rated.Motor.Gen.Power..kW.
## 1                                                                        
## 2                                                                        
## 3                                                                        
## 4                                                                        
## 5                                                                        
## 6                                                                        
##   Fuel.Metering.Type.1.Desc Fuel.Metering.Type.2.Desc Fuel.Metering.Sys.Cd
## 1                        NA                        NA                  GDI
## 2                        NA                        NA                  GDI
## 3                        NA                        NA                  GDI
## 4                        NA                        NA                  GDI
## 5                        NA                        NA                  GDI
## 6                        NA                        NA                  GDI
##            Fuel.Metering.Sys.Desc Fuel.Cell.Vehicle..Y.or.N.
## 1 Spark Ignition Direct Injection                          N
## 2 Spark Ignition Direct Injection                           
## 3 Spark Ignition Direct Injection                           
## 4 Spark Ignition Direct Injection                           
## 5 Spark Ignition Direct Injection                           
## 6 Spark Ignition Direct Injection                           
##   Off.Board.Charge.Capable..Y.or.N. Camless.Valvetrain..Y.or.N.
## 1                                                             N
## 2                                                             N
## 3                                                             N
## 4                                                             N
## 5                                                             N
## 6                                                             N
##             Oil.Viscosity Stop.Start.System..Engine.Management.System..Code
## 1                    0W40                                                 N
## 2                    0W20                                                 Y
## 3                    0W20                                                 Y
## 4                    0W20                                                 Y
## 5 10W60 VW50101 / VW50500                                                 N
## 6                    0W40                                                 N
##   Stop.Start.System..Engine.Management.System...Description
## 1                                                        No
## 2                                                       Yes
## 3                                                       Yes
## 4                                                       Yes
## 5                                                        No
## 6                                                        No
##   Model.Type.Desc..MFR.entered. Charge.Depleting.Calc.Appr.Code..PHEV.only.
## 1                       Vantage                                          NA
## 2                                                                        NA
## 3                                                                        NA
## 4                                                                        NA
## 5                                                                        NA
## 6                                                                        NA
##   Charge.Depleting.Calc.Appr.Desc..PHEV.only.
## 1                                          NA
## 2                                          NA
## 3                                          NA
## 4                                          NA
## 5                                          NA
## 6                                          NA
##   Charge.Sustaining.Calc.Appr.Code..PHEV.only.
## 1                                           NA
## 2                                           NA
## 3                                           NA
## 4                                           NA
## 5                                           NA
## 6                                           NA
##   Charge.Sustaining.Calc.Appr.Desc..PHEV.only. EPA.Calculated.Annual.Fuel.Cost
## 1                                           NA                              NA
## 2                                           NA                              NA
## 3                                           NA                              NA
## 4                                           NA                              NA
## 5                                           NA                              NA
## 6                                           NA                              NA
##   EPA.Calculated.Gas.Guzzler.MPG MFR.Calculated.Gas.Guzzler.MPG
## 1                             NA                           23.3
## 2                             NA                           34.0
## 3                             NA                           28.4
## 4                             NA                           38.0
## 5                             NA                           11.8
## 6                             NA                           24.2
##   EPA.Calculated.Gas.Guzzler.Indicator..Y.or.N. X.1
## 1                                            NA  NA
## 2                                            NA  NA
## 3                                            NA  NA
## 4                                            NA  NA
## 5                                            NA  NA
## 6                                            NA  NA
##   FE.Rating..1.10.rating.on.Label. GHG.Rating..1.10.rating.on.Label.
## 1                                4                                 4
## 2                                5                                 5
## 3                                5                                 5
## 4                                6                                 6
## 5                                1                                 1
## 6                                4                                 4
##   GHG.1.10.rating.on.Ethanol..EPA.Determined. X.1.Smog.Rating.Test.Group
## 1                                          NA               SASXJ04.0AEX
## 2                                          NA               SBMXV03.0B5X
## 3                                          NA               SBMXV03.0B5X
## 4                                          NA               SBMXJ02.0B4X
## 5                                          NA               SVGAV08.0GLB
## 6                                          NA               SGMXV06.2088
##   X.1.Mfr.Smog.Rating..Mfr.Smog.1.10.Rating.on.Label.for.Test.Group.1.
## 1                                                                    5
## 2                                                                    4
## 3                                                                    4
## 4                                                                    6
## 5                                                                    1
## 6                                                                    4
##   X.1.EPA.Smog.Rating..EPA.Smog.1.10.Rating.on.Label.for.Test.Group.1.
## 1                                                                   NA
## 2                                                                   NA
## 3                                                                   NA
## 4                                                                   NA
## 5                                                                   NA
## 6                                                                   NA
##   SmartWay1..EPA.derived.SmartWay.rating.for.Test.Group.1.
## 1                                                       NA
## 2                                                       NA
## 3                                                       NA
## 4                                                       NA
## 5                                                       NA
## 6                                                       NA
##   X.2.Smog.Rating.Test.Group
## 1                         NA
## 2                         NA
## 3                         NA
## 4                         NA
## 5                         NA
## 6                         NA
##   X.2.Mfr.Smog.Rating..Mfr.Smog.1.10.Rating.on.Label.for.Test.Group.2.
## 1                                                                   NA
## 2                                                                   NA
## 3                                                                   NA
## 4                                                                   NA
## 5                                                                   NA
## 6                                                                   NA
##   X.2.EPA.Smog.Rating..EPA.Smog.1.10.Rating.on.Label.for.Test.Group.2.
## 1                                                                   NA
## 2                                                                   NA
## 3                                                                   NA
## 4                                                                   NA
## 5                                                                   NA
## 6                                                                   NA
##   SmartWay2..EPA.derived.SmartWay.rating.for.Test.Group.2.
## 1                                                       NA
## 2                                                       NA
## 3                                                       NA
## 4                                                       NA
## 5                                                       NA
## 6                                                       NA
##   X.3.Smog.Rating.Test.Group
## 1                         NA
## 2                         NA
## 3                         NA
## 4                         NA
## 5                         NA
## 6                         NA
##   X.3.Mfr.Smog.Rating..Mfr.Smog.1.10.Rating.on.Label.for.Test.Group.3.
## 1                                                                   NA
## 2                                                                   NA
## 3                                                                   NA
## 4                                                                   NA
## 5                                                                   NA
## 6                                                                   NA
##   X.3.EPA.Smog.Rating..EPA.Smog.1.10.Rating.on.Label.for.Test.Group.3.
## 1                                                                   NA
## 2                                                                   NA
## 3                                                                   NA
## 4                                                                   NA
## 5                                                                   NA
## 6                                                                   NA
##   SmartWay3..EPA.derived.SmartWay.rating.for.Test.Group.3.
## 1                                                       NA
## 2                                                       NA
## 3                                                       NA
## 4                                                       NA
## 5                                                       NA
## 6                                                       NA
##   X.4.Smog.Rating.Test.Group
## 1                         NA
## 2                         NA
## 3                         NA
## 4                         NA
## 5                         NA
## 6                         NA
##   X.4.Mfr.Smog.Rating..Mfr.Smog.1.10.Rating.on.Label.for.Test.Group.4.
## 1                                                                   NA
## 2                                                                   NA
## 3                                                                   NA
## 4                                                                   NA
## 5                                                                   NA
## 6                                                                   NA
##   X.4.EPA.Smog.Rating..EPA.Smog.1.10.Rating.on.Label.for.Test.Group.4.
## 1                                                                   NA
## 2                                                                   NA
## 3                                                                   NA
## 4                                                                   NA
## 5                                                                   NA
## 6                                                                   NA
##   SmartWay4..EPA.derived.SmartWay.rating.for.Test.Group.4.
## 1                                                       NA
## 2                                                       NA
## 3                                                       NA
## 4                                                       NA
## 5                                                       NA
## 6                                                       NA
##   X..You.Save.over.5.years..amount.saved.in.fuel.costs.over.5.years...on.label.
## 1                                                                            NA
## 2                                                                            NA
## 3                                                                            NA
## 4                                                                            NA
## 5                                                                            NA
## 6                                                                            NA
##   X..You.Spend.over.5.years..increased.amount.spent.in.fuel.costs.over.5.years...on.label.
## 1                                                                                     8250
## 2                                                                                     3250
## 3                                                                                     5000
## 4                                                                                     2000
## 5                                                                                    26000
## 6                                                                                     7250
##   City.CO2.Rounded.Adjusted Hwy.CO2.Rounded.Adjusted
## 1                       579                      399
## 2                       386                      297
## 3                       469                      332
## 4                       351                      266
## 5                      1121                      726
## 6                       565                      356
##   Comb.CO2.Rounded.Adjusted..as.shown.on.FE.Label.
## 1                                              498
## 2                                              346
## 3                                              407
## 4                                              312
## 5                                              943
## 6                                              471
##   CO2.PHEV.Composite.Rounded.Adjusted.Combined.CO2
## 1                                               NA
## 2                                               NA
## 3                                               NA
## 4                                               NA
## 5                                               NA
## 6                                               NA
##   X240V.Charge.Time.at.240.volts..hours. X120V.Charge.time.at.120.Volts..hours.
## 1                                     NA                                     NA
## 2                                     NA                                     NA
## 3                                     NA                                     NA
## 4                                     NA                                     NA
## 5                                     NA                                     NA
## 6                                     NA                                     NA
##   PHEV.Total.Driving.Range..rounded.to.nearest.10.miles.DISTANCE
## 1                                                             NA
## 2                                                             NA
## 3                                                             NA
## 4                                                             NA
## 5                                                             NA
## 6                                                             NA
##   City.PHEV.Composite.MPGe Hwy.PHEV.Composite.MPGe Comb.PHEV.Composite.MPGe X.2
## 1                       NA                      NA                       NA  NA
## 2                       NA                      NA                       NA  NA
## 3                       NA                      NA                       NA  NA
## 4                       NA                      NA                       NA  NA
## 5                       NA                      NA                       NA  NA
## 6                       NA                      NA                       NA  NA
##   X.3 X.4
## 1  NA  NA
## 2  NA  NA
## 3  NA  NA
## 4  NA  NA
## 5  NA  NA
## 6  NA  NA

Data transformation and cleaning:

#selecting only relevant columns and eliminate NA values:
df_mpg_og<- df %>% 
  subset(select = c(Mfr.Name, Carline, X..Cyl, Comb.Unrd.Adj.FE...Conventional.Fuel, Descriptor...Model.Type..40.Char.or.less., Battery.Type.Desc)) %>% 
  filter(!is.na(X..Cyl))

#Rename columns to meaningful names:
names(df_mpg_og)=c('Manufacture', 'Car_model', 'Cylinder_num', 'Combined_MPG','Model_type', 'Battery_type')

#Create helper columns to sort the vehicle engine type and class based on "Model_type" column:
df_mpg_og<-df_mpg_og %>% 
  mutate('Engine_type'=case_when(Battery_type=="" ~"ICE",
                                 grepl("\\bMild Hybrid\\b", Model_type) ~ "mild hybrid",
                                 grepl("\\bHybrid\\b", Model_type) ~ "full hybrid")) %>% 
  mutate('Engine_class'=case_when(Engine_type=='mild hybrid'~'hybrid',
                                   Engine_type=='full hybrid'~'hybrid',
                                   Engine_type=='ICE'~'ICE'),
         Battery_type=ifelse(Battery_type=="","No Battery", Battery_type)) %>% 
  select(-"Model_type")

#Create a variable for all types of engine models:
full_hybrid<- df_mpg_og %>% 
  filter(Engine_type == 'full hybrid')

mild_hybrid<- df_mpg_og %>% 
  filter(Engine_type == 'mild hybrid')

ICE<- df_mpg_og %>% 
  filter(Engine_type=='ICE')

head(df_mpg_og)
##           Manufacture    Car_model Cylinder_num Combined_MPG Battery_type
## 1        aston martin      Vantage            8      17.9272   No Battery
## 2                 BMW      Z4 M40i            6      25.3007   No Battery
## 3                 BMW      Z4 M40i            6      21.5458   No Battery
## 4                 BMW Z4 sDrive30i            4      28.0285   No Battery
## 5 Volkswagen Group of      Mistral           16       9.4203   No Battery
## 6      General Motors     CORVETTE            8      19.1107   No Battery
##   Engine_type Engine_class
## 1         ICE          ICE
## 2         ICE          ICE
## 3         ICE          ICE
## 4         ICE          ICE
## 5         ICE          ICE
## 6         ICE          ICE

Data Exploratory on dataset #1:

This density plot shows the overview of the MPG distribution for different engine types

ggplot(df_mpg_og, aes(x=Combined_MPG, fill= Engine_type)) +
  geom_density(alpha = 0.5) +
  labs(title="MPG distribution among different types of engine design",
       subtitle = "Not separated for number of cylinders")

Explore the relationship between number of engine cylinders and MPGs:

ggplot(df_mpg_og, aes(x=Combined_MPG, y=Cylinder_num)) +
  geom_point(aes(color = Engine_type), alpha=0.5) +
  geom_smooth(se=FALSE) +
  labs(title = 'Engine cylinder vs MPG',
       y='Number of engine cylinder',
       x='Combined MPGs')

Explore the mean MPGs for each of the engine cylinder groups:

#Calculating the average MPG for each cylinder group and plotted them:
cyl_mpg<- df_mpg_og %>% 
  group_by(Engine_type, Cylinder_num) %>% 
  summarise("average_mpg_cylinder"=mean(Combined_MPG))

cyl_mpg
## # A tibble: 13 × 3
## # Groups:   Engine_type [3]
##    Engine_type Cylinder_num average_mpg_cylinder
##    <chr>              <int>                <dbl>
##  1 ICE                    3                29.2 
##  2 ICE                    4                26.1 
##  3 ICE                    6                20.5 
##  4 ICE                    8                16.5 
##  5 ICE                   12                13.9 
##  6 ICE                   16                 9.42
##  7 full hybrid            4                42.3 
##  8 full hybrid            6                20.7 
##  9 full hybrid            8                18.5 
## 10 full hybrid           12                11.8 
## 11 mild hybrid            4                26.9 
## 12 mild hybrid            6                22.4 
## 13 mild hybrid            8                17.2
ggplot(cyl_mpg, aes(x=Engine_type, y=average_mpg_cylinder, fill = Cylinder_num)) +
  geom_bar(position='dodge2', stat='identity') +
  geom_text(aes(label = Cylinder_num),
            position = position_dodge2(width = 0.8),
            vjust=-0.5, hjust = 0.1) +
  labs(title = 'Average mpg vs engine cylinder number') +
  theme(legend.position = 'none')

df_mpg_og %>% 
  group_by(Engine_type, Cylinder_num) %>% 
  count(Cylinder_num) %>% 
  group_by(Engine_type) %>% 
  mutate(percentage = paste(round((n/sum(n)*100), 2), "%"))
## # A tibble: 13 × 4
## # Groups:   Engine_type [3]
##    Engine_type Cylinder_num     n percentage
##    <chr>              <int> <int> <chr>     
##  1 ICE                    3    19 4.15 %    
##  2 ICE                    4   213 46.51 %   
##  3 ICE                    6   136 29.69 %   
##  4 ICE                    8    83 18.12 %   
##  5 ICE                   12     6 1.31 %    
##  6 ICE                   16     1 0.22 %    
##  7 full hybrid            4    39 84.78 %   
##  8 full hybrid            6     5 10.87 %   
##  9 full hybrid            8     1 2.17 %    
## 10 full hybrid           12     1 2.17 %    
## 11 mild hybrid            4    63 45 %      
## 12 mild hybrid            6    51 36.43 %   
## 13 mild hybrid            8    26 18.57 %

Statistical tests:

Welch Anova and pairwise tests:

set.seed(321)
#Remove engine cylinders great than 8.
df_mpg_8<- df_mpg_og %>% 
  filter(Cylinder_num<9)

#Calculate the variance:
bartlett.test(Combined_MPG ~ Engine_type, data = df_mpg_8)
## 
##  Bartlett test of homogeneity of variances
## 
## data:  Combined_MPG by Engine_type
## Bartlett's K-squared = 54.861, df = 2, p-value = 1.222e-12
#p-value < 0.05, indicating that the rough equal variance assumption is invalid, and Welch's Anova test is needed for the analysis.

# Welch anova:
wel<- oneway.test(Combined_MPG ~ Engine_type, data=df_mpg_8, var.equal = FALSE)
print(wel)
## 
##  One-way analysis of means (not assuming equal variances)
## 
## data:  Combined_MPG and Engine_type
## F = 65.286, num df = 2, denom df = 104, p-value < 2.2e-16
# To compare each group individually:
pair<- pairwise.t.test(df_mpg_8$Combined_MPG , df_mpg_8$Engine_type, var.equal = FALSE)

print(pair)
## 
##  Pairwise comparisons using t tests with pooled SD 
## 
## data:  df_mpg_8$Combined_MPG and df_mpg_8$Engine_type 
## 
##             full hybrid ICE 
## ICE         <2e-16      -   
## mild hybrid <2e-16      0.22
## 
## P value adjustment method: holm

Dataset #2: Real world MPG measurements in hybrid and non-hybrid vehicles

Data acquisition through OpenData.gov API

api_df<- read.socrata(
  "https://data.cityofnewyork.us/resource/mn2p-34if.json",
  app_token = Sys.getenv("nyc_app_token"),
  email     = Sys.getenv("nyc_data_user"),
  password  = Sys.getenv("nyc_data_pw"))

head(api_df)
##   year      make             model hybrid_non_hybrid standard_type
## 1 2008     HONDA          CIVIC GX        Non-Hybrid         SEDAN
## 2 2008      FORD         CROWN VIC        Non-Hybrid         SEDAN
## 3 2008 CHEVROLET           EXPRESS        Non-Hybrid           VAN
## 4 2008      FORD             FOCUS        Non-Hybrid         SEDAN
## 5 2008      FORD            FUSION        Non-Hybrid         SEDAN
## 6 2008    TOYOTA HIGHLANDER HYBRID            Hybrid           SUV
##   epa_rating_city vehicle_count total_actual_miles total_actual_fuel
## 1              24             9               4459               221
## 2              11             7              17162              1771
## 3              13             1                413                65
## 4              24            49              13074               793
## 5              18             5              29482              1813
## 6              27            15              36095              1667
##   epa_expected_fuel actual_fuel_economy_geotab percent_difference_actual
## 1            185.79                      20.18                     -0.16
## 2           1560.18                       9.69                     -0.12
## 3             31.77                       6.35                     -0.51
## 4            544.75                      16.49                     -0.31
## 5           1637.89                      16.26                      -0.1
## 6           1336.85                      21.65                      -0.2
##   estimated_fuel_costs_per fuel_costs_per_mile
## 1               $   552.50            $   0.12
## 2             $   4,427.50            $   0.26
## 3               $   162.50            $   0.39
## 4             $   1,982.50            $   0.15
## 5             $   4,532.50            $   0.15
## 6             $   4,167.50            $   0.12

Data cleaning and transformation:

# remove $ sign and "," in the cost column
api_df[, 13:14]<- sapply(api_df[,13:14], function(x) as.numeric(gsub("[$,]", "", x)))

#only compare sedan and SUV:
api_clean<- api_df %>% 
  filter((standard_type =="SEDAN" |standard_type == "SUV"))

# convert columns to numerical values:
api_clean[,6:14]<- sapply(api_clean[, 6:14], function(x) as.numeric(x))

#checking data distribution and found an outlier as an error in the dataset. NYC gov accidentally put a Prius as a "non-hybrid" car but all Pruii are made as hybrid. 
ggplot(api_clean, aes(x=actual_fuel_economy_geotab)) +
  geom_histogram()+
  facet_wrap(~hybrid_non_hybrid)

Outlier examination:

#Selecting the outlier with the two conditions:
api_clean %>% 
  filter((hybrid_non_hybrid=='Non-Hybrid' & actual_fuel_economy_geotab >=40))
##   year   make model hybrid_non_hybrid standard_type epa_rating_city
## 1 2015 TOYOTA PRIUS        Non-Hybrid         SEDAN              48
##   vehicle_count total_actual_miles total_actual_fuel epa_expected_fuel
## 1           327            2144639             44874          44679.98
##   actual_fuel_economy_geotab percent_difference_actual estimated_fuel_costs_per
## 1                      47.79                         0                   112185
##   fuel_costs_per_mile
## 1                0.05

Making correction to the dataset

#The type exists in row index 45 and it is selected through the code below:
api_clean[45,] <-api_clean[45,] %>% 
  mutate(hybrid_non_hybrid = gsub("Non-Hybrid", "Hybrid", hybrid_non_hybrid))

#checking:
api_clean[45,]
##    year   make model hybrid_non_hybrid standard_type epa_rating_city
## 45 2015 TOYOTA PRIUS            Hybrid         SEDAN              48
##    vehicle_count total_actual_miles total_actual_fuel epa_expected_fuel
## 45           327            2144639             44874          44679.98
##    actual_fuel_economy_geotab percent_difference_actual
## 45                      47.79                         0
##    estimated_fuel_costs_per fuel_costs_per_mile
## 45                   112185                0.05

Checking the histograms:

ggplot(api_clean, aes(x=actual_fuel_economy_geotab)) +
  geom_histogram()+
  facet_wrap(~hybrid_non_hybrid)

Uploading the cleaned and corrected dataset to Azure SQL database:

#Forming connection to Azure database:
dbcon<- dbConnect(MySQL(), 
                  user = Sys.getenv('az_user'),
                  password = Sys.getenv('az_pw'),
                  host = 'cunydata607sql.mysql.database.azure.com',
                  dbname= Sys.getenv('az_user'),
                  port = 3306)
#To upload the dataframe to Azure SQL database:
dbWriteTable(dbcon, "real_mpg", api_clean, overwrite=TRUE, row.names=FALSE)
## [1] TRUE
#To check table integrity:
dbGetQuery(dbcon, "select * from real_mpg limit 10")
##    year   make             model hybrid_non_hybrid standard_type
## 1  2008  HONDA          CIVIC GX        Non-Hybrid         SEDAN
## 2  2008   FORD         CROWN VIC        Non-Hybrid         SEDAN
## 3  2008   FORD             FOCUS        Non-Hybrid         SEDAN
## 4  2008   FORD            FUSION        Non-Hybrid         SEDAN
## 5  2008 TOYOTA HIGHLANDER HYBRID            Hybrid           SUV
## 6  2008 TOYOTA             PRIUS            Hybrid         SEDAN
## 7  2009 NISSAN     ALTIMA HYBRID            Hybrid         SEDAN
## 8  2009   FORD         CROWN VIC        Non-Hybrid         SEDAN
## 9  2009   FORD            ESCAPE        Non-Hybrid           SUV
## 10 2009   FORD     ESCAPE HYBRID            Hybrid           SUV
##    epa_rating_city vehicle_count total_actual_miles total_actual_fuel
## 1               24             9               4459               221
## 2               11             7              17162              1771
## 3               24            49              13074               793
## 4               18             5              29482              1813
## 5               27            15              36095              1667
## 6               48           189            1034445             25453
## 7               35            11              73015              2790
## 8               16            39               3263               290
## 9               19            14               3903               430
## 10              34            32             180992              8729
##    epa_expected_fuel actual_fuel_economy_geotab percent_difference_actual
## 1             185.79                      20.18                     -0.16
## 2            1560.18                       9.69                     -0.12
## 3             544.75                      16.49                     -0.31
## 4            1637.89                      16.26                     -0.10
## 5            1336.85                      21.65                     -0.20
## 6           21550.94                      40.64                     -0.15
## 7            2086.14                      26.17                     -0.25
## 8             203.94                      11.25                     -0.30
## 9             205.42                       9.08                     -0.52
## 10           5323.29                      20.73                     -0.39
##    estimated_fuel_costs_per fuel_costs_per_mile
## 1                     552.5                0.12
## 2                    4427.5                0.26
## 3                    1982.5                0.15
## 4                    4532.5                0.15
## 5                    4167.5                0.12
## 6                   63632.5                0.06
## 7                    6975.0                0.10
## 8                     725.0                0.22
## 9                    1075.0                0.28
## 10                  21822.5                0.12

Closing the connection:

dbDisconnect(dbcon)
## [1] TRUE

Exploratory data on dataset #2:

api_clean %>% 
  count(hybrid_non_hybrid) %>% 
  arrange(desc(n))
##   hybrid_non_hybrid  n
## 1            Hybrid 34
## 2        Non-Hybrid 31
ggplot(api_clean, aes(x=hybrid_non_hybrid, y=actual_fuel_economy_geotab, color=hybrid_non_hybrid)) +
  geom_boxplot() +
  scale_color_manual(values = c('Hybrid'='blue', 'Non-Hybrid'='darkgreen')) +
  labs(title = "Average MPG of hybrid vs. non-hybrid",
       x="",
       y="Miles per Gallon (MPG)")

Statistical test:

#We will use one-tailed t-test to compare the data:
obs<- api_clean %>% 
  observe(actual_fuel_economy_geotab ~ hybrid_non_hybrid, stat='t', order = c('Hybrid', 'Non-Hybrid'))

null_dis<- api_clean %>% 
  specify(actual_fuel_economy_geotab ~ hybrid_non_hybrid) %>% 
  hypothesize(null = 'independence') %>% 
  generate(reps = 1000, type = 'permute') %>% 
  calculate(stat = 't', order = c('Hybrid', 'Non-Hybrid'))

p<- get_p_value(null_dis, obs, direction='greater')

visualise(null_dis) +
  shade_p_value(obs_stat = obs, direction = 'greater')+
  labs(subtitle = paste("P-value =", p))