library(dplyr) #data wrangling
library(lubridate) #convert date to date type
library(ggplot2) #visualization
library(plotly) #visualization
library(glue) #Create label
library(tidyr) #data wrangling
library(tidytext) #text conversion
library(scales) #data scalling
library(hrbrthemes) #additional plot theme
library(treemap) #treemap visualization

1. Introduction

For LBB (learning by building) - “Data Visualization” assignment , i’m using electric car datasets from kaggle. Kaggle datasets is gathered from EV Database. With this datasets, we want to know what kind of insight can we gathered related to Electronic Vehicle.


2. Dataset

Importing Data

Import data and assign it into object named “ev” stands for “Electric Vehicle”.

#import "electric vehicle" data
ev <- read.csv("ElectricCarData.csv")
head(ev)

Checking and Clean Dataset

#checking data type
glimpse(ev)
## Rows: 103
## Columns: 14
## $ Brand           <chr> "Tesla ", "Volkswagen ", "Polestar ", "BMW ", "Honda "~
## $ Model           <chr> "Model 3 Long Range Dual Motor", "ID.3 Pure", "2", "iX~
## $ AccelSec        <dbl> 4.6, 10.0, 4.7, 6.8, 9.5, 2.8, 9.6, 8.1, 5.6, 6.3, 5.1~
## $ TopSpeed_KmH    <int> 233, 160, 210, 180, 145, 250, 150, 150, 225, 180, 180,~
## $ Range_Km        <int> 450, 270, 400, 360, 170, 610, 190, 275, 310, 400, 370,~
## $ Efficiency_WhKm <int> 161, 167, 181, 206, 168, 180, 168, 164, 153, 193, 216,~
## $ FastCharge_KmH  <chr> "940", "250", "620", "560", "190", "620", "220", "420"~
## $ RapidCharge     <chr> "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes"~
## $ PowerTrain      <chr> "AWD", "RWD", "AWD", "RWD", "RWD", "AWD", "FWD", "FWD"~
## $ PlugType        <chr> "Type 2 CCS", "Type 2 CCS", "Type 2 CCS", "Type 2 CCS"~
## $ BodyStyle       <chr> "Sedan", "Hatchback", "Liftback", "SUV", "Hatchback", ~
## $ Segment         <chr> "D", "C", "D", "D", "B", "F", "C", "B", "D", "D", "D",~
## $ Seats           <int> 5, 5, 5, 5, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 4, 4, 4, ~
## $ PriceEuro       <int> 55480, 30000, 56440, 68040, 32997, 105000, 31900, 2968~

Brand : Manufacturer of the vehicle

Model : Vehicle model and type

Accelsec : Acceleration as 0-100 km/h

TopSpeed : The top speed in km/h

Range : Range in km

Efficiency : Efficiency Wh/km

FastCharge : Charge km/h

RapidCharge : Yes / No

PowerTrain : Front, rear, or all wheel drive

PlugType : Plug type

BodyStyle : Basic size or style

Segment : Market segment

Seats : Number of seats

PriceEuro : Price in Germany before tax incentives

check unique value in each column

#check unique value in each column
sapply(ev, function(x) n_distinct(x))
##           Brand           Model        AccelSec    TopSpeed_KmH        Range_Km 
##              33             102              55              25              50 
## Efficiency_WhKm  FastCharge_KmH     RapidCharge      PowerTrain        PlugType 
##              54              51               2               3               4 
##       BodyStyle         Segment           Seats       PriceEuro 
##               9               8               5              87

Convert data type

#convert data type
ev_col <- c("RapidCharge", "PowerTrain", "PlugType", "BodyStyle", "Segment", "Seats")

ev <- ev %>% mutate_each_(funs(factor(.)), ev_col) %>% 
      mutate(FastCharge_KmH = as.integer(FastCharge_KmH))
#checking missing value
colSums(is.na(ev))
##           Brand           Model        AccelSec    TopSpeed_KmH        Range_Km 
##               0               0               0               0               0 
## Efficiency_WhKm  FastCharge_KmH     RapidCharge      PowerTrain        PlugType 
##               0               5               0               0               0 
##       BodyStyle         Segment           Seats       PriceEuro 
##               0               0               0               0

3. Plot and Analysis

Price

Price by Brands

price_brand <- ev %>% 
  group_by(Brand) %>% 
  summarise(mean_price = mean(PriceEuro)) %>%
  arrange(mean_price) %>% 
  mutate(label = glue("Average Price = {comma(round(mean_price,0))}"))

price_brand_plot <- ggplot(price_brand, aes(x = Brand, y = mean_price, text = label))+
  geom_segment( aes(x=reorder(Brand, mean_price) ,xend=Brand, y=0, yend=mean_price), color="grey")+
  geom_point(size=3, color="steelblue")+
  coord_flip()+
  scale_y_continuous(labels = scales::comma)+
  ggtitle("Average Price by Brand")+
  labs(x = "Brands",
       y = "Average Price (Euro)", fill = "")+
  theme_minimal()+
  theme(axis.text = element_text(colour = "navyblue"),
        plot.title = element_text(hjust = 0.5))


ggplotly(price_brand_plot, tooltip = "label")

Insight

  • The highest Electric Vehicle price in average by Brand is Lightyear. Lightyear is tech Company located in Netherlands, founded in September 2016. Expensive price derived from its solar panel technology, where other Electric Vehicle may not have this kind technology.
  • Second and third most highest average car price are Porsche and Lucid. Porsche is well known car Company selling luxury sport car and Lucid is considered as luxury Electric Vehicle maker located in US.

Price by Car Type

price_type <- ev %>% 
  mutate(label = glue("Brand = {Brand}
                      Model = {Model}
                      Price = {comma(round(PriceEuro,0))}"))

price_type_plot <- ggplot(price_type, aes(x = BodyStyle, y = PriceEuro, color = BodyStyle, text = label))+
  geom_jitter(show.legend = F)+
  scale_y_continuous(labels = scales::comma)+
  ggtitle("Price by Car Type")+
  labs(x = "Car Type",
       y = "Average Price (Euro)", fill = "")+
  theme_minimal()+
  theme(axis.text = element_text(colour = "navyblue"), 
        plot.title = element_text(hjust = 0.5),
        legend.position = "none")

ggplotly(price_type_plot, tooltip = "label")

Insight:

  • Based on car type BodyType category, Cabrio (Tesla - Roadster) is car type with the most expensive one, followed by Sedan (Porsche - Taycan Turbo S) and Station (Porsche - Taycan Cross Turismo).
  • From this figure, it is also noted that most of the electric car is dominated by Hatchback and SUV car type with price range from EUR 20,000 up to EUR 100,000.
  • Tesla - Roadster car price is amounting to EUR 215,000. As the most expensive electric car, Tesla claim that Roadster is the quickest car in the world, with record-setting acceleration, range and performance.

Price by Battery Pack

ev_batt <- ev %>% group_by(Brand, Model, PriceEuro) %>% 
      summarise(battery = round((Range_Km*Efficiency_WhKm)/1000,0)) %>% 
      mutate(label = glue({"Brand = {Brand}
                            Model = {Model}"})) %>% 
  ggplot(aes(x = PriceEuro, y = battery, text = label))+
  geom_point(colour = "dodgerblue1")+ 
  scale_y_continuous(labels = scales::comma)+
  scale_x_continuous(labels = scales::comma)+
  ggtitle("Price by Battery Pack")+
  labs(x = "Price in Euro",
       y = "Battery Pack Kwh", fill = "")+
  theme_minimal()+
  theme(axis.text = element_text(colour = "navyblue"), 
        plot.title = element_text(hjust = 0.5))

ggplotly(ev_batt, tooltip = "label")

Insight

  • The bigger battery capacity in EV, the more expensive it become.
  • 1 outlier Tesla - Cybertruck Tri Motor have the same battery capacity 200Kwh with Tesla - Roadster, with way more cheaper price.

Market Segment

Price

ev_segment <-
  ev %>%
  ggplot(aes(x=Segment, y=PriceEuro, fill=Segment))+
          geom_boxplot(alpha=0.5)+
          geom_jitter(color="black", size=1)+
          scale_y_continuous(labels = scales::comma)+
          ggtitle("Price per Segment")+
          labs(x = "Market Segment",
          y = "Price in Euro", fill = "")+
          theme_minimal()+
          theme(legend.position = "none",
                plot.title = element_text(hjust = 0.5))

ggplotly(ev_segment)

Insight

  • A is the cheapest in EV market Segment, with PriceEuro ranging from EUR 24,790 to EUR 20,129.
  • Expensive EV are in F market Segment, with PriceEuro ranging from EUR 79,990 to EUR 180,781.
  • The most expensive EV Tesla - Roadster is in S market Segment, with PriceEuro amounting to EUR 215,000.

Let see what kind of EV BodyType that dominating each Segment.

EV Body Type

ev_seg_type <-
  ev %>% group_by(Segment, BodyStyle) %>% 
  tally() %>% 
  mutate(label = glue("Count = {n}")) %>% 
  ggplot(aes(x=Segment, y=n, fill=BodyStyle, text=label)) + 
  geom_bar(stat = "identity", position = "stack") +
  ggtitle("Body Type per Segment")+
          labs(x = "Market Segment",
          y = "", fill = "")+
          theme_minimal()+
          theme(plot.title = element_text(hjust = 0.5))

ggplotly(ev_seg_type, tooltip = "label")

Insight

  • SUV BodyType mostly exist in almost all Market Segment.
  • Hatchback BodyType is considered as affordable EV in term of PriceEuro.
  • Sedan BodyType mostly in “F” market Segment which considered as expensive EV.
  • From this chart, we get informed that “N” Segment is specifically for SPV (Special Purpose Vehicle) and Pickup BodyType.

Car Performance

Top Speed Efficiency by Power Train

AWD

ev_awd <- ev %>% mutate(label = glue("Brand = {Brand}
                                     Model = {Model}
                                     Price = {comma(round(PriceEuro))}")) %>% 
  filter(PowerTrain == "AWD") %>% 
  ggplot(aes(x = TopSpeed_KmH, y = Efficiency_WhKm, text = label))+
  geom_point(aes(size = PriceEuro), colour = "SteelBlue")+ 
  scale_y_continuous(labels = scales::comma)+
  scale_x_continuous(labels = scales::comma)+
  ggtitle("Top Speed to Efficiency")+
  labs(x = "Top Speed (Kmh)",
       y = "Efficiency Wh/km", fill = "")+
  theme_minimal()+
  theme(axis.text = element_text(colour = "navyblue"), 
        plot.title = element_text(hjust = 0.5))

ggplotly(ev_awd, tooltip = "label")

RWD

ev_rwd <- ev %>% mutate(label = glue("Brand = {Brand}
                                     Model = {Model}
                                     Price = {comma(round(PriceEuro))}")) %>% 
  filter(PowerTrain == "RWD") %>% 
  ggplot(aes(x = TopSpeed_KmH, y = Efficiency_WhKm, text = label))+
  geom_point(aes(size = PriceEuro), colour = "SpringGreen")+ 
  scale_y_continuous(labels = scales::comma)+
  scale_x_continuous(labels = scales::comma)+
  ggtitle("Top Speed to Efficiency")+
  labs(x = "Top Speed (Kmh)",
       y = "Efficiency Wh/km", fill = "")+
  theme_minimal()+
  theme(axis.text = element_text(colour = "navyblue"), 
        plot.title = element_text(hjust = 0.5))

ggplotly(ev_rwd, tooltip = "label")

FWD

ev_fwd <- ev %>% mutate(label = glue("Brand = {Brand}
                                     Model = {Model}
                                     Price = {comma(round(PriceEuro))}")) %>% 
  filter(PowerTrain == "FWD") %>% 
  ggplot(aes(x = TopSpeed_KmH, y = Efficiency_WhKm, text = label))+
  geom_point(aes(size = PriceEuro), colour = "GoldenRod")+ 
  scale_y_continuous(labels = scales::comma)+
  scale_x_continuous(labels = scales::comma)+
  ggtitle("Top Speed to Efficiency")+
  labs(x = "Top Speed (Kmh)",
       y = "Efficiency Wh/km", fill = "")+
  theme_minimal()+
  theme(axis.text = element_text(colour = "navyblue"), 
        plot.title = element_text(hjust = 0.5))

ggplotly(ev_fwd, tooltip = "label")

Insight

  • The faster the car its more likely to consume more energy (less efficient).
  • Most EV are dominated by All Wheel Drive (AWD) PowerTrain. An All Wheel Drive vehicle is one with a powertrain capable of providing power to all its wheels.
  • On AWD plot, we can spot 2 outlier Tesla - Roadster the fastest EV with around 200 Wh/km energy consumption and Lightyear - One the slowest AWD EV with the most efficient enerygy consumption.
  • On RWD plot, we can spot 3 outlier Tesla - Cybertruck tend consume more energy than the other, but only have 180 Km/h top speed. BMW - i4 and Tesla - Model 3 can reach up to 200 km/h with only 150 - 175 Wh/Km energy consumption.
  • On FWD plot, the only outlier Mercedes - EQV become the most expensive EV, with a lot of energy consumption and its top speed only reach up to 140 Km/h.

Range per Battery Pack

ev_range <- ev %>% group_by(Brand, Model, Range_Km, RapidCharge) %>% 
      summarise(battery = round((Range_Km*Efficiency_WhKm)/1000,0)) %>% 
      mutate(label = glue({"Brand = {Brand}
                            Model = {Model}"})) %>% 
  ggplot(aes(x = Range_Km, y = battery, text = label))+
  geom_point(aes(colour = RapidCharge))+ 
  scale_y_continuous(labels = scales::comma)+
  scale_x_continuous(labels = scales::comma)+
  ggtitle("Range by Battery Pack")+
  labs(x = "Range in KM",
       y = "Battery Pack Kwh", fill = "")+
  theme_minimal()+
  theme(axis.text = element_text(colour = "navyblue"), 
        plot.title = element_text(hjust = 0.5))

ggplotly(ev_range, tooltip = "label")

Insight

  • The bigger battery capacity, the longer EV travel distance.
  • Most of the EV have fast charging feature.

4. Kaggle Tasks

EV datasets obtained from kaggle provide several tasks / questions to be completed as below:

  1. Which car has the fastest 0-100 acceleration?
  2. Which car has the highest efficiency?
  3. Does a difference in power train effect the range, top speed, efficiency?
  4. Which manufacturer has the most number of vehicles?
  5. How does price relate to rapid charging?

Let’s try to make visualization to answer those questions.

Answer No. 1

ev %>% 
  arrange(AccelSec) %>% 
  slice_head(n = 10) %>% 
  ggplot(aes(x=reorder(Model, desc(AccelSec)), y=AccelSec)) + 
  geom_bar(stat = "identity", aes(fill=Brand))+
  coord_flip()

Answer : Tesla Roadster.

Answer No. 2

ev %>% 
  arrange(Efficiency_WhKm) %>% 
  slice_head(n = 10) %>% 
  ggplot(aes(x=reorder(Model, desc(Efficiency_WhKm)), y=Efficiency_WhKm)) + 
  geom_bar(stat = "identity", aes(fill=Brand))+
  coord_flip()

Answer : Lightyear - One.

Answer No. 3

   ggplot(ev, aes(x = PowerTrain, y = Efficiency_WhKm, color=Range_Km))+
  geom_jitter(aes(size=TopSpeed_KmH))+
  scale_y_continuous(labels = scales::comma)+
  ggtitle("Power Train")+
  labs(x = "Power Train",
       y = "Efficiency Wh/Km", fill = "")+
  theme_minimal()+
  theme(axis.text = element_text(colour = "navyblue"), 
        plot.title = element_text(hjust = 0.5))

Answer :

  • In term of Efficiency, AWD is considered to be less efficient than FWD and RWD.
  • EV with AWD PowerTrain tend to be faster than FWD and RWD.
  • The longest EV Range is using AWD PowerTrain.
  • Overall, AWD PowerTrain have better EV performance than FWD and RWD, but consume more energy (less efficient).

Answer No.4

plot_4 <-
  ev %>% group_by(Brand) %>% 
  tally() %>% 
  mutate(label = glue("Count = {n}")) %>% 
  ggplot(aes(x=reorder(Brand, n), y=n, text=label)) + 
  geom_col(aes(fill=n)) +
  coord_flip()+
  ggtitle("Brand")+
          labs(x = "Brand",
          y = "Number of Electric Vehicle", fill = "")+
          theme_minimal()+
          theme(plot.title = element_text(hjust = 0.5))

ggplotly(plot_4, tooltip = "label")

Answer : Tesla

Answer 5

ev %>%
  treemap(index = "RapidCharge",
          vSize = "PriceEuro",
          type = "index")

Answer :

  • EV with RapidCharge technology tend to be more expensive.
  • This chart also give an insight that in the future there will be more EV with RapidCharge technology, since those technology is considered as a “must have” feature in all electronic that rely on battery energy.