library(dplyr) #data wrangling
library(lubridate) #convert date to date type
library(ggplot2) #visualization
library(plotly) #visualization
library(glue) #Create label
library(tidyr) #data wrangling
library(tidytext) #text conversion
library(scales) #data scalling
library(hrbrthemes) #additional plot theme
library(treemap) #treemap visualization
For LBB (learning by building) - “Data Visualization” assignment , i’m using electric car datasets from kaggle. Kaggle datasets is gathered from EV Database. With this datasets, we want to know what kind of insight can we gathered related to Electronic Vehicle.
Import data and assign it into object named “ev” stands for “Electric Vehicle”.
#import "electric vehicle" data
<- read.csv("ElectricCarData.csv")
ev head(ev)
#checking data type
glimpse(ev)
## Rows: 103
## Columns: 14
## $ Brand <chr> "Tesla ", "Volkswagen ", "Polestar ", "BMW ", "Honda "~
## $ Model <chr> "Model 3 Long Range Dual Motor", "ID.3 Pure", "2", "iX~
## $ AccelSec <dbl> 4.6, 10.0, 4.7, 6.8, 9.5, 2.8, 9.6, 8.1, 5.6, 6.3, 5.1~
## $ TopSpeed_KmH <int> 233, 160, 210, 180, 145, 250, 150, 150, 225, 180, 180,~
## $ Range_Km <int> 450, 270, 400, 360, 170, 610, 190, 275, 310, 400, 370,~
## $ Efficiency_WhKm <int> 161, 167, 181, 206, 168, 180, 168, 164, 153, 193, 216,~
## $ FastCharge_KmH <chr> "940", "250", "620", "560", "190", "620", "220", "420"~
## $ RapidCharge <chr> "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes"~
## $ PowerTrain <chr> "AWD", "RWD", "AWD", "RWD", "RWD", "AWD", "FWD", "FWD"~
## $ PlugType <chr> "Type 2 CCS", "Type 2 CCS", "Type 2 CCS", "Type 2 CCS"~
## $ BodyStyle <chr> "Sedan", "Hatchback", "Liftback", "SUV", "Hatchback", ~
## $ Segment <chr> "D", "C", "D", "D", "B", "F", "C", "B", "D", "D", "D",~
## $ Seats <int> 5, 5, 5, 5, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 4, 4, 4, ~
## $ PriceEuro <int> 55480, 30000, 56440, 68040, 32997, 105000, 31900, 2968~
Brand
: Manufacturer of the vehicle
Model
: Vehicle model and type
Accelsec
: Acceleration as 0-100 km/h
TopSpeed
: The top speed in km/h
Range
: Range in km
Efficiency
: Efficiency Wh/km
FastCharge
: Charge km/h
RapidCharge
: Yes / No
PowerTrain
: Front, rear, or all wheel drive
PlugType
: Plug type
BodyStyle
: Basic size or style
Segment
: Market segment
Seats
: Number of seats
PriceEuro
: Price in Germany before tax incentives
check unique value in each column
#check unique value in each column
sapply(ev, function(x) n_distinct(x))
## Brand Model AccelSec TopSpeed_KmH Range_Km
## 33 102 55 25 50
## Efficiency_WhKm FastCharge_KmH RapidCharge PowerTrain PlugType
## 54 51 2 3 4
## BodyStyle Segment Seats PriceEuro
## 9 8 5 87
Convert data type
#convert data type
<- c("RapidCharge", "PowerTrain", "PlugType", "BodyStyle", "Segment", "Seats")
ev_col
<- ev %>% mutate_each_(funs(factor(.)), ev_col) %>%
ev mutate(FastCharge_KmH = as.integer(FastCharge_KmH))
#checking missing value
colSums(is.na(ev))
## Brand Model AccelSec TopSpeed_KmH Range_Km
## 0 0 0 0 0
## Efficiency_WhKm FastCharge_KmH RapidCharge PowerTrain PlugType
## 0 5 0 0 0
## BodyStyle Segment Seats PriceEuro
## 0 0 0 0
<- ev %>%
price_brand group_by(Brand) %>%
summarise(mean_price = mean(PriceEuro)) %>%
arrange(mean_price) %>%
mutate(label = glue("Average Price = {comma(round(mean_price,0))}"))
<- ggplot(price_brand, aes(x = Brand, y = mean_price, text = label))+
price_brand_plot geom_segment( aes(x=reorder(Brand, mean_price) ,xend=Brand, y=0, yend=mean_price), color="grey")+
geom_point(size=3, color="steelblue")+
coord_flip()+
scale_y_continuous(labels = scales::comma)+
ggtitle("Average Price by Brand")+
labs(x = "Brands",
y = "Average Price (Euro)", fill = "")+
theme_minimal()+
theme(axis.text = element_text(colour = "navyblue"),
plot.title = element_text(hjust = 0.5))
ggplotly(price_brand_plot, tooltip = "label")
Insight
Brand
is Lightyear. Lightyear is tech Company located in Netherlands, founded in September 2016. Expensive price derived from its solar panel technology, where other Electric Vehicle may not have this kind technology.<- ev %>%
price_type mutate(label = glue("Brand = {Brand}
Model = {Model}
Price = {comma(round(PriceEuro,0))}"))
<- ggplot(price_type, aes(x = BodyStyle, y = PriceEuro, color = BodyStyle, text = label))+
price_type_plot geom_jitter(show.legend = F)+
scale_y_continuous(labels = scales::comma)+
ggtitle("Price by Car Type")+
labs(x = "Car Type",
y = "Average Price (Euro)", fill = "")+
theme_minimal()+
theme(axis.text = element_text(colour = "navyblue"),
plot.title = element_text(hjust = 0.5),
legend.position = "none")
ggplotly(price_type_plot, tooltip = "label")
Insight:
BodyType
category, Cabrio (Tesla - Roadster) is car type with the most expensive one, followed by Sedan (Porsche - Taycan Turbo S) and Station (Porsche - Taycan Cross Turismo).<- ev %>% group_by(Brand, Model, PriceEuro) %>%
ev_batt summarise(battery = round((Range_Km*Efficiency_WhKm)/1000,0)) %>%
mutate(label = glue({"Brand = {Brand}
Model = {Model}"})) %>%
ggplot(aes(x = PriceEuro, y = battery, text = label))+
geom_point(colour = "dodgerblue1")+
scale_y_continuous(labels = scales::comma)+
scale_x_continuous(labels = scales::comma)+
ggtitle("Price by Battery Pack")+
labs(x = "Price in Euro",
y = "Battery Pack Kwh", fill = "")+
theme_minimal()+
theme(axis.text = element_text(colour = "navyblue"),
plot.title = element_text(hjust = 0.5))
ggplotly(ev_batt, tooltip = "label")
Insight
<-
ev_segment %>%
ev ggplot(aes(x=Segment, y=PriceEuro, fill=Segment))+
geom_boxplot(alpha=0.5)+
geom_jitter(color="black", size=1)+
scale_y_continuous(labels = scales::comma)+
ggtitle("Price per Segment")+
labs(x = "Market Segment",
y = "Price in Euro", fill = "")+
theme_minimal()+
theme(legend.position = "none",
plot.title = element_text(hjust = 0.5))
ggplotly(ev_segment)
Insight
Segment
, with PriceEuro
ranging from EUR 24,790 to EUR 20,129.Segment
, with PriceEuro
ranging from EUR 79,990 to EUR 180,781.Segment
, with PriceEuro
amounting to EUR 215,000.Let see what kind of EV BodyType
that dominating each Segment
.
<-
ev_seg_type %>% group_by(Segment, BodyStyle) %>%
ev tally() %>%
mutate(label = glue("Count = {n}")) %>%
ggplot(aes(x=Segment, y=n, fill=BodyStyle, text=label)) +
geom_bar(stat = "identity", position = "stack") +
ggtitle("Body Type per Segment")+
labs(x = "Market Segment",
y = "", fill = "")+
theme_minimal()+
theme(plot.title = element_text(hjust = 0.5))
ggplotly(ev_seg_type, tooltip = "label")
Insight
BodyType
mostly exist in almost all Market Segment
.BodyType
is considered as affordable EV in term of PriceEuro
.BodyType
mostly in “F” market Segment
which considered as expensive EV.Segment
is specifically for SPV (Special Purpose Vehicle) and Pickup BodyType
.<- ev %>% mutate(label = glue("Brand = {Brand}
ev_awd Model = {Model}
Price = {comma(round(PriceEuro))}")) %>%
filter(PowerTrain == "AWD") %>%
ggplot(aes(x = TopSpeed_KmH, y = Efficiency_WhKm, text = label))+
geom_point(aes(size = PriceEuro), colour = "SteelBlue")+
scale_y_continuous(labels = scales::comma)+
scale_x_continuous(labels = scales::comma)+
ggtitle("Top Speed to Efficiency")+
labs(x = "Top Speed (Kmh)",
y = "Efficiency Wh/km", fill = "")+
theme_minimal()+
theme(axis.text = element_text(colour = "navyblue"),
plot.title = element_text(hjust = 0.5))
ggplotly(ev_awd, tooltip = "label")
<- ev %>% mutate(label = glue("Brand = {Brand}
ev_rwd Model = {Model}
Price = {comma(round(PriceEuro))}")) %>%
filter(PowerTrain == "RWD") %>%
ggplot(aes(x = TopSpeed_KmH, y = Efficiency_WhKm, text = label))+
geom_point(aes(size = PriceEuro), colour = "SpringGreen")+
scale_y_continuous(labels = scales::comma)+
scale_x_continuous(labels = scales::comma)+
ggtitle("Top Speed to Efficiency")+
labs(x = "Top Speed (Kmh)",
y = "Efficiency Wh/km", fill = "")+
theme_minimal()+
theme(axis.text = element_text(colour = "navyblue"),
plot.title = element_text(hjust = 0.5))
ggplotly(ev_rwd, tooltip = "label")
<- ev %>% mutate(label = glue("Brand = {Brand}
ev_fwd Model = {Model}
Price = {comma(round(PriceEuro))}")) %>%
filter(PowerTrain == "FWD") %>%
ggplot(aes(x = TopSpeed_KmH, y = Efficiency_WhKm, text = label))+
geom_point(aes(size = PriceEuro), colour = "GoldenRod")+
scale_y_continuous(labels = scales::comma)+
scale_x_continuous(labels = scales::comma)+
ggtitle("Top Speed to Efficiency")+
labs(x = "Top Speed (Kmh)",
y = "Efficiency Wh/km", fill = "")+
theme_minimal()+
theme(axis.text = element_text(colour = "navyblue"),
plot.title = element_text(hjust = 0.5))
ggplotly(ev_fwd, tooltip = "label")
Insight
PowerTrain
. An All Wheel Drive vehicle is one with a powertrain capable of providing power to all its wheels.<- ev %>% group_by(Brand, Model, Range_Km, RapidCharge) %>%
ev_range summarise(battery = round((Range_Km*Efficiency_WhKm)/1000,0)) %>%
mutate(label = glue({"Brand = {Brand}
Model = {Model}"})) %>%
ggplot(aes(x = Range_Km, y = battery, text = label))+
geom_point(aes(colour = RapidCharge))+
scale_y_continuous(labels = scales::comma)+
scale_x_continuous(labels = scales::comma)+
ggtitle("Range by Battery Pack")+
labs(x = "Range in KM",
y = "Battery Pack Kwh", fill = "")+
theme_minimal()+
theme(axis.text = element_text(colour = "navyblue"),
plot.title = element_text(hjust = 0.5))
ggplotly(ev_range, tooltip = "label")
Insight
EV datasets obtained from kaggle provide several tasks / questions to be completed as below:
Let’s try to make visualization to answer those questions.
%>%
ev arrange(AccelSec) %>%
slice_head(n = 10) %>%
ggplot(aes(x=reorder(Model, desc(AccelSec)), y=AccelSec)) +
geom_bar(stat = "identity", aes(fill=Brand))+
coord_flip()
Answer : Tesla Roadster.
%>%
ev arrange(Efficiency_WhKm) %>%
slice_head(n = 10) %>%
ggplot(aes(x=reorder(Model, desc(Efficiency_WhKm)), y=Efficiency_WhKm)) +
geom_bar(stat = "identity", aes(fill=Brand))+
coord_flip()
Answer : Lightyear - One.
ggplot(ev, aes(x = PowerTrain, y = Efficiency_WhKm, color=Range_Km))+
geom_jitter(aes(size=TopSpeed_KmH))+
scale_y_continuous(labels = scales::comma)+
ggtitle("Power Train")+
labs(x = "Power Train",
y = "Efficiency Wh/Km", fill = "")+
theme_minimal()+
theme(axis.text = element_text(colour = "navyblue"),
plot.title = element_text(hjust = 0.5))
Answer :
Efficiency
, AWD is considered to be less efficient than FWD and RWD.PowerTrain
tend to be faster than FWD and RWD.Range
is using AWD PowerTrain
.PowerTrain
have better EV performance than FWD and RWD, but consume more energy (less efficient).<-
plot_4 %>% group_by(Brand) %>%
ev tally() %>%
mutate(label = glue("Count = {n}")) %>%
ggplot(aes(x=reorder(Brand, n), y=n, text=label)) +
geom_col(aes(fill=n)) +
coord_flip()+
ggtitle("Brand")+
labs(x = "Brand",
y = "Number of Electric Vehicle", fill = "")+
theme_minimal()+
theme(plot.title = element_text(hjust = 0.5))
ggplotly(plot_4, tooltip = "label")
Answer : Tesla
%>%
ev treemap(index = "RapidCharge",
vSize = "PriceEuro",
type = "index")
Answer :
RapidCharge
technology tend to be more expensive.RapidCharge
technology, since those technology is considered as a “must have” feature in all electronic that rely on battery energy.