library(dplyr) #data wrangling
library(lubridate) #convert date to date type
library(ggplot2) #visualization
library(plotly) #visualization
library(glue) #Create label
library(tidyr) #data wrangling
library(tidytext) #text conversion
library(scales) #data scalling
library(hrbrthemes) #additional plot theme
library(treemap) #treemap visualizationFor LBB (learning by building) - “Data Visualization” assignment , i’m using electric car datasets from kaggle. Kaggle datasets is gathered from EV Database. With this datasets, we want to know what kind of insight can we gathered related to Electronic Vehicle.
Import data and assign it into object named “ev” stands for “Electric Vehicle”.
#import "electric vehicle" data
ev <- read.csv("ElectricCarData.csv")
head(ev)#checking data type
glimpse(ev)## Rows: 103
## Columns: 14
## $ Brand <chr> "Tesla ", "Volkswagen ", "Polestar ", "BMW ", "Honda "~
## $ Model <chr> "Model 3 Long Range Dual Motor", "ID.3 Pure", "2", "iX~
## $ AccelSec <dbl> 4.6, 10.0, 4.7, 6.8, 9.5, 2.8, 9.6, 8.1, 5.6, 6.3, 5.1~
## $ TopSpeed_KmH <int> 233, 160, 210, 180, 145, 250, 150, 150, 225, 180, 180,~
## $ Range_Km <int> 450, 270, 400, 360, 170, 610, 190, 275, 310, 400, 370,~
## $ Efficiency_WhKm <int> 161, 167, 181, 206, 168, 180, 168, 164, 153, 193, 216,~
## $ FastCharge_KmH <chr> "940", "250", "620", "560", "190", "620", "220", "420"~
## $ RapidCharge <chr> "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes"~
## $ PowerTrain <chr> "AWD", "RWD", "AWD", "RWD", "RWD", "AWD", "FWD", "FWD"~
## $ PlugType <chr> "Type 2 CCS", "Type 2 CCS", "Type 2 CCS", "Type 2 CCS"~
## $ BodyStyle <chr> "Sedan", "Hatchback", "Liftback", "SUV", "Hatchback", ~
## $ Segment <chr> "D", "C", "D", "D", "B", "F", "C", "B", "D", "D", "D",~
## $ Seats <int> 5, 5, 5, 5, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 4, 4, 4, ~
## $ PriceEuro <int> 55480, 30000, 56440, 68040, 32997, 105000, 31900, 2968~
Brand : Manufacturer of the vehicle
Model : Vehicle model and type
Accelsec : Acceleration as 0-100 km/h
TopSpeed : The top speed in km/h
Range : Range in km
Efficiency : Efficiency Wh/km
FastCharge : Charge km/h
RapidCharge : Yes / No
PowerTrain : Front, rear, or all wheel drive
PlugType : Plug type
BodyStyle : Basic size or style
Segment : Market segment
Seats : Number of seats
PriceEuro : Price in Germany before tax incentives
check unique value in each column
#check unique value in each column
sapply(ev, function(x) n_distinct(x))## Brand Model AccelSec TopSpeed_KmH Range_Km
## 33 102 55 25 50
## Efficiency_WhKm FastCharge_KmH RapidCharge PowerTrain PlugType
## 54 51 2 3 4
## BodyStyle Segment Seats PriceEuro
## 9 8 5 87
Convert data type
#convert data type
ev_col <- c("RapidCharge", "PowerTrain", "PlugType", "BodyStyle", "Segment", "Seats")
ev <- ev %>% mutate_each_(funs(factor(.)), ev_col) %>%
mutate(FastCharge_KmH = as.integer(FastCharge_KmH))#checking missing value
colSums(is.na(ev))## Brand Model AccelSec TopSpeed_KmH Range_Km
## 0 0 0 0 0
## Efficiency_WhKm FastCharge_KmH RapidCharge PowerTrain PlugType
## 0 5 0 0 0
## BodyStyle Segment Seats PriceEuro
## 0 0 0 0
price_brand <- ev %>%
group_by(Brand) %>%
summarise(mean_price = mean(PriceEuro)) %>%
arrange(mean_price) %>%
mutate(label = glue("Average Price = {comma(round(mean_price,0))}"))
price_brand_plot <- ggplot(price_brand, aes(x = Brand, y = mean_price, text = label))+
geom_segment( aes(x=reorder(Brand, mean_price) ,xend=Brand, y=0, yend=mean_price), color="grey")+
geom_point(size=3, color="steelblue")+
coord_flip()+
scale_y_continuous(labels = scales::comma)+
ggtitle("Average Price by Brand")+
labs(x = "Brands",
y = "Average Price (Euro)", fill = "")+
theme_minimal()+
theme(axis.text = element_text(colour = "navyblue"),
plot.title = element_text(hjust = 0.5))
ggplotly(price_brand_plot, tooltip = "label")Insight
Brand is Lightyear. Lightyear is tech Company located in Netherlands, founded in September 2016. Expensive price derived from its solar panel technology, where other Electric Vehicle may not have this kind technology.price_type <- ev %>%
mutate(label = glue("Brand = {Brand}
Model = {Model}
Price = {comma(round(PriceEuro,0))}"))
price_type_plot <- ggplot(price_type, aes(x = BodyStyle, y = PriceEuro, color = BodyStyle, text = label))+
geom_jitter(show.legend = F)+
scale_y_continuous(labels = scales::comma)+
ggtitle("Price by Car Type")+
labs(x = "Car Type",
y = "Average Price (Euro)", fill = "")+
theme_minimal()+
theme(axis.text = element_text(colour = "navyblue"),
plot.title = element_text(hjust = 0.5),
legend.position = "none")
ggplotly(price_type_plot, tooltip = "label")Insight:
BodyType category, Cabrio (Tesla - Roadster) is car type with the most expensive one, followed by Sedan (Porsche - Taycan Turbo S) and Station (Porsche - Taycan Cross Turismo).ev_batt <- ev %>% group_by(Brand, Model, PriceEuro) %>%
summarise(battery = round((Range_Km*Efficiency_WhKm)/1000,0)) %>%
mutate(label = glue({"Brand = {Brand}
Model = {Model}"})) %>%
ggplot(aes(x = PriceEuro, y = battery, text = label))+
geom_point(colour = "dodgerblue1")+
scale_y_continuous(labels = scales::comma)+
scale_x_continuous(labels = scales::comma)+
ggtitle("Price by Battery Pack")+
labs(x = "Price in Euro",
y = "Battery Pack Kwh", fill = "")+
theme_minimal()+
theme(axis.text = element_text(colour = "navyblue"),
plot.title = element_text(hjust = 0.5))
ggplotly(ev_batt, tooltip = "label")Insight
ev_segment <-
ev %>%
ggplot(aes(x=Segment, y=PriceEuro, fill=Segment))+
geom_boxplot(alpha=0.5)+
geom_jitter(color="black", size=1)+
scale_y_continuous(labels = scales::comma)+
ggtitle("Price per Segment")+
labs(x = "Market Segment",
y = "Price in Euro", fill = "")+
theme_minimal()+
theme(legend.position = "none",
plot.title = element_text(hjust = 0.5))
ggplotly(ev_segment)Insight
Segment, with PriceEuro ranging from EUR 24,790 to EUR 20,129.Segment, with PriceEuro ranging from EUR 79,990 to EUR 180,781.Segment, with PriceEuro amounting to EUR 215,000.Let see what kind of EV BodyType that dominating each Segment.
ev_seg_type <-
ev %>% group_by(Segment, BodyStyle) %>%
tally() %>%
mutate(label = glue("Count = {n}")) %>%
ggplot(aes(x=Segment, y=n, fill=BodyStyle, text=label)) +
geom_bar(stat = "identity", position = "stack") +
ggtitle("Body Type per Segment")+
labs(x = "Market Segment",
y = "", fill = "")+
theme_minimal()+
theme(plot.title = element_text(hjust = 0.5))
ggplotly(ev_seg_type, tooltip = "label")Insight
BodyType mostly exist in almost all Market Segment.BodyType is considered as affordable EV in term of PriceEuro.BodyType mostly in “F” market Segment which considered as expensive EV.Segment is specifically for SPV (Special Purpose Vehicle) and Pickup BodyType.ev_awd <- ev %>% mutate(label = glue("Brand = {Brand}
Model = {Model}
Price = {comma(round(PriceEuro))}")) %>%
filter(PowerTrain == "AWD") %>%
ggplot(aes(x = TopSpeed_KmH, y = Efficiency_WhKm, text = label))+
geom_point(aes(size = PriceEuro), colour = "SteelBlue")+
scale_y_continuous(labels = scales::comma)+
scale_x_continuous(labels = scales::comma)+
ggtitle("Top Speed to Efficiency")+
labs(x = "Top Speed (Kmh)",
y = "Efficiency Wh/km", fill = "")+
theme_minimal()+
theme(axis.text = element_text(colour = "navyblue"),
plot.title = element_text(hjust = 0.5))
ggplotly(ev_awd, tooltip = "label")ev_rwd <- ev %>% mutate(label = glue("Brand = {Brand}
Model = {Model}
Price = {comma(round(PriceEuro))}")) %>%
filter(PowerTrain == "RWD") %>%
ggplot(aes(x = TopSpeed_KmH, y = Efficiency_WhKm, text = label))+
geom_point(aes(size = PriceEuro), colour = "SpringGreen")+
scale_y_continuous(labels = scales::comma)+
scale_x_continuous(labels = scales::comma)+
ggtitle("Top Speed to Efficiency")+
labs(x = "Top Speed (Kmh)",
y = "Efficiency Wh/km", fill = "")+
theme_minimal()+
theme(axis.text = element_text(colour = "navyblue"),
plot.title = element_text(hjust = 0.5))
ggplotly(ev_rwd, tooltip = "label")ev_fwd <- ev %>% mutate(label = glue("Brand = {Brand}
Model = {Model}
Price = {comma(round(PriceEuro))}")) %>%
filter(PowerTrain == "FWD") %>%
ggplot(aes(x = TopSpeed_KmH, y = Efficiency_WhKm, text = label))+
geom_point(aes(size = PriceEuro), colour = "GoldenRod")+
scale_y_continuous(labels = scales::comma)+
scale_x_continuous(labels = scales::comma)+
ggtitle("Top Speed to Efficiency")+
labs(x = "Top Speed (Kmh)",
y = "Efficiency Wh/km", fill = "")+
theme_minimal()+
theme(axis.text = element_text(colour = "navyblue"),
plot.title = element_text(hjust = 0.5))
ggplotly(ev_fwd, tooltip = "label")Insight
PowerTrain. An All Wheel Drive vehicle is one with a powertrain capable of providing power to all its wheels.ev_range <- ev %>% group_by(Brand, Model, Range_Km, RapidCharge) %>%
summarise(battery = round((Range_Km*Efficiency_WhKm)/1000,0)) %>%
mutate(label = glue({"Brand = {Brand}
Model = {Model}"})) %>%
ggplot(aes(x = Range_Km, y = battery, text = label))+
geom_point(aes(colour = RapidCharge))+
scale_y_continuous(labels = scales::comma)+
scale_x_continuous(labels = scales::comma)+
ggtitle("Range by Battery Pack")+
labs(x = "Range in KM",
y = "Battery Pack Kwh", fill = "")+
theme_minimal()+
theme(axis.text = element_text(colour = "navyblue"),
plot.title = element_text(hjust = 0.5))
ggplotly(ev_range, tooltip = "label")Insight
EV datasets obtained from kaggle provide several tasks / questions to be completed as below:
Let’s try to make visualization to answer those questions.
ev %>%
arrange(AccelSec) %>%
slice_head(n = 10) %>%
ggplot(aes(x=reorder(Model, desc(AccelSec)), y=AccelSec)) +
geom_bar(stat = "identity", aes(fill=Brand))+
coord_flip()Answer : Tesla Roadster.
ev %>%
arrange(Efficiency_WhKm) %>%
slice_head(n = 10) %>%
ggplot(aes(x=reorder(Model, desc(Efficiency_WhKm)), y=Efficiency_WhKm)) +
geom_bar(stat = "identity", aes(fill=Brand))+
coord_flip()Answer : Lightyear - One.
ggplot(ev, aes(x = PowerTrain, y = Efficiency_WhKm, color=Range_Km))+
geom_jitter(aes(size=TopSpeed_KmH))+
scale_y_continuous(labels = scales::comma)+
ggtitle("Power Train")+
labs(x = "Power Train",
y = "Efficiency Wh/Km", fill = "")+
theme_minimal()+
theme(axis.text = element_text(colour = "navyblue"),
plot.title = element_text(hjust = 0.5))Answer :
Efficiency, AWD is considered to be less efficient than FWD and RWD.PowerTrain tend to be faster than FWD and RWD.Range is using AWD PowerTrain.PowerTrain have better EV performance than FWD and RWD, but consume more energy (less efficient).plot_4 <-
ev %>% group_by(Brand) %>%
tally() %>%
mutate(label = glue("Count = {n}")) %>%
ggplot(aes(x=reorder(Brand, n), y=n, text=label)) +
geom_col(aes(fill=n)) +
coord_flip()+
ggtitle("Brand")+
labs(x = "Brand",
y = "Number of Electric Vehicle", fill = "")+
theme_minimal()+
theme(plot.title = element_text(hjust = 0.5))
ggplotly(plot_4, tooltip = "label")Answer : Tesla
ev %>%
treemap(index = "RapidCharge",
vSize = "PriceEuro",
type = "index")Answer :
RapidCharge technology tend to be more expensive.RapidCharge technology, since those technology is considered as a “must have” feature in all electronic that rely on battery energy.