library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✓ ggplot2 3.3.5 ✓ purrr 0.3.4
## ✓ tibble 3.1.4 ✓ dplyr 1.0.7
## ✓ tidyr 1.1.4 ✓ stringr 1.4.0
## ✓ readr 2.0.2 ✓ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
ecars <- read_csv("EVDatabase.csv")
## Rows: 180 Columns: 11
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (10): Name, Subtitle, Acceleration, TopSpeed, Range, Efficiency, FastCha...
## dbl (1): NumberofSeats
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
head(ecars)
## # A tibble: 6 × 11
## Name Subtitle Acceleration TopSpeed Range Efficiency FastChargeSpeed Drive
## <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr>
## 1 Opel A… Battery … 7.3 sec 150 km/h 335 … 173 Wh/km 210 km/h Fron…
## 2 Renaul… Battery … 22.4 sec 130 km/h 160 … 194 Wh/km - Fron…
## 3 Nissan… Battery … 7.9 sec 144 km/h 220 … 164 Wh/km 230 km/h Fron…
## 4 Audi e… Battery … 5.7 sec 200 km/h 375 … 231 Wh/km 600 km/h All …
## 5 Porsch… Battery … 2.8 sec 260 km/h 390 … 215 Wh/km 860 km/h All …
## 6 Nissan… Battery … 14.0 sec 123 km/h 165 … 218 Wh/km 170 km/h Fron…
## # … with 3 more variables: NumberofSeats <dbl>, PriceinGermany <chr>,
## # PriceinUK <chr>
ecars <- ecars %>%
separate(Name, into = "Manufacturer", sep = ' ', extra = 'drop', remove = FALSE)
glimpse(ecars)
## Rows: 180
## Columns: 12
## $ Name <chr> "Opel Ampera-e", "Renault Kangoo Maxi ZE 33", "Nissan …
## $ Manufacturer <chr> "Opel", "Renault", "Nissan", "Audi", "Porsche", "Nissa…
## $ Subtitle <chr> "Battery Electric Vehicle | 58 kWh", "Battery El…
## $ Acceleration <chr> "7.3 sec", "22.4 sec", "7.9 sec", "5.7 sec", "2.8 sec"…
## $ TopSpeed <chr> "150 km/h", "130 km/h", "144 km/h", "200 km/h", "260 k…
## $ Range <chr> "335 km", "160 km", "220 km", "375 km", "390 km", "165…
## $ Efficiency <chr> "173 Wh/km", "194 Wh/km", "164 Wh/km", "231 Wh/km", "2…
## $ FastChargeSpeed <chr> "210 km/h", "-", "230 km/h", "600 km/h", "860 km/h", "…
## $ Drive <chr> "Front Wheel Drive", "Front Wheel Drive", "Front Wheel…
## $ NumberofSeats <dbl> 5, 5, 5, 5, 4, 7, 5, 5, 5, 4, 5, 5, 4, 5, 4, 5, 5, 5, …
## $ PriceinGermany <chr> "€42,990", "N/A", "€29,990", "N/A", "€186,336", "€43,4…
## $ PriceinUK <chr> "N/A", "£31,680", "£25,995", "£79,900", "£138,830", "£…
ecars <- ecars %>%
separate(Acceleration, c("Acceleration"), sep = ' ', extra = 'drop') %>%
mutate(Acceleration = as.double(Acceleration)) %>%
separate(TopSpeed, c("TopSpeed"), sep = ' ', extra = 'drop') %>%
mutate(TopSpeed = as.double(TopSpeed)) %>%
separate(Range, c("Range"), sep = ' ', extra = 'drop') %>%
mutate(Range = as.double(Range)) %>%
separate(Efficiency, c("Efficiency"), sep = ' ', extra = 'drop') %>%
mutate(Efficiency = as.double(Efficiency)) %>%
separate(FastChargeSpeed, c("FastChargeSpeed"), sep = ' ', extra = 'drop') %>%
mutate(FastChargeSpeed = as.double(FastChargeSpeed))
## Warning in mask$eval_all_mutate(quo): NAs introduced by coercion
glimpse(ecars)
## Rows: 180
## Columns: 12
## $ Name <chr> "Opel Ampera-e", "Renault Kangoo Maxi ZE 33", "Nissan …
## $ Manufacturer <chr> "Opel", "Renault", "Nissan", "Audi", "Porsche", "Nissa…
## $ Subtitle <chr> "Battery Electric Vehicle | 58 kWh", "Battery El…
## $ Acceleration <dbl> 7.3, 22.4, 7.9, 5.7, 2.8, 14.0, 8.9, 6.8, 7.3, 7.3, 8.…
## $ TopSpeed <dbl> 150, 130, 144, 200, 260, 123, 160, 180, 157, 150, 160,…
## $ Range <dbl> 335, 160, 220, 375, 390, 165, 275, 385, 325, 235, 355,…
## $ Efficiency <dbl> 173, 194, 164, 231, 215, 218, 164, 192, 172, 161, 187,…
## $ FastChargeSpeed <dbl> 210, NA, 230, 600, 860, 170, 260, 520, 390, 270, 420, …
## $ Drive <chr> "Front Wheel Drive", "Front Wheel Drive", "Front Wheel…
## $ NumberofSeats <dbl> 5, 5, 5, 5, 4, 7, 5, 5, 5, 4, 5, 5, 4, 5, 4, 5, 5, 5, …
## $ PriceinGermany <chr> "€42,990", "N/A", "€29,990", "N/A", "€186,336", "€43,4…
## $ PriceinUK <chr> "N/A", "£31,680", "£25,995", "£79,900", "£138,830", "£…
ecars <- ecars %>%
mutate(PriceinGermany = ifelse(PriceinGermany == "N/A", "€0", PriceinGermany))
ecars$PriceinGermany <- gsub("€", "", as.character(ecars$PriceinGermany))
ecars$PriceinGermany <- gsub(",", "", as.character(ecars$PriceinGermany))
ecars <- ecars %>%
mutate(PriceinGermany = as.numeric(PriceinGermany))
ecars <- ecars %>%
mutate(PriceinUK = ifelse(PriceinUK == "N/A", "£0", PriceinUK))
ecars$PriceinUK <- gsub("£", "", as.character(ecars$PriceinUK))
ecars$PriceinUK <- gsub(",", "", as.character(ecars$PriceinUK))
ecars <- ecars %>%
mutate(PriceinUK = as.numeric(PriceinUK))
glimpse(ecars)
## Rows: 180
## Columns: 12
## $ Name <chr> "Opel Ampera-e", "Renault Kangoo Maxi ZE 33", "Nissan …
## $ Manufacturer <chr> "Opel", "Renault", "Nissan", "Audi", "Porsche", "Nissa…
## $ Subtitle <chr> "Battery Electric Vehicle | 58 kWh", "Battery El…
## $ Acceleration <dbl> 7.3, 22.4, 7.9, 5.7, 2.8, 14.0, 8.9, 6.8, 7.3, 7.3, 8.…
## $ TopSpeed <dbl> 150, 130, 144, 200, 260, 123, 160, 180, 157, 150, 160,…
## $ Range <dbl> 335, 160, 220, 375, 390, 165, 275, 385, 325, 235, 355,…
## $ Efficiency <dbl> 173, 194, 164, 231, 215, 218, 164, 192, 172, 161, 187,…
## $ FastChargeSpeed <dbl> 210, NA, 230, 600, 860, 170, 260, 520, 390, 270, 420, …
## $ Drive <chr> "Front Wheel Drive", "Front Wheel Drive", "Front Wheel…
## $ NumberofSeats <dbl> 5, 5, 5, 5, 4, 7, 5, 5, 5, 4, 5, 5, 4, 5, 4, 5, 5, 5, …
## $ PriceinGermany <dbl> 42990, 0, 29990, 0, 186336, 43433, 31960, 66300, 38350…
## $ PriceinUK <dbl> 0, 31680, 25995, 79900, 138830, 30255, 27120, 58850, 3…
ecars[is.na(ecars)] <- 0
ecars$Subtitle <- gsub("Battery Electric Vehicle | ", "", as.character(ecars$Subtitle))
# To remove the hardcoded value kWh
ecars$Subtitle <- gsub("kWh", "", as.character(ecars$Subtitle))
# To remove the first hardcoded value in the column.
ecars$Subtitle <- gsub("^.", "", as.character(ecars$Subtitle))
# Convert the column Subtitle to numeric and rename it to KWH
ecars <- ecars %>%
mutate(Subtitle = as.numeric(Subtitle)) %>%
rename(KWH = Subtitle)
glimpse(ecars)
## Rows: 180
## Columns: 12
## $ Name <chr> "Opel Ampera-e", "Renault Kangoo Maxi ZE 33", "Nissan …
## $ Manufacturer <chr> "Opel", "Renault", "Nissan", "Audi", "Porsche", "Nissa…
## $ KWH <dbl> 58.0, 31.0, 36.0, 86.5, 83.7, 36.0, 45.0, 74.0, 56.0, …
## $ Acceleration <dbl> 7.3, 22.4, 7.9, 5.7, 2.8, 14.0, 8.9, 6.8, 7.3, 7.3, 8.…
## $ TopSpeed <dbl> 150, 130, 144, 200, 260, 123, 160, 180, 157, 150, 160,…
## $ Range <dbl> 335, 160, 220, 375, 390, 165, 275, 385, 325, 235, 355,…
## $ Efficiency <dbl> 173, 194, 164, 231, 215, 218, 164, 192, 172, 161, 187,…
## $ FastChargeSpeed <dbl> 210, 0, 230, 600, 860, 170, 260, 520, 390, 270, 420, 3…
## $ Drive <chr> "Front Wheel Drive", "Front Wheel Drive", "Front Wheel…
## $ NumberofSeats <dbl> 5, 5, 5, 5, 4, 7, 5, 5, 5, 4, 5, 5, 4, 5, 4, 5, 5, 5, …
## $ PriceinGermany <dbl> 42990, 0, 29990, 0, 186336, 43433, 31960, 66300, 38350…
## $ PriceinUK <dbl> 0, 31680, 25995, 79900, 138830, 30255, 27120, 58850, 3…
ecars_only_numeric_fields <- ecars %>%
select(-c(Name, Manufacturer, Drive))
ecars_only_numeric_fields.cor = cor(ecars_only_numeric_fields)
ecars_only_numeric_fields.cor
## KWH Acceleration TopSpeed Range Efficiency
## KWH 1.0000000 -0.6229854 0.69141548 0.89009245 0.35642612
## Acceleration -0.6229854 1.0000000 -0.82104106 -0.71207706 0.14098402
## TopSpeed 0.6914155 -0.8210411 1.00000000 0.76031899 -0.04738347
## Range 0.8900925 -0.7120771 0.76031899 1.00000000 -0.07227251
## Efficiency 0.3564261 0.1409840 -0.04738347 -0.07227251 1.00000000
## FastChargeSpeed 0.6597431 -0.7416703 0.77318976 0.76757435 -0.05991384
## NumberofSeats 0.1344473 0.3183015 -0.24256727 -0.06885113 0.60246852
## PriceinGermany 0.5980835 -0.5015186 0.67886530 0.56903699 0.23922128
## PriceinUK 0.6212530 -0.6742728 0.79266892 0.62075705 0.08452729
## FastChargeSpeed NumberofSeats PriceinGermany PriceinUK
## KWH 0.65974314 0.13444725 0.59808354 0.62125296
## Acceleration -0.74167034 0.31830154 -0.50151859 -0.67427279
## TopSpeed 0.77318976 -0.24256727 0.67886530 0.79266892
## Range 0.76757435 -0.06885113 0.56903699 0.62075705
## Efficiency -0.05991384 0.60246852 0.23922128 0.08452729
## FastChargeSpeed 1.00000000 -0.11964428 0.59560913 0.65085116
## NumberofSeats -0.11964428 1.00000000 -0.01245501 -0.21198513
## PriceinGermany 0.59560913 -0.01245501 1.00000000 0.69875323
## PriceinUK 0.65085116 -0.21198513 0.69875323 1.00000000
palette = colorRampPalette(c("green", "white", "red")) (20)
heatmap(x = ecars_only_numeric_fields.cor, col = palette, symm = TRUE)
#### Let us check each type of drive
ecars %>%
ggplot(aes(x = Drive)) +
geom_bar() +
theme_classic()
ecars %>%
group_by(Drive) %>%
summarize(Total_count = n()) %>%
arrange(-Total_count)
## # A tibble: 3 × 2
## Drive Total_count
## <chr> <int>
## 1 Front Wheel Drive 71
## 2 All Wheel Drive 64
## 3 Rear Wheel Drive 45
ecars %>%
ggplot(aes(x = NumberofSeats)) +
geom_bar() +
labs(x = "Number of Seats") +
theme_classic()
barplot(table(ecars$Drive, ecars$NumberofSeats),
main="Car Distribution considering Number of Seats and Drive type",
xlab="NumberofSeats", col=c("darkblue","red"),
legend = rownames(table(ecars$Drive, ecars$NumberofSeats)), beside = TRUE)
ecars %>%
ggplot(aes(x = Manufacturer)) +
geom_bar() +
labs(x = "Manufacturer") +
coord_flip() +
theme_classic()
ecars %>%
ggplot(aes(x = KWH, y = Acceleration, color = Drive)) +
geom_point() +
theme_classic()
ecars %>%
ggplot(aes(x = KWH, y = Acceleration, size = NumberofSeats)) +
geom_point() +
theme_classic()
ecars %>%
ggplot(aes(x = TopSpeed, y = Range, color = Drive)) +
geom_point() +
theme_classic()