Prepare the libraries
library (tidyverse)## -- Attaching packages --------------------------------------------------------------------------------------- tidyverse 1.3.0 --
## v ggplot2 3.3.2 v purrr 0.3.4
## v tibble 3.0.1 v dplyr 1.0.0
## v tidyr 1.1.0 v stringr 1.4.0
## v readr 1.3.1 v forcats 0.5.0
## Warning: package 'ggplot2' was built under R version 4.0.2
## -- Conflicts ------------------------------------------------------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(dplyr)
library(ggplot2)
library(knitr)## Warning: package 'knitr' was built under R version 4.0.2
library(data.table)##
## Attaching package: 'data.table'
## The following objects are masked from 'package:dplyr':
##
## between, first, last
## The following object is masked from 'package:purrr':
##
## transpose
library(ggrepel)## Warning: package 'ggrepel' was built under R version 4.0.2
Lets use USA Car Sales data, i grab it from: https://www.kaggle.com/gagandeep16/car-sales/data
Transform type of data (Manufacturer, Model and Vehicle Type) into Factor
car <- read.csv("datasets_3483_5614_Car_sales.csv")
car$Manufacturer <- as.factor(car$Manufacturer)
car$Model <- as.factor(car$Model)
car$Vehicle_type <- as.factor(car$Vehicle_type)Description:
knitr::include_graphics("image_catgorisation_2020.jpg")Alright, we have read the data and know whats the meaning of each column now lets explore it!
First, lets check their Most Sales Car
most_sold <- car[ car$Sales_in_thousands == max(car$Sales_in_thousands), ]
most_sold <- most_sold[,1:3]
most_soldNo wonder that USA Market likes a pick-up model because some of them are still influenced by old culture that like to carry things from or to their farm on their trunk and also it's equipped with massive engine displacement and fuel efficiency was absolutely not a big issue for them :)
Lets check the Top Sales by Quantity
top <- aggregate(x = list(Amount_of_Car_Sold = car$Sales_in_thousands),
by = list(Manufacturer = car$Manufacturer,
Model = car$Model), sum)
top <- top[order(top$Amount, decreasing = T),]
top <- head(top, 15)
top <- top %>%
mutate(text = paste(Model, Amount_of_Car_Sold, "Units"))
top <- ggplot(top, aes(x = reorder(Manufacturer, -Amount_of_Car_Sold), y = Amount_of_Car_Sold))+
geom_point(color = "red", alpha = 0.4) +
geom_label_repel(aes(label = text), hjust = 0, size = 3, fill = "#2471A3", alpha =0.4)+
labs(x = "Manufacturer", y = "Quantity (Units)")+
theme(legend.position = "none", axis.text.x = element_text(angle = 30, hjust = 1))
top In USA, Ford are leading the sales because the high sense of Nationalism has been in their culture since the beginning and also it's American brand , winning in taking customer's heart are also because the manufacturer really know their customer by any mean they absorb what the trully need from design, price and the specification.
and the Top Sales by Revenue
*Revenue = Sales_in_Thousands TIMES by Price_in_thousands
tr <- copy(car)
tr$revenue <- tr$Sales_in_thousands * tr$Price_in_thousands
tr <- aggregate(revenue ~ Manufacturer+Model, tr, sum)
tr <- tr[order(tr$revenue, decreasing = T),]
tr <- head(tr, 15)
tr <- tr %>%
mutate(textr = paste(Model, round(revenue/1000,2),"mill"))
tr <- ggplot(tr, aes(x = reorder(Manufacturer,-revenue), y = revenue))+
geom_point(color = "red", alpha = 0.4) +
geom_label_repel(aes(label = textr), hjust = 0, size = 3, fill = "#2471A3", alpha =0.4)+
labs(x = "Manufacturer", y = "Revenue (US$)")+
# scale_y_continuous(labels = comma)+
theme(legend.position = "none", axis.text.x = element_text(angle = 30, hjust = 1))
tr 8 out of top 10 are won by American Manufacturer! What a success move they have made!
Top Sales by Group
tg <- copy(car)
tg <- tg %>%
group_by(Manufacturer) %>%
mutate(revenue = Sales_in_thousands * Price_in_thousands)
tg <- aggregate(x = list(revenue = tg$revenue),
by = list(Manufacturer = tg$Manufacturer
), sum, na.rm=TRUE)
tg <- tg[order(tg$revenue, decreasing = T),]
pg <- head(tg, 15)
pg <- pg %>%
mutate(textg = round(revenue/1000,2))
pg <- ggplot(pg, aes(x = reorder(Manufacturer, -revenue), y = revenue))+
geom_col(width = 0.8, alpha = 0.5, fill="#2471A3") +
geom_label(aes(label = textg), size = 3, fill = "white")+
labs(x = "Manufacturer", y = "Revenue US$ million")+
theme(legend.position = "none", axis.text.x = element_text(angle = 30, hjust = 1))
pg Ford are leading the US Car Market followed by Dodge, Toyota and Honda.
Lets see the market share!
tg$percentage <- round(tg$revenue/ sum(tg$revenue)*100,2)
ms <- tg %>%
filter(percentage >="2.79") %>%
group_by(Manufacturer) %>%
mutate(textm = paste(percentage,"%"))
ms <- ggplot(data = ms, aes(x = percentage, y = Manufacturer, fill = Manufacturer))+
geom_col(width = 1)+
geom_label_repel(aes(label = textm), hjust = 0, size = 3, fill = "white")+
coord_polar(theta="y")+
theme(legend.position = "none")
msFord leads with 25,43% of market share in US Car Market