Sys.setlocale(category = "LC_ALL", locale = "zh_TW.UTF-8") 
## [1] "zh_TW.UTF-8/zh_TW.UTF-8/zh_TW.UTF-8/C/zh_TW.UTF-8/zh_TW.UTF-8"
pacman::p_load(dplyr,tidyverse,pROC,caTools,ISLR,showtext)
load("UL_6sale.rdata")
load("UL_sale.rdata")

Todo list:

#add `year` column
#str(UL_sale)
UL_sale$sale_date = as.Date(UL_sale$sale_date)
UL_sale_new <- UL_sale %>% 
  mutate(year = format(UL_sale$sale_date,format="%Y"))

不同經銷商在三年內(2018~2020)賣國產/進口車的台數

UL_sale_new %>% 
  group_by(dealercode,from) %>% 
  count() %>% 
#  ggplot(aes(dealercode,n,fill=from))+
  ggplot(aes(reorder(dealercode,-n),n,fill=from))+
  geom_col(position = "stack")+
  geom_text(aes(label = n), position = "stack")+
  xlab("經銷商代碼") + ylab("數量")+
  theme(text=element_text(family="STKaiti")) #mac 會跑亂碼

  • 國產最多:YA;最少:YF
  • 進口最多:ES;最少:KT

不同經銷商在三年內(2018~2020)賣轎車/休旅車/跑車的台數

UL_sale_new %>% 
  filter(type==c("轎車","休旅車")) %>% 
  group_by(dealercode,type) %>% 
  count() %>% 
  ggplot(aes(reorder(dealercode,-n),n,fill=type))+
  geom_col(position = "stack")+
  geom_text(aes(label = n), position = position_stack(0.1),vjust = "top-right")+
  xlab("經銷商代碼") + ylab("數量")+
  theme(text=element_text(family="STKaiti"))

  • 休旅車最多:YA>ES
  • 轎車最多:YA>YK
#電動車,跑車詳細看
UL_sale_new %>% 
  filter(type==c("電動車","跑車")) %>% 
  group_by(dealercode,type) %>% 
  count() %>% 
#  ggplot(aes(reorder(dealercode,-n),n,fill=type))+
  ggplot(aes(reorder(dealercode,-n),n,fill=type))+
  geom_col(position = "stack")+
  geom_text(aes(label = n))+
  xlab("經銷商代碼") + ylab("數量")+
  theme(text=element_text(family="STKaiti"))

  • 電動車最多:ES>YK
  • 跑車最多:ES
  • 銷售車種排名:休旅車>轎車>電動車>跑車

不同年份賣轎車/休旅車/跑車的台數

UL_sale_new %>% 
  group_by(year,type) %>% 
  count()
## # A tibble: 11 x 3
## # Groups:   year, type [11]
##    year  type       n
##    <chr> <chr>  <int>
##  1 2018  轎車   16062
##  2 2018  跑車      12
##  3 2018  休旅車 18057
##  4 2019  電動車    50
##  5 2019  轎車    8719
##  6 2019  跑車       9
##  7 2019  休旅車 26872
##  8 2020  電動車    89
##  9 2020  轎車   10425
## 10 2020  跑車       5
## 11 2020  休旅車 24532
UL_sale_new %>% 
  group_by(year,type) %>% 
  count() %>% 
  ggplot(aes(year,n,fill=type))+
  geom_col(position = "stack")+
  geom_text(aes(label = n),position = position_stack(0.3),vjust = "top-right")+
  xlab("年份") + ylab("數量")+
  theme(text=element_text(family="STKaiti")) 

  • 跑車都不高,逐年降低:12->9->5
  • 休旅車在2019最多,轎車在2018最多

不同年份賣國產/進口車的台數

UL_sale_new %>% 
  group_by(year,from) %>% 
  count() %>% 
  ggplot(aes(year,n,fill=from))+
  geom_col(position = "stack")+
  geom_text(aes(label = n))+
  xlab("年份") + ylab("數量")+
  theme(text=element_text(family="STKaiti")) 

  • 進口車逐年增加,國產車2019最多

不同年份中各經銷商賣國產/進口車的台數

UL_sale_new %>% 
  group_by(from,year,dealercode) %>% 
  count() %>% 
  ggplot(aes(reorder(dealercode,-n),n,fill=from))+
  geom_col(position = "stack")+
  facet_wrap(year~.,ncol=1)+
  geom_text(aes(label = n),position = position_stack(0.9))+
  xlab("年份") + ylab("數量")+
  theme(text=element_text(family="STKaiti")) 

不同年份中各經銷商賣轎車/休旅車的台數

par(mfrow = c(4, 2)) 
UL_sale_new %>% 
  filter(type==c("轎車","休旅車")) %>% 
  group_by(type,year,dealercode) %>% 
  count() %>% 
  ggplot(aes(reorder(dealercode,-n),n,fill=type))+
  geom_col(position = "stack")+
  facet_wrap(year~.,ncol=1)+
  geom_text(aes(label = n),position = position_stack(0.9))+
  xlab("年份") + ylab("數量")+
  theme(text=element_text(family="STKaiti")) 

銷售員的學歷分佈

UL_6sale %>% 
  count(school) %>% 
  arrange(desc(n)) %>% 
  mutate(prop = n / sum(n) *100) %>%
  ggplot(aes(x="", y=prop, fill=school))+
  geom_bar(stat="identity", width=1, color="white") +
  geom_text(aes(label = round(prop,2)),position = position_stack(vjust = 0.5))+
  coord_polar("y", start=0) +
  theme(text=element_text(family="STKaiti"))

⁃ 男女sale的marrid分佈

UL_6sale %>% 
  group_by(sex,married) %>% 
  count() %>% 
  ggplot(aes(married,n,fill=sex))+
  geom_col()+
  theme(text=element_text(family="STKaiti"))+
  scale_fill_manual("sex", values = c("男" = "#5167d2", "女" = "#FF6666"))

⁃ 男女sale的age分佈

UL_6sale$age_n = case_when(
  UL_6sale$age <20~ "below20",
  UL_6sale$age >20 & UL_6sale$age <26 ~ "21-25",
  UL_6sale$age >25 & UL_6sale$age <31 ~ "25-30",
  UL_6sale$age >30 & UL_6sale$age <36 ~ "31-35",
  UL_6sale$age >35 & UL_6sale$age <41 ~ "35-40",
  UL_6sale$age >40 & UL_6sale$age <46 ~ "41-45",
  UL_6sale$age >45 & UL_6sale$age <51 ~ "46-50",
  UL_6sale$age >50 & UL_6sale$age <56 ~ "51-55",
  UL_6sale$age >55 & UL_6sale$age <61 ~ "56-60",
  UL_6sale$age >60  ~ "60up",
  TRUE ~ as.character(UL_6sale$age)
)
UL_6sale$age_n %>% table
## .
##   21-25   25-30   31-35   35-40   41-45   46-50   51-55   56-60    60up below20 
##     381     717     464     247     190     131     139      95      31       3
UL_6sale %>% 
  group_by(sex,age_n) %>% 
  count() %>% 
  ggplot(aes(reorder(age_n,-n),n,fill=sex))+
  geom_col()+
  geom_text(aes(label = n), position = "stack")+
  xlab("年齡") + ylab("數量")+
  theme(text=element_text(family="STKaiti"))+
  scale_fill_manual("sex", values = c("男" = "#5167d2", "女" = "#FF6666"))