Sys.setlocale(category = "LC_ALL", locale = "zh_TW.UTF-8")
## [1] "zh_TW.UTF-8/zh_TW.UTF-8/zh_TW.UTF-8/C/zh_TW.UTF-8/zh_TW.UTF-8"
pacman::p_load(dplyr,tidyverse,pROC,caTools,ISLR,showtext)
load("UL_6sale.rdata")
load("UL_sale.rdata")
#add `year` column
#str(UL_sale)
UL_sale$sale_date = as.Date(UL_sale$sale_date)
UL_sale_new <- UL_sale %>%
mutate(year = format(UL_sale$sale_date,format="%Y"))
UL_sale_new %>%
group_by(dealercode,from) %>%
count() %>%
# ggplot(aes(dealercode,n,fill=from))+
ggplot(aes(reorder(dealercode,-n),n,fill=from))+
geom_col(position = "stack")+
geom_text(aes(label = n), position = "stack")+
xlab("經銷商代碼") + ylab("數量")+
theme(text=element_text(family="STKaiti")) #mac 會跑亂碼
UL_sale_new %>%
filter(type==c("轎車","休旅車")) %>%
group_by(dealercode,type) %>%
count() %>%
ggplot(aes(reorder(dealercode,-n),n,fill=type))+
geom_col(position = "stack")+
geom_text(aes(label = n), position = position_stack(0.1),vjust = "top-right")+
xlab("經銷商代碼") + ylab("數量")+
theme(text=element_text(family="STKaiti"))
#電動車,跑車詳細看
UL_sale_new %>%
filter(type==c("電動車","跑車")) %>%
group_by(dealercode,type) %>%
count() %>%
# ggplot(aes(reorder(dealercode,-n),n,fill=type))+
ggplot(aes(reorder(dealercode,-n),n,fill=type))+
geom_col(position = "stack")+
geom_text(aes(label = n))+
xlab("經銷商代碼") + ylab("數量")+
theme(text=element_text(family="STKaiti"))
UL_sale_new %>%
group_by(year,type) %>%
count()
## # A tibble: 11 x 3
## # Groups: year, type [11]
## year type n
## <chr> <chr> <int>
## 1 2018 轎車 16062
## 2 2018 跑車 12
## 3 2018 休旅車 18057
## 4 2019 電動車 50
## 5 2019 轎車 8719
## 6 2019 跑車 9
## 7 2019 休旅車 26872
## 8 2020 電動車 89
## 9 2020 轎車 10425
## 10 2020 跑車 5
## 11 2020 休旅車 24532
UL_sale_new %>%
group_by(year,type) %>%
count() %>%
ggplot(aes(year,n,fill=type))+
geom_col(position = "stack")+
geom_text(aes(label = n),position = position_stack(0.3),vjust = "top-right")+
xlab("年份") + ylab("數量")+
theme(text=element_text(family="STKaiti"))
UL_sale_new %>%
group_by(year,from) %>%
count() %>%
ggplot(aes(year,n,fill=from))+
geom_col(position = "stack")+
geom_text(aes(label = n))+
xlab("年份") + ylab("數量")+
theme(text=element_text(family="STKaiti"))
UL_sale_new %>%
group_by(from,year,dealercode) %>%
count() %>%
ggplot(aes(reorder(dealercode,-n),n,fill=from))+
geom_col(position = "stack")+
facet_wrap(year~.,ncol=1)+
geom_text(aes(label = n),position = position_stack(0.9))+
xlab("年份") + ylab("數量")+
theme(text=element_text(family="STKaiti"))
par(mfrow = c(4, 2))
UL_sale_new %>%
filter(type==c("轎車","休旅車")) %>%
group_by(type,year,dealercode) %>%
count() %>%
ggplot(aes(reorder(dealercode,-n),n,fill=type))+
geom_col(position = "stack")+
facet_wrap(year~.,ncol=1)+
geom_text(aes(label = n),position = position_stack(0.9))+
xlab("年份") + ylab("數量")+
theme(text=element_text(family="STKaiti"))
UL_6sale %>%
count(school) %>%
arrange(desc(n)) %>%
mutate(prop = n / sum(n) *100) %>%
ggplot(aes(x="", y=prop, fill=school))+
geom_bar(stat="identity", width=1, color="white") +
geom_text(aes(label = round(prop,2)),position = position_stack(vjust = 0.5))+
coord_polar("y", start=0) +
theme(text=element_text(family="STKaiti"))
⁃ 男女sale的marrid分佈
UL_6sale %>%
group_by(sex,married) %>%
count() %>%
ggplot(aes(married,n,fill=sex))+
geom_col()+
theme(text=element_text(family="STKaiti"))+
scale_fill_manual("sex", values = c("男" = "#5167d2", "女" = "#FF6666"))
⁃ 男女sale的age分佈
UL_6sale$age_n = case_when(
UL_6sale$age <20~ "below20",
UL_6sale$age >20 & UL_6sale$age <26 ~ "21-25",
UL_6sale$age >25 & UL_6sale$age <31 ~ "25-30",
UL_6sale$age >30 & UL_6sale$age <36 ~ "31-35",
UL_6sale$age >35 & UL_6sale$age <41 ~ "35-40",
UL_6sale$age >40 & UL_6sale$age <46 ~ "41-45",
UL_6sale$age >45 & UL_6sale$age <51 ~ "46-50",
UL_6sale$age >50 & UL_6sale$age <56 ~ "51-55",
UL_6sale$age >55 & UL_6sale$age <61 ~ "56-60",
UL_6sale$age >60 ~ "60up",
TRUE ~ as.character(UL_6sale$age)
)
UL_6sale$age_n %>% table
## .
## 21-25 25-30 31-35 35-40 41-45 46-50 51-55 56-60 60up below20
## 381 717 464 247 190 131 139 95 31 3
UL_6sale %>%
group_by(sex,age_n) %>%
count() %>%
ggplot(aes(reorder(age_n,-n),n,fill=sex))+
geom_col()+
geom_text(aes(label = n), position = "stack")+
xlab("年齡") + ylab("數量")+
theme(text=element_text(family="STKaiti"))+
scale_fill_manual("sex", values = c("男" = "#5167d2", "女" = "#FF6666"))