Sys.setlocale(category = "LC_ALL", locale = "zh_TW.UTF-8")
## [1] "zh_TW.UTF-8/zh_TW.UTF-8/zh_TW.UTF-8/C/zh_TW.UTF-8/zh_TW.UTF-8"
pacman::p_load(dplyr,tidyverse,pROC,caTools,ISLR,showtext)
load("N_20.rdata")
newN_20 = N_20
newN_20$not_off =ifelse(N_20$not_off>0.31,1,0)
newN_20$class = case_when(
newN_20$not_off==1 & newN_20$N_next_year>34 ~ 'A:離職高銷售',
newN_20$not_off==1 & newN_20$N_next_year<34 ~ 'B:離職低銷售',
newN_20$not_off==0 & newN_20$N_next_year>34 ~ 'C:在職高銷售',
newN_20$not_off==0 & newN_20$N_next_year<34 ~ 'D:在職低銷售',
)
newN_20$class %>% table
## .
## A:離職高銷售 B:離職低銷售 C:在職高銷售 D:在職低銷售
## 15 275 480 217
save(newN_20,file="N_20class.rdata")
#34
W = newN_20
W$not_off = N_20$not_off
table(W$class)
##
## A:離職高銷售 B:離職低銷售 C:在職高銷售 D:在職低銷售
## 15 275 480 217
ggplot(W, aes(x= not_off, y= N_next_year, col= class)) +
geom_point(size=1.5, alpha=2)+
xlab("離職機率") + ylab("預測銷售台數")+
theme(text=element_text(family="STKaiti")) #mac 會跑亂碼

Todo list:
- 主要特徵:
- 新產品訓練時數 newproduct_t
- 年資 seniority
- Ratio_last_all 最後一年銷售量/平均銷售比率
- n_all總銷售量
- SSI 分數
newN_20 = mutate_if(newN_20,is.character, as.factor)
newN_20$not_off = as.factor(newN_20$not_off)
#newN_20$newproduct_t = as.factor(newN_20$newproduct_t)
newN_20$age = round(newN_20$age)
newN_20$seniority = round(newN_20$seniority)
newN_20$N_next_year = round(newN_20$N_next_year)
str(newN_20)
## 'data.frame': 987 obs. of 27 variables:
## $ V1 : int 1 2 3 4 5 6 7 8 9 10 ...
## $ sex : Factor w/ 2 levels "男","女": 2 1 1 1 2 1 1 1 2 2 ...
## $ position : Factor w/ 5 levels "銷售副理","銷售高級專員",..: 5 5 5 5 5 5 5 5 5 5 ...
## $ distributor : Factor w/ 9 levels "EM","ES","HL",..: 6 4 4 4 4 2 3 2 2 2 ...
## $ age : num 28 29 37 37 37 37 28 37 37 37 ...
## $ school : Factor w/ 6 levels "大學","高中(含)以下",..: 3 1 6 6 6 6 5 6 6 6 ...
## $ married : Factor w/ 3 levels "N","Not_Provide",..: 1 2 2 2 2 2 3 2 2 2 ...
## $ seniority : num 2 0 0 0 0 7 0 5 0 0 ...
## $ past_job : Factor w/ 7 levels "服務業","技師/技術員/作業員/工程師/維修員",..: 4 7 7 7 7 7 7 7 7 7 ...
## $ past_job_data : int 1 0 0 0 0 0 0 0 0 0 ...
## $ sellcar : int 1 1 0 0 0 0 0 0 0 0 ...
## $ newproduct : int 23 0 0 0 0 0 0 0 0 0 ...
## $ fresh : int 28 0 0 0 0 0 0 0 0 0 ...
## $ reward : int 0 0 0 0 0 0 0 0 0 0 ...
## $ n_last : int 29 0 0 0 0 0 0 0 0 0 ...
## $ n_all : int 66 0 0 0 0 46 0 2 0 0 ...
## $ avg_n_all : num 28.7 0 0 0 0 ...
## $ ratio_last_all: num 1.01 0 0 0 0 ...
## $ newproduct_t : int 7 0 0 0 0 0 0 0 0 0 ...
## $ fresh_t : int 0 0 0 0 0 0 0 0 0 0 ...
## $ Reward_t : int 0 0 0 0 0 0 0 0 0 0 ...
## $ ssi_last : num 100 99.8 99.8 99.8 99.8 99.8 99.8 99.8 99.8 99.8 ...
## $ ssi_t : num 99.9 97.5 97.5 97.5 97.5 ...
## $ avg_ssi : num 99.9 98.7 98.7 98.7 98.7 ...
## $ not_off : Factor w/ 2 levels "0","1": 1 2 2 2 2 1 2 1 2 2 ...
## $ N_next_year : num 34 4 3 3 3 1 4 2 3 3 ...
## $ class : Factor w/ 4 levels "A:離職高銷售",..: 3 2 2 2 2 4 2 4 2 2 ...
離職特徵:新產品訓練時數 newproduct_t
newN_20 %>%
ggplot(aes(x=not_off,y=newproduct_t,color = not_off))+
geom_boxplot()+
xlab("是否離職") + ylab("前一年的新產品訓練時數")+
theme(text=element_text(family="STKaiti")) #mac 會跑亂碼
newN_20 %>%
mutate(newproduct_t2 = as.factor(newproduct_t)) %>%
select(newproduct_t2,not_off,class) %>%
group_by(not_off,newproduct_t2) %>%
summarise(n=n()) %>%
ungroup() %>%
ggplot(aes(newproduct_t2,n,fill=class))+
geom_col(position = "dodge")+
geom_text(aes(label = n))+
labs(fill = "是否離職")+
facet_wrap(not_off~.,ncol=1)+
xlab("前一年的新產品訓練時數") + ylab("人數")+
theme(text=element_text(family="STKaiti")) #mac 會跑亂碼
newN_20 %>%
mutate(newproduct_t2 = as.factor(newproduct_t)) %>%
select(newproduct_t2,not_off) %>%
group_by(not_off,newproduct_t2) %>%
summarise(n=n()) %>%
ggplot(aes(newproduct_t2,n,fill=not_off))+
geom_col(position = "dodge")+
geom_text(aes(label = n))+
facet_wrap(not_off~.,ncol=1)+
labs(fill = "是否離職")+
xlab("前一年的新產品訓練時數") + ylab("人數")+
theme(text=element_text(family="STKaiti")) #mac 會跑亂碼
離職特徵:前一年新產品訓練時數 newproduct_t
newN_20 %>%
mutate(newproduct_t2 = as.factor(newproduct_t)) %>%
select(newproduct_t2,class) %>%
group_by(newproduct_t2,class) %>%
summarise(n=n()) %>%
ungroup() %>%
ggplot(aes(newproduct_t2,n,fill=class))+
geom_col(position = "dodge")+
geom_text(aes(label = n))+
labs(fill = "是否離職")+
facet_wrap(class~.,ncol=1)+
xlab("前一年的新產品訓練時數") + ylab("人數")+
theme(text=element_text(family="STKaiti")) #mac 會跑亂碼
## `summarise()` has grouped output by 'newproduct_t2'. You can override using the `.groups` argument.

newN_20 %>%
ggplot(aes(x=class,y=newproduct_t,color = class))+
geom_boxplot()+
xlab("是否離職") + ylab("前一年的新產品訓練時數")+
theme(text=element_text(family="STKaiti")) #mac 會跑亂碼

離職特徵:新產品訓練時數 newproduct
newN_20 %>%
ggplot(aes(x=class,y=newproduct,color =class))+
geom_boxplot()+
xlab("是否離職") + ylab("新產品訓練時數")+
theme(text=element_text(family="STKaiti")) #mac 會跑亂碼

離職特徵:年資 seniority
newN_20 %>%
count(class,seniority) %>%
ggplot(aes(x=seniority,y=n,col=class))+
geom_line()+
labs(x="年資",y="人數")+
facet_wrap(class~.)+
theme(text=element_text(family="STKaiti"))

離職特徵:ratio_last_all 最後一年銷售量/平均銷售比率
newN_20 %>%
ggplot(aes(x=class,y=ratio_last_all,color = class))+
geom_boxplot()+
xlab("類別") + ylab("最後一年銷售量/平均銷售比率")+
theme(text=element_text(family="STKaiti"))

離職特徵:n_all總銷售量
newN_20 %>%
ggplot(aes(x=class,y=n_all,color = class))+
geom_boxplot()+
xlab("類別") + ylab("總銷售量")+
theme(text=element_text(family="STKaiti"))

離職特徵:SSI 分數
newN_20 %>%
ggplot(aes(x=class,y=avg_ssi,color = class))+
geom_boxplot()+
xlab("類別") + ylab("SSI分數")+
theme(text=element_text(family="STKaiti"))

離職特徵:age
#人數
newN_20 %>%
count(class,age) %>%
ggplot(aes(x=age,y=n,col=class))+
geom_line()+
labs(x="年齡",y="人數")+
facet_wrap(class~.)+
theme(text=element_text(family="STKaiti"))

#比率(但相差不大)
newN_20 %>%
group_by(class,age) %>%
summarise(n=n()) %>%
mutate(rate = n/sum(n)) %>%
ungroup() %>%
ggplot(aes(x=age,y=rate,col=class))+
geom_line()+
labs(x="年齡",y="比率")+
facet_wrap(class~.)+
theme(text=element_text(family="STKaiti"))
## `summarise()` has grouped output by 'class'. You can override using the `.groups` argument.

離職特徵:married
newN_20 %>%
count(class,married) %>%
ggplot(aes(reorder(married,-n),n))+
geom_col()+
labs(x="年齡",y="人數")+
facet_wrap(class~.)+
theme(text=element_text(family="STKaiti"))
