Data Preparation
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(stringr)
library(showtext)
## Loading required package: sysfonts
## Loading required package: showtextdb
library(readxl)
Adjust Variable Type
data <- read_excel("~/Downloads/商分/同意试用客户43家251223.xlsx")
str(data)
## tibble [43 × 27] (S3: tbl_df/tbl/data.frame)
## $ *企业名称 : chr [1:43] "深圳市腾彩洗护用品有限公司" "陆令数码科技有限公司" "星石(深圳)品牌管理有限公司" "学而思" ...
## $ *负责人 : chr [1:43] NA NA NA NA ...
## $ 现+前负责人 : chr [1:43] "黄水武" "黄宗继" "邓莉" "刘海明" ...
## $ outlier : chr [1:43] NA NA NA NA ...
## $ outlier原因 : chr [1:43] NA NA NA NA ...
## $ *客户状态 : chr [1:43] "同意试用" "试用中" "试用中" "试用中" ...
## $ *客户来源 : chr [1:43] "渠道转介绍" "BOSS搜索" "陌拜" "BOSS搜索" ...
## $ *详细地址(必填)-省 : chr [1:43] "广东省" "广东省" "广东省" "浙江省" ...
## $ 商品类目 : chr [1:43] "日用百货_个人护理" "3C数码" "美妆护肤_美容工具/仪器" "图书教育" ...
## $ 抖音是否为主要销售渠道 : chr [1:43] NA NA "否" "否" ...
## $ 抖音占比 : num [1:43] NA NA NA NA NA NA 40 NA NA NA ...
## $ 客服情况 : chr [1:43] "国内自有客服" "运营兼带回复" "国内自有客服,国内外包客服" "国内自有客服" ...
## $ 客服人数 : chr [1:43] "12" NA "2" "50" ...
## $ 全平台客服人数 : chr [1:43] NA NA NA "125" ...
## $ pitch对象 : chr [1:43] "电商负责人" NA "平台负责人(如抖音负责人)" NA ...
## $ pitch对象(其他) : chr [1:43] NA NA NA "IT/信息安全负责人" ...
## $ 日均单量 : chr [1:43] "20000" NA "3000" NA ...
## $ 全平台日均单量 : chr [1:43] NA NA NA NA ...
## $ 日咨询量 : chr [1:43] NA "40" "1500" "5000" ...
## $ 全平台日咨询量 : chr [1:43] NA NA NA "10000" ...
## $ 客服人均接待量(每日) : chr [1:43] NA NA "300" "100" ...
## $ 客服月工资 : chr [1:43] NA NA NA "6000" ...
## $ 客服月休几天 : chr [1:43] NA NA NA "4" ...
## $ 外包客服成本(元/接待): chr [1:43] NA NA NA NA ...
## $ 当前客服工具 : chr [1:43] "飞鸽" NA "未使用" "飞鸽" ...
## $ 客户信息已核实 : chr [1:43] "是" "是" "是" "是" ...
## $ 备注(商分) : chr [1:43] NA NA "自有客服团队1人负责售前,1人负责售后,其余外包" "客服人数在100-150,高峰200;客服团队base在江苏" ...
data$客服人数 <- as.numeric(data$客服人数)
data$全平台客服人数 <- as.numeric(data$全平台客服人数)
data$日均单量 <- as.numeric(data$日均单量)
## Warning: NAs introduced by coercion
data$全平台日均单量 <- as.numeric(data$全平台日均单量)
data$日咨询量 <- as.numeric(data$日咨询量)
data$全平台日咨询量 <- as.numeric(data$全平台日咨询量)
data$`客服人均接待量(每日)` <- as.numeric(data$`客服人均接待量(每日)`)
data$客服月工资 <- as.numeric(data$客服月工资)
data$客服月休几天 <- as.numeric(data$客服月休几天)
data$`外包客服成本(元/接待)` <- as.numeric(data$`外包客服成本(元/接待)`)
Filter Relevant Observations
nrow(data)
## [1] 43
data <- data %>% filter(客户信息已核实 == "是", is.na(outlier))
nrow(data)
## [1] 36
Create New Variables
# 1. 一级类目
data$'一级类目' <- substr(data$商品类目, 1, 4)
# 2. 人工客服接待成本(元/接待)
impute_yuexiu <- !is.na(data$客服月工资) & is.na(data$客服月休几天)
sum(impute_yuexiu)
## [1] 10
data[impute_yuexiu, '客服月休几天'] <- 4
data$'人工客服接待成本(元/接待)' <- NA
idx <- !is.na(data$'客服人均接待量(每日)') & !is.na(data$'客服月工资')
data[idx, '人工客服接待成本(元/接待)'] <- round(data[idx, '客服月工资'] / (30 - data[idx, '客服月休几天']) / data[idx, '客服人均接待量(每日)'], 2)
sum(!is.na(data$'人工客服接待成本(元/接待)'))
## [1] 25
# 3. 月客服成本
data$月客服成本 <- NA
idx <- !is.na(data$客服人数) & !is.na(data$客服月工资)
data[idx, '月客服成本'] <- data[idx, '客服人数'] * data[idx, '客服月工资']
Input Parameter Options
商品类目options <- c("服饰内衣_女装", "服饰内衣_男装", "服饰内衣_童装/婴儿装", "服饰内衣_内衣/袜子/家居服", "服饰内衣_服饰配件/皮带/帽子", "美妆护肤_护肤品", "美妆护肤_彩妆/香水", "美妆护肤_美容工具/仪器", "鞋靴箱包_女鞋", "鞋靴箱包_男鞋", "鞋靴箱包_箱包", "鞋靴箱包_童鞋/婴儿鞋", "日用百货_个人护理", "日用百货_生活日用", "日用百货_家庭清洁", "日用百货_收纳整理", "3C数码", "运动户外", "食品饮料", "珠宝饰品", "母婴用品", "厨卫家电", "汽配摩托", "医药保健", "生鲜蔬果", "礼品文创", "玩具乐器", "索具建材", "图书教育", "二手商品", "钟表配饰", "原料包装", "家居家纺", "宠物用品", "鲜花绿植", "本地生活", "奢侈品")
客服情况options <- c("国内自有客服", "国内外包客服", "运营兼带回复", "其他(请补充)", "国外客服")
pitch对象options <- c("老板", "客服负责人", "运营负责人", "电商负责人", "平台负责人(如抖音负责人)", "其他(请补充)")
当前客服工具options <- c("未使用", "自研", "多客", "乐聊", "乐言", "晓多", "飞鸽", "探域", "快麦-智库", "福客", "其他(请补充到备注)")
Data Analysis
showtext_auto()
shangpinleimu <- data.frame(options = 商品类目options, count = 0, percentage = 0)
for(i in 1:nrow(shangpinleimu)){
for(j in 1:nrow(data)){
if(grepl(商品类目options[i], data[j, '商品类目'])){
shangpinleimu[i, 'count'] <- shangpinleimu[i, 'count'] + 1
}
}
}
shangpinleimu$percentage <- round(shangpinleimu$count / sum(shangpinleimu$count) * 100, 0)
shangpinleimu <- shangpinleimu %>% arrange(desc(count))
labels <- paste0(
shangpinleimu$options, "\n",
shangpinleimu$count, "(", shangpinleimu$percentage, "%)"
)
shangpinleimu
## options count percentage
## 1 3C数码 6 14
## 2 服饰内衣_女装 5 12
## 3 服饰内衣_男装 4 9
## 4 美妆护肤_护肤品 4 9
## 5 厨卫家电 4 9
## 6 美妆护肤_彩妆/香水 3 7
## 7 日用百货_个人护理 3 7
## 8 服饰内衣_内衣/袜子/家居服 2 5
## 9 食品饮料 2 5
## 10 母婴用品 2 5
## 11 玩具乐器 2 5
## 12 服饰内衣_童装/婴儿装 1 2
## 13 美妆护肤_美容工具/仪器 1 2
## 14 运动户外 1 2
## 15 珠宝饰品 1 2
## 16 医药保健 1 2
## 17 图书教育 1 2
## 18 服饰内衣_服饰配件/皮带/帽子 0 0
## 19 鞋靴箱包_女鞋 0 0
## 20 鞋靴箱包_男鞋 0 0
## 21 鞋靴箱包_箱包 0 0
## 22 鞋靴箱包_童鞋/婴儿鞋 0 0
## 23 日用百货_生活日用 0 0
## 24 日用百货_家庭清洁 0 0
## 25 日用百货_收纳整理 0 0
## 26 汽配摩托 0 0
## 27 生鲜蔬果 0 0
## 28 礼品文创 0 0
## 29 索具建材 0 0
## 30 二手商品 0 0
## 31 钟表配饰 0 0
## 32 原料包装 0 0
## 33 家居家纺 0 0
## 34 宠物用品 0 0
## 35 鲜花绿植 0 0
## 36 本地生活 0 0
## 37 奢侈品 0 0
shangpinleimu <- shangpinleimu[shangpinleimu$count != 0, ]
pie(shangpinleimu$count,
labels = labels,
main = "商品类目分布",
col = hcl.colors(length(shangpinleimu$count), palette = "Set2"),
cex = 0.5)

cat_count <- sort(table(data$抖音是否为主要销售渠道, useNA = "no"), decreasing = TRUE)
pct <- round(cat_count / sum(cat_count) * 100, 0)
labels <- paste0(
names(cat_count), "\n",
cat_count, "(", pct, "%)"
)
douyinzhanbi <- as.data.frame(cat_count)
douyinzhanbi
## Var1 Freq
## 1 否 14
## 2 是 13
pie(cat_count,
labels = labels,
main = "抖音是否为主要销售渠道",
col = hcl.colors(length(cat_count), palette = "Set2"),
cex = 0.5)

kefuqingkuang <- data.frame(options = 客服情况options, count = 0, percentage = 0)
for(i in 1:nrow(kefuqingkuang)){
for(j in 1:nrow(data)){
if(grepl(客服情况options[i], data[j, '客服情况'])){
kefuqingkuang[i, 'count'] <- kefuqingkuang[i, 'count'] + 1
}
}
}
kefuqingkuang$percentage <- round(kefuqingkuang$count / sum(kefuqingkuang$count) * 100, 0)
kefuqingkuang <- kefuqingkuang %>% arrange(desc(count))
labels <- paste0(
kefuqingkuang$options, "\n",
kefuqingkuang$count, "(", kefuqingkuang$percentage, "%)"
)
kefuqingkuang
## options count percentage
## 1 国内自有客服 31 84
## 2 国内外包客服 4 11
## 3 运营兼带回复 2 5
## 4 其他(请补充) 0 0
## 5 国外客服 0 0
kefuqingkuang <- kefuqingkuang[kefuqingkuang$count != 0, ]
pie(kefuqingkuang$count,
labels = labels,
main = "客服情况分布",
col = hcl.colors(length(kefuqingkuang$count), palette = "Set2"),
cex = 0.5)

pitchduixiang <- data.frame(options = pitch对象options, count = 0, percentage = 0)
for(i in 1:nrow(pitchduixiang)){
for(j in 1:nrow(data)){
if(grepl(pitch对象options[i], data[j, 'pitch对象'])){
pitchduixiang[i, 'count'] <- pitchduixiang[i, 'count'] + 1
}
}
}
pitchduixiang$percentage <- round(pitchduixiang$count / sum(pitchduixiang$count) * 100, 0)
pitchduixiang <- pitchduixiang %>% arrange(desc(count))
labels <- paste0(
pitchduixiang$options, "\n",
pitchduixiang$count, "(", pitchduixiang$percentage, "%)"
)
pitchduixiang
## options count percentage
## 1 客服负责人 12 32
## 2 老板 11 29
## 3 电商负责人 7 18
## 4 运营负责人 4 11
## 5 平台负责人(如抖音负责人) 4 11
## 6 其他(请补充) 0 0
pitchduixiang <- pitchduixiang[pitchduixiang$count != 0, ]
pie(pitchduixiang$count,
labels = labels,
main = "pitch对象分布",
col = hcl.colors(length(pitchduixiang$count), palette = "Set2"),
cex = 0.5)

cat("客服人数分布\n\n")
## 客服人数分布
summary(data$客服人数)
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 1.000 2.000 6.000 7.613 7.500 50.000 5
hist(data$'客服人数',
breaks = seq(0, max(data$客服人数, na.rm = TRUE), by = 1))

cat("日咨询量分布\n\n")
## 日咨询量分布
summary(data$日咨询量)
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 40 350 1000 1793 2750 7000 5
hist(data$'日咨询量',
breaks = seq(0, max(data$日咨询量, na.rm = TRUE), by = 100))

cat("月客服成本分布\n\n")
## 月客服成本分布
summary(data$月客服成本)
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 5000 15375 28000 46708 39375 300000 12
hist(data$月客服成本,
breaks = seq(0, max(data$月客服成本, na.rm = TRUE), by = 5000))

cat("客服人均日接待量分布\n\n")
## 客服人均日接待量分布
summary(data$'客服人均接待量(每日)')
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 50.0 150.0 200.0 295.6 300.0 1500.0 2
hist(data$'客服人均接待量(每日)',
breaks = seq(0, max(data$'客服人均接待量(每日)', na.rm = TRUE), by = 50))

cat("客服月工资分布\n\n")
## 客服月工资分布
summary(data$客服月工资)
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 3000 5000 5500 5340 6000 8000 11
hist(data$'客服月工资',
breaks = seq(0, max(data$'客服月工资', na.rm = TRUE), by = 100))

cat("人工客服接待成本(元/接待)\n\n")
## 人工客服接待成本(元/接待)
summary(data$'人工客服接待成本(元/接待)')
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 0.150 0.480 0.960 1.209 1.540 3.750 11
hist(data$'人工客服接待成本(元/接待)',
breaks = seq(0, max(data$'人工客服接待成本(元/接待)', na.rm = TRUE), by = 0.05))

kefugongju <- data.frame(options = 当前客服工具options, count = 0, percentage = 0)
for(i in 1:nrow(kefugongju)){
for(j in 1:nrow(data)){
if(grepl(当前客服工具options[i], data[j, '当前客服工具'])){
kefugongju[i, 'count'] <- kefugongju[i, 'count'] + 1
}
}
}
kefugongju$percentage <- round(kefugongju$count / sum(kefugongju$count) * 100, 0)
kefugongju <- kefugongju %>% arrange(desc(count))
labels <- paste0(
kefugongju$options, "\n",
kefugongju$count, "(", kefugongju$percentage, "%)"
)
kefugongju
## options count percentage
## 1 飞鸽 19 54
## 2 未使用 10 29
## 3 乐言 4 11
## 4 晓多 1 3
## 5 快麦-智库 1 3
## 6 自研 0 0
## 7 多客 0 0
## 8 乐聊 0 0
## 9 探域 0 0
## 10 福客 0 0
## 11 其他(请补充到备注) 0 0
kefugongju <- kefugongju[kefugongju$count != 0, ]
pie(kefugongju$count,
labels = labels,
main = "当前客服工具分布",
col = hcl.colors(length(kefugongju$count), palette = "Set2"),
cex = 0.5)
