Data Preparation

library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(stringr)
library(showtext)
## Loading required package: sysfonts
## Loading required package: showtextdb
library(readxl)

Adjust Variable Type

data <- read_excel("~/Downloads/商分/同意试用客户43家251223.xlsx")
str(data)
## tibble [43 × 27] (S3: tbl_df/tbl/data.frame)
##  $ *企业名称              : chr [1:43] "深圳市腾彩洗护用品有限公司" "陆令数码科技有限公司" "星石(深圳)品牌管理有限公司" "学而思" ...
##  $ *负责人                : chr [1:43] NA NA NA NA ...
##  $ 现+前负责人            : chr [1:43] "黄水武" "黄宗继" "邓莉" "刘海明" ...
##  $ outlier                : chr [1:43] NA NA NA NA ...
##  $ outlier原因            : chr [1:43] NA NA NA NA ...
##  $ *客户状态              : chr [1:43] "同意试用" "试用中" "试用中" "试用中" ...
##  $ *客户来源              : chr [1:43] "渠道转介绍" "BOSS搜索" "陌拜" "BOSS搜索" ...
##  $ *详细地址(必填)-省   : chr [1:43] "广东省" "广东省" "广东省" "浙江省" ...
##  $ 商品类目               : chr [1:43] "日用百货_个人护理" "3C数码" "美妆护肤_美容工具/仪器" "图书教育" ...
##  $ 抖音是否为主要销售渠道 : chr [1:43] NA NA "否" "否" ...
##  $ 抖音占比               : num [1:43] NA NA NA NA NA NA 40 NA NA NA ...
##  $ 客服情况               : chr [1:43] "国内自有客服" "运营兼带回复" "国内自有客服,国内外包客服" "国内自有客服" ...
##  $ 客服人数               : chr [1:43] "12" NA "2" "50" ...
##  $ 全平台客服人数         : chr [1:43] NA NA NA "125" ...
##  $ pitch对象              : chr [1:43] "电商负责人" NA "平台负责人(如抖音负责人)" NA ...
##  $ pitch对象(其他)      : chr [1:43] NA NA NA "IT/信息安全负责人" ...
##  $ 日均单量               : chr [1:43] "20000" NA "3000" NA ...
##  $ 全平台日均单量         : chr [1:43] NA NA NA NA ...
##  $ 日咨询量               : chr [1:43] NA "40" "1500" "5000" ...
##  $ 全平台日咨询量         : chr [1:43] NA NA NA "10000" ...
##  $ 客服人均接待量(每日) : chr [1:43] NA NA "300" "100" ...
##  $ 客服月工资             : chr [1:43] NA NA NA "6000" ...
##  $ 客服月休几天           : chr [1:43] NA NA NA "4" ...
##  $ 外包客服成本(元/接待): chr [1:43] NA NA NA NA ...
##  $ 当前客服工具           : chr [1:43] "飞鸽" NA "未使用" "飞鸽" ...
##  $ 客户信息已核实         : chr [1:43] "是" "是" "是" "是" ...
##  $ 备注(商分)           : chr [1:43] NA NA "自有客服团队1人负责售前,1人负责售后,其余外包" "客服人数在100-150,高峰200;客服团队base在江苏" ...
data$客服人数 <- as.numeric(data$客服人数)
data$全平台客服人数 <- as.numeric(data$全平台客服人数)
data$日均单量 <- as.numeric(data$日均单量)
## Warning: NAs introduced by coercion
data$全平台日均单量 <- as.numeric(data$全平台日均单量)
data$日咨询量 <- as.numeric(data$日咨询量)
data$全平台日咨询量 <- as.numeric(data$全平台日咨询量)
data$`客服人均接待量(每日)` <- as.numeric(data$`客服人均接待量(每日)`)
data$客服月工资 <- as.numeric(data$客服月工资)
data$客服月休几天 <- as.numeric(data$客服月休几天)
data$`外包客服成本(元/接待)` <- as.numeric(data$`外包客服成本(元/接待)`)

Filter Relevant Observations

nrow(data)
## [1] 43
data <- data %>% filter(客户信息已核实 == "是", is.na(outlier))
nrow(data)
## [1] 36

Create New Variables

# 1. 一级类目
data$'一级类目' <- substr(data$商品类目, 1, 4)

# 2. 人工客服接待成本(元/接待)
impute_yuexiu <- !is.na(data$客服月工资) & is.na(data$客服月休几天)
sum(impute_yuexiu)
## [1] 10
data[impute_yuexiu, '客服月休几天'] <- 4
data$'人工客服接待成本(元/接待)' <- NA
idx <- !is.na(data$'客服人均接待量(每日)') & !is.na(data$'客服月工资')
data[idx, '人工客服接待成本(元/接待)'] <- round(data[idx, '客服月工资'] / (30 - data[idx, '客服月休几天']) / data[idx, '客服人均接待量(每日)'], 2)
sum(!is.na(data$'人工客服接待成本(元/接待)'))
## [1] 25
# 3. 月客服成本
data$月客服成本 <- NA
idx <- !is.na(data$客服人数) & !is.na(data$客服月工资)
data[idx, '月客服成本'] <- data[idx, '客服人数'] * data[idx, '客服月工资']

Input Parameter Options

商品类目options <- c("服饰内衣_女装", "服饰内衣_男装", "服饰内衣_童装/婴儿装", "服饰内衣_内衣/袜子/家居服", "服饰内衣_服饰配件/皮带/帽子", "美妆护肤_护肤品", "美妆护肤_彩妆/香水", "美妆护肤_美容工具/仪器", "鞋靴箱包_女鞋", "鞋靴箱包_男鞋", "鞋靴箱包_箱包", "鞋靴箱包_童鞋/婴儿鞋", "日用百货_个人护理", "日用百货_生活日用", "日用百货_家庭清洁", "日用百货_收纳整理", "3C数码", "运动户外", "食品饮料", "珠宝饰品", "母婴用品", "厨卫家电", "汽配摩托", "医药保健", "生鲜蔬果", "礼品文创", "玩具乐器", "索具建材", "图书教育", "二手商品", "钟表配饰", "原料包装", "家居家纺", "宠物用品", "鲜花绿植", "本地生活", "奢侈品")
客服情况options <- c("国内自有客服", "国内外包客服", "运营兼带回复", "其他(请补充)", "国外客服")
pitch对象options <- c("老板", "客服负责人", "运营负责人", "电商负责人", "平台负责人(如抖音负责人)", "其他(请补充)")
当前客服工具options <- c("未使用", "自研", "多客", "乐聊", "乐言", "晓多", "飞鸽", "探域", "快麦-智库", "福客", "其他(请补充到备注)")

Data Analysis

showtext_auto()
shangpinleimu <- data.frame(options = 商品类目options, count = 0, percentage = 0)
for(i in 1:nrow(shangpinleimu)){
  for(j in 1:nrow(data)){
    if(grepl(商品类目options[i], data[j, '商品类目'])){
      shangpinleimu[i, 'count'] <- shangpinleimu[i, 'count'] + 1
    }
  }
}
shangpinleimu$percentage <- round(shangpinleimu$count / sum(shangpinleimu$count) * 100, 0)
shangpinleimu <- shangpinleimu %>% arrange(desc(count))
labels <- paste0(
  shangpinleimu$options, "\n",
  shangpinleimu$count, "(", shangpinleimu$percentage, "%)"
)
shangpinleimu
##                        options count percentage
## 1                       3C数码     6         14
## 2                服饰内衣_女装     5         12
## 3                服饰内衣_男装     4          9
## 4              美妆护肤_护肤品     4          9
## 5                     厨卫家电     4          9
## 6           美妆护肤_彩妆/香水     3          7
## 7            日用百货_个人护理     3          7
## 8    服饰内衣_内衣/袜子/家居服     2          5
## 9                     食品饮料     2          5
## 10                    母婴用品     2          5
## 11                    玩具乐器     2          5
## 12        服饰内衣_童装/婴儿装     1          2
## 13      美妆护肤_美容工具/仪器     1          2
## 14                    运动户外     1          2
## 15                    珠宝饰品     1          2
## 16                    医药保健     1          2
## 17                    图书教育     1          2
## 18 服饰内衣_服饰配件/皮带/帽子     0          0
## 19               鞋靴箱包_女鞋     0          0
## 20               鞋靴箱包_男鞋     0          0
## 21               鞋靴箱包_箱包     0          0
## 22        鞋靴箱包_童鞋/婴儿鞋     0          0
## 23           日用百货_生活日用     0          0
## 24           日用百货_家庭清洁     0          0
## 25           日用百货_收纳整理     0          0
## 26                    汽配摩托     0          0
## 27                    生鲜蔬果     0          0
## 28                    礼品文创     0          0
## 29                    索具建材     0          0
## 30                    二手商品     0          0
## 31                    钟表配饰     0          0
## 32                    原料包装     0          0
## 33                    家居家纺     0          0
## 34                    宠物用品     0          0
## 35                    鲜花绿植     0          0
## 36                    本地生活     0          0
## 37                      奢侈品     0          0
shangpinleimu <- shangpinleimu[shangpinleimu$count != 0, ]
pie(shangpinleimu$count,
    labels = labels,
    main = "商品类目分布",
    col = hcl.colors(length(shangpinleimu$count), palette = "Set2"),
    cex = 0.5)

cat_count <- sort(table(data$抖音是否为主要销售渠道, useNA = "no"), decreasing = TRUE)
pct <- round(cat_count / sum(cat_count) * 100, 0)
labels <- paste0(
  names(cat_count), "\n",
  cat_count, "(", pct, "%)"
)
douyinzhanbi <- as.data.frame(cat_count)
douyinzhanbi
##   Var1 Freq
## 1   否   14
## 2   是   13
pie(cat_count,
    labels = labels,
    main = "抖音是否为主要销售渠道",
    col = hcl.colors(length(cat_count), palette = "Set2"),
    cex = 0.5)

kefuqingkuang <- data.frame(options = 客服情况options, count = 0, percentage = 0)
for(i in 1:nrow(kefuqingkuang)){
  for(j in 1:nrow(data)){
    if(grepl(客服情况options[i], data[j, '客服情况'])){
      kefuqingkuang[i, 'count'] <- kefuqingkuang[i, 'count'] + 1
    }
  }
}
kefuqingkuang$percentage <- round(kefuqingkuang$count / sum(kefuqingkuang$count) * 100, 0)
kefuqingkuang <- kefuqingkuang %>% arrange(desc(count))
labels <- paste0(
  kefuqingkuang$options, "\n",
  kefuqingkuang$count, "(", kefuqingkuang$percentage, "%)"
)
kefuqingkuang
##          options count percentage
## 1   国内自有客服    31         84
## 2   国内外包客服     4         11
## 3   运营兼带回复     2          5
## 4 其他(请补充)     0          0
## 5       国外客服     0          0
kefuqingkuang <- kefuqingkuang[kefuqingkuang$count != 0, ]
pie(kefuqingkuang$count,
    labels = labels,
    main = "客服情况分布",
    col = hcl.colors(length(kefuqingkuang$count), palette = "Set2"),
    cex = 0.5)

pitchduixiang <- data.frame(options = pitch对象options, count = 0, percentage = 0)
for(i in 1:nrow(pitchduixiang)){
  for(j in 1:nrow(data)){
    if(grepl(pitch对象options[i], data[j, 'pitch对象'])){
      pitchduixiang[i, 'count'] <- pitchduixiang[i, 'count'] + 1
    }
  }
}
pitchduixiang$percentage <- round(pitchduixiang$count / sum(pitchduixiang$count) * 100, 0)
pitchduixiang <- pitchduixiang %>% arrange(desc(count))
labels <- paste0(
  pitchduixiang$options, "\n",
  pitchduixiang$count, "(", pitchduixiang$percentage, "%)"
)
pitchduixiang
##                      options count percentage
## 1                 客服负责人    12         32
## 2                       老板    11         29
## 3                 电商负责人     7         18
## 4                 运营负责人     4         11
## 5 平台负责人(如抖音负责人)     4         11
## 6             其他(请补充)     0          0
pitchduixiang <- pitchduixiang[pitchduixiang$count != 0, ]
pie(pitchduixiang$count,
    labels = labels,
    main = "pitch对象分布",
    col = hcl.colors(length(pitchduixiang$count), palette = "Set2"),
    cex = 0.5)

cat("客服人数分布\n\n")
## 客服人数分布
summary(data$客服人数)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##   1.000   2.000   6.000   7.613   7.500  50.000       5
hist(data$'客服人数', 
     breaks = seq(0, max(data$客服人数, na.rm = TRUE), by = 1))

cat("日咨询量分布\n\n")
## 日咨询量分布
summary(data$日咨询量)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##      40     350    1000    1793    2750    7000       5
hist(data$'日咨询量', 
     breaks = seq(0, max(data$日咨询量, na.rm = TRUE), by = 100))

cat("月客服成本分布\n\n")
## 月客服成本分布
summary(data$月客服成本)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##    5000   15375   28000   46708   39375  300000      12
hist(data$月客服成本, 
     breaks = seq(0, max(data$月客服成本, na.rm = TRUE), by = 5000))

cat("客服人均日接待量分布\n\n")
## 客服人均日接待量分布
summary(data$'客服人均接待量(每日)')
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##    50.0   150.0   200.0   295.6   300.0  1500.0       2
hist(data$'客服人均接待量(每日)', 
     breaks = seq(0, max(data$'客服人均接待量(每日)', na.rm = TRUE), by = 50))

cat("客服月工资分布\n\n")
## 客服月工资分布
summary(data$客服月工资)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##    3000    5000    5500    5340    6000    8000      11
hist(data$'客服月工资', 
     breaks = seq(0, max(data$'客服月工资', na.rm = TRUE), by = 100))

cat("人工客服接待成本(元/接待)\n\n")
## 人工客服接待成本(元/接待)
summary(data$'人工客服接待成本(元/接待)')
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##   0.150   0.480   0.960   1.209   1.540   3.750      11
hist(data$'人工客服接待成本(元/接待)', 
     breaks = seq(0, max(data$'人工客服接待成本(元/接待)', na.rm = TRUE), by = 0.05))

kefugongju <- data.frame(options = 当前客服工具options, count = 0, percentage = 0)
for(i in 1:nrow(kefugongju)){
  for(j in 1:nrow(data)){
    if(grepl(当前客服工具options[i], data[j, '当前客服工具'])){
      kefugongju[i, 'count'] <- kefugongju[i, 'count'] + 1
    }
  }
}
kefugongju$percentage <- round(kefugongju$count / sum(kefugongju$count) * 100, 0)
kefugongju <- kefugongju %>% arrange(desc(count))
labels <- paste0(
  kefugongju$options, "\n",
  kefugongju$count, "(", kefugongju$percentage, "%)"
)
kefugongju
##                 options count percentage
## 1                  飞鸽    19         54
## 2                未使用    10         29
## 3                  乐言     4         11
## 4                  晓多     1          3
## 5             快麦-智库     1          3
## 6                  自研     0          0
## 7                  多客     0          0
## 8                  乐聊     0          0
## 9                  探域     0          0
## 10                 福客     0          0
## 11 其他(请补充到备注)     0          0
kefugongju <- kefugongju[kefugongju$count != 0, ]
pie(kefugongju$count,
    labels = labels,
    main = "当前客服工具分布",
    col = hcl.colors(length(kefugongju$count), palette = "Set2"),
    cex = 0.5)