紅山展社課

##################################資料前處理
## 0127F000000yMarQAE 展設
## 0127F000000yMawQAE 裝修
## 0127F000000yTPMQA2 出租
## 0127F000000yTPWQA2 系統
###更換類型碼至名稱
for (i in 1:nrow(A)) {
  if (A[i,16] == "0127F000000yMarQAE")
  {A[i,16] <- "展設"}  
  else if (A[i,16] == "0127F000000yMawQAE")
  {A[i,16] <- "裝修"}
  else if (A[i,16] == "0127F000000yTPMQA2")
  {A[i,16] <- "出租"}
  else if (A[i,16] == "0127F000000yTPWQA2")
  {A[i,16] <- "系統"}
} 

table(A$RecordTypeId)
## 
## 出租 系統 展設 裝修 
## 1677   65  235  437
####自extract1合併公司名稱
names(A)[17] <- "oppotunityID"

for(i in 1:nrow(A)){
  if(A$AccountId[i] %in% C1$ID){
    a <- which(C1$ID %in% A$AccountId[i])
    A$firm[i] <- C1$NAME[a]
  }else{
    A$firm[i] <- NA
  }
}
## Warning: Unknown or uninitialised column: 'firm'.
A <- cbind(A[,c(1)],A[,c(18)],A[,c(2:17)])
## 展設
Dc <- filter(A,StageName=="簽約" & Amount != 0 & RecordTypeId=="展設") ##只有簽約的且有金額的展設
Dc$CreatedDate = as.Date(Dc$CreatedDate)

##NDc <- A %>% filter(StageName != "簽約" &  RecordTypeId == "展設")
library(readxl)
extract <- read_excel("~/Downloads/extract.xlsx")
extract$CREATEDDATE <- as.Date(extract$CREATEDDATE)
extract$CLOSEDATE <- as.Date(extract$CLOSEDATE)

extract$Time <- as.Date("2019-04-17") - extract$CLOSEDATE
NDc <- extract %>% filter(STAGENAME != "關閉" & STAGENAME != "簽約")
stage <- NDc %>%  arrange(OPPORTUNITYID, CREATEDDATE, CLOSEDATE) %>% group_by(OPPORTUNITYID) %>% summarize(last(STAGENAME))
names(A)[1] <- "OPPORTUNITYID"
stage <- left_join(stage, A[,c(1,2,5,11,16,17)], by = "OPPORTUNITYID")
names(stage)[2] <- "STAGENAME"
stage[,c(2,5,7)] <- lapply(stage[,c(2,5,7)], factor)

table(stage$STAGENAME) %>% barplot(las=2,family="FZLTXHB--B51-0")

table(stage$product_type__c) %>% barplot(las=2,family="FZLTXHB--B51-0")

table(stage$STAGENAME, stage$product_type__c) %>%  as.data.frame.matrix %>% 
  d3heatmap(F,F,col=colorRamp(c('black','seagreen','yellow','orange','red')))
summary(Dc)    
##       Id                firm            AccountId          Probability 
##  Length:78          Length:78          Length:78          Min.   :100  
##  Class :character   Class :character   Class :character   1st Qu.:100  
##  Mode  :character   Mode  :character   Mode  :character   Median :100  
##                                                           Mean   :100  
##                                                           3rd Qu.:100  
##                                                           Max.   :100  
##      Amount        Loss_Reason__c      Field5__c        
##  Min.   :    630   Length:78          Length:78         
##  1st Qu.:  79062   Class :character   Class :character  
##  Median : 132500   Mode  :character   Mode  :character  
##  Mean   : 191520                                        
##  3rd Qu.: 205188                                        
##  Max.   :1100000                                        
##  Sys_Principal__c    LeadSource         Field46__c       
##  Length:78          Length:78          Length:78         
##  Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character  
##                                                          
##                                                          
##                                                          
##  product_type__c      CloseDate           StageName        
##  Length:78          Min.   :2018-01-26   Length:78         
##  Class :character   1st Qu.:2018-07-16   Class :character  
##  Mode  :character   Median :2018-09-21   Mode  :character  
##                     Mean   :2018-10-01                     
##                     3rd Qu.:2018-12-22                     
##                     Max.   :2019-04-03                     
##   Budget__c             Type            CreatedDate        
##  Length:78          Length:78          Min.   :2018-05-18  
##  Class :character   Class :character   1st Qu.:2018-06-17  
##  Mode  :character   Mode  :character   Median :2018-08-25  
##                                        Mean   :2018-09-17  
##                                        3rd Qu.:2018-12-17  
##                                        Max.   :2019-04-02  
##  RecordTypeId       oppotunityID      
##  Length:78          Length:78         
##  Class :character   Class :character  
##  Mode  :character   Mode  :character  
##                                       
##                                       
## 
n_distinct(Dc$AccountId)   ###總共78筆 64   
## [1] 64
n_distinct(Dc$firm)        ###總共78筆 裡不重複的顧客有64顧客
## [1] 64
##### AcountId firm 
##產業分析
In = Dc %>%
  group_by(product_type__c) %>%
  summarise(
    number=n(),
    money=sum(Amount),
    mean=mean(Amount),
    max=max(Amount),
    min=min(Amount))  %>% as.data.frame()

unique(Dc[,c(2,10)])
RFM = Dc %>% 
  mutate(days = as.integer(as.Date("2019-04-03") - CreatedDate)) %>% 
  group_by(firm) %>% summarise(
    recent = min(days),     # 最近購買距今天數
    freq = n(),             # 購買次數
    money = mean(Amount),   # 平均購買金額
    senior = max(days),     # 第一次購買距今天數
    since = min(CreatedDate)       # 第一次購買日期
  ) %>% data.frame

summary(RFM)
##      firm               recent         freq          money        
##  Length:64          Min.   :  1   Min.   :1.00   Min.   :  15750  
##  Class :character   1st Qu.:107   1st Qu.:1.00   1st Qu.:  78750  
##  Mode  :character   Median :210   Median :1.00   Median : 140000  
##                     Mean   :198   Mean   :1.22   Mean   : 196316  
##                     3rd Qu.:294   3rd Qu.:1.00   3rd Qu.: 218500  
##                     Max.   :320   Max.   :3.00   Max.   :1006125  
##      senior        since           
##  Min.   : 23   Min.   :2018-05-18  
##  1st Qu.:145   1st Qu.:2018-06-11  
##  Median :240   Median :2018-08-06  
##  Mean   :216   Mean   :2018-08-29  
##  3rd Qu.:295   3rd Qu.:2018-11-09  
##  Max.   :320   Max.   :2019-03-11
# 回購顧客的平均購買週期
K = as.integer(sum(RFM$senior[RFM$freq>1]) / sum(RFM$freq[RFM$freq>1]))  ###102

##資料繪圖檢視
p0 = par(cex=0.8, mfrow=c(2,2))
hist(RFM$recent)
hist(RFM$freq)
hist(log(RFM$money,10))
hist(RFM$senior)

par(p0)
set.seed(111)

##分群
set.seed(111)
RFM$grp = kmeans(scale(RFM[,2:4]),4)$cluster
table(RFM$grp)  # 族群大小
## 
##  1  2  3  4 
## 15 12 33  4
####
RFM %>% group_by(grp) %>% summarize(r = mean(recent), f = mean(freq), m = mean(money), n = n())
RFM %>% ggplot(aes(x=freq, y=money, col=as.factor(grp))) + geom_point(size=5, alpha=0.3)

RFM %>% ggplot(aes(x=recent, y=money, col=as.factor(grp))) + geom_point(size=5, alpha=0.3)

####第一群產業特性 
##1群
table(RFM$grp)
## 
##  1  2  3  4 
## 15 12 33  4
RFM$firm[RFM$grp == 1] -> c  
Dc[which(Dc$firm %in% c),] -> c
table(c$product_type__c) #### 機械/科技 12 拍賣1 消費生活餐飲2 
## 
##      機械/科技           拍賣 消費/生活/餐飲 
##             12              1              2
##2群
table(RFM$grp)
## 
##  1  2  3  4 
## 15 12 33  4
RFM$firm[RFM$grp == 2] -> c1  
Dc[which(Dc$firm %in% c1),] -> c1
c1[(!duplicated(c1$firm)),] -> c1
table(c1$product_type__c) ####  機械/科技3 美妝1  拍賣1 消費/生活/餐飲 2 醫藥/生技4 珠寶/精品1 
## 
##      機械/科技           美妝           拍賣 消費/生活/餐飲      醫藥/生技 
##              3              1              1              2              4 
##      珠寶/精品 
##              1
##3群
table(RFM$grp)
## 
##  1  2  3  4 
## 15 12 33  4
RFM$firm[RFM$grp == 3] -> c2  
Dc[which(Dc$firm %in% c2),] -> c2
c2[(!duplicated(c2$firm)),] -> c2
table(c2$product_type__c)
## 
##      機械/科技           拍賣           其他 消費/生活/餐飲      醫藥/生技 
##              9              1              1             13              5 
##      珠寶/精品 
##              4
###機械/科技9 消費/生活/餐飲13  醫藥/生技 5 珠寶/精品4
##4群
table(RFM$grp)
## 
##  1  2  3  4 
## 15 12 33  4
RFM$firm[RFM$grp == 4] -> c3  
Dc[which(Dc$firm %in% c3),] -> c3
c3[(!duplicated(c3$firm)),] -> c3
table(c3$product_type__c)
## 
##      機械/科技           其他 消費/生活/餐飲 
##              2              1              1
###機械科技2
           

###############################################################規則分群
table(RFM$recent < 204)
## 
## FALSE  TRUE 
##    34    30
STS = c("N1","N2","N3","N4")
Status = function(rx,fx,mx,sx,K) {factor(
  ifelse(rx < 150,
         ifelse(fx*mx > 250000,"N1","N2"),
         ifelse(rx < 250 ,"N3", "N4")), STS)}


Y1 = list()              # 建立一個空的LIST
for(y in 2018) {   # 每年年底將顧客資料彙整成一個資料框
  D = c(as.Date("2018-12-31"),as.Date("2017-12-31")) # 當期、前期的期末日期 
  Y1[[paste0("Y",y)]] = Dc %>%        # 從交易資料做起
    filter(CreatedDate <= D[1]) %>%        # 將資料切齊到期末日期
    mutate(days = 1 + as.integer(D[1] - CreatedDate)) %>%   # 交易距期末天數
    group_by(AccountId) %>% summarise(    # 依顧客彙總 ...
      recent = min(days),           #   最後一次購買距期末天數   
      freq = n(),                   #   購買次數 (至期末為止)   
      money = mean(Amount),         #   平均購買金額 (至期末為止)
      senior = max(days),           #   第一次購買距期末天數
      status = Status(recent,freq,money,senior,K),  # 期末狀態
      since = min(CreatedDate),                      # 第一次購買日期
      y_freq = sum(CreatedDate > D[2]),              # 當期購買次數
      y_revenue = sum(Amount[CreatedDate > D[2]])    # 當期購買金額
    ) %>% data.frame }



Y2 = list()              # 建立一個空的LIST
for(y in 2019) {   # 每年年底將顧客資料彙整成一個資料框
  D = c(as.Date("2019-04-03"),as.Date("2018-12-31")) # 當期、前期的期末日期 
  Y2[[paste0("Y",y)]] = Dc %>%        # 從交易資料做起
    filter(CreatedDate <= D[1]) %>%        # 將資料切齊到期末日期
    mutate(days = 1 + as.integer(D[1] - CreatedDate)) %>%   # 交易距期末天數
    group_by(AccountId) %>% summarise(    # 依顧客彙總 ...
      recent = min(days),           #   最後一次購買距期末天數   
      freq = n(),                   #   購買次數 (至期末為止)   
      money = mean(Amount),         #   平均購買金額 (至期末為止)
      senior = max(days),           #   第一次購買距期末天數
      status = Status(recent,freq,money,senior,K),  # 期末狀態
      since = min(CreatedDate),                      # 第一次購買日期
      y_freq = sum(CreatedDate > D[2]),              # 當期購買次數
      y_revenue = sum(Amount[CreatedDate > D[2]])    # 當期購買金額
    ) %>% data.frame }

Y = c(Y1,Y2)

sapply(Y,nrow)
## Y2018 Y2019 
##    56    64
table(Y$Y2018$status)
## 
## N1 N2 N3 N4 
## 10 18 28  0
table(Y$Y2019$status)
## 
## N1 N2 N3 N4 
##  4 16 17 27
cols = c("gold","orange","blue","green") #指定每個族群的顏色
sapply(Y, function(df) table(df$status))  %>%  barplot(col=cols)
legend("topleft",rev(STS),fill=rev(cols))  

####族群動態屬性
CustSegments = do.call(rbind, lapply(Y, function(d) {
  group_by(d, status) %>% summarise(
    average_frequency = mean(freq),
    average_amount = mean(money),
    total_revenue = sum(y_revenue),
    total_no_orders = sum(y_freq),
    average_recency = mean(recent),
    average_seniority = mean(senior),
    group_size = n()
  )})) %>% ungroup %>% 
  mutate(year=c(2018,2018,2018,2019,2019,2019,2019)) %>% data.frame
head(CustSegments)
# CustSegments
# 
# library(googleVis)
# op <- options(gvis.plot.tag='chart')
# m1 = gvisMotionChart(
#   CustSegments, "status", "year",
#   options=list(width=720, height=480) )
# plot(m1)

library(googleVis)
op <- options(gvis.plot.tag='chart')
plot( gvisMotionChart(
  CustSegments, "status", "year",
  options=list(width=720, height=480) ) )
## <!-- MotionChart generated in R 3.5.2 by googleVis 0.6.3 package -->
## <!-- Thu Apr 18 14:28:45 2019 -->
## 
## 
## <!-- jsHeader -->
## <script type="text/javascript">
##  
## // jsData 
## function gvisDataMotionChartID2ee716b3649c () {
## var data = new google.visualization.DataTable();
## var datajson =
## [
##  [
## "N1",
## 2018,
## 1.6,
## 356901.75,
## 5317305,
## 16,
## 67.5,
## 111,
## 10
## ],
## [
## "N2",
## 2018,
## 1.111111111,
## 103003.3333,
## 2078825,
## 20,
## 77.05555556,
## 87,
## 18
## ],
## [
## "N3",
## 2018,
## 1.035714286,
## 219661.1786,
## 6312612,
## 29,
## 198.2142857,
## 199.9285714,
## 28
## ],
## [
## "N1",
## 2019,
## 1.5,
## 488750,
## 625000,
## 1,
## 100.75,
## 132.75,
## 4
## ],
## [
## "N2",
## 2019,
## 1.375,
## 66993.95833,
## 604825,
## 12,
## 68.6875,
## 111.4375,
## 16
## ],
## [
## "N3",
## 2019,
## 1.294117647,
## 203857.5,
## 0,
## 0,
## 196.4117647,
## 216.7647059,
## 17
## ],
## [
## "N4",
## 2019,
## 1.037037037,
## 224880.1111,
## 0,
## 0,
## 291.1111111,
## 292.8888889,
## 27
## ] 
## ];
## data.addColumn('string','status');
## data.addColumn('number','year');
## data.addColumn('number','average_frequency');
## data.addColumn('number','average_amount');
## data.addColumn('number','total_revenue');
## data.addColumn('number','total_no_orders');
## data.addColumn('number','average_recency');
## data.addColumn('number','average_seniority');
## data.addColumn('number','group_size');
## data.addRows(datajson);
## return(data);
## }
##  
## // jsDrawChart
## function drawChartMotionChartID2ee716b3649c() {
## var data = gvisDataMotionChartID2ee716b3649c();
## var options = {};
## options["width"] = 720;
## options["height"] = 480;
## options["state"] = "";
## 
## 
##     var chart = new google.visualization.MotionChart(
##     document.getElementById('MotionChartID2ee716b3649c')
##     );
##     chart.draw(data,options);
##     
## 
## }
##   
##  
## // jsDisplayChart
## (function() {
## var pkgs = window.__gvisPackages = window.__gvisPackages || [];
## var callbacks = window.__gvisCallbacks = window.__gvisCallbacks || [];
## var chartid = "motionchart";
##   
## // Manually see if chartid is in pkgs (not all browsers support Array.indexOf)
## var i, newPackage = true;
## for (i = 0; newPackage && i < pkgs.length; i++) {
## if (pkgs[i] === chartid)
## newPackage = false;
## }
## if (newPackage)
##   pkgs.push(chartid);
##   
## // Add the drawChart function to the global list of callbacks
## callbacks.push(drawChartMotionChartID2ee716b3649c);
## })();
## function displayChartMotionChartID2ee716b3649c() {
##   var pkgs = window.__gvisPackages = window.__gvisPackages || [];
##   var callbacks = window.__gvisCallbacks = window.__gvisCallbacks || [];
##   window.clearTimeout(window.__gvisLoad);
##   // The timeout is set to 100 because otherwise the container div we are
##   // targeting might not be part of the document yet
##   window.__gvisLoad = setTimeout(function() {
##   var pkgCount = pkgs.length;
##   google.load("visualization", "1", { packages:pkgs, callback: function() {
##   if (pkgCount != pkgs.length) {
##   // Race condition where another setTimeout call snuck in after us; if
##   // that call added a package, we must not shift its callback
##   return;
## }
## while (callbacks.length > 0)
## callbacks.shift()();
## } });
## }, 100);
## }
##  
## // jsFooter
## </script>
##  
## <!-- jsChart -->  
## <script type="text/javascript" src="https://www.google.com/jsapi?callback=displayChartMotionChartID2ee716b3649c"></script>
##  
## <!-- divChart -->
##   
## <div id="MotionChartID2ee716b3649c" 
##   style="width: 720; height: 480;">
## </div>
# library(googleVis)
# op <- options(gvis.plot.tag='chart')
# plot( gvisMotionChart(
#   CustSegments, "status", "year",
#   options=list(width=900, height=600) ) )

Add a new chunk by clicking the Insert Chunk button on the toolbar or by pressing Cmd+Option+I.

When you save the notebook, an HTML file containing the code and output will be saved alongside it (click the Preview button or press Cmd+Shift+K to preview the HTML file).

The preview shows you a rendered HTML copy of the contents of the editor. Consequently, unlike Knit, Preview does not run any R code chunks. Instead, the output of the chunk when it was last run in the editor is displayed.