紅山展社課
##################################資料前處理
## 0127F000000yMarQAE 展設
## 0127F000000yMawQAE 裝修
## 0127F000000yTPMQA2 出租
## 0127F000000yTPWQA2 系統
###更換類型碼至名稱
for (i in 1:nrow(A)) {
if (A[i,16] == "0127F000000yMarQAE")
{A[i,16] <- "展設"}
else if (A[i,16] == "0127F000000yMawQAE")
{A[i,16] <- "裝修"}
else if (A[i,16] == "0127F000000yTPMQA2")
{A[i,16] <- "出租"}
else if (A[i,16] == "0127F000000yTPWQA2")
{A[i,16] <- "系統"}
}
table(A$RecordTypeId)
##
## 出租 系統 展設 裝修
## 1677 65 235 437
####自extract1合併公司名稱
names(A)[17] <- "oppotunityID"
for(i in 1:nrow(A)){
if(A$AccountId[i] %in% C1$ID){
a <- which(C1$ID %in% A$AccountId[i])
A$firm[i] <- C1$NAME[a]
}else{
A$firm[i] <- NA
}
}
## Warning: Unknown or uninitialised column: 'firm'.
A <- cbind(A[,c(1)],A[,c(18)],A[,c(2:17)])
## 展設
Dc <- filter(A,StageName=="簽約" & Amount != 0 & RecordTypeId=="展設") ##只有簽約的且有金額的展設
Dc$CreatedDate = as.Date(Dc$CreatedDate)
##NDc <- A %>% filter(StageName != "簽約" & RecordTypeId == "展設")
library(readxl)
extract <- read_excel("~/Downloads/extract.xlsx")
extract$CREATEDDATE <- as.Date(extract$CREATEDDATE)
extract$CLOSEDATE <- as.Date(extract$CLOSEDATE)
extract$Time <- as.Date("2019-04-17") - extract$CLOSEDATE
NDc <- extract %>% filter(STAGENAME != "關閉" & STAGENAME != "簽約")
stage <- NDc %>% arrange(OPPORTUNITYID, CREATEDDATE, CLOSEDATE) %>% group_by(OPPORTUNITYID) %>% summarize(last(STAGENAME))
names(A)[1] <- "OPPORTUNITYID"
stage <- left_join(stage, A[,c(1,2,5,11,16,17)], by = "OPPORTUNITYID")
names(stage)[2] <- "STAGENAME"
stage[,c(2,5,7)] <- lapply(stage[,c(2,5,7)], factor)
table(stage$STAGENAME) %>% barplot(las=2,family="FZLTXHB--B51-0")
table(stage$product_type__c) %>% barplot(las=2,family="FZLTXHB--B51-0")
table(stage$STAGENAME, stage$product_type__c) %>% as.data.frame.matrix %>%
d3heatmap(F,F,col=colorRamp(c('black','seagreen','yellow','orange','red')))
summary(Dc)
## Id firm AccountId Probability
## Length:78 Length:78 Length:78 Min. :100
## Class :character Class :character Class :character 1st Qu.:100
## Mode :character Mode :character Mode :character Median :100
## Mean :100
## 3rd Qu.:100
## Max. :100
## Amount Loss_Reason__c Field5__c
## Min. : 630 Length:78 Length:78
## 1st Qu.: 79062 Class :character Class :character
## Median : 132500 Mode :character Mode :character
## Mean : 191520
## 3rd Qu.: 205188
## Max. :1100000
## Sys_Principal__c LeadSource Field46__c
## Length:78 Length:78 Length:78
## Class :character Class :character Class :character
## Mode :character Mode :character Mode :character
##
##
##
## product_type__c CloseDate StageName
## Length:78 Min. :2018-01-26 Length:78
## Class :character 1st Qu.:2018-07-16 Class :character
## Mode :character Median :2018-09-21 Mode :character
## Mean :2018-10-01
## 3rd Qu.:2018-12-22
## Max. :2019-04-03
## Budget__c Type CreatedDate
## Length:78 Length:78 Min. :2018-05-18
## Class :character Class :character 1st Qu.:2018-06-17
## Mode :character Mode :character Median :2018-08-25
## Mean :2018-09-17
## 3rd Qu.:2018-12-17
## Max. :2019-04-02
## RecordTypeId oppotunityID
## Length:78 Length:78
## Class :character Class :character
## Mode :character Mode :character
##
##
##
n_distinct(Dc$AccountId) ###總共78筆 64
## [1] 64
n_distinct(Dc$firm) ###總共78筆 裡不重複的顧客有64顧客
## [1] 64
##### AcountId firm
##產業分析
In = Dc %>%
group_by(product_type__c) %>%
summarise(
number=n(),
money=sum(Amount),
mean=mean(Amount),
max=max(Amount),
min=min(Amount)) %>% as.data.frame()
unique(Dc[,c(2,10)])
RFM = Dc %>%
mutate(days = as.integer(as.Date("2019-04-03") - CreatedDate)) %>%
group_by(firm) %>% summarise(
recent = min(days), # 最近購買距今天數
freq = n(), # 購買次數
money = mean(Amount), # 平均購買金額
senior = max(days), # 第一次購買距今天數
since = min(CreatedDate) # 第一次購買日期
) %>% data.frame
summary(RFM)
## firm recent freq money
## Length:64 Min. : 1 Min. :1.00 Min. : 15750
## Class :character 1st Qu.:107 1st Qu.:1.00 1st Qu.: 78750
## Mode :character Median :210 Median :1.00 Median : 140000
## Mean :198 Mean :1.22 Mean : 196316
## 3rd Qu.:294 3rd Qu.:1.00 3rd Qu.: 218500
## Max. :320 Max. :3.00 Max. :1006125
## senior since
## Min. : 23 Min. :2018-05-18
## 1st Qu.:145 1st Qu.:2018-06-11
## Median :240 Median :2018-08-06
## Mean :216 Mean :2018-08-29
## 3rd Qu.:295 3rd Qu.:2018-11-09
## Max. :320 Max. :2019-03-11
# 回購顧客的平均購買週期
K = as.integer(sum(RFM$senior[RFM$freq>1]) / sum(RFM$freq[RFM$freq>1])) ###102
##資料繪圖檢視
p0 = par(cex=0.8, mfrow=c(2,2))
hist(RFM$recent)
hist(RFM$freq)
hist(log(RFM$money,10))
hist(RFM$senior)
par(p0)
set.seed(111)
##分群
set.seed(111)
RFM$grp = kmeans(scale(RFM[,2:4]),4)$cluster
table(RFM$grp) # 族群大小
##
## 1 2 3 4
## 15 12 33 4
####
RFM %>% group_by(grp) %>% summarize(r = mean(recent), f = mean(freq), m = mean(money), n = n())
RFM %>% ggplot(aes(x=freq, y=money, col=as.factor(grp))) + geom_point(size=5, alpha=0.3)
RFM %>% ggplot(aes(x=recent, y=money, col=as.factor(grp))) + geom_point(size=5, alpha=0.3)
####第一群產業特性
##1群
table(RFM$grp)
##
## 1 2 3 4
## 15 12 33 4
RFM$firm[RFM$grp == 1] -> c
Dc[which(Dc$firm %in% c),] -> c
table(c$product_type__c) #### 機械/科技 12 拍賣1 消費生活餐飲2
##
## 機械/科技 拍賣 消費/生活/餐飲
## 12 1 2
##2群
table(RFM$grp)
##
## 1 2 3 4
## 15 12 33 4
RFM$firm[RFM$grp == 2] -> c1
Dc[which(Dc$firm %in% c1),] -> c1
c1[(!duplicated(c1$firm)),] -> c1
table(c1$product_type__c) #### 機械/科技3 美妝1 拍賣1 消費/生活/餐飲 2 醫藥/生技4 珠寶/精品1
##
## 機械/科技 美妝 拍賣 消費/生活/餐飲 醫藥/生技
## 3 1 1 2 4
## 珠寶/精品
## 1
##3群
table(RFM$grp)
##
## 1 2 3 4
## 15 12 33 4
RFM$firm[RFM$grp == 3] -> c2
Dc[which(Dc$firm %in% c2),] -> c2
c2[(!duplicated(c2$firm)),] -> c2
table(c2$product_type__c)
##
## 機械/科技 拍賣 其他 消費/生活/餐飲 醫藥/生技
## 9 1 1 13 5
## 珠寶/精品
## 4
###機械/科技9 消費/生活/餐飲13 醫藥/生技 5 珠寶/精品4
##4群
table(RFM$grp)
##
## 1 2 3 4
## 15 12 33 4
RFM$firm[RFM$grp == 4] -> c3
Dc[which(Dc$firm %in% c3),] -> c3
c3[(!duplicated(c3$firm)),] -> c3
table(c3$product_type__c)
##
## 機械/科技 其他 消費/生活/餐飲
## 2 1 1
###機械科技2
###############################################################規則分群
table(RFM$recent < 204)
##
## FALSE TRUE
## 34 30
STS = c("N1","N2","N3","N4")
Status = function(rx,fx,mx,sx,K) {factor(
ifelse(rx < 150,
ifelse(fx*mx > 250000,"N1","N2"),
ifelse(rx < 250 ,"N3", "N4")), STS)}
Y1 = list() # 建立一個空的LIST
for(y in 2018) { # 每年年底將顧客資料彙整成一個資料框
D = c(as.Date("2018-12-31"),as.Date("2017-12-31")) # 當期、前期的期末日期
Y1[[paste0("Y",y)]] = Dc %>% # 從交易資料做起
filter(CreatedDate <= D[1]) %>% # 將資料切齊到期末日期
mutate(days = 1 + as.integer(D[1] - CreatedDate)) %>% # 交易距期末天數
group_by(AccountId) %>% summarise( # 依顧客彙總 ...
recent = min(days), # 最後一次購買距期末天數
freq = n(), # 購買次數 (至期末為止)
money = mean(Amount), # 平均購買金額 (至期末為止)
senior = max(days), # 第一次購買距期末天數
status = Status(recent,freq,money,senior,K), # 期末狀態
since = min(CreatedDate), # 第一次購買日期
y_freq = sum(CreatedDate > D[2]), # 當期購買次數
y_revenue = sum(Amount[CreatedDate > D[2]]) # 當期購買金額
) %>% data.frame }
Y2 = list() # 建立一個空的LIST
for(y in 2019) { # 每年年底將顧客資料彙整成一個資料框
D = c(as.Date("2019-04-03"),as.Date("2018-12-31")) # 當期、前期的期末日期
Y2[[paste0("Y",y)]] = Dc %>% # 從交易資料做起
filter(CreatedDate <= D[1]) %>% # 將資料切齊到期末日期
mutate(days = 1 + as.integer(D[1] - CreatedDate)) %>% # 交易距期末天數
group_by(AccountId) %>% summarise( # 依顧客彙總 ...
recent = min(days), # 最後一次購買距期末天數
freq = n(), # 購買次數 (至期末為止)
money = mean(Amount), # 平均購買金額 (至期末為止)
senior = max(days), # 第一次購買距期末天數
status = Status(recent,freq,money,senior,K), # 期末狀態
since = min(CreatedDate), # 第一次購買日期
y_freq = sum(CreatedDate > D[2]), # 當期購買次數
y_revenue = sum(Amount[CreatedDate > D[2]]) # 當期購買金額
) %>% data.frame }
Y = c(Y1,Y2)
sapply(Y,nrow)
## Y2018 Y2019
## 56 64
table(Y$Y2018$status)
##
## N1 N2 N3 N4
## 10 18 28 0
table(Y$Y2019$status)
##
## N1 N2 N3 N4
## 4 16 17 27
cols = c("gold","orange","blue","green") #指定每個族群的顏色
sapply(Y, function(df) table(df$status)) %>% barplot(col=cols)
legend("topleft",rev(STS),fill=rev(cols))
####族群動態屬性
CustSegments = do.call(rbind, lapply(Y, function(d) {
group_by(d, status) %>% summarise(
average_frequency = mean(freq),
average_amount = mean(money),
total_revenue = sum(y_revenue),
total_no_orders = sum(y_freq),
average_recency = mean(recent),
average_seniority = mean(senior),
group_size = n()
)})) %>% ungroup %>%
mutate(year=c(2018,2018,2018,2019,2019,2019,2019)) %>% data.frame
head(CustSegments)
# CustSegments
#
# library(googleVis)
# op <- options(gvis.plot.tag='chart')
# m1 = gvisMotionChart(
# CustSegments, "status", "year",
# options=list(width=720, height=480) )
# plot(m1)
library(googleVis)
op <- options(gvis.plot.tag='chart')
plot( gvisMotionChart(
CustSegments, "status", "year",
options=list(width=720, height=480) ) )
## <!-- MotionChart generated in R 3.5.2 by googleVis 0.6.3 package -->
## <!-- Thu Apr 18 14:28:45 2019 -->
##
##
## <!-- jsHeader -->
## <script type="text/javascript">
##
## // jsData
## function gvisDataMotionChartID2ee716b3649c () {
## var data = new google.visualization.DataTable();
## var datajson =
## [
## [
## "N1",
## 2018,
## 1.6,
## 356901.75,
## 5317305,
## 16,
## 67.5,
## 111,
## 10
## ],
## [
## "N2",
## 2018,
## 1.111111111,
## 103003.3333,
## 2078825,
## 20,
## 77.05555556,
## 87,
## 18
## ],
## [
## "N3",
## 2018,
## 1.035714286,
## 219661.1786,
## 6312612,
## 29,
## 198.2142857,
## 199.9285714,
## 28
## ],
## [
## "N1",
## 2019,
## 1.5,
## 488750,
## 625000,
## 1,
## 100.75,
## 132.75,
## 4
## ],
## [
## "N2",
## 2019,
## 1.375,
## 66993.95833,
## 604825,
## 12,
## 68.6875,
## 111.4375,
## 16
## ],
## [
## "N3",
## 2019,
## 1.294117647,
## 203857.5,
## 0,
## 0,
## 196.4117647,
## 216.7647059,
## 17
## ],
## [
## "N4",
## 2019,
## 1.037037037,
## 224880.1111,
## 0,
## 0,
## 291.1111111,
## 292.8888889,
## 27
## ]
## ];
## data.addColumn('string','status');
## data.addColumn('number','year');
## data.addColumn('number','average_frequency');
## data.addColumn('number','average_amount');
## data.addColumn('number','total_revenue');
## data.addColumn('number','total_no_orders');
## data.addColumn('number','average_recency');
## data.addColumn('number','average_seniority');
## data.addColumn('number','group_size');
## data.addRows(datajson);
## return(data);
## }
##
## // jsDrawChart
## function drawChartMotionChartID2ee716b3649c() {
## var data = gvisDataMotionChartID2ee716b3649c();
## var options = {};
## options["width"] = 720;
## options["height"] = 480;
## options["state"] = "";
##
##
## var chart = new google.visualization.MotionChart(
## document.getElementById('MotionChartID2ee716b3649c')
## );
## chart.draw(data,options);
##
##
## }
##
##
## // jsDisplayChart
## (function() {
## var pkgs = window.__gvisPackages = window.__gvisPackages || [];
## var callbacks = window.__gvisCallbacks = window.__gvisCallbacks || [];
## var chartid = "motionchart";
##
## // Manually see if chartid is in pkgs (not all browsers support Array.indexOf)
## var i, newPackage = true;
## for (i = 0; newPackage && i < pkgs.length; i++) {
## if (pkgs[i] === chartid)
## newPackage = false;
## }
## if (newPackage)
## pkgs.push(chartid);
##
## // Add the drawChart function to the global list of callbacks
## callbacks.push(drawChartMotionChartID2ee716b3649c);
## })();
## function displayChartMotionChartID2ee716b3649c() {
## var pkgs = window.__gvisPackages = window.__gvisPackages || [];
## var callbacks = window.__gvisCallbacks = window.__gvisCallbacks || [];
## window.clearTimeout(window.__gvisLoad);
## // The timeout is set to 100 because otherwise the container div we are
## // targeting might not be part of the document yet
## window.__gvisLoad = setTimeout(function() {
## var pkgCount = pkgs.length;
## google.load("visualization", "1", { packages:pkgs, callback: function() {
## if (pkgCount != pkgs.length) {
## // Race condition where another setTimeout call snuck in after us; if
## // that call added a package, we must not shift its callback
## return;
## }
## while (callbacks.length > 0)
## callbacks.shift()();
## } });
## }, 100);
## }
##
## // jsFooter
## </script>
##
## <!-- jsChart -->
## <script type="text/javascript" src="https://www.google.com/jsapi?callback=displayChartMotionChartID2ee716b3649c"></script>
##
## <!-- divChart -->
##
## <div id="MotionChartID2ee716b3649c"
## style="width: 720; height: 480;">
## </div>
# library(googleVis)
# op <- options(gvis.plot.tag='chart')
# plot( gvisMotionChart(
# CustSegments, "status", "year",
# options=list(width=900, height=600) ) )
Add a new chunk by clicking the Insert Chunk button on the toolbar or by pressing Cmd+Option+I.
When you save the notebook, an HTML file containing the code and output will be saved alongside it (click the Preview button or press Cmd+Shift+K to preview the HTML file).
The preview shows you a rendered HTML copy of the contents of the editor. Consequently, unlike Knit, Preview does not run any R code chunks. Instead, the output of the chunk when it was last run in the editor is displayed.