We estimate Sales ~ Gamma(k, k * lambda) with Gamma priors on k and lambda.
In order to simply use Pareto/GGG code for this purpose, we convert transaction amounts to time-differences.
library(data.table)
library(BTYD)
## Loading required package: hypergeo
library(BTYDplus)
# prepare cdnow elog
cdnowElog <- read.csv(system.file("data/cdnowElog.csv", package = "BTYD"),
stringsAsFactors = FALSE,
col.names = c("cust", "sampleid", "date", "cds", "sales"))
cdnowElog$date <- as.Date(as.character(cdnowElog$date), format = "%Y%m%d")
elog <- as.data.table(cdnowElog)
elog <- elog[, date := as.POSIXct(date, ' 00:00') + runif(1:nrow(elog), min = 0, max = 3600 * 24)]
# convert sales to itt's
elog_s <- rbind(elog[, .(cust, date, sales)], elog[, .(sales = 0, date = min(elog$date) - 1), by = cust])
setkey(elog_s, cust, date)
elog_s[, date := as.POSIXct('1990-01-01') + 3600 * 24 * cumsum(sales), by = cust]
cbs_s <- elog2cbs(elog_s, units = 'day')
# estimate Pareto/GGG without death process
cbs_s[, T.cal := t.x] # ignore dropout
ggg_s <- pggg.mcmc.DrawParameters(cbs_s,
chains = 4, mcmc = 20, burnin = 10, thin = 1,
param_init = list(t = 1, gamma = 1, r = 1, alpha = 1, s = 1000, beta = 1000 * max(cbs_s$T.cal)))
## running in parallel on 4 cores
est_ggg <- t(sapply(ggg_s$level_1, function(el) apply(as.matrix(el[, c('k', 'lambda')]), 2, mean)))
est_ggg_2 <- apply(as.matrix(ggg_s$level_2), 2, mean)
round(apply(est_ggg, 2, mean), 3)
## k lambda
## 3.735 0.039
round(apply(est_ggg, 2, sd), 3)
## k lambda
## 0.402 0.012
see http://www.brucehardie.com/notes/025/gamma_gamma.pdf
library(BTYD)
data(cdnowSummary)
ave.spend <- cdnowSummary$m.x
tot.trans <- cdnowSummary$cbs[,"x"]
ave.spend <- ave.spend[which(tot.trans > 0)]
tot.trans <- tot.trans[which(tot.trans > 0)]
round(est_gg <- spend.EstimateParameters(ave.spend, tot.trans), 2)
## [1] 6.25 3.74 15.44
n <- nrow(elog)
plot(density(elog[sales>0, sales, by = cust]$sales), log='x', main='Distribution of Sales')
## Warning in xy.coords(x, y, xlabel, ylabel, log): 8 x values <= 0 omitted
## from logarithmic plot
lines(density(rgamma(n, est_ggg[, 'k'], est_ggg[, 'k'] * est_ggg[, 'lambda'])), col='red')
lines(density(rgamma(n, est_gg[1], rgamma(n, est_gg[2], est_gg[3]))), col='blue')
legend(x = 1, y = 0.035, legend = c('actual', 'GGG', 'GG'), col = c('black', 'red', 'blue'), lty=1)