load("intuit_online.RData")
# Create new variables on FULL dataset before splitting
intuit <- intuit %>%
mutate(
has_orders = ifelse(numords > 0, 1, 0),
dollars_per_order = ifelse(numords > 0, dollars / numords, 0),
sex_male = ifelse(sex == 1, 1, 0),
sex_female = ifelse(sex == 2, 1, 0)
)
# Same mutations on wave2
intuit.wave2 <- intuit.wave2 %>%
mutate(
has_orders = ifelse(numords > 0, 1, 0),
dollars_per_order = ifelse(numords > 0, dollars / numords, 0),
sex_male = ifelse(sex == 1, 1, 0),
sex_female = ifelse(sex == 2, 1, 0)
)
# Split AFTER mutations
intuit.train <- intuit %>% filter(training == 1)
intuit.test <- intuit %>% filter(training == 0)
# Make res a factor
intuit.train <- intuit.train %>% mutate(res = as.factor(res))
intuit.test <- intuit.test %>% mutate(res = as.factor(res))
Type your answer text here, intervowen with blocks of R code
# LOGISTIC REGRESSION
logit_model <- glm(res ~ speeddown + speedup + last + numords + dollars +
sincepurch + version2013 + upgraded + payroll +
bizflag + income + medhvalue + sex_male + sex_female +
has_orders + dollars_per_order,
data = intuit.train,
family = binomial(link = "logit"))
# Score test set
intuit.test <- intuit.test %>%
mutate(logit_score = predict(logit_model, newdata = intuit.test, type = "response"))
# NEURAL NETWORK
set.seed(1234)
nn_model <- nnet(res ~ speeddown + speedup + last + numords + dollars +
sincepurch + version2013 + upgraded + payroll +
bizflag + income + medhvalue + sex_male + sex_female +
has_orders + dollars_per_order,
data = intuit.train,
size = 5,
decay = 0.01,
maxit = 500)
# weights: 91
initial value 27656.002491
iter 10 value 5729.813308
iter 20 value 5540.332826
iter 30 value 5446.734078
iter 40 value 5316.409534
iter 50 value 5187.616413
iter 60 value 5106.272062
iter 70 value 5020.821044
iter 80 value 4911.480330
iter 90 value 4857.000186
iter 100 value 4801.781516
iter 110 value 4769.192332
iter 120 value 4748.104794
iter 130 value 4739.896958
iter 140 value 4736.335078
iter 150 value 4733.674338
iter 160 value 4732.263168
iter 170 value 4730.708654
iter 180 value 4727.935918
iter 190 value 4726.188365
iter 200 value 4725.141060
iter 210 value 4724.531912
iter 220 value 4723.555278
iter 230 value 4722.891555
iter 240 value 4722.842140
iter 250 value 4722.816144
iter 260 value 4722.748847
iter 270 value 4722.651608
iter 280 value 4722.347087
iter 290 value 4722.289794
iter 300 value 4722.286867
iter 300 value 4722.286832
iter 300 value 4722.286832
final value 4722.286832
converged
# Score test set
intuit.test <- intuit.test %>%
mutate(nn_score = predict(nn_model, newdata = intuit.test, type = "raw")[,1])
# GAINS CURVE comparing both models on test set
gainsplot(intuit.test$logit_score, intuit.test$nn_score,
outcome.var = intuit.test$res)
Score AUGC
intuit.test$logit_score 0.745
intuit.test$nn_score 0.812
# VARIABLE IMPORTANCE for both models on test set
varimpplot(logit_model, target = "res", data = intuit.test)
varimpplot(nn_model, target = "res", data = intuit.test)
Type your answer text here, intervowen with blocks of R code
# Targeting threshold
# Need: P(respond) x $180 - $1.60 > $5.60
# So: P(respond) > $7.20 / $180 = 0.04
threshold <- (5.60 + 1.60) / 180
cat("Targeting threshold:", round(threshold, 4), "\n")
Targeting threshold: 0.04
# Score wave2 with best model (swap logit_model for nn_model if nn won)
intuit.wave2 <- intuit.wave2 %>%
mutate(
best.score = predict(logit_model, newdata = intuit.wave2, type = "response"),
target.wave2 = ifelse(best.score > threshold, 1, 0)
)
table(intuit.wave2$target.wave2)
0 1
16657 8343
cat("Targeting", sum(intuit.wave2$target.wave2), "out of 25,000 customers\n")
Targeting 8343 out of 25,000 customers
Type your answer text here, intervowen with blocks of R code
n_targeted <- sum(intuit.wave2$target.wave2)
expected_conv <- sum(intuit.wave2$best.score[intuit.wave2$target.wave2 == 1])
expected_profit <- (expected_conv * 180) - (n_targeted * 1.60)
profit_per_cust <- expected_profit / n_targeted
cat("Customers targeted: ", n_targeted, "\n")
Customers targeted: 8343
cat("Expected conversions: ", round(expected_conv, 0), "\n")
Expected conversions: 654
cat("Expected total profit: $", round(expected_profit, 0), "\n")
Expected total profit: $ 104328
cat("Expected profit per customer: $", round(profit_per_cust, 2), "\n")
Expected profit per customer: $ 12.5
Type your answer text here, intervowen with blocks of R code
# Significant predictors from logistic regression
coef(summary(logit_model)) %>%
as.data.frame() %>%
rownames_to_column("variable") %>%
arrange(`Pr(>|z|)`) %>%
filter(`Pr(>|z|)` < 0.05)
# Variable importance plots on test set
varimpplot(logit_model, target = "res", data = intuit.test)
varimpplot(nn_model, target = "res", data = intuit.test)
Type your answer text here, intervowen with blocks of R code
intuit.wave2.submit <- intuit.wave2 %>%
select(id, best.score, target.wave2) %>%
mutate(
group = "Kathryn_GG_Emma", # your actual first names
section = 1 # your actual section number
)
saveRDS(intuit.wave2.submit, "Kathryn_GG_Emma.RDS")