1q— title: “Rhapsody Churn Analysis – Logistic Regression Analysis” subtitle: Refactored into Data Prep | Analysis | Extended Analysis (Added Features) Group 1 output: html_document: default word_document: default —
options(width=100)
# Continuation of logistic regression analysis script for Rhapsody Churn Modeling
# Includes additional model development
# Includes additional data visualization
# Load required libraries
library(ggplot2) # for improved plots -- qplot and scatterplot matrix
library(GGally) # for scatterplot matrix
library(gridExtra)
library(RcmdrMisc) # for numSummary, stepwise
library(ROCR) # for producing ROC and other prediction performance plots
library(caret) # for confusion matrix analysis
library(plyr) # for mapvalues recoding function
library(dplyr) # select function for subsetting data frames
# Set default plot theme for ggplot2 and date format
theme_set(theme_light(base_size = 12))
DateFormat <- "%Y-%m-%d"
# Suggested usage:
# Check that required data file, ChurnFullPrepped.RData
# (created via RhapsodyChurnPrep.Rmd) is present in project folder <folder>
# Check that working directory (ChurnDir below) points to the project <folder>
# Run the entire script as a block
# Set up working directory for file locations
# and load required libraries
ChurnDir <- "D:/MBA/SecondTerm/Data/exam"
ChurnPrepped <- "ChurnFullPrepped.RData"
ChurnTrainAnalysis <- "ChurnTrainAnalysis.csv"
ChurnTestAnalysis <- "ChurnTestAnalysis.csv"
setwd(ChurnDir)
# DATA ENTRY AND SUMMARIZATION
# Load fully prepped (.RData) data set, list -- names, structure, head
# (created via RhapsodyChurnPrep.Rmd)
load(ChurnPrepped)
AllRows <- nrow(ChurnFull)
str(ChurnFull, list.len = 999)
## 'data.frame': 529430 obs. of 127 variables:
## $ UserPartyId : num 120392 141193 146503 149471 150298 ...
## $ UserOrigSignupDt : Factor w/ 4161 levels "2001-12-03","2002-01-25",..: 2507 504 2894 3513 762 2375 361 2176 1139 2841 ...
## $ SubscriberServiceId : num 1.41e+08 8.80e+07 1.77e+08 3.02e+08 4.00e+08 ...
## $ FirstBillDt : Factor w/ 3890 levels "2001-12-03","2002-01-25",..: 2212 202 2584 3203 460 2065 97 1896 844 2531 ...
## $ LastPlayDt : Factor w/ 2234 levels "","2003-05-09",..: 2205 2205 1976 2230 1 2232 2234 2234 1974 2187 ...
## $ SignUpDt : Factor w/ 3200 levels "2002-12-22","2003-01-04",..: 1546 1018 1933 2552 2908 2296 2908 1245 2525 2480 ...
## $ StopRequestDt : Factor w/ 924 levels "2007-11-26","2007-12-19",..: 924 924 908 924 924 924 924 924 924 924 ...
## $ CancelDt : Factor w/ 87 levels "2007-12-16","2008-01-03",..: 87 87 87 87 87 87 87 87 87 87 ...
## $ CancelType : Factor w/ 4 levels "Current Sub",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ CancelReason : Factor w/ 6 levels "","MIGRATION_PAID_TO_PAID",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ ChannelGroupName : Factor w/ 15 levels "Cable Partners",..: 14 14 5 12 3 14 1 8 14 2 ...
## $ ChannelName : Factor w/ 117 levels "Affiliate","Android Mobile",..: 106 106 84 70 30 106 69 91 106 92 ...
## $ CobrandCode : num 40134 1 40134 40134 40135 ...
## $ CobrandName : Factor w/ 36 levels "","ATT","AudioGalaxy",..: 28 19 28 28 15 28 26 28 19 28 ...
## $ OriginCode : Factor w/ 493 levels "","12_19Bpage",..: 191 187 327 341 161 332 280 191 187 191 ...
## $ pcode : Factor w/ 445 levels ".","0","12_19Bpage",..: 1 1 1 305 1 1 1 345 1 385 ...
## $ cpath : Factor w/ 351 levels ".","<musicnew>",..: 1 1 223 272 1 1 1 176 1 251 ...
## $ rsrc : Factor w/ 5161 levels "-","$10_300x250",..: 4 4 1604 4755 4 4 4 4958 4 2171 ...
## $ opage : Factor w/ 455 levels "","-","0","404__404",..: 1 1 262 441 1 1 1 1 1 31 ...
## $ ServiceTier : Factor w/ 6 levels "FREE","PREM",..: 5 5 2 2 2 2 2 5 2 5 ...
## $ MonthsPerBill : num 1 1 1 1 1 1 1 1 1 1 ...
## $ BillingZipCode : num 44333 94127 18015 19426 96737 ...
## $ BillingState : Factor w/ 171 levels "","- AICH","- MIYA",..: 118 30 126 126 65 103 117 117 126 53 ...
## $ EcommOrderId : num 3.68e+08 3.67e+08 3.67e+08 3.68e+08 3.67e+08 ...
## $ OrderDate : Factor w/ 30 levels "2013-11-01","2013-11-02",..: 16 1 2 11 8 1 26 14 15 1 ...
## $ PaymentTypeName : Factor w/ 2 levels "Credit","Flexib": 1 1 1 1 1 1 1 1 1 1 ...
## $ PaymentTypeCode : Factor w/ 2 levels "CC","FP": 1 1 1 1 1 1 1 1 1 1 ...
## $ PaymentSubTypeCode : Factor w/ 6 levels "AMZ_FPS","AX",..: 6 6 6 2 4 6 3 2 6 6 ...
## $ CPTCardBrand : Factor w/ 3 levels "MC","None","VI": 3 3 3 2 1 3 2 2 3 3 ...
## $ CPTCardType : Factor w/ 7 levels "0 - None","1 - Credit",..: 2 2 4 1 2 2 1 1 4 4 ...
## $ CPTBIN : num 408161 414729 482857 NA 512107 ...
## $ CPTTransactionDivision : num 193099 193099 193099 NA 193099 ...
## $ PaymentTxTypeCode : Factor w/ 1 level "BC": 1 1 1 1 1 1 1 1 1 1 ...
## $ PaymentTxStatus : Factor w/ 3 levels "HRDDECLINE","SFTDECLINE",..: 3 3 3 3 3 3 3 3 3 3 ...
## $ PmtStatType : Factor w/ 1 level "COMPLETE": 1 1 1 1 1 1 1 1 1 1 ...
## $ PT_status : Factor w/ 33 levels "00","01","04",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ AVS_status : Factor w/ 16 levels "2","3","6","7",..: 8 10 10 6 2 8 8 6 7 8 ...
## $ SEC_status : Factor w/ 4 levels "M","P","S","UNK": 4 4 4 4 4 4 3 4 4 4 ...
## $ RiskCode : Factor w/ 1 level "UNK": 1 1 1 1 1 1 1 1 1 1 ...
## $ PaymentProcessorCode : Factor w/ 4 levels "AMZ","PPL","PT",..: 3 3 3 3 3 3 3 3 3 3 ...
## $ PaymentProcessorDesc : Factor w/ 4 levels "","Amazon FPS",..: 3 3 3 3 3 3 3 3 3 3 ...
## $ PaymentDivisionCode : Factor w/ 4 levels "193099","203217",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ PaymentDivisionDesc : Factor w/ 5 levels "","Amazon FPS non-recurring default",..: 4 4 4 4 4 4 4 4 4 4 ...
## $ PaymentDivisionUseType : Factor w/ 3 levels "","N","R": 3 3 3 3 3 3 3 3 3 3 ...
## $ PaymentDivisionRiskType: Factor w/ 2 levels "","NORMALRISK": 2 2 2 2 2 2 2 2 2 2 ...
## $ Country : Factor w/ 1 level "US": 1 1 1 1 1 1 1 1 1 1 ...
## $ Currency : Factor w/ 1 level "USD": 1 1 1 1 1 1 1 1 1 1 ...
## $ Amount : num 14.99 14.99 9.99 9.99 10.35 ...
## $ OrderStatus : Factor w/ 3 levels "HRDDECLINE","SFTDECLINE",..: 3 3 3 3 3 3 3 3 3 3 ...
## $ Churn : num 0 0 0 0 0 0 0 0 0 0 ...
## $ OrdSucces : num 1 1 1 1 1 1 1 1 1 1 ...
## $ OrdSftDec : num 1 1 1 1 1 1 1 1 1 1 ...
## $ OrdHrdDec : num 0 0 0 0 0 0 0 0 0 0 ...
## $ PayAttempts : num 0 0 0 0 0 0 0 0 0 0 ...
## $ PaySuccess : num 1 1 1 1 1 1 1 1 1 1 ...
## $ PaySftDec : num 0 0 0 0 0 0 0 0 0 0 ...
## $ PayHrdDec : num 0 0 0 0 0 0 0 0 0 0 ...
## $ PTStat89 : num 0 0 0 0 0 0 0 0 0 0 ...
## $ PTStat05 : num 0 0 0 0 0 0 0 0 0 0 ...
## $ PTStat14 : num 0 0 0 0 0 0 0 0 0 0 ...
## $ PTStat56 : num 0 0 0 0 0 0 0 0 0 0 ...
## $ PTStat52 : num 0 0 0 0 0 0 0 0 0 0 ...
## $ PTStat12 : num 0 0 0 0 0 0 0 0 0 0 ...
## $ PTStat33 : num 0 0 0 0 0 0 0 0 0 0 ...
## $ PTStat41 : num 0 0 0 0 0 0 0 0 0 0 ...
## $ PTStat04 : num 0 0 0 0 0 0 0 0 0 0 ...
## $ SubOrderCount : num 26 25 25 16 8 25 8 27 17 19 ...
## $ SubPayAttempts : num 34 25 25 16 8 25 8 27 17 19 ...
## $ SubAmount : num 389.7 374.8 249.8 159.8 82.8 ...
## $ SubFirstOrderDate : Factor w/ 954 levels "2011-04-21","2011-04-22",..: 6 11 43 326 660 72 678 24 301 256 ...
## $ SubLastOrderDate : Factor w/ 408 levels "2011-10-23","2011-10-29",..: 364 319 320 359 356 349 344 362 363 349 ...
## $ SubChurns : num 0 0 0 0 0 0 0 0 0 0 ...
## $ SubOrdSucces : num 26 25 25 16 8 25 8 27 17 19 ...
## $ SubOrdSftDec : num 0 0 0 0 0 0 0 0 0 0 ...
## $ SubOrdHrdDec : num 0 0 0 0 0 0 0 0 0 0 ...
## $ SubPaySuccess : num 26 25 25 16 8 25 8 27 17 19 ...
## $ SubPaySftDec : num 2 0 0 0 0 0 0 0 0 0 ...
## $ SubPayHrdDec : num 0 0 0 0 0 0 0 0 0 0 ...
## $ SubPTStat89 : num 0 0 0 0 0 0 0 0 0 0 ...
## $ SubPTStat05 : num 2 0 0 0 0 0 0 0 0 0 ...
## $ SubPTStat14 : num 0 0 0 0 0 0 0 0 0 0 ...
## $ SubPTStat56 : num 0 0 0 0 0 0 0 0 0 0 ...
## $ SubPTStat52 : num 0 0 0 0 0 0 0 0 0 0 ...
## $ SubPTStat12 : num 0 0 0 0 0 0 0 0 0 0 ...
## $ SubPTStat33 : num 0 0 0 0 0 0 0 0 0 0 ...
## $ SubPTStat41 : num 0 0 0 0 0 0 0 0 0 0 ...
## $ SubPTStat04 : num 0 0 0 0 0 0 0 0 0 0 ...
## $ SubCount : num 1 1 1 1 2 2 2 1 2 2 ...
## $ UserOrderCount : num 26 25 25 16 26 26 28 27 18 25 ...
## $ UserPayAttempts : num 34 25 25 16 26 26 28 27 18 25 ...
## $ UserAmount : num 390 375 250 160 269 ...
## $ UserFirstOrderDate : Factor w/ 955 levels "2011-04-21","2011-04-22",..: 6 11 43 326 18 11 6 24 129 24 ...
## $ UserLastOrderDate : Factor w/ 878 levels "2011-04-22","2011-04-23",..: 833 788 789 828 825 818 813 831 832 818 ...
## $ UserChurns : num 0 0 0 0 0 0 0 0 0 0 ...
## $ UserOrdSucces : num 26 25 25 16 26 26 28 27 18 25 ...
## $ UserOrdSftDec : num 0 0 0 0 0 0 0 0 0 0 ...
## $ UserOrdHrdDec : num 0 0 0 0 0 0 0 0 0 0 ...
## $ UserPaySuccess : num 26 25 25 16 26 26 28 27 18 25 ...
## $ UserPaySftDec : num 2 0 0 0 0 0 0 0 0 0 ...
## $ UserPayHrdDec : num 0 0 0 0 0 0 0 0 0 0 ...
## $ UserPTStat89 : num 0 0 0 0 0 0 0 0 0 0 ...
## $ UserPTStat05 : num 2 0 0 0 0 0 0 0 0 0 ...
## $ UserPTStat14 : num 0 0 0 0 0 0 0 0 0 0 ...
## $ UserPTStat56 : num 0 0 0 0 0 0 0 0 0 0 ...
## $ UserPTStat52 : num 0 0 0 0 0 0 0 0 0 0 ...
## $ UserPTStat12 : num 0 0 0 0 0 0 0 0 0 0 ...
## $ UserPTStat33 : num 0 0 0 0 0 0 0 0 0 0 ...
## $ UserPTStat41 : num 0 0 0 0 0 0 0 0 0 0 ...
## $ UserPTStat04 : num 0 0 0 0 0 0 0 0 0 0 ...
## $ CardBrand : Factor w/ 6 levels "VI","AMZ_FPS",..: 1 1 1 3 5 1 4 3 1 1 ...
## $ CardType : Factor w/ 5 levels "Credit","None",..: 1 1 3 1 1 1 1 1 3 3 ...
## $ UserOrigSignupDate : Date, format: "2009-05-10" "2003-11-15" "2010-06-01" ...
## $ FirstBillDate : Date, format: "2009-05-25" "2003-11-23" "2010-06-01" ...
## $ LastPlayDate : Date, format: "2013-12-24" "2013-12-24" "2013-05-09" ...
## $ SignUpDate : Date, format: "2009-05-10" "2007-11-29" "2010-06-01" ...
## $ StopRequestDate : Date, format: "2999-01-01" "2999-01-01" "2014-01-07" ...
## $ CancelDate : Date, format: "2999-01-01" "2999-01-01" "2999-01-01" ...
## $ OrdDate : Date, format: "2013-11-16" "2013-11-01" "2013-11-02" ...
## $ UserFirstOrdDate : Date, format: "2011-04-26" "2011-05-01" "2011-06-02" ...
## $ UserLastOrdDate : Date, format: "2013-10-16" "2013-09-01" "2013-09-02" ...
## $ UserTenure : num 55 121 42 22 112 59 126 66 100 44 ...
## $ UserWindow : num 31 29 28 20 30 30 30 30 27 30 ...
## $ USuccessDensity : num 0.839 0.862 0.893 0.8 0.867 ...
## $ USDDensity : num 0.0645 0 0 0 0 ...
## $ UHDDensity : num 0 0 0 0 0 0 0 0 0 0 ...
## $ UChurnDensity : num 0 0 0 0 0 0 0 0 0 0 ...
## $ TrainTest : chr "Test" "Train" "Train" "Test" ...
head(ChurnFull, n = 3)
## UserPartyId UserOrigSignupDt SubscriberServiceId FirstBillDt LastPlayDt SignUpDt StopRequestDt
## 1 120392 2009-05-10 141005905 2009-05-25 2013-12-24 2009-05-10 2999-01-01
## 2 141193 2003-11-15 87975825 2003-11-23 2013-12-24 2007-11-29 2999-01-01
## 3 146503 2010-06-01 176614654 2010-06-01 2013-05-09 2010-06-01 2014-01-07
## CancelDt CancelType CancelReason ChannelGroupName ChannelName CobrandCode
## 1 2999-01-01 Current Sub Unmapped Unknown 40134
## 2 2999-01-01 Current Sub Unmapped Unknown 1
## 3 2999-01-01 Current Sub Miscellaneous RealPlayer Feature Trigger SP 40134
## CobrandName OriginCode pcode cpath rsrc opage ServiceTier MonthsPerBill
## 1 RealRhapsody LogInViaRN . . . RTG 1
## 2 Listen.com listen . . . RTG 1
## 3 RealRhapsody rcom . myaccount edit rc_rp_trigger_dvdplayback PREM 1
## BillingZipCode BillingState EcommOrderId OrderDate PaymentTypeName PaymentTypeCode
## 1 44333 OH 367726684 2013-11-16 Credit CC
## 2 94127 CA 367201564 2013-11-01 Credit CC
## 3 18015 PA 367275211 2013-11-02 Credit CC
## PaymentSubTypeCode CPTCardBrand CPTCardType CPTBIN CPTTransactionDivision PaymentTxTypeCode
## 1 VI VI 1 - Credit 408161 193099 BC
## 2 VI VI 1 - Credit 414729 193099 BC
## 3 VI VI 3 - Check 482857 193099 BC
## PaymentTxStatus PmtStatType PT_status AVS_status SEC_status RiskCode PaymentProcessorCode
## 1 SUCCESS COMPLETE 00 C UNK UNK PT
## 2 SUCCESS COMPLETE 00 G UNK UNK PT
## 3 SUCCESS COMPLETE 00 G UNK UNK PT
## PaymentProcessorDesc PaymentDivisionCode PaymentDivisionDesc PaymentDivisionUseType
## 1 PaymentTech 193099 Paymentech Recurring for Rhapsody R
## 2 PaymentTech 193099 Paymentech Recurring for Rhapsody R
## 3 PaymentTech 193099 Paymentech Recurring for Rhapsody R
## PaymentDivisionRiskType Country Currency Amount OrderStatus Churn OrdSucces OrdSftDec OrdHrdDec
## 1 NORMALRISK US USD 14.99 SUCCESS 0 1 1 0
## 2 NORMALRISK US USD 14.99 SUCCESS 0 1 1 0
## 3 NORMALRISK US USD 9.99 SUCCESS 0 1 1 0
## PayAttempts PaySuccess PaySftDec PayHrdDec PTStat89 PTStat05 PTStat14 PTStat56 PTStat52 PTStat12
## 1 0 1 0 0 0 0 0 0 0 0
## 2 0 1 0 0 0 0 0 0 0 0
## 3 0 1 0 0 0 0 0 0 0 0
## PTStat33 PTStat41 PTStat04 SubOrderCount SubPayAttempts SubAmount SubFirstOrderDate
## 1 0 0 0 26 34 389.74 2011-04-26
## 2 0 0 0 25 25 374.75 2011-05-01
## 3 0 0 0 25 25 249.75 2011-06-02
## SubLastOrderDate SubChurns SubOrdSucces SubOrdSftDec SubOrdHrdDec SubPaySuccess SubPaySftDec
## 1 2013-10-16 0 26 0 0 26 2
## 2 2013-09-01 0 25 0 0 25 0
## 3 2013-09-02 0 25 0 0 25 0
## SubPayHrdDec SubPTStat89 SubPTStat05 SubPTStat14 SubPTStat56 SubPTStat52 SubPTStat12 SubPTStat33
## 1 0 0 2 0 0 0 0 0
## 2 0 0 0 0 0 0 0 0
## 3 0 0 0 0 0 0 0 0
## SubPTStat41 SubPTStat04 SubCount UserOrderCount UserPayAttempts UserAmount UserFirstOrderDate
## 1 0 0 1 26 34 389.74 2011-04-26
## 2 0 0 1 25 25 374.75 2011-05-01
## 3 0 0 1 25 25 249.75 2011-06-02
## UserLastOrderDate UserChurns UserOrdSucces UserOrdSftDec UserOrdHrdDec UserPaySuccess
## 1 2013-10-16 0 26 0 0 26
## 2 2013-09-01 0 25 0 0 25
## 3 2013-09-02 0 25 0 0 25
## UserPaySftDec UserPayHrdDec UserPTStat89 UserPTStat05 UserPTStat14 UserPTStat56 UserPTStat52
## 1 2 0 0 2 0 0 0
## 2 0 0 0 0 0 0 0
## 3 0 0 0 0 0 0 0
## UserPTStat12 UserPTStat33 UserPTStat41 UserPTStat04 CardBrand CardType UserOrigSignupDate
## 1 0 0 0 0 VI Credit 2009-05-10
## 2 0 0 0 0 VI Credit 2003-11-15
## 3 0 0 0 0 VI Debit 2010-06-01
## FirstBillDate LastPlayDate SignUpDate StopRequestDate CancelDate OrdDate UserFirstOrdDate
## 1 2009-05-25 2013-12-24 2009-05-10 2999-01-01 2999-01-01 2013-11-16 2011-04-26
## 2 2003-11-23 2013-12-24 2007-11-29 2999-01-01 2999-01-01 2013-11-01 2011-05-01
## 3 2010-06-01 2013-05-09 2010-06-01 2014-01-07 2999-01-01 2013-11-02 2011-06-02
## UserLastOrdDate UserTenure UserWindow USuccessDensity USDDensity UHDDensity UChurnDensity
## 1 2013-10-16 55 31 0.8387097 0.06451613 0 0
## 2 2013-09-01 121 29 0.8620690 0.00000000 0 0
## 3 2013-09-02 42 28 0.8928571 0.00000000 0 0
## TrainTest
## 1 Test
## 2 Train
## 3 Train
############ ADDITIONAL DATA PREPARATION FOR ADVANCED MODELS
ChurnFull$MoPerBill <- as.factor(ChurnFull$MonthsPerBill)
# Set reference levels for ServiceTier, CardBrand, CardType and MoPerBill
# Sets RHAP, VISA, Credit and monthly pay as omitted factor levels in models
# Estimates are interpretted relative to these reference levels
ChurnFull$ServiceTier <- relevel(ChurnFull$ServiceTier, ref = "RHAP")
ChurnFull$CardBrand <- relevel(ChurnFull$CardBrand, ref = "VI")
ChurnFull$CardType <- relevel(ChurnFull$CardType, ref = "Credit")
ChurnFull$MoPerBill <- relevel(ChurnFull$MoPerBill, ref = 1)
# Add User history metrics (via dplyr mutate function)
# Tenure, OrderDensity, Payment Friction, ChurnDensity
ChurnFull <- ChurnFull %>%
mutate(
UAttemptsPerOrder = UserPayAttempts/UserOrderCount,
UserTenure = as.numeric(round((OrdDate - UserOrigSignupDate)/365.25 * 12 + 1)),
SinceLastPlay = as.numeric(round(OrdDate - LastPlayDate))
)
ChurnFull$SinceLastPlay[ChurnFull$SinceLastPlay < 0] <- 0
# Density measures -- Success, Failure, Payment Friction
ChurnFull <- ChurnFull %>%
mutate(
UserWindow = as.numeric(round((UserLastOrdDate - UserFirstOrdDate)/365.25 * 12 + 1)),
USuccessDensity = MonthsPerBill * UserPaySuccess / UserWindow,
USDDensity = MonthsPerBill * UserPaySftDec / UserWindow,
UHDDensity = MonthsPerBill * UserPayHrdDec / UserWindow,
UChurnDensity = MonthsPerBill * UserChurns / UserWindow,
UPTS89Den = MonthsPerBill * UserPTStat89 / UserWindow,
UPTS05Den = MonthsPerBill * UserPTStat05 / UserWindow,
UPTS14Den = MonthsPerBill * UserPTStat14 / UserWindow,
UPTS56Den = MonthsPerBill * UserPTStat56 / UserWindow,
UPTS52Den = MonthsPerBill * UserPTStat52 / UserWindow,
UPTS12Den = MonthsPerBill * UserPTStat12 / UserWindow,
UPTS33Den = MonthsPerBill * UserPTStat33 / UserWindow,
UPTS41Den = MonthsPerBill * UserPTStat41 / UserWindow,
UPTS04Den = MonthsPerBill * UserPTStat04 / UserWindow
)
ChurnFull$USuccessDensity[ChurnFull$USuccessDensity > 1] <- 1
ChurnFull$USDDensity[ChurnFull$USDDensity > 1] <- 1
ChurnFull$UHDDensity[ChurnFull$UHDDensity > 1] <- 1
ChurnFull$UChurnDensity[ChurnFull$UChurnDensity > 1] <- 1
ChurnFull$UPTS89Den[ChurnFull$UPTS89Den > 1] <- 1
ChurnFull$UPTS05Den[ChurnFull$UPTS05Den > 1] <- 1
ChurnFull$UPTS14Den[ChurnFull$UPTS14Den > 1] <- 1
ChurnFull$UPTS56Den[ChurnFull$UPTS56Den > 1] <- 1
ChurnFull$UPTS52Den[ChurnFull$UPTS52Den > 1] <- 1
ChurnFull$UPTS12Den[ChurnFull$UPTS12Den > 1] <- 1
ChurnFull$UPTS33Den[ChurnFull$UPTS33Den > 1] <- 1
ChurnFull$UPTS41Den[ChurnFull$UPTS41Den > 1] <- 1
ChurnFull$UPTS04Den[ChurnFull$UPTS04Den > 1] <- 1
# Check selected computed densities
ChurnFull %>%
select(UserPaySuccess, UserPaySftDec, UserPayHrdDec, UserChurns, UserPTStat89, MonthsPerBill,
UserWindow, USuccessDensity, USDDensity, UHDDensity, UChurnDensity, UPTS89Den) %>%
numSummary()
## mean sd IQR 0% 25% 50% 75% 100%
## UserPaySuccess 17.046043858 8.99938775 16.00000000 0 9.0000000 19.000000 25.00000000 46
## UserPaySftDec 0.741771339 1.52370589 1.00000000 0 0.0000000 0.000000 1.00000000 21
## UserPayHrdDec 0.100855637 0.37964982 0.00000000 0 0.0000000 0.000000 0.00000000 15
## UserChurns 0.236911395 0.62762746 0.00000000 0 0.0000000 0.000000 0.00000000 17
## UserPTStat89 0.371592090 1.23394854 0.00000000 0 0.0000000 0.000000 0.00000000 21
## MonthsPerBill 1.061078896 0.77044073 0.00000000 1 1.0000000 1.000000 1.00000000 12
## UserWindow 20.695196721 10.30212505 18.00000000 1 12.0000000 23.000000 30.00000000 31
## USuccessDensity 0.806077194 0.21325574 0.11742424 0 0.7916667 0.862069 0.90909091 1
## USDDensity 0.045662185 0.11444134 0.03846154 0 0.0000000 0.000000 0.03846154 1
## UHDDensity 0.005286566 0.03149747 0.00000000 0 0.0000000 0.000000 0.00000000 1
## UChurnDensity 0.013838580 0.05497953 0.00000000 0 0.0000000 0.000000 0.00000000 1
## UPTS89Den 0.025650940 0.09489304 0.00000000 0 0.0000000 0.000000 0.00000000 1
## n
## UserPaySuccess 529430
## UserPaySftDec 529430
## UserPayHrdDec 529430
## UserChurns 529430
## UserPTStat89 529430
## MonthsPerBill 529430
## UserWindow 529430
## USuccessDensity 529430
## USDDensity 529430
## UHDDensity 529430
## UChurnDensity 529430
## UPTS89Den 529430
# Customer lifetime projections -- Success, Failure, Payment Friction
# Density * Tenure
ChurnFull <- ChurnFull %>%
mutate(
USuccessCL = UserTenure * USuccessDensity,
USDCL = UserTenure * USDDensity,
UHDCL = UserTenure * UHDDensity,
UChurnCL = UserTenure * UChurnDensity,
UPTS89CL = UserTenure * UPTS89Den,
UPTS05CL = UserTenure * UPTS05Den,
UPTS14CL = UserTenure * UPTS14Den,
UPTS56CL = UserTenure * UPTS56Den,
UPTS52CL = UserTenure * UPTS52Den,
UPTS12CL = UserTenure * UPTS12Den,
UPTS33CL = UserTenure * UPTS33Den,
UPTS41CL = UserTenure * UPTS41Den,
UPTS04CL = UserTenure * UPTS04Den
)
# Set up SinceLastPlay, LastPlay, RecentPlay information
if (ChurnFull$LastPlayDate == as.Date("2999-01-01", format = DateFormat))
ChurnFull$LastPlayDate <- ChurnFull$UserOrigSignupDate
if (ChurnFull$LastPlayDate > ChurnFull$OrdDate)
ChurnFull$LastPlayDate <- ChurnFull$OrdDate
ChurnFull$LastPlay <- rep("Over one year", AllRows)
ChurnFull$LastPlay[ChurnFull$SinceLastPlay <= 365] <- "Past year"
ChurnFull$LastPlay[ChurnFull$SinceLastPlay <= 90] <- "Past 90 days"
ChurnFull$LastPlay[ChurnFull$SinceLastPlay <= 30] <- "Past 30 days"
ChurnFull$LastPlay <- as.factor(ChurnFull$LastPlay)
ChurnFull$LastPlay <- relevel(ChurnFull$LastPlay, ref = "Over one year")
ChurnFull$RecentPlay <- rep("No", AllRows)
ChurnFull$RecentPlay[ChurnFull$SinceLastPlay <= 30] <- "Yes"
ChurnFull$RecentPlay <- as.factor(ChurnFull$RecentPlay)
ChurnFull$RecentPlay <- relevel(ChurnFull$RecentPlay, ref = "No")
# Log-scale features (originals show significant right skewness)
ChurnFull <- ChurnFull %>%
mutate(
logUserTenure = log(UserTenure + 1),
logSinceLastPlay = log(SinceLastPlay + 1),
logUserOrderCount = log(UserOrderCount + 1)
)
# Histograms for selected features
UT <- qplot(UserTenure, data = ChurnFull, geom = "histogram", ylab = "Frequency")
LUT <- qplot(logUserTenure, data = ChurnFull, geom = "histogram", ylab = "Frequency")
LP <- qplot(SinceLastPlay, data = ChurnFull, geom = "histogram", ylab = "Frequency")
LLP <- qplot(logSinceLastPlay, data = ChurnFull, geom = "histogram", ylab = "Frequency")
OC <- qplot(UserOrderCount, data = ChurnFull, geom = "histogram", ylab = "Frequency")
LOC <- qplot(logUserOrderCount, data = ChurnFull, geom = "histogram", ylab = "Frequency")
APO <- qplot(UAttemptsPerOrder, data = ChurnFull, geom = "histogram", ylab = "Frequency")
US <- qplot(USuccessDensity, data = ChurnFull, geom = "histogram", ylab = "Frequency")
USD <- qplot(USDDensity, data = ChurnFull, geom = "histogram", ylab = "Frequency")
UHD <- qplot(UHDDensity, data = ChurnFull, geom = "histogram", ylab = "Frequency")
U89 <- qplot(UPTS89Den, data = ChurnFull, geom = "histogram", ylab = "Frequency")
UC <- qplot(UChurnDensity, data = ChurnFull, geom = "histogram", ylab = "Frequency")
USCL <- qplot(USuccessCL, data = ChurnFull, geom = "histogram", ylab = "Frequency")
USDC <- qplot(USDCL, data = ChurnFull, geom = "histogram", ylab = "Frequency")
UHDC <- qplot(UHDCL, data = ChurnFull, geom = "histogram", ylab = "Frequency")
U89C <- qplot(UPTS89CL, data = ChurnFull, geom = "histogram", ylab = "Frequency")
grid.arrange(UT, LUT, LP, LLP, OC, LOC, APO, UC, US, USD, UHD, U89,
USCL, USDC, UHDC, U89C, ncol = 4)
# Select (using dplyr) subset of candidate model variables/features for analysis
ChurnAnalysis <- ChurnFull %>%
filter(ServiceTier != "FREE") %>%
select(
TrainTest, Churn, UserOrigSignupDate, FirstBillDate, LastPlayDate,
SignUpDate, StopRequestDate, CancelDate, CancelType,
CancelReason, ServiceTier:CPTCardType,
CardBrand, CardType,
Amount, SubCount:UserPTStat04,
# Additional engineered features
MoPerBill,
UAttemptsPerOrder, UserTenure,
SinceLastPlay, LastPlay, RecentPlay,
logUserTenure, logSinceLastPlay, logUserOrderCount,
UChurnDensity, USuccessDensity, USDDensity, UHDDensity,
UPTS89Den, UPTS05Den, UPTS14Den, UPTS56Den, UPTS52Den,
UPTS12Den, UPTS33Den, UPTS41Den, UPTS04Den,
UChurnCL, USuccessCL, USDCL, UHDCL,
UPTS89CL, UPTS05CL, UPTS14CL, UPTS56CL, UPTS52CL,
UPTS12CL, UPTS33CL, UPTS41CL, UPTS04CL
)
str(ChurnAnalysis, list.len = 999)
## 'data.frame': 529252 obs. of 81 variables:
## $ TrainTest : chr "Test" "Train" "Train" "Test" ...
## $ Churn : num 0 0 0 0 0 0 0 0 0 0 ...
## $ UserOrigSignupDate: Date, format: "2009-05-10" "2003-11-15" "2010-06-01" ...
## $ FirstBillDate : Date, format: "2009-05-25" "2003-11-23" "2010-06-01" ...
## $ LastPlayDate : Date, format: "2013-11-16" "2013-11-01" "2013-11-02" ...
## $ SignUpDate : Date, format: "2009-05-10" "2007-11-29" "2010-06-01" ...
## $ StopRequestDate : Date, format: "2999-01-01" "2999-01-01" "2014-01-07" ...
## $ CancelDate : Date, format: "2999-01-01" "2999-01-01" "2999-01-01" ...
## $ CancelType : Factor w/ 4 levels "Current Sub",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ CancelReason : Factor w/ 6 levels "","MIGRATION_PAID_TO_PAID",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ ServiceTier : Factor w/ 6 levels "RHAP","FREE",..: 5 5 3 3 3 3 3 5 3 5 ...
## $ MonthsPerBill : num 1 1 1 1 1 1 1 1 1 1 ...
## $ BillingZipCode : num 44333 94127 18015 19426 96737 ...
## $ BillingState : Factor w/ 171 levels "","- AICH","- MIYA",..: 118 30 126 126 65 103 117 117 126 53 ...
## $ EcommOrderId : num 3.68e+08 3.67e+08 3.67e+08 3.68e+08 3.67e+08 ...
## $ OrderDate : Factor w/ 30 levels "2013-11-01","2013-11-02",..: 16 1 2 11 8 1 26 14 15 1 ...
## $ PaymentTypeName : Factor w/ 2 levels "Credit","Flexib": 1 1 1 1 1 1 1 1 1 1 ...
## $ PaymentTypeCode : Factor w/ 2 levels "CC","FP": 1 1 1 1 1 1 1 1 1 1 ...
## $ PaymentSubTypeCode: Factor w/ 6 levels "AMZ_FPS","AX",..: 6 6 6 2 4 6 3 2 6 6 ...
## $ CPTCardBrand : Factor w/ 3 levels "MC","None","VI": 3 3 3 2 1 3 2 2 3 3 ...
## $ CPTCardType : Factor w/ 7 levels "0 - None","1 - Credit",..: 2 2 4 1 2 2 1 1 4 4 ...
## $ CardBrand : Factor w/ 6 levels "VI","AMZ_FPS",..: 1 1 1 3 5 1 4 3 1 1 ...
## $ CardType : Factor w/ 5 levels "Credit","None",..: 1 1 3 1 1 1 1 1 3 3 ...
## $ Amount : num 14.99 14.99 9.99 9.99 10.35 ...
## $ SubCount : num 1 1 1 1 2 2 2 1 2 2 ...
## $ UserOrderCount : num 26 25 25 16 26 26 28 27 18 25 ...
## $ UserPayAttempts : num 34 25 25 16 26 26 28 27 18 25 ...
## $ UserAmount : num 390 375 250 160 269 ...
## $ UserFirstOrderDate: Factor w/ 955 levels "2011-04-21","2011-04-22",..: 6 11 43 326 18 11 6 24 129 24 ...
## $ UserLastOrderDate : Factor w/ 878 levels "2011-04-22","2011-04-23",..: 833 788 789 828 825 818 813 831 832 818 ...
## $ UserChurns : num 0 0 0 0 0 0 0 0 0 0 ...
## $ UserOrdSucces : num 26 25 25 16 26 26 28 27 18 25 ...
## $ UserOrdSftDec : num 0 0 0 0 0 0 0 0 0 0 ...
## $ UserOrdHrdDec : num 0 0 0 0 0 0 0 0 0 0 ...
## $ UserPaySuccess : num 26 25 25 16 26 26 28 27 18 25 ...
## $ UserPaySftDec : num 2 0 0 0 0 0 0 0 0 0 ...
## $ UserPayHrdDec : num 0 0 0 0 0 0 0 0 0 0 ...
## $ UserPTStat89 : num 0 0 0 0 0 0 0 0 0 0 ...
## $ UserPTStat05 : num 2 0 0 0 0 0 0 0 0 0 ...
## $ UserPTStat14 : num 0 0 0 0 0 0 0 0 0 0 ...
## $ UserPTStat56 : num 0 0 0 0 0 0 0 0 0 0 ...
## $ UserPTStat52 : num 0 0 0 0 0 0 0 0 0 0 ...
## $ UserPTStat12 : num 0 0 0 0 0 0 0 0 0 0 ...
## $ UserPTStat33 : num 0 0 0 0 0 0 0 0 0 0 ...
## $ UserPTStat41 : num 0 0 0 0 0 0 0 0 0 0 ...
## $ UserPTStat04 : num 0 0 0 0 0 0 0 0 0 0 ...
## $ MoPerBill : Factor w/ 3 levels "1","3","12": 1 1 1 1 1 1 1 1 1 1 ...
## $ UAttemptsPerOrder : num 1.31 1 1 1 1 ...
## $ UserTenure : num 55 121 42 22 112 59 126 66 100 44 ...
## $ SinceLastPlay : num 0 0 177 0 NA 0 0 0 192 0 ...
## $ LastPlay : Factor w/ 4 levels "Over one year",..: 2 2 4 2 1 2 2 2 4 2 ...
## $ RecentPlay : Factor w/ 2 levels "No","Yes": 2 2 1 2 1 2 2 2 1 2 ...
## $ logUserTenure : num 4.03 4.8 3.76 3.14 4.73 ...
## $ logSinceLastPlay : num 0 0 5.18 0 NA ...
## $ logUserOrderCount : num 3.3 3.26 3.26 2.83 3.3 ...
## $ UChurnDensity : num 0 0 0 0 0 0 0 0 0 0 ...
## $ USuccessDensity : num 0.839 0.862 0.893 0.8 0.867 ...
## $ USDDensity : num 0.0645 0 0 0 0 ...
## $ UHDDensity : num 0 0 0 0 0 0 0 0 0 0 ...
## $ UPTS89Den : num 0 0 0 0 0 0 0 0 0 0 ...
## $ UPTS05Den : num 0.0645 0 0 0 0 ...
## $ UPTS14Den : num 0 0 0 0 0 0 0 0 0 0 ...
## $ UPTS56Den : num 0 0 0 0 0 0 0 0 0 0 ...
## $ UPTS52Den : num 0 0 0 0 0 0 0 0 0 0 ...
## $ UPTS12Den : num 0 0 0 0 0 0 0 0 0 0 ...
## $ UPTS33Den : num 0 0 0 0 0 0 0 0 0 0 ...
## $ UPTS41Den : num 0 0 0 0 0 0 0 0 0 0 ...
## $ UPTS04Den : num 0 0 0 0 0 0 0 0 0 0 ...
## $ UChurnCL : num 0 0 0 0 0 0 0 0 0 0 ...
## $ USuccessCL : num 46.1 104.3 37.5 17.6 97.1 ...
## $ USDCL : num 3.55 0 0 0 0 ...
## $ UHDCL : num 0 0 0 0 0 0 0 0 0 0 ...
## $ UPTS89CL : num 0 0 0 0 0 0 0 0 0 0 ...
## $ UPTS05CL : num 3.55 0 0 0 0 ...
## $ UPTS14CL : num 0 0 0 0 0 0 0 0 0 0 ...
## $ UPTS56CL : num 0 0 0 0 0 0 0 0 0 0 ...
## $ UPTS52CL : num 0 0 0 0 0 0 0 0 0 0 ...
## $ UPTS12CL : num 0 0 0 0 0 0 0 0 0 0 ...
## $ UPTS33CL : num 0 0 0 0 0 0 0 0 0 0 ...
## $ UPTS41CL : num 0 0 0 0 0 0 0 0 0 0 ...
## $ UPTS04CL : num 0 0 0 0 0 0 0 0 0 0 ...
head(ChurnAnalysis, n = 3)
## TrainTest Churn UserOrigSignupDate FirstBillDate LastPlayDate SignUpDate StopRequestDate
## 1 Test 0 2009-05-10 2009-05-25 2013-11-16 2009-05-10 2999-01-01
## 2 Train 0 2003-11-15 2003-11-23 2013-11-01 2007-11-29 2999-01-01
## 3 Train 0 2010-06-01 2010-06-01 2013-11-02 2010-06-01 2014-01-07
## CancelDate CancelType CancelReason ServiceTier MonthsPerBill BillingZipCode BillingState
## 1 2999-01-01 Current Sub RTG 1 44333 OH
## 2 2999-01-01 Current Sub RTG 1 94127 CA
## 3 2999-01-01 Current Sub PREM 1 18015 PA
## EcommOrderId OrderDate PaymentTypeName PaymentTypeCode PaymentSubTypeCode CPTCardBrand
## 1 367726684 2013-11-16 Credit CC VI VI
## 2 367201564 2013-11-01 Credit CC VI VI
## 3 367275211 2013-11-02 Credit CC VI VI
## CPTCardType CardBrand CardType Amount SubCount UserOrderCount UserPayAttempts UserAmount
## 1 1 - Credit VI Credit 14.99 1 26 34 389.74
## 2 1 - Credit VI Credit 14.99 1 25 25 374.75
## 3 3 - Check VI Debit 9.99 1 25 25 249.75
## UserFirstOrderDate UserLastOrderDate UserChurns UserOrdSucces UserOrdSftDec UserOrdHrdDec
## 1 2011-04-26 2013-10-16 0 26 0 0
## 2 2011-05-01 2013-09-01 0 25 0 0
## 3 2011-06-02 2013-09-02 0 25 0 0
## UserPaySuccess UserPaySftDec UserPayHrdDec UserPTStat89 UserPTStat05 UserPTStat14 UserPTStat56
## 1 26 2 0 0 2 0 0
## 2 25 0 0 0 0 0 0
## 3 25 0 0 0 0 0 0
## UserPTStat52 UserPTStat12 UserPTStat33 UserPTStat41 UserPTStat04 MoPerBill UAttemptsPerOrder
## 1 0 0 0 0 0 1 1.307692
## 2 0 0 0 0 0 1 1.000000
## 3 0 0 0 0 0 1 1.000000
## UserTenure SinceLastPlay LastPlay RecentPlay logUserTenure logSinceLastPlay logUserOrderCount
## 1 55 0 Past 30 days Yes 4.025352 0.000000 3.295837
## 2 121 0 Past 30 days Yes 4.804021 0.000000 3.258097
## 3 42 177 Past year No 3.761200 5.181784 3.258097
## UChurnDensity USuccessDensity USDDensity UHDDensity UPTS89Den UPTS05Den UPTS14Den UPTS56Den
## 1 0 0.8387097 0.06451613 0 0 0.06451613 0 0
## 2 0 0.8620690 0.00000000 0 0 0.00000000 0 0
## 3 0 0.8928571 0.00000000 0 0 0.00000000 0 0
## UPTS52Den UPTS12Den UPTS33Den UPTS41Den UPTS04Den UChurnCL USuccessCL USDCL UHDCL UPTS89CL
## 1 0 0 0 0 0 0 46.12903 3.548387 0 0
## 2 0 0 0 0 0 0 104.31034 0.000000 0 0
## 3 0 0 0 0 0 0 37.50000 0.000000 0 0
## UPTS05CL UPTS14CL UPTS56CL UPTS52CL UPTS12CL UPTS33CL UPTS41CL UPTS04CL
## 1 3.548387 0 0 0 0 0 0 0
## 2 0.000000 0 0 0 0 0 0 0
## 3 0.000000 0 0 0 0 0 0 0
# Split analysis database into Train and Test
ChurnTrain <- ChurnAnalysis[ChurnAnalysis$TrainTest == "Train",]
ChurnTest <- ChurnAnalysis[ChurnAnalysis$TrainTest == "Test",]
TrainRows <- nrow(ChurnTrain)
TestRows <- nrow(ChurnTest)
TrainRows
## [1] 423399
TestRows
## [1] 105853
############ END OF DATA PREPARATION
############ BEGIN ANALYSIS
# ANALYSIS AND MODELING OF TRAINING DATA
# Estimate card brand and card type effects
BrandModel <- glm(Churn ~ CardBrand,
family = binomial(logit), data = ChurnTrain)
summary(BrandModel)
##
## Call:
## glm(formula = Churn ~ CardBrand, family = binomial(logit), data = ChurnTrain)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -0.5557 -0.3122 -0.3122 -0.3122 3.0173
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -2.996890 0.009288 -322.678 <2e-16 ***
## CardBrandAMZ_FPS 1.206924 0.112402 10.738 <2e-16 ***
## CardBrandAX -1.544488 0.048292 -31.982 <2e-16 ***
## CardBrandDI -1.221986 0.079445 -15.382 <2e-16 ***
## CardBrandMC 0.006311 0.017484 0.361 0.7181
## CardBrandPAYPAL -0.078022 0.043800 -1.781 0.0749 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 150370 on 423398 degrees of freedom
## Residual deviance: 148242 on 423393 degrees of freedom
## AIC: 148254
##
## Number of Fisher Scoring iterations: 7
ResDev <- BrandModel$deviance
NullDev <- BrandModel$null.deviance
DevianceR2 <- (NullDev - ResDev) / NullDev
cat("Deviance R2 ", DevianceR2, "\n")
## Deviance R2 0.01415554
BrandTypeModel <- glm(Churn ~ CardBrand + CardType,
family = binomial(logit), data = ChurnTrain)
summary(BrandTypeModel)
##
## Call:
## glm(formula = Churn ~ CardBrand + CardType, family = binomial(logit),
## data = ChurnTrain)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -0.9850 -0.2933 -0.2933 -0.1930 3.0173
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -3.97397 0.02270 -175.075 < 2e-16 ***
## CardBrandAMZ_FPS -1.25725 0.12234 -10.277 < 2e-16 ***
## CardBrandAX -0.56741 0.05255 -10.798 < 2e-16 ***
## CardBrandDI -0.24491 0.08210 -2.983 0.002854 **
## CardBrandMC 0.06165 0.01845 3.342 0.000831 ***
## CardBrandPAYPAL 0.89905 0.04845 18.556 < 2e-16 ***
## CardTypeNone 3.44126 0.05337 64.473 < 2e-16 ***
## CardTypeDebit 0.84908 0.02411 35.218 < 2e-16 ***
## CardTypePrepaid 3.01708 0.02757 109.443 < 2e-16 ***
## CardType6 - Charge Card -4.59180 43.95396 -0.104 0.916798
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 150370 on 423398 degrees of freedom
## Residual deviance: 133655 on 423389 degrees of freedom
## AIC: 133675
##
## Number of Fisher Scoring iterations: 7
ResDev <- BrandTypeModel$deviance
NullDev <- BrandTypeModel$null.deviance
DevianceR2 <- (NullDev - ResDev) / NullDev
cat("Deviance R2 ", DevianceR2, "\n")
## Deviance R2 0.1111637
# Stepwise feature selection
# AIC -- k = 2
# BIC -- k = log(n) = log(TrainRows)
BrandTypeRedModel <- step(BrandTypeModel, k = log(TrainRows)) # based on BIC
## Start: AIC=133784.1
## Churn ~ CardBrand + CardType
##
## Df Deviance AIC
## <none> 133655 133784
## - CardBrand 5 134323 134387
## - CardType 4 148242 148319
summary(BrandTypeRedModel)
##
## Call:
## glm(formula = Churn ~ CardBrand + CardType, family = binomial(logit),
## data = ChurnTrain)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -0.9850 -0.2933 -0.2933 -0.1930 3.0173
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -3.97397 0.02270 -175.075 < 2e-16 ***
## CardBrandAMZ_FPS -1.25725 0.12234 -10.277 < 2e-16 ***
## CardBrandAX -0.56741 0.05255 -10.798 < 2e-16 ***
## CardBrandDI -0.24491 0.08210 -2.983 0.002854 **
## CardBrandMC 0.06165 0.01845 3.342 0.000831 ***
## CardBrandPAYPAL 0.89905 0.04845 18.556 < 2e-16 ***
## CardTypeNone 3.44126 0.05337 64.473 < 2e-16 ***
## CardTypeDebit 0.84908 0.02411 35.218 < 2e-16 ***
## CardTypePrepaid 3.01708 0.02757 109.443 < 2e-16 ***
## CardType6 - Charge Card -4.59180 43.95396 -0.104 0.916798
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 150370 on 423398 degrees of freedom
## Residual deviance: 133655 on 423389 degrees of freedom
## AIC: 133675
##
## Number of Fisher Scoring iterations: 7
ResDev <- BrandTypeRedModel$deviance
NullDev <- BrandTypeRedModel$null.deviance
DevianceR2 <- (NullDev - ResDev) / NullDev
cat("Deviance R2 ", DevianceR2, "\n")
## Deviance R2 0.1111637
# Compute and examine training-sample predictions
# for reduced logistic --
# Logistic Pred, Logistic Prob, Classification
LogisticPred <- predict(BrandTypeRedModel, newdata = ChurnTrain)
LogisticProb <- predict(BrandTypeRedModel, newdata = ChurnTrain,
type = "response")
# Classification analysis, training sample
Threshold <- 0.5
LogisticClass <- rep(0, TrainRows)
LogisticClass[LogisticProb > Threshold] <- 1
Confusion <- table(LogisticClass, ChurnTrain$Churn)
Confusion
##
## LogisticClass 0 1
## 0 405164 18235
mean(LogisticClass == ChurnTrain$Churn)
## [1] 0.9569319
# Lift chart -- True positives versus rate of positive prediction (for changing cutoffs)
# Using performance function from ROCR package
plotpred <- prediction(LogisticProb, ChurnTrain$Churn)
plotroc <- performance(plotpred, measure = "tpr", x.measure = "rpp")
rpp <- plotroc@x.values[[1]] # Extract "fpr" for qplot(ggplot2)
tpr <- plotroc@y.values[[1]] # Extract "tpr" for qplot(ggplot2)
Cutoff <- plotroc@alpha.values[[1]] # Extract cutoffs for color scale
qplot(x = rpp, y = tpr, geom = "line", color = Cutoff,
main = "Lift Chart -- Train",
xlab = "Rate of Positive Predicition",
ylab = "True Positive Rate") +
theme(legend.position = c(0.90, 0.23))
# Capture lift information for comparative lift Chart
ScoreFracBTR <- rpp
ChurnFracBTR <- tpr
ModelBTR <- rep("Card Brand/Type", length(rpp))
# Estimate card brand, card type and User effects
OrderUserModel <- glm(Churn ~ CardBrand + CardType + UserTenure + USuccessDensity +
USDDensity + UHDDensity + UChurnDensity,
family = binomial(logit), data = ChurnTrain)
summary(OrderUserModel)
##
## Call:
## glm(formula = Churn ~ CardBrand + CardType + UserTenure + USuccessDensity +
## USDDensity + UHDDensity + UChurnDensity, family = binomial(logit),
## data = ChurnTrain)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.8304 -0.2742 -0.2251 -0.1645 3.5486
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -2.3170530 0.0304348 -76.132 < 2e-16 ***
## CardBrandAMZ_FPS -1.4576451 0.1279197 -11.395 < 2e-16 ***
## CardBrandAX -0.4900796 0.0527762 -9.286 < 2e-16 ***
## CardBrandDI -0.1938661 0.0825090 -2.350 0.0188 *
## CardBrandMC 0.0315202 0.0187690 1.679 0.0931 .
## CardBrandPAYPAL 0.3701546 0.0495726 7.467 8.21e-14 ***
## CardTypeNone 3.0092227 0.0570594 52.738 < 2e-16 ***
## CardTypeDebit 0.5853468 0.0245622 23.831 < 2e-16 ***
## CardTypePrepaid 1.9600398 0.0306089 64.035 < 2e-16 ***
## CardType6 - Charge Card -4.6113098 43.9539591 -0.105 0.9164
## UserTenure -0.0164609 0.0005169 -31.844 < 2e-16 ***
## USuccessDensity -1.5057171 0.0280066 -53.763 < 2e-16 ***
## USDDensity 1.9590601 0.0503108 38.939 < 2e-16 ***
## UHDDensity 0.6558940 0.1639283 4.001 6.30e-05 ***
## UChurnDensity -1.1818971 0.0948576 -12.460 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 150370 on 423398 degrees of freedom
## Residual deviance: 126845 on 423384 degrees of freedom
## AIC: 126875
##
## Number of Fisher Scoring iterations: 7
ResDev <- OrderUserModel$deviance
NullDev <- OrderUserModel$null.deviance
DevianceR2 <- (NullDev - ResDev) / NullDev
cat("Deviance R2 ", DevianceR2, "\n")
## Deviance R2 0.1564482
# Stepwise feature selection
# AIC -- k = 2
# BIC -- k = log(n) = log(TrainRows)
OrderUserRedModel <- step(OrderUserModel, k = log(TrainRows)) # based on BIC
## Start: AIC=127039.4
## Churn ~ CardBrand + CardType + UserTenure + USuccessDensity +
## USDDensity + UHDDensity + UChurnDensity
##
## Df Deviance AIC
## <none> 126845 127039
## - UHDDensity 1 126860 127041
## - UChurnDensity 1 127002 127183
## - CardBrand 5 127197 127327
## - UserTenure 1 128059 128240
## - USDDensity 1 128162 128344
## - USuccessDensity 1 129578 129760
## - CardType 4 132862 133005
summary(OrderUserRedModel)
##
## Call:
## glm(formula = Churn ~ CardBrand + CardType + UserTenure + USuccessDensity +
## USDDensity + UHDDensity + UChurnDensity, family = binomial(logit),
## data = ChurnTrain)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.8304 -0.2742 -0.2251 -0.1645 3.5486
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -2.3170530 0.0304348 -76.132 < 2e-16 ***
## CardBrandAMZ_FPS -1.4576451 0.1279197 -11.395 < 2e-16 ***
## CardBrandAX -0.4900796 0.0527762 -9.286 < 2e-16 ***
## CardBrandDI -0.1938661 0.0825090 -2.350 0.0188 *
## CardBrandMC 0.0315202 0.0187690 1.679 0.0931 .
## CardBrandPAYPAL 0.3701546 0.0495726 7.467 8.21e-14 ***
## CardTypeNone 3.0092227 0.0570594 52.738 < 2e-16 ***
## CardTypeDebit 0.5853468 0.0245622 23.831 < 2e-16 ***
## CardTypePrepaid 1.9600398 0.0306089 64.035 < 2e-16 ***
## CardType6 - Charge Card -4.6113098 43.9539591 -0.105 0.9164
## UserTenure -0.0164609 0.0005169 -31.844 < 2e-16 ***
## USuccessDensity -1.5057171 0.0280066 -53.763 < 2e-16 ***
## USDDensity 1.9590601 0.0503108 38.939 < 2e-16 ***
## UHDDensity 0.6558940 0.1639283 4.001 6.30e-05 ***
## UChurnDensity -1.1818971 0.0948576 -12.460 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 150370 on 423398 degrees of freedom
## Residual deviance: 126845 on 423384 degrees of freedom
## AIC: 126875
##
## Number of Fisher Scoring iterations: 7
ResDev <- OrderUserRedModel$deviance
NullDev <- OrderUserRedModel$null.deviance
DevianceR2 <- (NullDev - ResDev) / NullDev
cat("Deviance R2 ", DevianceR2, "\n")
## Deviance R2 0.1564482
# Note Deviance measures and compute Pseudo (Deviance) R2
# (Null Deviance - Residual Deviance)/(Null Deviance)
# Exponentiate and interpret logistic coefficients
# Multiplicative change in odds per unit change in predictor
cat("\n")
LogisticCoef <- coef(summary(OrderUserRedModel))
ExpCoef <- (exp(LogisticCoef[,"Estimate"]))
NeatCoef <- round(cbind(LogisticCoef, ExpCoef), digits = 6)
NeatCoef
## Estimate Std. Error z value Pr(>|z|) ExpCoef
## (Intercept) -2.317053 0.030435 -76.131764 0.000000 0.098564
## CardBrandAMZ_FPS -1.457645 0.127920 -11.395003 0.000000 0.232784
## CardBrandAX -0.490080 0.052776 -9.286002 0.000000 0.612578
## CardBrandDI -0.193866 0.082509 -2.349635 0.018792 0.823768
## CardBrandMC 0.031520 0.018769 1.679375 0.093079 1.032022
## CardBrandPAYPAL 0.370155 0.049573 7.466918 0.000000 1.447958
## CardTypeNone 3.009223 0.057059 52.738385 0.000000 20.271637
## CardTypeDebit 0.585347 0.024562 23.831167 0.000000 1.795614
## CardTypePrepaid 1.960040 0.030609 64.035057 0.000000 7.099609
## CardType6 - Charge Card -4.611310 43.953959 -0.104912 0.916445 0.009939
## UserTenure -0.016461 0.000517 -31.843948 0.000000 0.983674
## USuccessDensity -1.505717 0.028007 -53.763030 0.000000 0.221858
## USDDensity 1.959060 0.050311 38.939183 0.000000 7.092658
## UHDDensity 0.655894 0.163928 4.001102 0.000063 1.926864
## UChurnDensity -1.181897 0.094858 -12.459702 0.000000 0.306696
cat("\n")
cat("Deviance R2 ", DevianceR2, "\n")
## Deviance R2 0.1564482
# Compute and examine training-sample predictions
# for reduced logistic --
# Logistic Pred, Logistic Prob, Classification
LogisticPred <- predict(OrderUserRedModel, newdata = ChurnTrain)
LogisticProb <- predict(OrderUserRedModel, newdata = ChurnTrain,
type = "response")
# Classification analysis, training sample
Threshold <- 0.5
LogisticClass <- rep(0, TrainRows)
LogisticClass[LogisticProb > Threshold] <- 1
Confusion <- table(LogisticClass, ChurnTrain$Churn)
Confusion
##
## LogisticClass 0 1
## 0 404243 17884
## 1 921 351
mean(LogisticClass == ChurnTrain$Churn)
## [1] 0.9555856
# Lift chart -- True positives versus rate of positive prediction (for changing cutoffs)
# Using performance function from ROCR package
plotpred <- prediction(LogisticProb, ChurnTrain$Churn)
plotroc <- performance(plotpred, measure = "tpr", x.measure = "rpp")
rpp <- plotroc@x.values[[1]] # Extract "fpr" for qplot(ggplot2)
tpr <- plotroc@y.values[[1]] # Extract "tpr" for qplot(ggplot2)
Cutoff <- plotroc@alpha.values[[1]] # Extract cutoffs for color scale
qplot(x = rpp, y = tpr, geom = "line", color = Cutoff,
main = "Lift Chart -- Train",
xlab = "Rate of Positive Predicition",
ylab = "True Positive Rate") +
theme(legend.position = c(0.90, 0.23))
# Capture lift information for comparative lift Chart
ScoreFracOUR <- rpp
ChurnFracOUR <- tpr
ModelOUR <- rep("Add Order/User", length(rpp))
# Estimate additional model effects
PairTab <- ChurnTrain %>%
select(Churn, logUserTenure, logSinceLastPlay, CardBrand, CardType)
PairTab <- as.data.frame(PairTab)
ChurnYN <- rep("N",TrainRows)
ChurnYN[PairTab$Churn == 1] <- "Y"
PairTab$Churn <- ChurnYN
ggpairs(data = PairTab,
diag = list(continuous = "density", discrete = "bar"),
upper = list(continuous = wrap("smooth", alpha = 0.01, size = 0.2),
combo = "box"),
lower = "blank",
# axisLabels="none",
title = "Extended Model -- Churn versus Selected Predictors"
)
ModelVars <- ChurnTrain %>%
select(
Churn, ServiceTier, CardBrand, CardType, MoPerBill,
UserTenure, logUserTenure,
SinceLastPlay, logSinceLastPlay, LastPlay, RecentPlay,
SubCount, UserOrderCount, logUserOrderCount, UAttemptsPerOrder,
UChurnDensity, USuccessDensity, USDDensity, UHDDensity,
UPTS89Den, UPTS05Den, UPTS14Den, UPTS56Den, UPTS52Den,
UPTS12Den, UPTS33Den, UPTS41Den, UPTS04Den,
UChurnCL, USuccessCL, USDCL, UHDCL,
UPTS89CL, UPTS05CL, UPTS14CL, UPTS56CL, UPTS52CL,
UPTS12CL, UPTS33CL, UPTS41CL, UPTS04CL
)
summary(ModelVars)
## Churn ServiceTier CardBrand CardType MoPerBill
## Min. :0.00000 RHAP: 9052 VI :255894 Credit :184095 1 :419569
## 1st Qu.:0.00000 FREE: 0 AMZ_FPS: 650 None : 2440 3 : 1759
## Median :0.00000 PREM:270114 AX : 42669 Debit :220244 12: 2071
## Mean :0.04307 RR : 1016 DI : 11240 Prepaid : 16619
## 3rd Qu.:0.00000 RTG :143081 MC :100014 6 - Charge Card: 1
## Max. :1.00000 RU : 136 PAYPAL : 12932
##
## UserTenure logUserTenure SinceLastPlay logSinceLastPlay LastPlay
## Min. : 1.00 Min. :0.6931 Min. : 0.00 Min. :0.00 Over one year: 61627
## 1st Qu.: 11.00 1st Qu.:2.4849 1st Qu.: 0.00 1st Qu.:0.00 Past 30 days :312917
## Median : 24.00 Median :3.2189 Median : 0.00 Median :0.00 Past 90 days : 18703
## Mean : 30.09 Mean :3.0703 Mean : 66.02 Mean :1.14 Past year : 30152
## 3rd Qu.: 40.00 3rd Qu.:3.7136 3rd Qu.: 0.00 3rd Qu.:0.00
## Max. :144.00 Max. :4.9767 Max. :3849.00 Max. :8.26
## NA's :36219 NA's :36219
## RecentPlay SubCount UserOrderCount logUserOrderCount UAttemptsPerOrder
## No :110482 Min. : 0.000 Min. : 0.00 Min. :0.000 Min. : 1.000
## Yes:312917 1st Qu.: 1.000 1st Qu.: 9.00 1st Qu.:2.303 1st Qu.: 1.000
## Median : 1.000 Median :20.00 Median :3.045 Median : 1.000
## Mean : 1.371 Mean :17.27 Mean :2.662 Mean : 1.229
## 3rd Qu.: 2.000 3rd Qu.:25.00 3rd Qu.:3.258 3rd Qu.: 1.125
## Max. :17.000 Max. :46.00 Max. :3.850 Max. :18.000
## NA's :17325
## UChurnDensity USuccessDensity USDDensity UHDDensity UPTS89Den
## Min. :0.00000 Min. :0.0000 Min. :0.00000 Min. :0.000000 Min. :0.00000
## 1st Qu.:0.00000 1st Qu.:0.7917 1st Qu.:0.00000 1st Qu.:0.000000 1st Qu.:0.00000
## Median :0.00000 Median :0.8621 Median :0.00000 Median :0.000000 Median :0.00000
## Mean :0.01387 Mean :0.8061 Mean :0.04571 Mean :0.005288 Mean :0.02572
## 3rd Qu.:0.00000 3rd Qu.:0.9091 3rd Qu.:0.03846 3rd Qu.:0.000000 3rd Qu.:0.00000
## Max. :1.00000 Max. :1.0000 Max. :1.00000 Max. :1.000000 Max. :1.00000
##
## UPTS05Den UPTS14Den UPTS56Den UPTS52Den UPTS12Den
## Min. :0.00000 Min. :0.000000 Min. :0.00000 Min. :0.000000 Min. :0.0000000
## 1st Qu.:0.00000 1st Qu.:0.000000 1st Qu.:0.00000 1st Qu.:0.000000 1st Qu.:0.0000000
## Median :0.00000 Median :0.000000 Median :0.00000 Median :0.000000 Median :0.0000000
## Mean :0.01319 Mean :0.003407 Mean :0.00176 Mean :0.001216 Mean :0.0008748
## 3rd Qu.:0.00000 3rd Qu.:0.000000 3rd Qu.:0.00000 3rd Qu.:0.000000 3rd Qu.:0.0000000
## Max. :1.00000 Max. :1.000000 Max. :1.00000 Max. :1.000000 Max. :1.0000000
##
## UPTS33Den UPTS41Den UPTS04Den UChurnCL USuccessCL
## Min. :0.0000000 Min. :0.000000 Min. :0.000000 Min. : 0.0000 Min. : 0.00
## 1st Qu.:0.0000000 1st Qu.:0.000000 1st Qu.:0.000000 1st Qu.: 0.0000 1st Qu.: 8.75
## Median :0.0000000 Median :0.000000 Median :0.000000 Median : 0.0000 Median : 20.45
## Mean :0.0008269 Mean :0.003222 Mean :0.001022 Mean : 0.2048 Mean : 25.46
## 3rd Qu.:0.0000000 3rd Qu.:0.000000 3rd Qu.:0.000000 3rd Qu.: 0.0000 3rd Qu.: 34.00
## Max. :1.0000000 Max. :1.000000 Max. :1.000000 Max. :98.7692 Max. :143.00
##
## USDCL UHDCL UPTS89CL UPTS05CL UPTS14CL
## Min. : 0.0000 Min. : 0.0000 Min. : 0.0000 Min. : 0.0000 Min. : 0.00000
## 1st Qu.: 0.0000 1st Qu.: 0.0000 1st Qu.: 0.0000 1st Qu.: 0.0000 1st Qu.: 0.00000
## Median : 0.0000 Median : 0.0000 Median : 0.0000 Median : 0.0000 Median : 0.00000
## Mean : 0.8451 Mean : 0.1166 Mean : 0.3746 Mean : 0.3137 Mean : 0.07416
## 3rd Qu.: 1.1177 3rd Qu.: 0.0000 3rd Qu.: 0.0000 3rd Qu.: 0.0000 3rd Qu.: 0.00000
## Max. :122.7692 Max. :97.0000 Max. :85.0000 Max. :122.7692 Max. :98.00000
##
## UPTS56CL UPTS52CL UPTS12CL UPTS33CL UPTS41CL
## Min. : 0.00000 Min. : 0.00000 Min. : 0.00000 Min. : 0.0000 Min. : 0.00000
## 1st Qu.: 0.00000 1st Qu.: 0.00000 1st Qu.: 0.00000 1st Qu.: 0.0000 1st Qu.: 0.00000
## Median : 0.00000 Median : 0.00000 Median : 0.00000 Median : 0.0000 Median : 0.00000
## Mean : 0.03909 Mean : 0.03433 Mean : 0.02077 Mean : 0.0209 Mean : 0.05875
## 3rd Qu.: 0.00000 3rd Qu.: 0.00000 3rd Qu.: 0.00000 3rd Qu.: 0.0000 3rd Qu.: 0.00000
## Max. :89.53846 Max. :89.53846 Max. :98.00000 Max. :84.0000 Max. :78.46154
##
## UPTS04CL
## Min. : 0.00000
## 1st Qu.: 0.00000
## Median : 0.00000
## Mean : 0.02406
## 3rd Qu.: 0.00000
## Max. :97.00000
##
ExtendedModel <- glm(Churn ~ ServiceTier + CardBrand + CardType + MoPerBill +
UserTenure + logUserTenure +
SinceLastPlay + logSinceLastPlay + LastPlay + RecentPlay +
SubCount + UserOrderCount + logUserOrderCount +
UAttemptsPerOrder +
UChurnDensity + USuccessDensity + USDDensity + UHDDensity +
UPTS89Den + UPTS05Den + UPTS14Den + UPTS56Den + UPTS52Den +
UPTS12Den + UPTS33Den + UPTS41Den + UPTS04Den +
UChurnCL + USuccessCL + USDCL + UHDCL +
UPTS89CL + UPTS05CL + UPTS14CL + UPTS56CL + UPTS52CL +
UPTS12CL + UPTS33CL + UPTS41CL + UPTS04CL,
family = binomial(logit), data = ChurnTrain)
summary(ExtendedModel)
##
## Call:
## glm(formula = Churn ~ ServiceTier + CardBrand + CardType + MoPerBill +
## UserTenure + logUserTenure + SinceLastPlay + logSinceLastPlay +
## LastPlay + RecentPlay + SubCount + UserOrderCount + logUserOrderCount +
## UAttemptsPerOrder + UChurnDensity + USuccessDensity + USDDensity +
## UHDDensity + UPTS89Den + UPTS05Den + UPTS14Den + UPTS56Den +
## UPTS52Den + UPTS12Den + UPTS33Den + UPTS41Den + UPTS04Den +
## UChurnCL + USuccessCL + USDCL + UHDCL + UPTS89CL + UPTS05CL +
## UPTS14CL + UPTS56CL + UPTS52CL + UPTS12CL + UPTS33CL + UPTS41CL +
## UPTS04CL, family = binomial(logit), data = ChurnTrain)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -4.3702 -0.2292 -0.1639 -0.1179 3.9899
##
## Coefficients: (1 not defined because of singularities)
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -7.2431572 0.1849738 -39.158 < 2e-16 ***
## ServiceTierPREM 0.2370009 0.0741663 3.196 0.001396 **
## ServiceTierRR 0.6095221 0.8706641 0.700 0.483886
## ServiceTierRTG 0.0710088 0.0753619 0.942 0.346071
## ServiceTierRU 0.1490950 0.4967812 0.300 0.764084
## CardBrandAMZ_FPS -3.4025001 0.2073048 -16.413 < 2e-16 ***
## CardBrandAX -0.7325921 0.0690226 -10.614 < 2e-16 ***
## CardBrandDI -0.4332736 0.1063210 -4.075 4.60e-05 ***
## CardBrandMC -0.0052360 0.0231008 -0.227 0.820690
## CardBrandPAYPAL 0.1846635 0.0682197 2.707 0.006792 **
## CardTypeNone 4.4299644 0.0702075 63.098 < 2e-16 ***
## CardTypeDebit 0.6514186 0.0294484 22.121 < 2e-16 ***
## CardTypePrepaid 1.4480550 0.0419931 34.483 < 2e-16 ***
## CardType6 - Charge Card -4.1142778 43.9539665 -0.094 0.925424
## MoPerBill3 0.7324124 0.1518552 4.823 1.41e-06 ***
## MoPerBill12 2.0328721 0.1136184 17.892 < 2e-16 ***
## UserTenure -0.0337960 0.0040232 -8.400 < 2e-16 ***
## logUserTenure -0.3483088 0.0366555 -9.502 < 2e-16 ***
## SinceLastPlay -0.0019069 0.0001259 -15.144 < 2e-16 ***
## logSinceLastPlay 0.9195961 0.0103242 89.072 < 2e-16 ***
## LastPlayPast 30 days 3.3809142 0.0935690 36.133 < 2e-16 ***
## LastPlayPast 90 days 1.2262614 0.0830922 14.758 < 2e-16 ***
## LastPlayPast year 0.1875247 0.0658581 2.847 0.004408 **
## RecentPlayYes NA NA NA NA
## SubCount 0.1042504 0.0130455 7.991 1.33e-15 ***
## UserOrderCount -0.0111914 0.0042739 -2.619 0.008830 **
## logUserOrderCount 0.0837204 0.0497924 1.681 0.092687 .
## UAttemptsPerOrder 0.1112715 0.0122622 9.074 < 2e-16 ***
## UChurnDensity 0.0192178 0.1515762 0.127 0.899109
## USuccessDensity -0.4075729 0.0995770 -4.093 4.26e-05 ***
## USDDensity 1.1715813 0.2094996 5.592 2.24e-08 ***
## UHDDensity 0.0472876 0.5660570 0.084 0.933423
## UPTS89Den -0.1072644 0.2046095 -0.524 0.600112
## UPTS05Den 0.1561634 0.2169172 0.720 0.471573
## UPTS14Den -0.3192043 0.3092912 -1.032 0.302048
## UPTS56Den -0.3905099 0.3073387 -1.271 0.203865
## UPTS52Den 1.3653120 0.3565215 3.830 0.000128 ***
## UPTS12Den 0.6972033 0.4439069 1.571 0.116274
## UPTS33Den -0.2079464 0.5663203 -0.367 0.713478
## UPTS41Den 1.4377468 0.6027601 2.385 0.017066 *
## UPTS04Den 0.6757543 0.8118433 0.832 0.405200
## UChurnCL 0.0046984 0.0092942 0.506 0.613198
## USuccessCL 0.0382151 0.0043400 8.805 < 2e-16 ***
## USDCL 0.0152729 0.0189048 0.808 0.419157
## UHDCL 0.0272423 0.0198526 1.372 0.169992
## UPTS89CL 0.0776018 0.0188325 4.121 3.78e-05 ***
## UPTS05CL -0.0253374 0.0190614 -1.329 0.183765
## UPTS14CL -0.0400318 0.0214964 -1.862 0.062567 .
## UPTS56CL -0.0341326 0.0231291 -1.476 0.140012
## UPTS52CL -0.0503562 0.0228859 -2.200 0.027784 *
## UPTS12CL -0.0268814 0.0229966 -1.169 0.242431
## UPTS33CL -0.0075449 0.0268240 -0.281 0.778499
## UPTS41CL -0.0276667 0.0253715 -1.090 0.275508
## UPTS04CL -0.0286894 0.0287737 -0.997 0.318731
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 109130 on 372712 degrees of freedom
## Residual deviance: 86093 on 372660 degrees of freedom
## (50686 observations deleted due to missingness)
## AIC: 86199
##
## Number of Fisher Scoring iterations: 7
ResDev <- ExtendedModel$deviance
NullDev <- ExtendedModel$null.deviance
DevianceR2 <- (NullDev - ResDev) / NullDev
cat("Deviance R2 ", DevianceR2, "\n")
## Deviance R2 0.2110935
# Stepwise feature selection
# AIC -- k = 2
# BIC -- k = log(n) = log(TrainRows)
ExtendedRedModel <- step(ExtendedModel, k = log(TrainRows)) # based on BIC
## Start: AIC=86780.17
## Churn ~ ServiceTier + CardBrand + CardType + MoPerBill + UserTenure +
## logUserTenure + SinceLastPlay + logSinceLastPlay + LastPlay +
## RecentPlay + SubCount + UserOrderCount + logUserOrderCount +
## UAttemptsPerOrder + UChurnDensity + USuccessDensity + USDDensity +
## UHDDensity + UPTS89Den + UPTS05Den + UPTS14Den + UPTS56Den +
## UPTS52Den + UPTS12Den + UPTS33Den + UPTS41Den + UPTS04Den +
## UChurnCL + USuccessCL + USDCL + UHDCL + UPTS89CL + UPTS05CL +
## UPTS14CL + UPTS56CL + UPTS52CL + UPTS12CL + UPTS33CL + UPTS41CL +
## UPTS04CL
##
##
## Step: AIC=86780.17
## Churn ~ ServiceTier + CardBrand + CardType + MoPerBill + UserTenure +
## logUserTenure + SinceLastPlay + logSinceLastPlay + LastPlay +
## SubCount + UserOrderCount + logUserOrderCount + UAttemptsPerOrder +
## UChurnDensity + USuccessDensity + USDDensity + UHDDensity +
## UPTS89Den + UPTS05Den + UPTS14Den + UPTS56Den + UPTS52Den +
## UPTS12Den + UPTS33Den + UPTS41Den + UPTS04Den + UChurnCL +
## USuccessCL + USDCL + UHDCL + UPTS89CL + UPTS05CL + UPTS14CL +
## UPTS56CL + UPTS52CL + UPTS12CL + UPTS33CL + UPTS41CL + UPTS04CL
##
## Df Deviance AIC
## - UHDDensity 1 86094 86767
## - UChurnDensity 1 86094 86767
## - UPTS33CL 1 86094 86767
## - UPTS33Den 1 86094 86767
## - UChurnCL 1 86094 86767
## - UPTS89Den 1 86094 86767
## - UPTS05Den 1 86094 86768
## - USDCL 1 86094 86768
## - UPTS04Den 1 86094 86768
## - UPTS04CL 1 86095 86768
## - UPTS14Den 1 86095 86768
## - UPTS41CL 1 86095 86768
## - UPTS12CL 1 86095 86769
## - UHDCL 1 86095 86769
## - UPTS56Den 1 86095 86769
## - UPTS05CL 1 86095 86769
## - UPTS12Den 1 86096 86770
## - UPTS56CL 1 86096 86770
## - logUserOrderCount 1 86096 86770
## - UPTS14CL 1 86097 86770
## - UPTS52CL 1 86098 86772
## - UPTS41Den 1 86100 86773
## - UserOrderCount 1 86100 86774
## <none> 86093 86780
## - UPTS52Den 1 86108 86781
## - USuccessDensity 1 86110 86784
## - ServiceTier 4 86150 86785
## - UPTS89CL 1 86112 86785
## - USDDensity 1 86124 86797
## - SubCount 1 86156 86829
## - UserTenure 1 86173 86847
## - UAttemptsPerOrder 1 86176 86850
## - USuccessCL 1 86179 86853
## - logUserTenure 1 86180 86853
## - MoPerBill 2 86364 87025
## - SinceLastPlay 1 86412 87086
## - CardBrand 5 86735 87357
## - LastPlay 3 89177 89824
## - CardType 4 90025 90660
## - logSinceLastPlay 1 92309 92983
##
## Step: AIC=86767.22
## Churn ~ ServiceTier + CardBrand + CardType + MoPerBill + UserTenure +
## logUserTenure + SinceLastPlay + logSinceLastPlay + LastPlay +
## SubCount + UserOrderCount + logUserOrderCount + UAttemptsPerOrder +
## UChurnDensity + USuccessDensity + USDDensity + UPTS89Den +
## UPTS05Den + UPTS14Den + UPTS56Den + UPTS52Den + UPTS12Den +
## UPTS33Den + UPTS41Den + UPTS04Den + UChurnCL + USuccessCL +
## USDCL + UHDCL + UPTS89CL + UPTS05CL + UPTS14CL + UPTS56CL +
## UPTS52CL + UPTS12CL + UPTS33CL + UPTS41CL + UPTS04CL
##
## Df Deviance AIC
## - UChurnDensity 1 86094 86754
## - UPTS33CL 1 86094 86754
## - UPTS33Den 1 86094 86754
## - UChurnCL 1 86094 86755
## - UPTS89Den 1 86094 86755
## - UPTS05Den 1 86094 86755
## - USDCL 1 86094 86755
## - UPTS14Den 1 86095 86755
## - UPTS04CL 1 86095 86755
## - UPTS12CL 1 86095 86756
## - UPTS04Den 1 86095 86756
## - UPTS41CL 1 86095 86756
## - UPTS56Den 1 86095 86756
## - UPTS05CL 1 86095 86756
## - UHDCL 1 86096 86756
## - UPTS12Den 1 86096 86757
## - UPTS56CL 1 86096 86757
## - logUserOrderCount 1 86096 86757
## - UPTS14CL 1 86097 86758
## - UPTS52CL 1 86098 86759
## - UserOrderCount 1 86100 86761
## <none> 86094 86767
## - UPTS52Den 1 86108 86768
## - USuccessDensity 1 86110 86771
## - ServiceTier 4 86150 86772
## - UPTS89CL 1 86112 86773
## - USDDensity 1 86124 86784
## - UPTS41Den 1 86137 86797
## - SubCount 1 86156 86816
## - UserTenure 1 86173 86834
## - UAttemptsPerOrder 1 86176 86837
## - USuccessCL 1 86179 86840
## - logUserTenure 1 86180 86840
## - MoPerBill 2 86364 87012
## - SinceLastPlay 1 86412 87073
## - CardBrand 5 86735 87344
## - LastPlay 3 89177 89811
## - CardType 4 90026 90648
## - logSinceLastPlay 1 92309 92970
##
## Step: AIC=86754.28
## Churn ~ ServiceTier + CardBrand + CardType + MoPerBill + UserTenure +
## logUserTenure + SinceLastPlay + logSinceLastPlay + LastPlay +
## SubCount + UserOrderCount + logUserOrderCount + UAttemptsPerOrder +
## USuccessDensity + USDDensity + UPTS89Den + UPTS05Den + UPTS14Den +
## UPTS56Den + UPTS52Den + UPTS12Den + UPTS33Den + UPTS41Den +
## UPTS04Den + UChurnCL + USuccessCL + USDCL + UHDCL + UPTS89CL +
## UPTS05CL + UPTS14CL + UPTS56CL + UPTS52CL + UPTS12CL + UPTS33CL +
## UPTS41CL + UPTS04CL
##
## Df Deviance AIC
## - UPTS33CL 1 86094 86741
## - UPTS33Den 1 86094 86741
## - UPTS89Den 1 86094 86742
## - UChurnCL 1 86094 86742
## - UPTS05Den 1 86094 86742
## - USDCL 1 86094 86742
## - UPTS14Den 1 86095 86742
## - UPTS04CL 1 86095 86743
## - UPTS12CL 1 86095 86743
## - UPTS04Den 1 86095 86743
## - UPTS41CL 1 86095 86743
## - UPTS56Den 1 86095 86743
## - UPTS05CL 1 86095 86743
## - UHDCL 1 86096 86744
## - UPTS56CL 1 86096 86744
## - UPTS12Den 1 86096 86744
## - logUserOrderCount 1 86096 86744
## - UPTS14CL 1 86097 86745
## - UPTS52CL 1 86098 86746
## - UserOrderCount 1 86100 86748
## <none> 86094 86754
## - UPTS52Den 1 86108 86755
## - ServiceTier 4 86150 86759
## - UPTS89CL 1 86112 86760
## - USuccessDensity 1 86118 86766
## - USDDensity 1 86124 86772
## - UPTS41Den 1 86138 86786
## - SubCount 1 86157 86804
## - UAttemptsPerOrder 1 86177 86824
## - UserTenure 1 86179 86826
## - logUserTenure 1 86180 86827
## - USuccessCL 1 86186 86834
## - MoPerBill 2 86364 86999
## - SinceLastPlay 1 86413 87060
## - CardBrand 5 86735 87331
## - LastPlay 3 89177 89798
## - CardType 4 90027 90636
## - logSinceLastPlay 1 92309 92957
##
## Step: AIC=86741.41
## Churn ~ ServiceTier + CardBrand + CardType + MoPerBill + UserTenure +
## logUserTenure + SinceLastPlay + logSinceLastPlay + LastPlay +
## SubCount + UserOrderCount + logUserOrderCount + UAttemptsPerOrder +
## USuccessDensity + USDDensity + UPTS89Den + UPTS05Den + UPTS14Den +
## UPTS56Den + UPTS52Den + UPTS12Den + UPTS33Den + UPTS41Den +
## UPTS04Den + UChurnCL + USuccessCL + USDCL + UHDCL + UPTS89CL +
## UPTS05CL + UPTS14CL + UPTS56CL + UPTS52CL + UPTS12CL + UPTS41CL +
## UPTS04CL
##
## Df Deviance AIC
## - UPTS33Den 1 86094 86729
## - UPTS89Den 1 86094 86729
## - UChurnCL 1 86094 86729
## - UPTS05Den 1 86094 86729
## - USDCL 1 86094 86729
## - UPTS14Den 1 86095 86730
## - UPTS04CL 1 86095 86730
## - UPTS12CL 1 86095 86730
## - UPTS04Den 1 86095 86730
## - UPTS41CL 1 86095 86730
## - UPTS05CL 1 86095 86730
## - UPTS56Den 1 86095 86730
## - UPTS56CL 1 86096 86731
## - UPTS12Den 1 86096 86731
## - logUserOrderCount 1 86096 86731
## - UHDCL 1 86097 86731
## - UPTS14CL 1 86098 86733
## - UPTS52CL 1 86099 86733
## - UserOrderCount 1 86101 86735
## <none> 86094 86741
## - UPTS52Den 1 86108 86743
## - ServiceTier 4 86150 86746
## - UPTS89CL 1 86114 86749
## - USuccessDensity 1 86118 86753
## - USDDensity 1 86126 86761
## - UPTS41Den 1 86138 86773
## - SubCount 1 86157 86792
## - UAttemptsPerOrder 1 86177 86811
## - UserTenure 1 86179 86813
## - logUserTenure 1 86180 86815
## - USuccessCL 1 86186 86821
## - MoPerBill 2 86364 86986
## - SinceLastPlay 1 86413 87047
## - CardBrand 5 86735 87318
## - LastPlay 3 89177 89785
## - CardType 4 90027 90623
## - logSinceLastPlay 1 92310 92944
##
## Step: AIC=86728.79
## Churn ~ ServiceTier + CardBrand + CardType + MoPerBill + UserTenure +
## logUserTenure + SinceLastPlay + logSinceLastPlay + LastPlay +
## SubCount + UserOrderCount + logUserOrderCount + UAttemptsPerOrder +
## USuccessDensity + USDDensity + UPTS89Den + UPTS05Den + UPTS14Den +
## UPTS56Den + UPTS52Den + UPTS12Den + UPTS41Den + UPTS04Den +
## UChurnCL + USuccessCL + USDCL + UHDCL + UPTS89CL + UPTS05CL +
## UPTS14CL + UPTS56CL + UPTS52CL + UPTS12CL + UPTS41CL + UPTS04CL
##
## Df Deviance AIC
## - UPTS89Den 1 86094 86716
## - UChurnCL 1 86094 86716
## - USDCL 1 86094 86716
## - UPTS05Den 1 86095 86716
## - UPTS04CL 1 86095 86717
## - UPTS14Den 1 86095 86717
## - UPTS12CL 1 86095 86717
## - UPTS41CL 1 86095 86717
## - UPTS04Den 1 86095 86717
## - UPTS05CL 1 86095 86717
## - UPTS56Den 1 86096 86718
## - UPTS56CL 1 86096 86718
## - UPTS12Den 1 86096 86718
## - UHDCL 1 86097 86718
## - logUserOrderCount 1 86097 86719
## - UPTS14CL 1 86098 86720
## - UPTS52CL 1 86099 86721
## - UserOrderCount 1 86101 86723
## <none> 86094 86729
## - UPTS52Den 1 86108 86730
## - ServiceTier 4 86150 86733
## - UPTS89CL 1 86116 86738
## - USuccessDensity 1 86119 86741
## - USDDensity 1 86126 86748
## - UPTS41Den 1 86138 86760
## - SubCount 1 86157 86779
## - UAttemptsPerOrder 1 86177 86799
## - UserTenure 1 86179 86801
## - logUserTenure 1 86180 86802
## - USuccessCL 1 86186 86808
## - MoPerBill 2 86364 86973
## - SinceLastPlay 1 86413 87035
## - CardBrand 5 86735 87306
## - LastPlay 3 89177 89773
## - CardType 4 90027 90610
## - logSinceLastPlay 1 92310 92932
##
## Step: AIC=86716.09
## Churn ~ ServiceTier + CardBrand + CardType + MoPerBill + UserTenure +
## logUserTenure + SinceLastPlay + logSinceLastPlay + LastPlay +
## SubCount + UserOrderCount + logUserOrderCount + UAttemptsPerOrder +
## USuccessDensity + USDDensity + UPTS05Den + UPTS14Den + UPTS56Den +
## UPTS52Den + UPTS12Den + UPTS41Den + UPTS04Den + UChurnCL +
## USuccessCL + USDCL + UHDCL + UPTS89CL + UPTS05CL + UPTS14CL +
## UPTS56CL + UPTS52CL + UPTS12CL + UPTS41CL + UPTS04CL
##
## Df Deviance AIC
## - UChurnCL 1 86095 86704
## - UPTS14Den 1 86095 86704
## - UPTS04CL 1 86095 86704
## - USDCL 1 86095 86704
## - UPTS04Den 1 86096 86705
## - UPTS41CL 1 86096 86705
## - UPTS56Den 1 86096 86705
## - UPTS12CL 1 86096 86705
## - logUserOrderCount 1 86097 86706
## - UPTS05Den 1 86097 86706
## - UPTS05CL 1 86097 86706
## - UHDCL 1 86097 86706
## - UPTS56CL 1 86097 86706
## - UPTS12Den 1 86098 86706
## - UPTS14CL 1 86100 86708
## - UserOrderCount 1 86101 86710
## - UPTS52CL 1 86101 86710
## <none> 86094 86716
## - ServiceTier 4 86151 86721
## - UPTS52Den 1 86115 86724
## - USuccessDensity 1 86119 86728
## - UPTS89CL 1 86123 86732
## - UPTS41Den 1 86139 86748
## - SubCount 1 86157 86766
## - UAttemptsPerOrder 1 86177 86786
## - UserTenure 1 86179 86788
## - logUserTenure 1 86180 86789
## - USuccessCL 1 86187 86796
## - USDDensity 1 86238 86847
## - MoPerBill 2 86364 86960
## - SinceLastPlay 1 86413 87022
## - CardBrand 5 86735 87293
## - LastPlay 3 89177 89760
## - CardType 4 90029 90599
## - logSinceLastPlay 1 92310 92919
##
## Step: AIC=86703.55
## Churn ~ ServiceTier + CardBrand + CardType + MoPerBill + UserTenure +
## logUserTenure + SinceLastPlay + logSinceLastPlay + LastPlay +
## SubCount + UserOrderCount + logUserOrderCount + UAttemptsPerOrder +
## USuccessDensity + USDDensity + UPTS05Den + UPTS14Den + UPTS56Den +
## UPTS52Den + UPTS12Den + UPTS41Den + UPTS04Den + USuccessCL +
## USDCL + UHDCL + UPTS89CL + UPTS05CL + UPTS14CL + UPTS56CL +
## UPTS52CL + UPTS12CL + UPTS41CL + UPTS04CL
##
## Df Deviance AIC
## - UPTS14Den 1 86096 86691
## - UPTS04CL 1 86096 86692
## - UPTS04Den 1 86096 86692
## - USDCL 1 86096 86692
## - UPTS56Den 1 86096 86692
## - UPTS41CL 1 86096 86692
## - UPTS12CL 1 86097 86693
## - logUserOrderCount 1 86097 86693
## - UPTS05Den 1 86098 86693
## - UPTS05CL 1 86098 86694
## - UPTS56CL 1 86098 86694
## - UHDCL 1 86098 86694
## - UPTS12Den 1 86098 86694
## - UPTS14CL 1 86100 86696
## - UserOrderCount 1 86101 86697
## - UPTS52CL 1 86102 86698
## <none> 86095 86704
## - ServiceTier 4 86151 86708
## - UPTS52Den 1 86115 86711
## - USuccessDensity 1 86121 86717
## - UPTS89CL 1 86123 86719
## - UPTS41Den 1 86140 86736
## - SubCount 1 86158 86754
## - UAttemptsPerOrder 1 86177 86773
## - UserTenure 1 86179 86775
## - logUserTenure 1 86180 86776
## - USuccessCL 1 86187 86783
## - USDDensity 1 86239 86835
## - MoPerBill 2 86367 86950
## - SinceLastPlay 1 86414 87010
## - CardBrand 5 86736 87280
## - LastPlay 3 89177 89747
## - CardType 4 90029 90586
## - logSinceLastPlay 1 92310 92906
##
## Step: AIC=86691.5
## Churn ~ ServiceTier + CardBrand + CardType + MoPerBill + UserTenure +
## logUserTenure + SinceLastPlay + logSinceLastPlay + LastPlay +
## SubCount + UserOrderCount + logUserOrderCount + UAttemptsPerOrder +
## USuccessDensity + USDDensity + UPTS05Den + UPTS56Den + UPTS52Den +
## UPTS12Den + UPTS41Den + UPTS04Den + USuccessCL + USDCL +
## UHDCL + UPTS89CL + UPTS05CL + UPTS14CL + UPTS56CL + UPTS52CL +
## UPTS12CL + UPTS41CL + UPTS04CL
##
## Df Deviance AIC
## - UPTS04CL 1 86097 86680
## - UPTS04Den 1 86097 86680
## - UPTS56Den 1 86097 86680
## - USDCL 1 86097 86680
## - UPTS41CL 1 86097 86680
## - UPTS12CL 1 86098 86681
## - logUserOrderCount 1 86098 86681
## - UPTS05CL 1 86098 86681
## - UPTS56CL 1 86099 86682
## - UPTS05Den 1 86099 86682
## - UPTS12Den 1 86099 86682
## - UHDCL 1 86099 86682
## - UserOrderCount 1 86102 86685
## - UPTS52CL 1 86102 86686
## - UPTS14CL 1 86104 86687
## <none> 86096 86691
## - ServiceTier 4 86152 86696
## - UPTS52Den 1 86116 86699
## - USuccessDensity 1 86121 86704
## - UPTS89CL 1 86124 86707
## - UPTS41Den 1 86140 86723
## - SubCount 1 86159 86742
## - UAttemptsPerOrder 1 86177 86760
## - UserTenure 1 86180 86763
## - logUserTenure 1 86181 86764
## - USuccessCL 1 86188 86771
## - USDDensity 1 86239 86822
## - MoPerBill 2 86368 86938
## - SinceLastPlay 1 86415 86998
## - CardBrand 5 86737 87268
## - LastPlay 3 89177 89735
## - CardType 4 90032 90576
## - logSinceLastPlay 1 92311 92894
##
## Step: AIC=86679.7
## Churn ~ ServiceTier + CardBrand + CardType + MoPerBill + UserTenure +
## logUserTenure + SinceLastPlay + logSinceLastPlay + LastPlay +
## SubCount + UserOrderCount + logUserOrderCount + UAttemptsPerOrder +
## USuccessDensity + USDDensity + UPTS05Den + UPTS56Den + UPTS52Den +
## UPTS12Den + UPTS41Den + UPTS04Den + USuccessCL + USDCL +
## UHDCL + UPTS89CL + UPTS05CL + UPTS14CL + UPTS56CL + UPTS52CL +
## UPTS12CL + UPTS41CL
##
## Df Deviance AIC
## - UPTS04Den 1 86097 86667
## - USDCL 1 86098 86668
## - UPTS41CL 1 86098 86668
## - UPTS56Den 1 86098 86668
## - UPTS12CL 1 86098 86668
## - UPTS05CL 1 86099 86669
## - UHDCL 1 86099 86669
## - UPTS56CL 1 86099 86669
## - logUserOrderCount 1 86099 86669
## - UPTS05Den 1 86100 86670
## - UPTS12Den 1 86100 86670
## - UPTS52CL 1 86103 86673
## - UserOrderCount 1 86103 86674
## - UPTS14CL 1 86104 86674
## <none> 86097 86680
## - ServiceTier 4 86153 86685
## - UPTS52Den 1 86117 86687
## - USuccessDensity 1 86123 86693
## - UPTS89CL 1 86127 86697
## - UPTS41Den 1 86141 86711
## - SubCount 1 86160 86730
## - UAttemptsPerOrder 1 86178 86748
## - UserTenure 1 86181 86751
## - logUserTenure 1 86182 86752
## - USuccessCL 1 86189 86759
## - USDDensity 1 86241 86811
## - MoPerBill 2 86369 86926
## - SinceLastPlay 1 86416 86986
## - CardBrand 5 86737 87255
## - LastPlay 3 89179 89724
## - CardType 4 90033 90564
## - logSinceLastPlay 1 92314 92884
##
## Step: AIC=86667.25
## Churn ~ ServiceTier + CardBrand + CardType + MoPerBill + UserTenure +
## logUserTenure + SinceLastPlay + logSinceLastPlay + LastPlay +
## SubCount + UserOrderCount + logUserOrderCount + UAttemptsPerOrder +
## USuccessDensity + USDDensity + UPTS05Den + UPTS56Den + UPTS52Den +
## UPTS12Den + UPTS41Den + USuccessCL + USDCL + UHDCL + UPTS89CL +
## UPTS05CL + UPTS14CL + UPTS56CL + UPTS52CL + UPTS12CL + UPTS41CL
##
## Df Deviance AIC
## - USDCL 1 86098 86655
## - UPTS56Den 1 86099 86656
## - UPTS41CL 1 86099 86656
## - UPTS12CL 1 86099 86656
## - UPTS05CL 1 86100 86657
## - logUserOrderCount 1 86100 86657
## - UPTS56CL 1 86100 86657
## - UPTS05Den 1 86100 86658
## - UPTS12Den 1 86101 86658
## - UHDCL 1 86101 86658
## - UPTS52CL 1 86104 86661
## - UserOrderCount 1 86104 86661
## - UPTS14CL 1 86105 86663
## <none> 86097 86667
## - ServiceTier 4 86154 86672
## - UPTS52Den 1 86118 86675
## - USuccessDensity 1 86124 86681
## - UPTS89CL 1 86127 86684
## - UPTS41Den 1 86142 86699
## - SubCount 1 86160 86718
## - UAttemptsPerOrder 1 86178 86735
## - UserTenure 1 86182 86739
## - logUserTenure 1 86183 86740
## - USuccessCL 1 86189 86747
## - USDDensity 1 86242 86799
## - MoPerBill 2 86369 86913
## - SinceLastPlay 1 86416 86973
## - CardBrand 5 86738 87243
## - LastPlay 3 89180 89711
## - CardType 4 90033 90552
## - logSinceLastPlay 1 92314 92871
##
## Step: AIC=86655.41
## Churn ~ ServiceTier + CardBrand + CardType + MoPerBill + UserTenure +
## logUserTenure + SinceLastPlay + logSinceLastPlay + LastPlay +
## SubCount + UserOrderCount + logUserOrderCount + UAttemptsPerOrder +
## USuccessDensity + USDDensity + UPTS05Den + UPTS56Den + UPTS52Den +
## UPTS12Den + UPTS41Den + USuccessCL + UHDCL + UPTS89CL + UPTS05CL +
## UPTS14CL + UPTS56CL + UPTS52CL + UPTS12CL + UPTS41CL
##
## Df Deviance AIC
## - UPTS12CL 1 86099 86643
## - UPTS41CL 1 86099 86644
## - UPTS56CL 1 86100 86644
## - UPTS56Den 1 86100 86644
## - UPTS05Den 1 86101 86645
## - logUserOrderCount 1 86101 86645
## - UHDCL 1 86101 86646
## - UPTS12Den 1 86101 86646
## - UPTS05CL 1 86102 86646
## - UserOrderCount 1 86105 86649
## - UPTS52CL 1 86106 86650
## - UPTS14CL 1 86107 86651
## <none> 86098 86655
## - ServiceTier 4 86155 86660
## - UPTS52Den 1 86119 86663
## - USuccessDensity 1 86125 86669
## - UPTS41Den 1 86143 86687
## - SubCount 1 86161 86706
## - UAttemptsPerOrder 1 86179 86723
## - UserTenure 1 86183 86727
## - logUserTenure 1 86184 86728
## - USuccessCL 1 86190 86734
## - USDDensity 1 86255 86800
## - UPTS89CL 1 86321 86865
## - MoPerBill 2 86373 86904
## - SinceLastPlay 1 86417 86962
## - CardBrand 5 86738 87231
## - LastPlay 3 89181 89700
## - CardType 4 90033 90539
## - logSinceLastPlay 1 92315 92859
##
## Step: AIC=86643.17
## Churn ~ ServiceTier + CardBrand + CardType + MoPerBill + UserTenure +
## logUserTenure + SinceLastPlay + logSinceLastPlay + LastPlay +
## SubCount + UserOrderCount + logUserOrderCount + UAttemptsPerOrder +
## USuccessDensity + USDDensity + UPTS05Den + UPTS56Den + UPTS52Den +
## UPTS12Den + UPTS41Den + USuccessCL + UHDCL + UPTS89CL + UPTS05CL +
## UPTS14CL + UPTS56CL + UPTS52CL + UPTS41CL
##
## Df Deviance AIC
## - UPTS41CL 1 86100 86631
## - UPTS56CL 1 86101 86632
## - UPTS56Den 1 86101 86632
## - UPTS12Den 1 86101 86633
## - logUserOrderCount 1 86102 86633
## - UPTS05Den 1 86102 86633
## - UHDCL 1 86102 86633
## - UPTS05CL 1 86103 86634
## - UserOrderCount 1 86106 86637
## - UPTS52CL 1 86107 86638
## - UPTS14CL 1 86107 86638
## <none> 86099 86643
## - ServiceTier 4 86155 86648
## - UPTS52Den 1 86119 86650
## - USuccessDensity 1 86126 86657
## - UPTS41Den 1 86144 86675
## - SubCount 1 86162 86693
## - UAttemptsPerOrder 1 86180 86712
## - UserTenure 1 86183 86714
## - logUserTenure 1 86184 86715
## - USuccessCL 1 86191 86722
## - USDDensity 1 86255 86787
## - UPTS89CL 1 86322 86853
## - MoPerBill 2 86374 86892
## - SinceLastPlay 1 86418 86949
## - CardBrand 5 86739 87218
## - LastPlay 3 89182 89687
## - CardType 4 90034 90526
## - logSinceLastPlay 1 92316 92847
##
## Step: AIC=86631.3
## Churn ~ ServiceTier + CardBrand + CardType + MoPerBill + UserTenure +
## logUserTenure + SinceLastPlay + logSinceLastPlay + LastPlay +
## SubCount + UserOrderCount + logUserOrderCount + UAttemptsPerOrder +
## USuccessDensity + USDDensity + UPTS05Den + UPTS56Den + UPTS52Den +
## UPTS12Den + UPTS41Den + USuccessCL + UHDCL + UPTS89CL + UPTS05CL +
## UPTS14CL + UPTS56CL + UPTS52CL
##
## Df Deviance AIC
## - UPTS56CL 1 86102 86620
## - UPTS56Den 1 86102 86620
## - UHDCL 1 86102 86620
## - UPTS12Den 1 86103 86621
## - logUserOrderCount 1 86103 86621
## - UPTS05Den 1 86103 86621
## - UPTS05CL 1 86104 86622
## - UserOrderCount 1 86107 86625
## - UPTS14CL 1 86107 86626
## - UPTS52CL 1 86108 86626
## <none> 86100 86631
## - ServiceTier 4 86157 86636
## - UPTS52Den 1 86120 86639
## - USuccessDensity 1 86127 86645
## - UPTS41Den 1 86147 86665
## - SubCount 1 86163 86681
## - UAttemptsPerOrder 1 86181 86699
## - UserTenure 1 86184 86703
## - logUserTenure 1 86186 86704
## - USuccessCL 1 86192 86710
## - USDDensity 1 86256 86774
## - UPTS89CL 1 86324 86842
## - MoPerBill 2 86374 86879
## - SinceLastPlay 1 86419 86938
## - CardBrand 5 86740 87206
## - LastPlay 3 89184 89676
## - CardType 4 90036 90516
## - logSinceLastPlay 1 92317 92835
##
## Step: AIC=86620.05
## Churn ~ ServiceTier + CardBrand + CardType + MoPerBill + UserTenure +
## logUserTenure + SinceLastPlay + logSinceLastPlay + LastPlay +
## SubCount + UserOrderCount + logUserOrderCount + UAttemptsPerOrder +
## USuccessDensity + USDDensity + UPTS05Den + UPTS56Den + UPTS52Den +
## UPTS12Den + UPTS41Den + USuccessCL + UHDCL + UPTS89CL + UPTS05CL +
## UPTS14CL + UPTS52CL
##
## Df Deviance AIC
## - UHDCL 1 86104 86609
## - logUserOrderCount 1 86104 86610
## - UPTS12Den 1 86104 86610
## - UPTS05Den 1 86105 86610
## - UPTS05CL 1 86106 86611
## - UPTS56Den 1 86108 86613
## - UserOrderCount 1 86108 86613
## - UPTS14CL 1 86109 86614
## - UPTS52CL 1 86110 86615
## <none> 86102 86620
## - ServiceTier 4 86158 86625
## - UPTS52Den 1 86122 86627
## - USuccessDensity 1 86129 86634
## - UPTS41Den 1 86149 86654
## - SubCount 1 86164 86670
## - UAttemptsPerOrder 1 86184 86689
## - UserTenure 1 86186 86692
## - logUserTenure 1 86188 86693
## - USuccessCL 1 86193 86699
## - USDDensity 1 86257 86762
## - UPTS89CL 1 86325 86831
## - MoPerBill 2 86374 86866
## - SinceLastPlay 1 86421 86926
## - CardBrand 5 86741 87194
## - LastPlay 3 89185 89664
## - CardType 4 90039 90505
## - logSinceLastPlay 1 92318 92823
##
## Step: AIC=86609.2
## Churn ~ ServiceTier + CardBrand + CardType + MoPerBill + UserTenure +
## logUserTenure + SinceLastPlay + logSinceLastPlay + LastPlay +
## SubCount + UserOrderCount + logUserOrderCount + UAttemptsPerOrder +
## USuccessDensity + USDDensity + UPTS05Den + UPTS56Den + UPTS52Den +
## UPTS12Den + UPTS41Den + USuccessCL + UPTS89CL + UPTS05CL +
## UPTS14CL + UPTS52CL
##
## Df Deviance AIC
## - UPTS12Den 1 86106 86599
## - logUserOrderCount 1 86106 86599
## - UPTS05Den 1 86107 86599
## - UPTS05CL 1 86108 86600
## - UPTS14CL 1 86109 86601
## - UPTS56Den 1 86110 86602
## - UserOrderCount 1 86110 86603
## - UPTS52CL 1 86112 86604
## <none> 86104 86609
## - ServiceTier 4 86161 86614
## - UPTS52Den 1 86124 86616
## - USuccessDensity 1 86131 86623
## - UPTS41Den 1 86161 86654
## - SubCount 1 86167 86659
## - UAttemptsPerOrder 1 86187 86679
## - UserTenure 1 86188 86680
## - logUserTenure 1 86190 86682
## - USuccessCL 1 86196 86688
## - USDDensity 1 86258 86750
## - UPTS89CL 1 86328 86821
## - MoPerBill 2 86382 86861
## - SinceLastPlay 1 86423 86916
## - CardBrand 5 86743 87184
## - LastPlay 3 89187 89653
## - CardType 4 90044 90497
## - logSinceLastPlay 1 92320 92813
##
## Step: AIC=86598.7
## Churn ~ ServiceTier + CardBrand + CardType + MoPerBill + UserTenure +
## logUserTenure + SinceLastPlay + logSinceLastPlay + LastPlay +
## SubCount + UserOrderCount + logUserOrderCount + UAttemptsPerOrder +
## USuccessDensity + USDDensity + UPTS05Den + UPTS56Den + UPTS52Den +
## UPTS41Den + USuccessCL + UPTS89CL + UPTS05CL + UPTS14CL +
## UPTS52CL
##
## Df Deviance AIC
## - logUserOrderCount 1 86109 86588
## - UPTS05Den 1 86109 86588
## - UPTS05CL 1 86110 86590
## - UPTS14CL 1 86112 86591
## - UPTS56Den 1 86113 86592
## - UserOrderCount 1 86113 86592
## - UPTS52CL 1 86114 86594
## <none> 86106 86599
## - ServiceTier 4 86163 86604
## - UPTS52Den 1 86126 86606
## - USuccessDensity 1 86134 86613
## - UPTS41Den 1 86164 86644
## - SubCount 1 86169 86648
## - UserTenure 1 86191 86670
## - UAttemptsPerOrder 1 86191 86670
## - logUserTenure 1 86192 86671
## - USuccessCL 1 86198 86677
## - USDDensity 1 86265 86745
## - UPTS89CL 1 86328 86808
## - MoPerBill 2 86388 86855
## - SinceLastPlay 1 86426 86905
## - CardBrand 5 86746 87174
## - LastPlay 3 89189 89643
## - CardType 4 90045 90485
## - logSinceLastPlay 1 92322 92802
##
## Step: AIC=86588.26
## Churn ~ ServiceTier + CardBrand + CardType + MoPerBill + UserTenure +
## logUserTenure + SinceLastPlay + logSinceLastPlay + LastPlay +
## SubCount + UserOrderCount + UAttemptsPerOrder + USuccessDensity +
## USDDensity + UPTS05Den + UPTS56Den + UPTS52Den + UPTS41Den +
## USuccessCL + UPTS89CL + UPTS05CL + UPTS14CL + UPTS52CL
##
## Df Deviance AIC
## - UPTS05Den 1 86111 86578
## - UPTS05CL 1 86112 86579
## - UPTS14CL 1 86114 86580
## - UserOrderCount 1 86114 86581
## - UPTS56Den 1 86115 86582
## - UPTS52CL 1 86116 86583
## <none> 86109 86588
## - ServiceTier 4 86166 86594
## - UPTS52Den 1 86129 86595
## - USuccessDensity 1 86142 86608
## - UPTS41Den 1 86165 86632
## - SubCount 1 86179 86645
## - UAttemptsPerOrder 1 86191 86658
## - logUserTenure 1 86198 86665
## - UserTenure 1 86201 86668
## - USuccessCL 1 86202 86668
## - USDDensity 1 86265 86732
## - UPTS89CL 1 86337 86803
## - MoPerBill 2 86392 86845
## - SinceLastPlay 1 86430 86897
## - CardBrand 5 86749 87163
## - LastPlay 3 89189 89630
## - CardType 4 90051 90479
## - logSinceLastPlay 1 92324 92791
##
## Step: AIC=86577.79
## Churn ~ ServiceTier + CardBrand + CardType + MoPerBill + UserTenure +
## logUserTenure + SinceLastPlay + logSinceLastPlay + LastPlay +
## SubCount + UserOrderCount + UAttemptsPerOrder + USuccessDensity +
## USDDensity + UPTS56Den + UPTS52Den + UPTS41Den + USuccessCL +
## UPTS89CL + UPTS05CL + UPTS14CL + UPTS52CL
##
## Df Deviance AIC
## - UPTS05CL 1 86113 86567
## - UserOrderCount 1 86116 86570
## - UPTS14CL 1 86117 86570
## - UPTS56Den 1 86118 86572
## - UPTS52CL 1 86119 86572
## <none> 86111 86578
## - ServiceTier 4 86169 86583
## - UPTS52Den 1 86130 86584
## - USuccessDensity 1 86145 86598
## - UPTS41Den 1 86168 86621
## - SubCount 1 86181 86635
## - UAttemptsPerOrder 1 86199 86652
## - logUserTenure 1 86200 86654
## - USuccessCL 1 86203 86657
## - UserTenure 1 86203 86657
## - USDDensity 1 86286 86740
## - UPTS89CL 1 86339 86793
## - MoPerBill 2 86394 86835
## - SinceLastPlay 1 86432 86886
## - CardBrand 5 86753 87154
## - LastPlay 3 89193 89621
## - CardType 4 90058 90472
## - logSinceLastPlay 1 92326 92779
##
## Step: AIC=86566.53
## Churn ~ ServiceTier + CardBrand + CardType + MoPerBill + UserTenure +
## logUserTenure + SinceLastPlay + logSinceLastPlay + LastPlay +
## SubCount + UserOrderCount + UAttemptsPerOrder + USuccessDensity +
## USDDensity + UPTS56Den + UPTS52Den + UPTS41Den + USuccessCL +
## UPTS89CL + UPTS14CL + UPTS52CL
##
## Df Deviance AIC
## - UserOrderCount 1 86118 86558
## - UPTS14CL 1 86118 86559
## - UPTS56Den 1 86120 86560
## - UPTS52CL 1 86120 86561
## <none> 86113 86567
## - ServiceTier 4 86170 86571
## - UPTS52Den 1 86132 86573
## - USuccessDensity 1 86146 86587
## - UPTS41Den 1 86169 86610
## - SubCount 1 86182 86623
## - UAttemptsPerOrder 1 86201 86642
## - logUserTenure 1 86204 86644
## - USuccessCL 1 86205 86645
## - UserTenure 1 86205 86646
## - USDDensity 1 86289 86729
## - UPTS89CL 1 86350 86791
## - MoPerBill 2 86401 86829
## - SinceLastPlay 1 86434 86874
## - CardBrand 5 86753 87142
## - LastPlay 3 89195 89610
## - CardType 4 90069 90470
## - logSinceLastPlay 1 92329 92769
##
## Step: AIC=86558.48
## Churn ~ ServiceTier + CardBrand + CardType + MoPerBill + UserTenure +
## logUserTenure + SinceLastPlay + logSinceLastPlay + LastPlay +
## SubCount + UAttemptsPerOrder + USuccessDensity + USDDensity +
## UPTS56Den + UPTS52Den + UPTS41Den + USuccessCL + UPTS89CL +
## UPTS14CL + UPTS52CL
##
## Df Deviance AIC
## - UPTS14CL 1 86123 86551
## - UPTS56Den 1 86125 86552
## - UPTS52CL 1 86125 86553
## <none> 86118 86558
## - UPTS52Den 1 86137 86565
## - ServiceTier 4 86177 86566
## - USuccessDensity 1 86149 86577
## - UPTS41Den 1 86175 86602
## - SubCount 1 86185 86612
## - USuccessCL 1 86205 86633
## - UserTenure 1 86206 86633
## - UAttemptsPerOrder 1 86207 86635
## - logUserTenure 1 86281 86709
## - USDDensity 1 86296 86724
## - UPTS89CL 1 86355 86782
## - SinceLastPlay 1 86444 86871
## - MoPerBill 2 86491 86905
## - CardBrand 5 86756 87132
## - LastPlay 3 89195 89597
## - CardType 4 90104 90493
## - logSinceLastPlay 1 92343 92771
##
## Step: AIC=86550.55
## Churn ~ ServiceTier + CardBrand + CardType + MoPerBill + UserTenure +
## logUserTenure + SinceLastPlay + logSinceLastPlay + LastPlay +
## SubCount + UAttemptsPerOrder + USuccessDensity + USDDensity +
## UPTS56Den + UPTS52Den + UPTS41Den + USuccessCL + UPTS89CL +
## UPTS52CL
##
## Df Deviance AIC
## - UPTS56Den 1 86130 86544
## - UPTS52CL 1 86130 86544
## <none> 86123 86551
## - UPTS52Den 1 86142 86557
## - ServiceTier 4 86182 86557
## - USuccessDensity 1 86154 86569
## - UPTS41Den 1 86180 86594
## - SubCount 1 86189 86604
## - USuccessCL 1 86211 86625
## - UserTenure 1 86211 86626
## - UAttemptsPerOrder 1 86213 86627
## - logUserTenure 1 86288 86702
## - USDDensity 1 86298 86713
## - UPTS89CL 1 86365 86779
## - SinceLastPlay 1 86449 86863
## - MoPerBill 2 86491 86892
## - CardBrand 5 86761 87123
## - LastPlay 3 89201 89590
## - CardType 4 90105 90481
## - logSinceLastPlay 1 92350 92764
##
## Step: AIC=86544.17
## Churn ~ ServiceTier + CardBrand + CardType + MoPerBill + UserTenure +
## logUserTenure + SinceLastPlay + logSinceLastPlay + LastPlay +
## SubCount + UAttemptsPerOrder + USuccessDensity + USDDensity +
## UPTS52Den + UPTS41Den + USuccessCL + UPTS89CL + UPTS52CL
##
## Df Deviance AIC
## - UPTS52CL 1 86136 86538
## <none> 86130 86544
## - UPTS52Den 1 86149 86551
## - ServiceTier 4 86189 86551
## - USuccessDensity 1 86161 86563
## - UPTS41Den 1 86186 86588
## - SubCount 1 86196 86598
## - USuccessCL 1 86217 86619
## - UAttemptsPerOrder 1 86217 86619
## - UserTenure 1 86218 86620
## - logUserTenure 1 86295 86697
## - USDDensity 1 86300 86702
## - UPTS89CL 1 86375 86777
## - SinceLastPlay 1 86456 86857
## - MoPerBill 2 86495 86884
## - CardBrand 5 86766 87116
## - LastPlay 3 89209 89585
## - CardType 4 90109 90471
## - logSinceLastPlay 1 92360 92761
##
## Step: AIC=86537.94
## Churn ~ ServiceTier + CardBrand + CardType + MoPerBill + UserTenure +
## logUserTenure + SinceLastPlay + logSinceLastPlay + LastPlay +
## SubCount + UAttemptsPerOrder + USuccessDensity + USDDensity +
## UPTS52Den + UPTS41Den + USuccessCL + UPTS89CL
##
## Df Deviance AIC
## <none> 86136 86538
## - UPTS52Den 1 86149 86538
## - ServiceTier 4 86195 86544
## - USuccessDensity 1 86168 86556
## - UPTS41Den 1 86192 86581
## - SubCount 1 86202 86591
## - USuccessCL 1 86224 86612
## - UserTenure 1 86225 86613
## - UAttemptsPerOrder 1 86226 86615
## - logUserTenure 1 86303 86692
## - USDDensity 1 86305 86693
## - UPTS89CL 1 86382 86771
## - SinceLastPlay 1 86462 86851
## - MoPerBill 2 86495 86871
## - CardBrand 5 86773 87110
## - LastPlay 3 89215 89578
## - CardType 4 90113 90462
## - logSinceLastPlay 1 92365 92754
summary(ExtendedRedModel)
##
## Call:
## glm(formula = Churn ~ ServiceTier + CardBrand + CardType + MoPerBill +
## UserTenure + logUserTenure + SinceLastPlay + logSinceLastPlay +
## LastPlay + SubCount + UAttemptsPerOrder + USuccessDensity +
## USDDensity + UPTS52Den + UPTS41Den + USuccessCL + UPTS89CL,
## family = binomial(logit), data = ChurnTrain)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -4.3142 -0.2295 -0.1650 -0.1187 4.0284
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -7.058448 0.162392 -43.466 < 2e-16 ***
## ServiceTierPREM 0.228153 0.073747 3.094 0.001977 **
## ServiceTierRR 0.629585 0.871885 0.722 0.470235
## ServiceTierRTG 0.058470 0.074980 0.780 0.435503
## ServiceTierRU 0.121977 0.496570 0.246 0.805961
## CardBrandAMZ_FPS -3.377877 0.206181 -16.383 < 2e-16 ***
## CardBrandAX -0.730221 0.068839 -10.608 < 2e-16 ***
## CardBrandDI -0.420883 0.105963 -3.972 7.13e-05 ***
## CardBrandMC -0.005047 0.023080 -0.219 0.826897
## CardBrandPAYPAL 0.196385 0.067490 2.910 0.003616 **
## CardTypeNone 4.432021 0.070010 63.305 < 2e-16 ***
## CardTypeDebit 0.647268 0.029335 22.065 < 2e-16 ***
## CardTypePrepaid 1.443701 0.041402 34.870 < 2e-16 ***
## CardType6 - Charge Card -4.086978 43.953964 -0.093 0.925917
## MoPerBill3 0.840731 0.146187 5.751 8.87e-09 ***
## MoPerBill12 2.004570 0.093865 21.356 < 2e-16 ***
## UserTenure -0.032609 0.003722 -8.760 < 2e-16 ***
## logUserTenure -0.367106 0.027848 -13.183 < 2e-16 ***
## SinceLastPlay -0.001930 0.000126 -15.327 < 2e-16 ***
## logSinceLastPlay 0.920101 0.010315 89.203 < 2e-16 ***
## LastPlayPast 30 days 3.373614 0.093662 36.019 < 2e-16 ***
## LastPlayPast 90 days 1.217071 0.083128 14.641 < 2e-16 ***
## LastPlayPast year 0.191317 0.065974 2.900 0.003733 **
## SubCount 0.093433 0.011320 8.254 < 2e-16 ***
## UAttemptsPerOrder 0.113285 0.011979 9.457 < 2e-16 ***
## USuccessDensity -0.446915 0.079270 -5.638 1.72e-08 ***
## USDDensity 1.047592 0.078759 13.301 < 2e-16 ***
## UPTS52Den 0.961834 0.253805 3.790 0.000151 ***
## UPTS41Den 1.461507 0.182960 7.988 1.37e-15 ***
## USuccessCL 0.035696 0.004052 8.809 < 2e-16 ***
## UPTS89CL 0.092274 0.005653 16.323 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 109130 on 372712 degrees of freedom
## Residual deviance: 86136 on 372682 degrees of freedom
## (50686 observations deleted due to missingness)
## AIC: 86198
##
## Number of Fisher Scoring iterations: 7
ResDev <- ExtendedRedModel$deviance
NullDev <- ExtendedRedModel$null.deviance
DevianceR2 <- (NullDev - ResDev) / NullDev
cat("Deviance R2 ", DevianceR2, "\n")
## Deviance R2 0.2107013
# Note Deviance measures and compute Pseudo (Deviance) R2
# (Null Deviance - Residual Deviance)/(Null Deviance)
# Exponentiate and interpret logistic coefficients
# Multiplicative change in odds per unit change in predictor
cat("\n")
LogisticCoef <- coef(summary(ExtendedRedModel))
ExpCoef <- (exp(LogisticCoef[,"Estimate"]))
NeatCoef <- round(cbind(LogisticCoef, ExpCoef), digits = 6)
NeatCoef
## Estimate Std. Error z value Pr(>|z|) ExpCoef
## (Intercept) -7.058448 0.162392 -43.465541 0.000000 0.000860
## ServiceTierPREM 0.228153 0.073747 3.093728 0.001977 1.256278
## ServiceTierRR 0.629585 0.871885 0.722096 0.470235 1.876832
## ServiceTierRTG 0.058470 0.074980 0.779809 0.435503 1.060213
## ServiceTierRU 0.121977 0.496570 0.245640 0.805961 1.129729
## CardBrandAMZ_FPS -3.377877 0.206181 -16.383056 0.000000 0.034120
## CardBrandAX -0.730221 0.068839 -10.607656 0.000000 0.481802
## CardBrandDI -0.420883 0.105963 -3.971977 0.000071 0.656467
## CardBrandMC -0.005047 0.023080 -0.218683 0.826897 0.994965
## CardBrandPAYPAL 0.196385 0.067491 2.909812 0.003616 1.216995
## CardTypeNone 4.432021 0.070010 63.305428 0.000000 84.101233
## CardTypeDebit 0.647268 0.029335 22.064769 0.000000 1.910315
## CardTypePrepaid 1.443701 0.041402 34.870271 0.000000 4.236344
## CardType6 - Charge Card -4.086978 43.953964 -0.092983 0.925917 0.016790
## MoPerBill3 0.840731 0.146187 5.751053 0.000000 2.318060
## MoPerBill12 2.004570 0.093865 21.355874 0.000000 7.422899
## UserTenure -0.032609 0.003722 -8.760394 0.000000 0.967917
## logUserTenure -0.367106 0.027848 -13.182531 0.000000 0.692736
## SinceLastPlay -0.001930 0.000126 -15.326796 0.000000 0.998071
## logSinceLastPlay 0.920101 0.010315 89.203399 0.000000 2.509545
## LastPlayPast 30 days 3.373614 0.093662 36.018957 0.000000 29.183816
## LastPlayPast 90 days 1.217071 0.083128 14.640884 0.000000 3.377282
## LastPlayPast year 0.191317 0.065974 2.899883 0.003733 1.210843
## SubCount 0.093433 0.011320 8.253840 0.000000 1.097937
## UAttemptsPerOrder 0.113285 0.011979 9.457029 0.000000 1.119951
## USuccessDensity -0.446915 0.079270 -5.637856 0.000000 0.639598
## USDDensity 1.047592 0.078759 13.301215 0.000000 2.850779
## UPTS52Den 0.961834 0.253805 3.789660 0.000151 2.616490
## UPTS41Den 1.461507 0.182960 7.988138 0.000000 4.312453
## USuccessCL 0.035696 0.004052 8.809332 0.000000 1.036341
## UPTS89CL 0.092274 0.005653 16.322692 0.000000 1.096665
cat("\n")
cat("Deviance R2 ", DevianceR2, "\n")
## Deviance R2 0.2107013
# Compute and examine training-sample predictions
# for reduced logistic --
# Logistic Pred, Logistic Prob, Classification
LogisticPred <- predict(ExtendedRedModel, newdata = ChurnTrain)
LogisticProb <- predict(ExtendedRedModel, newdata = ChurnTrain,
type = "response")
# Classification analysis, training sample
Threshold <- 0.5
LogisticClass <- rep(0, TrainRows)
LogisticClass[LogisticProb > Threshold] <- 1
Confusion <- table(LogisticClass, ChurnTrain$Churn)
Confusion
##
## LogisticClass 0 1
## 0 404330 17335
## 1 834 900
mean(LogisticClass == ChurnTrain$Churn)
## [1] 0.9570878
# Detailed confusion matrix metrics using caret package
confusionMatrix(LogisticClass,ChurnTrain$Churn, positive = "1")
## Confusion Matrix and Statistics
##
## Reference
## Prediction 0 1
## 0 404330 17335
## 1 834 900
##
## Accuracy : 0.9571
## 95% CI : (0.9565, 0.9577)
## No Information Rate : 0.9569
## P-Value [Acc > NIR] : 0.3103
##
## Kappa : 0.0833
## Mcnemar's Test P-Value : <2e-16
##
## Sensitivity : 0.049356
## Specificity : 0.997942
## Pos Pred Value : 0.519031
## Neg Pred Value : 0.958889
## Prevalence : 0.043068
## Detection Rate : 0.002126
## Detection Prevalence : 0.004095
## Balanced Accuracy : 0.523649
##
## 'Positive' Class : 1
##
# ROC chart -- True positives versus false positives (for changing cutoffs)
# Using performance function from ROCR package
# Plot using qplot(ggplot2)
plotpred <- prediction(LogisticProb, ChurnTrain$Churn)
plotroc <- performance(plotpred, measure = "tpr", x.measure = "fpr")
fpr <- plotroc@x.values[[1]] # Extract "fpr" for qplot(ggplot2)
tpr <- plotroc@y.values[[1]] # Extract "tpr" for qplot(ggplot2)
Cutoff <- plotroc@alpha.values[[1]] # Extract cutoffs for color scale
qplot(x = fpr, y = tpr, geom = "line", color = Cutoff,
main = "ROC Chart -- Train",
xlab = "False Positive Rate",
ylab = "True Positive Rate") +
theme(legend.position = c(0.90, 0.23))
# Lift chart -- True positives versus rate of positive prediction (for changing cutoffs)
# Using performance function from ROCR package
plotpred <- prediction(LogisticProb, ChurnTrain$Churn)
plotroc <- performance(plotpred, measure = "tpr", x.measure = "rpp")
rpp <- plotroc@x.values[[1]] # Extract "fpr" for qplot(ggplot2)
tpr <- plotroc@y.values[[1]] # Extract "tpr" for qplot(ggplot2)
Cutoff <- plotroc@alpha.values[[1]] # Extract cutoffs for color scale
qplot(x = rpp, y = tpr, geom = "line", color = Cutoff,
main = "Lift Chart -- Train",
xlab = "Rate of Positive Predicition",
ylab = "True Positive Rate") +
theme(legend.position = c(0.90, 0.23))
# Capture lift information for comparative lift Chart
ScoreFracER <- rpp
ChurnFracER <- tpr
ModelER <- rep("Extended", length(rpp))
# Capture plot data for Train vs Test Lift Chart
TrainTest <- rep("Train", length(rpp))
LiftDataTrain <- data.frame(TrainTest,
ScoreFrac = ScoreFracER, ChurnFrac = ChurnFracER)
# Generate comparative Lift Chart for selected models
LiftDataModels <- data.frame(Model = c(ModelBTR, ModelOUR, ModelER),
ScoreFrac = c(ScoreFracBTR, ScoreFracOUR, ScoreFracER),
ChurnFrac = c(ChurnFracBTR, ChurnFracOUR, ChurnFracER))
LiftDataModels$Model <- factor(LiftDataModels$Model,
levels = c("Extended", "Add Order/User", "Card Brand/Type"))
ggplot(data = LiftDataModels, aes(x = ScoreFrac)) +
geom_line(aes(y = ChurnFrac, colour = Model)) +
xlab("Fraction of Top Churn Scores") + ylab("Fraction of Churn") +
theme(legend.position = c(0.83, 0.17))
# Add predictions to data set, save to .Rdata and .csv
ChurnTrain$LogisticPred <- LogisticPred
ChurnTrain$LogisticProb <- LogisticProb
ChurnTrain$LogisticClass <- LogisticClass
head(ChurnTrain, n = 3)
## TrainTest Churn UserOrigSignupDate FirstBillDate LastPlayDate SignUpDate StopRequestDate
## 2 Train 0 2003-11-15 2003-11-23 2013-11-01 2007-11-29 2999-01-01
## 3 Train 0 2010-06-01 2010-06-01 2013-11-02 2010-06-01 2014-01-07
## 5 Train 0 2004-07-30 2004-08-07 2013-11-08 2013-01-31 2999-01-01
## CancelDate CancelType CancelReason ServiceTier MonthsPerBill BillingZipCode BillingState
## 2 2999-01-01 Current Sub RTG 1 94127 CA
## 3 2999-01-01 Current Sub PREM 1 18015 PA
## 5 2999-01-01 Current Sub PREM 1 96737 HI
## EcommOrderId OrderDate PaymentTypeName PaymentTypeCode PaymentSubTypeCode CPTCardBrand
## 2 367201564 2013-11-01 Credit CC VI VI
## 3 367275211 2013-11-02 Credit CC VI VI
## 5 367488848 2013-11-08 Credit CC MC MC
## CPTCardType CardBrand CardType Amount SubCount UserOrderCount UserPayAttempts UserAmount
## 2 1 - Credit VI Credit 14.99 1 25 25 374.75
## 3 3 - Check VI Debit 9.99 1 25 25 249.75
## 5 1 - Credit MC Credit 10.35 2 26 26 269.10
## UserFirstOrderDate UserLastOrderDate UserChurns UserOrdSucces UserOrdSftDec UserOrdHrdDec
## 2 2011-05-01 2013-09-01 0 25 0 0
## 3 2011-06-02 2013-09-02 0 25 0 0
## 5 2011-05-08 2013-10-08 0 26 0 0
## UserPaySuccess UserPaySftDec UserPayHrdDec UserPTStat89 UserPTStat05 UserPTStat14 UserPTStat56
## 2 25 0 0 0 0 0 0
## 3 25 0 0 0 0 0 0
## 5 26 0 0 0 0 0 0
## UserPTStat52 UserPTStat12 UserPTStat33 UserPTStat41 UserPTStat04 MoPerBill UAttemptsPerOrder
## 2 0 0 0 0 0 1 1
## 3 0 0 0 0 0 1 1
## 5 0 0 0 0 0 1 1
## UserTenure SinceLastPlay LastPlay RecentPlay logUserTenure logSinceLastPlay
## 2 121 0 Past 30 days Yes 4.804021 0.000000
## 3 42 177 Past year No 3.761200 5.181784
## 5 112 NA Over one year No 4.727388 NA
## logUserOrderCount UChurnDensity USuccessDensity USDDensity UHDDensity UPTS89Den UPTS05Den
## 2 3.258097 0 0.8620690 0 0 0 0
## 3 3.258097 0 0.8928571 0 0 0 0
## 5 3.295837 0 0.8666667 0 0 0 0
## UPTS14Den UPTS56Den UPTS52Den UPTS12Den UPTS33Den UPTS41Den UPTS04Den UChurnCL USuccessCL USDCL
## 2 0 0 0 0 0 0 0 0 104.31034 0
## 3 0 0 0 0 0 0 0 0 37.50000 0
## 5 0 0 0 0 0 0 0 0 97.06667 0
## UHDCL UPTS89CL UPTS05CL UPTS14CL UPTS56CL UPTS52CL UPTS12CL UPTS33CL UPTS41CL UPTS04CL
## 2 0 0 0 0 0 0 0 0 0 0
## 3 0 0 0 0 0 0 0 0 0 0
## 5 0 0 0 0 0 0 0 0 0 0
## LogisticPred LogisticProb LogisticClass
## 2 -5.790641 0.003046712 0
## 3 -3.169657 0.040323680 0
## 5 NA NA 0
write.csv(ChurnTrain, file = ChurnTrainAnalysis)
# Boxplots for predicted logistic probabilities vs selected factors
ST <- qplot(y = LogisticProb, x = ServiceTier, data = ChurnTrain, geom = "boxplot", ylab = "Prob of Churn")
CB <- qplot(y = LogisticProb, x = CardBrand, data = ChurnTrain, geom = "boxplot", ylab = "Prob of Churn")
CT <- qplot(y = LogisticProb, x = CardType, data = ChurnTrain, geom = "boxplot", ylab = "Prob of Churn")
LP <- qplot(y = LogisticProb, x = LastPlay, data = ChurnTrain, geom = "boxplot", ylab = "Prob of Churn")
RP <- qplot(y = LogisticProb, x = RecentPlay, data = ChurnTrain, geom = "boxplot", ylab = "Prob of Churn")
MB <- qplot(y = LogisticProb, x = MoPerBill, data = ChurnTrain, geom = "boxplot", ylab = "Prob of Churn")
grid.arrange(ST, CB, CT, LP, RP, MB, ncol = 3)
# MODEL EVALUATION USING TEST DATA
# Generate test set predictions --
# Logistic Prob, Classification
LogisticPred <- predict(ExtendedRedModel, newdata = ChurnTest)
LogisticProb <- predict(ExtendedRedModel, newdata = ChurnTest,
type = "response")
# Classification analysis, Test sample
Threshold <- 0.5
LogisticClass <- rep(0, TestRows)
LogisticClass[LogisticProb > Threshold] <- 1
Confusion <- table(LogisticClass, ChurnTest$Churn)
Confusion
##
## LogisticClass 0 1
## 0 101060 4403
## 1 203 187
mean(LogisticClass == ChurnTest$Churn)
## [1] 0.9564868
# Detailed confusion matrix metrics using caret package
confusionMatrix(LogisticClass,ChurnTest$Churn, positive = "1")
## Confusion Matrix and Statistics
##
## Reference
## Prediction 0 1
## 0 101060 4403
## 1 203 187
##
## Accuracy : 0.9565
## 95% CI : (0.9552, 0.9577)
## No Information Rate : 0.9566
## P-Value [Acc > NIR] : 0.5992
##
## Kappa : 0.0688
## Mcnemar's Test P-Value : <2e-16
##
## Sensitivity : 0.040741
## Specificity : 0.997995
## Pos Pred Value : 0.479487
## Neg Pred Value : 0.958251
## Prevalence : 0.043362
## Detection Rate : 0.001767
## Detection Prevalence : 0.003684
## Balanced Accuracy : 0.519368
##
## 'Positive' Class : 1
##
# ROC chart -- True positives versus false positives (for changing cutoffs)
# Using performance function from ROCR package
# Plot using qplot(ggplot2)
plotpred <- prediction(LogisticProb, ChurnTest$Churn)
plotroc <- performance(plotpred, measure = "tpr", x.measure = "fpr")
fpr <- plotroc@x.values[[1]] # Extract "fpr" for qplot(ggplot2)
tpr <- plotroc@y.values[[1]] # Extract "tpr" for qplot(ggplot2)
Cutoff <- plotroc@alpha.values[[1]] # Extract cutoffs for color scale
qplot(x = fpr, y = tpr, geom = "line", color = Cutoff,
main = "ROC Chart -- Test",
xlab = "False Positive Rate",
ylab = "True Positive Rate") +
theme(legend.position = c(0.90, 0.23))
# Lift chart -- True positives versus rate of positive prediction (for changing cutoffs)
# Using performance function from ROCR package
plotpred <- prediction(LogisticProb, ChurnTest$Churn)
plotroc <- performance(plotpred, measure = "tpr", x.measure = "rpp")
rpp <- plotroc@x.values[[1]] # Extract "fpr" for qplot(ggplot2)
tpr <- plotroc@y.values[[1]] # Extract "tpr" for qplot(ggplot2)
Cutoff <- plotroc@alpha.values[[1]] # Extract cutoffs for color scale
qplot(x = rpp, y = tpr, geom = "line", color = Cutoff,
main = "Lift Chart -- Test",
xlab = "Rate of Positive Predicition",
ylab = "True Positive Rate") +
theme(legend.position = c(0.90, 0.23))
ScoreFracER <- rpp
ChurnFracER <- tpr
ModelER <- rep("Extended", length(rpp))
# Generate Train vs Test Lift Chart
TrainTest <- rep("Test", length(rpp))
LiftDataTest <- data.frame(TrainTest,
ScoreFrac = ScoreFracER, ChurnFrac = ChurnFracER)
LiftData <- rbind(LiftDataTrain, LiftDataTest)
qplot(x = ScoreFrac, y = ChurnFrac, data = LiftData,
geom = "line", color = TrainTest,
main = "Churn Lift Chart -- Train vs Test, Extended Model",
xlab = "Fraction of Top Churn Scores",
ylab = "Fraction of Churn") +
theme(legend.position = c(0.88, 0.16))
# Add predictions to data set, save to .Rdata and .csv
ChurnTest$LogisticPred <- LogisticPred
ChurnTest$LogisticProb <- LogisticProb
ChurnTest$LogisticClass <- LogisticClass
head(ChurnTest, n = 3)
## TrainTest Churn UserOrigSignupDate FirstBillDate LastPlayDate SignUpDate StopRequestDate
## 1 Test 0 2009-05-10 2009-05-25 2013-11-16 2009-05-10 2999-01-01
## 4 Test 0 2012-02-10 2012-02-10 2013-11-11 2012-02-10 2999-01-01
## 15 Test 0 2003-09-26 2003-11-04 2013-11-07 2013-01-31 2999-01-01
## CancelDate CancelType CancelReason ServiceTier MonthsPerBill BillingZipCode BillingState
## 1 2999-01-01 Current Sub RTG 1 44333 OH
## 4 2999-01-01 Current Sub PREM 1 19426 PA
## 15 2999-01-01 Current Sub PREM 1 26241 WV
## EcommOrderId OrderDate PaymentTypeName PaymentTypeCode PaymentSubTypeCode CPTCardBrand
## 1 367726684 2013-11-16 Credit CC VI VI
## 4 367578619 2013-11-11 Credit CC AX None
## 15 367455971 2013-11-07 Credit CC MC MC
## CPTCardType CardBrand CardType Amount SubCount UserOrderCount UserPayAttempts UserAmount
## 1 1 - Credit VI Credit 14.99 1 26 34 389.74
## 4 0 - None AX Credit 9.99 1 16 16 159.84
## 15 1 - Credit MC Credit 9.99 2 29 32 349.71
## UserFirstOrderDate UserLastOrderDate UserChurns UserOrdSucces UserOrdSftDec UserOrdHrdDec
## 1 2011-04-26 2013-10-16 0 26 0 0
## 4 2012-03-11 2013-10-11 0 16 0 0
## 15 2011-05-01 2013-10-07 0 29 0 0
## UserPaySuccess UserPaySftDec UserPayHrdDec UserPTStat89 UserPTStat05 UserPTStat14 UserPTStat56
## 1 26 2 0 0 2 0 0
## 4 16 0 0 0 0 0 0
## 15 29 1 0 1 0 0 0
## UserPTStat52 UserPTStat12 UserPTStat33 UserPTStat41 UserPTStat04 MoPerBill UAttemptsPerOrder
## 1 0 0 0 0 0 1 1.307692
## 4 0 0 0 0 0 1 1.000000
## 15 0 0 0 0 0 1 1.103448
## UserTenure SinceLastPlay LastPlay RecentPlay logUserTenure logSinceLastPlay
## 1 55 0 Past 30 days Yes 4.025352 0
## 4 22 0 Past 30 days Yes 3.135494 0
## 15 122 NA Over one year No 4.812184 NA
## logUserOrderCount UChurnDensity USuccessDensity USDDensity UHDDensity UPTS89Den UPTS05Den
## 1 3.295837 0 0.8387097 0.06451613 0 0.00000000 0.06451613
## 4 2.833213 0 0.8000000 0.00000000 0 0.00000000 0.00000000
## 15 3.401197 0 0.9666667 0.03333333 0 0.03333333 0.00000000
## UPTS14Den UPTS56Den UPTS52Den UPTS12Den UPTS33Den UPTS41Den UPTS04Den UChurnCL USuccessCL
## 1 0 0 0 0 0 0 0 0 46.12903
## 4 0 0 0 0 0 0 0 0 17.60000
## 15 0 0 0 0 0 0 0 0 117.93333
## USDCL UHDCL UPTS89CL UPTS05CL UPTS14CL UPTS56CL UPTS52CL UPTS12CL UPTS33CL UPTS41CL UPTS04CL
## 1 3.548387 0 0.000000 3.548387 0 0 0 0 0 0 0
## 4 0.000000 0 0.000000 0.000000 0 0 0 0 0 0 0
## 15 4.066667 0 4.066667 0.000000 0 0 0 0 0 0 0
## LogisticPred LogisticProb LogisticClass
## 1 -5.316598 0.004885444 0
## 4 -5.577907 0.003766232 0
## 15 NA NA 0
write.table(ChurnTest, file = ChurnTestAnalysis)