options(java.parameters = "- Xmx1024m")
pacman::p_load(tidyverse, caret, corrplot, caTools, car, ROCR, IRdisplay, xlsx, ggmap, ggpubr, broom, relaimpo)
## Set the working directory
setwd("C:/Users/SK/Desktop/Customer Analytics Project/OneDrive_1_3-2-2020")
## Read the Telco_customer_churn file
telco_raw <- read.xlsx("Telco_customer_churn.xlsx", sheetName = "Telco_Churn")
dim(telco_raw)
## [1] 7043 33
colnames(telco_raw)
## [1] "CustomerID" "Count" "Country"
## [4] "State" "City" "Zip.Code"
## [7] "Lat.Long" "Latitude" "Longitude"
## [10] "Gender" "Senior.Citizen" "Partner"
## [13] "Dependents" "Tenure.Months" "Phone.Service"
## [16] "Multiple.Lines" "Internet.Service" "Online.Security"
## [19] "Online.Backup" "Device.Protection" "Tech.Support"
## [22] "Streaming.TV" "Streaming.Movies" "Contract"
## [25] "Paperless.Billing" "Payment.Method" "Monthly.Charges"
## [28] "Total.Charges" "Churn.Label" "Churn.Value"
## [31] "Churn.Score" "CLTV" "Churn.Reason"
colnames(telco_raw)[1] <- "Customer.ID"
## Read the Telco_Customer_churn_status file
telco_status <- read.xlsx("Telco_customer_churn_status.xlsx", sheetName = "Telco_Churn")
dim(telco_status)
## [1] 7043 11
colnames(telco_status)
## [1] "Customer.ID" "Count" "Quarter"
## [4] "Satisfaction.Score" "Customer.Status" "Churn.Label"
## [7] "Churn.Value" "Churn.Score" "CLTV"
## [10] "Churn.Category" "Churn.Reason"
## Select the unique variables
telco_status <- telco_status %>%
dplyr::select(Customer.ID, Satisfaction.Score, Churn.Category, Customer.Status)
colnames(telco_status)
## [1] "Customer.ID" "Satisfaction.Score" "Churn.Category"
## [4] "Customer.Status"
## Read the Telco_customer_churn_services file
telco_service <- read.xlsx("Telco_customer_churn_services.xlsx", sheetName = "Telco_Churn")
dim(telco_service)
## [1] 7043 30
colnames(telco_service)
## [1] "Customer.ID"
## [2] "Count"
## [3] "Quarter"
## [4] "Referred.a.Friend"
## [5] "Number.of.Referrals"
## [6] "Tenure.in.Months"
## [7] "Offer"
## [8] "Phone.Service"
## [9] "Avg.Monthly.Long.Distance.Charges"
## [10] "Multiple.Lines"
## [11] "Internet.Service"
## [12] "Internet.Type"
## [13] "Avg.Monthly.GB.Download"
## [14] "Online.Security"
## [15] "Online.Backup"
## [16] "Device.Protection.Plan"
## [17] "Premium.Tech.Support"
## [18] "Streaming.TV"
## [19] "Streaming.Movies"
## [20] "Streaming.Music"
## [21] "Unlimited.Data"
## [22] "Contract"
## [23] "Paperless.Billing"
## [24] "Payment.Method"
## [25] "Monthly.Charge"
## [26] "Total.Charges"
## [27] "Total.Refunds"
## [28] "Total.Extra.Data.Charges"
## [29] "Total.Long.Distance.Charges"
## [30] "Total.Revenue"
## Select the unique variables
telco_service <- telco_service %>%
dplyr::select(Customer.ID, Referred.a.Friend, Number.of.Referrals, Offer, Avg.Monthly.Long.Distance.Charges,
Avg.Monthly.GB.Download, Streaming.Music, Unlimited.Data, Total.Refunds, Total.Extra.Data.Charges,
Total.Long.Distance.Charges, Total.Revenue)
colnames(telco_service)
## [1] "Customer.ID"
## [2] "Referred.a.Friend"
## [3] "Number.of.Referrals"
## [4] "Offer"
## [5] "Avg.Monthly.Long.Distance.Charges"
## [6] "Avg.Monthly.GB.Download"
## [7] "Streaming.Music"
## [8] "Unlimited.Data"
## [9] "Total.Refunds"
## [10] "Total.Extra.Data.Charges"
## [11] "Total.Long.Distance.Charges"
## [12] "Total.Revenue"
## Read the Telco_customer_churn_demographic file
telco_demo <- read.xlsx("Telco_customer_churn_demographics.xlsx", sheetName = "Telco_Churn")
dim(telco_demo)
## [1] 7043 9
colnames(telco_demo)
## [1] "Customer.ID" "Count" "Gender"
## [4] "Age" "Under.30" "Senior.Citizen"
## [7] "Married" "Dependents" "Number.of.Dependents"
## Select the unique variables
telco_demo <- telco_demo %>%
dplyr::select(Customer.ID, Age, Under.30, Number.of.Dependents)
colnames(telco_demo)
## [1] "Customer.ID" "Age" "Under.30"
## [4] "Number.of.Dependents"
## Join 4 tables together wiht common key = Customer.ID
telco <- left_join(telco_raw, telco_demo, by = 'Customer.ID') %>%
left_join(., telco_service, by = "Customer.ID") %>%
left_join(., telco_status, by = "Customer.ID")
colnames(telco)
## [1] "Customer.ID"
## [2] "Count"
## [3] "Country"
## [4] "State"
## [5] "City"
## [6] "Zip.Code"
## [7] "Lat.Long"
## [8] "Latitude"
## [9] "Longitude"
## [10] "Gender"
## [11] "Senior.Citizen"
## [12] "Partner"
## [13] "Dependents"
## [14] "Tenure.Months"
## [15] "Phone.Service"
## [16] "Multiple.Lines"
## [17] "Internet.Service"
## [18] "Online.Security"
## [19] "Online.Backup"
## [20] "Device.Protection"
## [21] "Tech.Support"
## [22] "Streaming.TV"
## [23] "Streaming.Movies"
## [24] "Contract"
## [25] "Paperless.Billing"
## [26] "Payment.Method"
## [27] "Monthly.Charges"
## [28] "Total.Charges"
## [29] "Churn.Label"
## [30] "Churn.Value"
## [31] "Churn.Score"
## [32] "CLTV"
## [33] "Churn.Reason"
## [34] "Age"
## [35] "Under.30"
## [36] "Number.of.Dependents"
## [37] "Referred.a.Friend"
## [38] "Number.of.Referrals"
## [39] "Offer"
## [40] "Avg.Monthly.Long.Distance.Charges"
## [41] "Avg.Monthly.GB.Download"
## [42] "Streaming.Music"
## [43] "Unlimited.Data"
## [44] "Total.Refunds"
## [45] "Total.Extra.Data.Charges"
## [46] "Total.Long.Distance.Charges"
## [47] "Total.Revenue"
## [48] "Satisfaction.Score"
## [49] "Churn.Category"
## [50] "Customer.Status"
dim(telco)
## [1] 7043 50
## Exploratory Data Analysis
## Data Preparation
## Check the variables
str(telco)
## 'data.frame': 7043 obs. of 50 variables:
## $ Customer.ID : Factor w/ 7043 levels "0002-ORFBO","0003-MKNFE",..: 2565 6512 6552 5605 175 2938 6208 724 4585 6120 ...
## $ Count : num 1 1 1 1 1 1 1 1 1 1 ...
## $ Country : Factor w/ 1 level "United States": 1 1 1 1 1 1 1 1 1 1 ...
## $ State : Factor w/ 1 level "California": 1 1 1 1 1 1 1 1 1 1 ...
## $ City : Factor w/ 1129 levels "Acampo","Acton",..: 563 563 563 563 563 563 563 563 563 563 ...
## $ Zip.Code : num 90003 90005 90006 90010 90015 ...
## $ Lat.Long : Factor w/ 1652 levels "32.555828, -117.040073",..: 328 406 394 411 386 417 368 416 448 441 ...
## $ Latitude : num 34 34.1 34 34.1 34 ...
## $ Longitude : num -118 -118 -118 -118 -118 ...
## $ Gender : Factor w/ 2 levels "Female","Male": 2 1 1 1 2 1 2 2 2 2 ...
## $ Senior.Citizen : Factor w/ 2 levels "No","Yes": 1 1 1 1 1 1 2 1 1 1 ...
## $ Partner : Factor w/ 2 levels "No","Yes": 1 1 1 2 1 2 1 1 2 2 ...
## $ Dependents : Factor w/ 2 levels "No","Yes": 1 2 2 2 2 1 1 1 2 1 ...
## $ Tenure.Months : num 2 2 8 28 49 10 1 1 47 1 ...
## $ Phone.Service : Factor w/ 2 levels "No","Yes": 2 2 2 2 2 2 1 2 2 1 ...
## $ Multiple.Lines : Factor w/ 3 levels "No","No phone service",..: 1 1 3 3 3 1 2 1 3 2 ...
## $ Internet.Service : Factor w/ 3 levels "DSL","Fiber optic",..: 1 2 2 2 2 1 1 3 2 1 ...
## $ Online.Security : Factor w/ 3 levels "No","No internet service",..: 3 1 1 1 1 1 1 2 1 1 ...
## $ Online.Backup : Factor w/ 3 levels "No","No internet service",..: 3 1 1 1 3 1 1 2 3 3 ...
## $ Device.Protection : Factor w/ 3 levels "No","No internet service",..: 1 1 3 3 3 3 3 2 1 1 ...
## $ Tech.Support : Factor w/ 3 levels "No","No internet service",..: 1 1 1 3 1 3 1 2 1 1 ...
## $ Streaming.TV : Factor w/ 3 levels "No","No internet service",..: 1 1 3 3 3 1 1 2 3 1 ...
## $ Streaming.Movies : Factor w/ 3 levels "No","No internet service",..: 1 1 3 3 3 1 3 2 3 1 ...
## $ Contract : Factor w/ 3 levels "Month-to-month",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ Paperless.Billing : Factor w/ 2 levels "No","Yes": 2 2 2 2 2 1 2 1 2 1 ...
## $ Payment.Method : Factor w/ 4 levels "Bank transfer (automatic)",..: 4 3 3 3 1 2 3 4 3 3 ...
## $ Monthly.Charges : num 53.9 70.7 99.7 104.8 103.7 ...
## $ Total.Charges : Factor w/ 6531 levels " ","100.2","100.25",..: 158 926 6105 2647 4266 4417 3341 1610 4020 2593 ...
## $ Churn.Label : Factor w/ 2 levels "No","Yes": 2 2 2 2 2 2 2 2 2 2 ...
## $ Churn.Value : num 1 1 1 1 1 1 1 1 1 1 ...
## $ Churn.Score : num 86 67 86 84 89 78 100 92 77 97 ...
## $ CLTV : num 3239 2701 5372 5003 5340 ...
## $ Churn.Reason : Factor w/ 20 levels "Attitude of service provider",..: 4 14 14 14 3 5 6 4 3 3 ...
## $ Age : num 37 19 31 23 38 21 78 29 61 27 ...
## $ Under.30 : Factor w/ 2 levels "No","Yes": 1 2 1 2 1 2 1 2 1 2 ...
## $ Number.of.Dependents : num 0 2 2 3 1 0 0 0 1 0 ...
## $ Referred.a.Friend : Factor w/ 2 levels "No","Yes": 1 1 1 1 1 2 1 1 2 2 ...
## $ Number.of.Referrals : num 0 0 0 0 0 6 0 0 1 1 ...
## $ Offer : Factor w/ 6 levels "None","Offer A",..: 1 1 1 4 1 1 1 1 1 1 ...
## $ Avg.Monthly.Long.Distance.Charges: num 10.47 9.12 12.15 4.89 44.33 ...
## $ Avg.Monthly.GB.Download : num 21 51 26 47 11 69 8 0 16 58 ...
## $ Streaming.Music : Factor w/ 2 levels "No","Yes": 1 1 2 2 2 1 1 1 2 1 ...
## $ Unlimited.Data : Factor w/ 2 levels "No","Yes": 2 2 2 2 2 2 1 1 2 2 ...
## $ Total.Refunds : num 0 0 0 0 0 0 0 0 0 0 ...
## $ Total.Extra.Data.Charges : num 0 0 0 0 0 0 20 0 0 0 ...
## $ Total.Long.Distance.Charges : num 20.9 18.2 97.2 136.9 2172.2 ...
## $ Total.Revenue : num 129 170 918 3183 7208 ...
## $ Satisfaction.Score : num 1 2 3 3 1 1 3 1 1 1 ...
## $ Churn.Category : Factor w/ 5 levels "Attitude","Competitor",..: 2 4 4 4 2 2 2 2 2 2 ...
## $ Customer.Status : Factor w/ 3 levels "Churned","Joined",..: 1 1 1 1 1 1 1 1 1 1 ...
summary(telco)
## Customer.ID Count Country State
## 0002-ORFBO: 1 Min. :1 United States:7043 California:7043
## 0003-MKNFE: 1 1st Qu.:1
## 0004-TLHLJ: 1 Median :1
## 0011-IGKFF: 1 Mean :1
## 0013-EXCHZ: 1 3rd Qu.:1
## 0013-MHZWF: 1 Max. :1
## (Other) :7037
## City Zip.Code Lat.Long
## Los Angeles : 305 Min. :90001 32.555828, -117.040073: 5
## San Diego : 150 1st Qu.:92102 32.578103, -117.012975: 5
## San Jose : 112 Median :93552 32.579134, -117.119009: 5
## Sacramento : 108 Mean :93522 32.587557, -116.636816: 5
## San Francisco: 104 3rd Qu.:95351 32.605012, -116.97595 : 5
## Fresno : 64 Max. :96161 32.607964, -117.059459: 5
## (Other) :6200 (Other) :7013
## Latitude Longitude Gender Senior.Citizen Partner
## Min. :32.56 Min. :-124.3 Female:3488 No :5901 No :3641
## 1st Qu.:34.03 1st Qu.:-121.8 Male :3555 Yes:1142 Yes:3402
## Median :36.39 Median :-119.7
## Mean :36.28 Mean :-119.8
## 3rd Qu.:38.22 3rd Qu.:-118.0
## Max. :41.96 Max. :-114.2
##
## Dependents Tenure.Months Phone.Service Multiple.Lines
## No :5416 Min. : 0.00 No : 682 No :3390
## Yes:1627 1st Qu.: 9.00 Yes:6361 No phone service: 682
## Median :29.00 Yes :2971
## Mean :32.37
## 3rd Qu.:55.00
## Max. :72.00
##
## Internet.Service Online.Security Online.Backup
## DSL :2421 No :3498 No :3088
## Fiber optic:3096 No internet service:1526 No internet service:1526
## No :1526 Yes :2019 Yes :2429
##
##
##
##
## Device.Protection Tech.Support
## No :3095 No :3473
## No internet service:1526 No internet service:1526
## Yes :2422 Yes :2044
##
##
##
##
## Streaming.TV Streaming.Movies
## No :2810 No :2785
## No internet service:1526 No internet service:1526
## Yes :2707 Yes :2732
##
##
##
##
## Contract Paperless.Billing Payment.Method
## Month-to-month:3875 No :2872 Bank transfer (automatic):1544
## One year :1473 Yes:4171 Credit card (automatic) :1522
## Two year :1695 Electronic check :2365
## Mailed check :1612
##
##
##
## Monthly.Charges Total.Charges Churn.Label Churn.Value
## Min. : 18.25 : 11 No :5174 Min. :0.0000
## 1st Qu.: 35.50 20.2 : 11 Yes:1869 1st Qu.:0.0000
## Median : 70.35 19.75 : 9 Median :0.0000
## Mean : 64.76 19.65 : 8 Mean :0.2654
## 3rd Qu.: 89.85 19.9 : 8 3rd Qu.:1.0000
## Max. :118.75 20.05 : 8 Max. :1.0000
## (Other):6988
## Churn.Score CLTV
## Min. : 5.0 Min. :2003
## 1st Qu.: 40.0 1st Qu.:3469
## Median : 61.0 Median :4527
## Mean : 58.7 Mean :4400
## 3rd Qu.: 75.0 3rd Qu.:5380
## Max. :100.0 Max. :6500
##
## Churn.Reason Age
## Attitude of support person : 192 Min. :19.00
## Competitor offered higher download speeds: 189 1st Qu.:32.00
## Competitor offered more data : 162 Median :46.00
## Don't know : 154 Mean :46.51
## Competitor made better offer : 140 3rd Qu.:60.00
## (Other) :1032 Max. :80.00
## NA's :5174
## Under.30 Number.of.Dependents Referred.a.Friend Number.of.Referrals
## No :5642 Min. :0.0000 No :3821 Min. : 0.000
## Yes:1401 1st Qu.:0.0000 Yes:3222 1st Qu.: 0.000
## Median :0.0000 Median : 0.000
## Mean :0.4687 Mean : 1.952
## 3rd Qu.:0.0000 3rd Qu.: 3.000
## Max. :9.0000 Max. :11.000
##
## Offer Avg.Monthly.Long.Distance.Charges Avg.Monthly.GB.Download
## None :3877 Min. : 0.00 Min. : 0.00
## Offer A: 520 1st Qu.: 9.21 1st Qu.: 3.00
## Offer B: 824 Median :22.89 Median :17.00
## Offer C: 415 Mean :22.96 Mean :20.52
## Offer D: 602 3rd Qu.:36.40 3rd Qu.:27.00
## Offer E: 805 Max. :49.99 Max. :85.00
##
## Streaming.Music Unlimited.Data Total.Refunds Total.Extra.Data.Charges
## No :4555 No :2298 Min. : 0.000 Min. : 0.000
## Yes:2488 Yes:4745 1st Qu.: 0.000 1st Qu.: 0.000
## Median : 0.000 Median : 0.000
## Mean : 1.962 Mean : 6.861
## 3rd Qu.: 0.000 3rd Qu.: 0.000
## Max. :49.790 Max. :150.000
##
## Total.Long.Distance.Charges Total.Revenue Satisfaction.Score
## Min. : 0.00 Min. : 21.36 Min. :1.000
## 1st Qu.: 70.55 1st Qu.: 605.61 1st Qu.:3.000
## Median : 401.44 Median : 2108.64 Median :3.000
## Mean : 749.10 Mean : 3034.38 Mean :3.245
## 3rd Qu.:1191.10 3rd Qu.: 4801.15 3rd Qu.:4.000
## Max. :3564.72 Max. :11979.34 Max. :5.000
##
## Churn.Category Customer.Status
## Attitude : 314 Churned:1869
## Competitor : 841 Joined : 454
## Dissatisfaction: 303 Stayed :4720
## Other : 200
## Price : 211
## NA's :5174
##
sapply(telco, class)
## Customer.ID Count
## "factor" "numeric"
## Country State
## "factor" "factor"
## City Zip.Code
## "factor" "numeric"
## Lat.Long Latitude
## "factor" "numeric"
## Longitude Gender
## "numeric" "factor"
## Senior.Citizen Partner
## "factor" "factor"
## Dependents Tenure.Months
## "factor" "numeric"
## Phone.Service Multiple.Lines
## "factor" "factor"
## Internet.Service Online.Security
## "factor" "factor"
## Online.Backup Device.Protection
## "factor" "factor"
## Tech.Support Streaming.TV
## "factor" "factor"
## Streaming.Movies Contract
## "factor" "factor"
## Paperless.Billing Payment.Method
## "factor" "factor"
## Monthly.Charges Total.Charges
## "numeric" "factor"
## Churn.Label Churn.Value
## "factor" "numeric"
## Churn.Score CLTV
## "numeric" "numeric"
## Churn.Reason Age
## "factor" "numeric"
## Under.30 Number.of.Dependents
## "factor" "numeric"
## Referred.a.Friend Number.of.Referrals
## "factor" "numeric"
## Offer Avg.Monthly.Long.Distance.Charges
## "factor" "numeric"
## Avg.Monthly.GB.Download Streaming.Music
## "numeric" "factor"
## Unlimited.Data Total.Refunds
## "factor" "numeric"
## Total.Extra.Data.Charges Total.Long.Distance.Charges
## "numeric" "numeric"
## Total.Revenue Satisfaction.Score
## "numeric" "numeric"
## Churn.Category Customer.Status
## "factor" "factor"
## Variables Satisfaction Scores, Number.of.Dependents, Churn.Value and Number.of.Referrals should change to class factor instead of numeric.
telco[ ,"Churn.Value"] <- as.factor(telco[,'Churn.Value'])
telco[ ,"Number.of.Referrals"] <- as.factor(telco[,'Number.of.Referrals'])
telco[ ,"Number.of.Dependents"] <- as.factor(telco[,'Number.of.Dependents'])
telco[, "Satisfaction.Score"] <- as.factor(telco[,"Satisfaction.Score"])
## Total charge change to numeric instead of factor
telco[, "Total.Charges"] <- as.numeric(telco[,"Total.Charges"])
summary(telco)
## Customer.ID Count Country State
## 0002-ORFBO: 1 Min. :1 United States:7043 California:7043
## 0003-MKNFE: 1 1st Qu.:1
## 0004-TLHLJ: 1 Median :1
## 0011-IGKFF: 1 Mean :1
## 0013-EXCHZ: 1 3rd Qu.:1
## 0013-MHZWF: 1 Max. :1
## (Other) :7037
## City Zip.Code Lat.Long
## Los Angeles : 305 Min. :90001 32.555828, -117.040073: 5
## San Diego : 150 1st Qu.:92102 32.578103, -117.012975: 5
## San Jose : 112 Median :93552 32.579134, -117.119009: 5
## Sacramento : 108 Mean :93522 32.587557, -116.636816: 5
## San Francisco: 104 3rd Qu.:95351 32.605012, -116.97595 : 5
## Fresno : 64 Max. :96161 32.607964, -117.059459: 5
## (Other) :6200 (Other) :7013
## Latitude Longitude Gender Senior.Citizen Partner
## Min. :32.56 Min. :-124.3 Female:3488 No :5901 No :3641
## 1st Qu.:34.03 1st Qu.:-121.8 Male :3555 Yes:1142 Yes:3402
## Median :36.39 Median :-119.7
## Mean :36.28 Mean :-119.8
## 3rd Qu.:38.22 3rd Qu.:-118.0
## Max. :41.96 Max. :-114.2
##
## Dependents Tenure.Months Phone.Service Multiple.Lines
## No :5416 Min. : 0.00 No : 682 No :3390
## Yes:1627 1st Qu.: 9.00 Yes:6361 No phone service: 682
## Median :29.00 Yes :2971
## Mean :32.37
## 3rd Qu.:55.00
## Max. :72.00
##
## Internet.Service Online.Security Online.Backup
## DSL :2421 No :3498 No :3088
## Fiber optic:3096 No internet service:1526 No internet service:1526
## No :1526 Yes :2019 Yes :2429
##
##
##
##
## Device.Protection Tech.Support
## No :3095 No :3473
## No internet service:1526 No internet service:1526
## Yes :2422 Yes :2044
##
##
##
##
## Streaming.TV Streaming.Movies
## No :2810 No :2785
## No internet service:1526 No internet service:1526
## Yes :2707 Yes :2732
##
##
##
##
## Contract Paperless.Billing Payment.Method
## Month-to-month:3875 No :2872 Bank transfer (automatic):1544
## One year :1473 Yes:4171 Credit card (automatic) :1522
## Two year :1695 Electronic check :2365
## Mailed check :1612
##
##
##
## Monthly.Charges Total.Charges Churn.Label Churn.Value Churn.Score
## Min. : 18.25 Min. : 1 No :5174 0:5174 Min. : 5.0
## 1st Qu.: 35.50 1st Qu.:1610 Yes:1869 1:1869 1st Qu.: 40.0
## Median : 70.35 Median :3250 Median : 61.0
## Mean : 64.76 Mean :3259 Mean : 58.7
## 3rd Qu.: 89.85 3rd Qu.:4902 3rd Qu.: 75.0
## Max. :118.75 Max. :6531 Max. :100.0
##
## CLTV Churn.Reason
## Min. :2003 Attitude of support person : 192
## 1st Qu.:3469 Competitor offered higher download speeds: 189
## Median :4527 Competitor offered more data : 162
## Mean :4400 Don't know : 154
## 3rd Qu.:5380 Competitor made better offer : 140
## Max. :6500 (Other) :1032
## NA's :5174
## Age Under.30 Number.of.Dependents Referred.a.Friend
## Min. :19.00 No :5642 0 :5416 No :3821
## 1st Qu.:32.00 Yes:1401 1 : 553 Yes:3222
## Median :46.00 2 : 531
## Mean :46.51 3 : 517
## 3rd Qu.:60.00 5 : 10
## Max. :80.00 4 : 9
## (Other): 7
## Number.of.Referrals Offer Avg.Monthly.Long.Distance.Charges
## 0 :3821 None :3877 Min. : 0.00
## 1 :1086 Offer A: 520 1st Qu.: 9.21
## 5 : 264 Offer B: 824 Median :22.89
## 3 : 255 Offer C: 415 Mean :22.96
## 7 : 248 Offer D: 602 3rd Qu.:36.40
## 9 : 238 Offer E: 805 Max. :49.99
## (Other):1131
## Avg.Monthly.GB.Download Streaming.Music Unlimited.Data Total.Refunds
## Min. : 0.00 No :4555 No :2298 Min. : 0.000
## 1st Qu.: 3.00 Yes:2488 Yes:4745 1st Qu.: 0.000
## Median :17.00 Median : 0.000
## Mean :20.52 Mean : 1.962
## 3rd Qu.:27.00 3rd Qu.: 0.000
## Max. :85.00 Max. :49.790
##
## Total.Extra.Data.Charges Total.Long.Distance.Charges Total.Revenue
## Min. : 0.000 Min. : 0.00 Min. : 21.36
## 1st Qu.: 0.000 1st Qu.: 70.55 1st Qu.: 605.61
## Median : 0.000 Median : 401.44 Median : 2108.64
## Mean : 6.861 Mean : 749.10 Mean : 3034.38
## 3rd Qu.: 0.000 3rd Qu.:1191.10 3rd Qu.: 4801.15
## Max. :150.000 Max. :3564.72 Max. :11979.34
##
## Satisfaction.Score Churn.Category Customer.Status
## 1: 922 Attitude : 314 Churned:1869
## 2: 518 Competitor : 841 Joined : 454
## 3:2665 Dissatisfaction: 303 Stayed :4720
## 4:1789 Other : 200
## 5:1149 Price : 211
## NA's :5174
##
## Check the churn ratio
table(telco$Churn.Label)
##
## No Yes
## 5174 1869
## Churn percentage ~ 36.12%
(1869/5174)*100
## [1] 36.12292
## Understand the churn based on each variables
telco_churn <- telco %>% filter(Churn.Label == "Yes")
## Check the location of churn
register_google(key= "AIzaSyBmXB5S5_NIqo6lAGH-_U-TbhrQjhOsplU")
summary(telco$Latitude)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 32.56 34.03 36.39 36.28 38.22 41.96
summary(telco$Longitude)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -124.3 -121.8 -119.7 -119.8 -118.0 -114.2
house_loc = c(-125, 33, -120, 42)
our_map=get_map(location = house_loc,maptype="roadmap", source="google")
ggmap(our_map) + geom_point(aes(telco_churn$Longitude, telco_churn$Latitude), data = telco_churn)
## Check Top_10 city contribute the highest churn
telco_churn %>%
group_by(City) %>%
summarise(total = n()) %>%
top_n(10, total) %>%
ggplot(aes(x = reorder(City, -desc(total)), y = total)) + geom_bar(stat = 'identity', fill = 'pink') +
coord_flip() + ggtitle("Total Churn by City") + xlab("City") + ylab("Total")
## Check the gender contribute to churn
## No much difference between male and female
table(telco_churn$Gender)
##
## Female Male
## 939 930
colnames(telco_churn)
## [1] "Customer.ID"
## [2] "Count"
## [3] "Country"
## [4] "State"
## [5] "City"
## [6] "Zip.Code"
## [7] "Lat.Long"
## [8] "Latitude"
## [9] "Longitude"
## [10] "Gender"
## [11] "Senior.Citizen"
## [12] "Partner"
## [13] "Dependents"
## [14] "Tenure.Months"
## [15] "Phone.Service"
## [16] "Multiple.Lines"
## [17] "Internet.Service"
## [18] "Online.Security"
## [19] "Online.Backup"
## [20] "Device.Protection"
## [21] "Tech.Support"
## [22] "Streaming.TV"
## [23] "Streaming.Movies"
## [24] "Contract"
## [25] "Paperless.Billing"
## [26] "Payment.Method"
## [27] "Monthly.Charges"
## [28] "Total.Charges"
## [29] "Churn.Label"
## [30] "Churn.Value"
## [31] "Churn.Score"
## [32] "CLTV"
## [33] "Churn.Reason"
## [34] "Age"
## [35] "Under.30"
## [36] "Number.of.Dependents"
## [37] "Referred.a.Friend"
## [38] "Number.of.Referrals"
## [39] "Offer"
## [40] "Avg.Monthly.Long.Distance.Charges"
## [41] "Avg.Monthly.GB.Download"
## [42] "Streaming.Music"
## [43] "Unlimited.Data"
## [44] "Total.Refunds"
## [45] "Total.Extra.Data.Charges"
## [46] "Total.Long.Distance.Charges"
## [47] "Total.Revenue"
## [48] "Satisfaction.Score"
## [49] "Churn.Category"
## [50] "Customer.Status"
## Check the Age contribute to Churn
telco_churn %>%
ggplot(aes(Age)) +
geom_histogram(bins = 20) + ggtitle("Distribution of Churn by Age") +
xlab("Age")
## Check the Age under 30 contribute tot Churn
telco_churn %>%
group_by(Under.30) %>%
summarise(total = n()) %>%
ggplot(aes(x = Under.30, y = total)) + geom_bar(stat = 'identity', fill = 'pink') +
coord_flip() + ggtitle("Total Churn by Under 30") + xlab("Under 30") + ylab("Total")
## Check the number of dependents contribute tot Churn
telco_churn %>%
group_by(Number.of.Dependents) %>%
summarise(total = n()) %>%
ggplot(aes(x = Number.of.Dependents, y = total)) + geom_bar(stat = 'identity', fill = 'pink') +
coord_flip() + ggtitle("Total Churn by Number of Dependents") + xlab("Number of Dependents") + ylab("Total")
## Check the senior citizen contribute to Churn
telco_churn %>%
group_by(Senior.Citizen) %>%
summarise(total = n()) %>%
ggplot(aes(x = Senior.Citizen, y = total)) + geom_bar(stat = 'identity', fill = 'pink') +
coord_flip() + ggtitle("Total Churn by Senior Citizen") + xlab("Senior Citizen") + ylab("Total")
## Check the partner contribute to Churn
telco_churn %>%
group_by(Partner) %>%
summarise(total = n()) %>%
ggplot(aes(x = Partner, y = total)) + geom_bar(stat = 'identity', fill = 'pink') +
coord_flip() + ggtitle("Total Churn by Partner") + xlab("Partner") + ylab("Total")
## Check the dependent contribute to Churn
telco_churn %>%
group_by(Dependents) %>%
summarise(total = n()) %>%
ggplot(aes(x = Dependents, y = total)) + geom_bar(stat = 'identity', fill = 'pink') +
coord_flip() + ggtitle("Total Churn by Dependents") + xlab("Dependents") + ylab("Total")
## Check the tenure month of the churn customer
summary(telco_churn$Tenure.Months)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1.00 2.00 10.00 17.98 29.00 72.00
class(telco_churn$Tenure.Months)
## [1] "numeric"
telco_churn %>%
ggplot(aes(Tenure.Months)) +
geom_histogram(bins = 20) + ggtitle("Distribution of Churn by Tenure(Months)") +
xlab("Tenure Months")
## Check phone service contribute to churn customer
table(telco_churn$Phone.Service)
##
## No Yes
## 170 1699
## Check multtple line contribute to churn customer
## Almost same between have and do not have phone service
table(telco_churn$Multiple.Lines)
##
## No No phone service Yes
## 849 170 850
## Check Referred.a.friend contribute to churn
telco_churn %>%
group_by(Referred.a.Friend) %>%
summarise(total = n()) %>%
ggplot(aes(x = Referred.a.Friend, y = total)) + geom_bar(stat = 'identity', fill = 'pink') +
coord_flip() + ggtitle("Total Churn by Referred to a Friend") + xlab("Refer to a friend") + ylab("Total")
## Check Number of Referrals contribute to churn
telco_churn %>%
group_by(Number.of.Referrals) %>%
summarise(total = n()) %>%
ggplot(aes(x = Number.of.Referrals, y = total)) + geom_bar(stat = 'identity', fill = 'pink') +
coord_flip() + ggtitle("Total Churn by Number of Referrals") + xlab("Number of Referrals") + ylab("Total")
## Check the internet service contribute to churn customer
telco_churn %>%
group_by(Internet.Service) %>%
summarise(total = n()) %>%
ggplot(aes(x = Internet.Service, y = total)) + geom_bar(stat = 'identity', fill = 'pink') +
coord_flip() + ggtitle("Total Churn by Internet Service") + xlab("Internet Service") + ylab("Total")
## Check Onlice security contribute to churn customer
telco_churn %>%
group_by(Online.Security) %>%
summarise(total = n()) %>%
ggplot(aes(x = Online.Security, y = total)) + geom_bar(stat = 'identity', fill = 'pink') +
coord_flip() + ggtitle("Total Churn by Online Security") + xlab("Online Security") + ylab("Total")
## Check the online backup contribute to churn customer
telco_churn %>%
group_by(Online.Backup) %>%
summarise(total = n()) %>%
ggplot(aes(x = Online.Backup, y = total)) + geom_bar(stat = 'identity', fill = 'pink') +
coord_flip() + ggtitle("Total Churn by Online Backup") + xlab("Online Backup") + ylab("Total")
## Check Device Protection contribute to churn customer
telco_churn %>%
group_by(Device.Protection) %>%
summarise(total = n()) %>%
ggplot(aes(x = Device.Protection, y = total)) + geom_bar(stat = 'identity', fill = 'pink') +
coord_flip() + ggtitle("Total Churn by Device Protection") + xlab("Device Protection") + ylab("Total")
## Check the tech support contribute to churn customer
telco_churn %>%
group_by(Tech.Support) %>%
summarise(total = n()) %>%
ggplot(aes(x = Tech.Support, y = total)) + geom_bar(stat = 'identity', fill = 'pink') +
coord_flip() + ggtitle("Total Churn by Tech Support") + xlab("Tech Support") + ylab("Total")
## Check the streaming TV contribute to churn customer
telco_churn %>%
group_by(Streaming.TV) %>%
summarise(total = n()) %>%
ggplot(aes(x = Streaming.TV, y = total)) + geom_bar(stat = 'identity', fill = 'pink') +
coord_flip() + ggtitle("Total Churn by Streaming TV") + xlab("Streaming TV") + ylab("Total")
## Check the streaming movies contribute to churn customer
telco_churn %>%
group_by(Streaming.Movies) %>%
summarise(total = n()) %>%
ggplot(aes(x = Streaming.Movies, y = total)) + geom_bar(stat = 'identity', fill = 'pink') +
coord_flip() + ggtitle("Total Churn by Streaming Movies") + xlab("Streaming Movies") + ylab("Total")
## Check the streaming music contribute to churn customer
telco_churn %>%
group_by(Streaming.Music) %>%
summarise(total = n()) %>%
ggplot(aes(x = Streaming.Music, y = total)) + geom_bar(stat = 'identity', fill = 'pink') +
coord_flip() + ggtitle("Total Churn by Streaming Music") + xlab("Streaming Music") + ylab("Total")
## Check the Unlimited Data contribute to churn customer
telco_churn %>%
group_by(Unlimited.Data) %>%
summarise(total = n()) %>%
ggplot(aes(x = Unlimited.Data, y = total)) + geom_bar(stat = 'identity', fill = 'pink') +
coord_flip() + ggtitle("Total Churn by unlimited Data") + xlab("Unlimited Data") + ylab("Total")
## Check type of contract contribute to churn customer
telco_churn %>%
group_by(Contract) %>%
summarise(total = n()) %>%
ggplot(aes(x = Contract, y = total)) + geom_bar(stat = 'identity', fill = 'pink') +
coord_flip() + ggtitle("Total Churn by Contract Type") + xlab("Contract Type") + ylab("Total")
## Check the paperless billing contribute to churn customer
telco_churn %>%
group_by(Paperless.Billing) %>%
summarise(total = n()) %>%
ggplot(aes(x = Paperless.Billing, y = total)) + geom_bar(stat = 'identity', fill = 'pink') +
coord_flip() + ggtitle("Total Churn by Paperless Billing") + xlab("Paperless Billing") + ylab("Total")
## Check Payment method contribute churn customer
telco_churn %>%
group_by(Payment.Method) %>%
summarise(total = n()) %>%
ggplot(aes(x = Payment.Method, y = total)) + geom_bar(stat = 'identity', fill = 'pink') +
coord_flip() + ggtitle("Total Churn by Payment Method") + xlab("Payment Method") + ylab("Total")
## Check the monthly charge distribution of churn customer
telco_churn %>%
ggplot(aes(Monthly.Charges)) +
geom_histogram(bins = 20) + ggtitle("Distribution of Churn by Monthly Charges") +
xlab("Monthly Charges")
## Check the total charge distribution of churn customer
telco_churn %>%
ggplot(aes(as.numeric(Total.Charges))) +
geom_histogram(bins = 20) + ggtitle("Distribution of Churn by Total Charge") +
xlab("Total Charge")
## Check the CLTV distribution of churn customer
telco_churn %>%
ggplot(aes(CLTV)) +
geom_histogram(bins = 20) + ggtitle("Distribution of Churn by Customer Lifetime Value") +
xlab("Customer Lifetime Value")
## Check the reason of churn customer
table(telco_churn$Churn.Reason)
##
## Attitude of service provider
## 135
## Attitude of support person
## 192
## Competitor had better devices
## 130
## Competitor made better offer
## 140
## Competitor offered higher download speeds
## 189
## Competitor offered more data
## 162
## Deceased
## 6
## Don't know
## 154
## Extra data charges
## 57
## Lack of affordable download/upload speed
## 44
## Lack of self-service on Website
## 88
## Limited range of services
## 44
## Long distance charges
## 44
## Moved
## 53
## Network reliability
## 103
## Poor expertise of online support
## 19
## Poor expertise of phone support
## 20
## Price too high
## 98
## Product dissatisfaction
## 102
## Service dissatisfaction
## 89
telco_churn %>%
group_by(Churn.Reason) %>%
filter(Churn.Reason != "Don't know") %>%
summarise(total = n()) %>%
ggplot(aes(x = reorder(Churn.Reason, total), y = total)) + geom_bar(stat = 'identity', fill = 'pink') +
coord_flip() + ggtitle("Total Churn by Churn Reason") + xlab("Churn Reason") + ylab("Total")
## Check the Offer contribute to churn customer
telco_churn %>%
group_by(Offer) %>%
summarise(total = n()) %>%
ggplot(aes(x = Offer, y = total)) + geom_bar(stat = 'identity', fill = 'pink') +
coord_flip() + ggtitle("Total Churn by Offer") + xlab("Offer") + ylab("Total")
## Check the Avg monthly long distance charge contribute to customer
telco_churn %>%
ggplot(aes(Avg.Monthly.Long.Distance.Charges)) +
geom_histogram(bins = 20) + ggtitle("Distribution of Churn by Avg.Monthly.Long.Distance.Charges") +
xlab("Avg,Monthly.Long.Distance.Charges")
## Check the Avg monthly GB Download contribute to customer
telco_churn %>%
ggplot(aes(Avg.Monthly.GB.Download)) +
geom_histogram(bins = 20) + ggtitle("Distribution of Churn by Avg.Monthly.GB.Download") +
xlab("Avg,Monthly.GB.Download")
## Check the total refunds contribute to churn
telco_churn %>%
ggplot(aes(Total.Refunds)) +
geom_histogram(bins = 20) + ggtitle("Distribution of Churn by Total Refund") +
xlab("Total Refund")
## Check the total extra data charges contributes to churn
telco_churn %>%
ggplot(aes(Total.Extra.Data.Charges)) +
geom_histogram(bins = 20) + ggtitle("Distribution of Churn by Total Extra Data Charges") +
xlab("Total Extra Data Charges")
## Check the total long distance charges contributes to churn
telco_churn %>%
ggplot(aes(Total.Long.Distance.Charges)) +
geom_histogram(bins = 20) + ggtitle("Distribution of Churn by Total Long Distance Charges") +
xlab("Total Long Distance Charges")
## Check the total revenue contributes to churn
telco_churn %>%
ggplot(aes(Total.Revenue)) +
geom_histogram(bins = 20) + ggtitle("Distribution of Churn by Total Revenue") +
xlab("Total Revenue")
## Check the Satisfaction score contributes to churn
telco_churn %>%
group_by(Satisfaction.Score) %>%
summarise(total = n()) %>%
ggplot(aes(x = Satisfaction.Score, y = total)) + geom_bar(stat = 'identity', fill = 'pink') +
coord_flip() + ggtitle("Total Churn by Satisfaction Score") + xlab("Satisfaction Score") + ylab("Total")
## Check the churn category
telco_churn %>%
group_by(Churn.Category) %>%
summarise(total = n()) %>%
ggplot(aes(x = Churn.Category, y = total)) + geom_bar(stat = 'identity', fill = 'pink') +
coord_flip() + ggtitle("Churn Category") + xlab("Churn Category") + ylab("Total")
###################################################################################################################################################
## Create model to predict the CLTV value
colnames(telco)
## [1] "Customer.ID"
## [2] "Count"
## [3] "Country"
## [4] "State"
## [5] "City"
## [6] "Zip.Code"
## [7] "Lat.Long"
## [8] "Latitude"
## [9] "Longitude"
## [10] "Gender"
## [11] "Senior.Citizen"
## [12] "Partner"
## [13] "Dependents"
## [14] "Tenure.Months"
## [15] "Phone.Service"
## [16] "Multiple.Lines"
## [17] "Internet.Service"
## [18] "Online.Security"
## [19] "Online.Backup"
## [20] "Device.Protection"
## [21] "Tech.Support"
## [22] "Streaming.TV"
## [23] "Streaming.Movies"
## [24] "Contract"
## [25] "Paperless.Billing"
## [26] "Payment.Method"
## [27] "Monthly.Charges"
## [28] "Total.Charges"
## [29] "Churn.Label"
## [30] "Churn.Value"
## [31] "Churn.Score"
## [32] "CLTV"
## [33] "Churn.Reason"
## [34] "Age"
## [35] "Under.30"
## [36] "Number.of.Dependents"
## [37] "Referred.a.Friend"
## [38] "Number.of.Referrals"
## [39] "Offer"
## [40] "Avg.Monthly.Long.Distance.Charges"
## [41] "Avg.Monthly.GB.Download"
## [42] "Streaming.Music"
## [43] "Unlimited.Data"
## [44] "Total.Refunds"
## [45] "Total.Extra.Data.Charges"
## [46] "Total.Long.Distance.Charges"
## [47] "Total.Revenue"
## [48] "Satisfaction.Score"
## [49] "Churn.Category"
## [50] "Customer.Status"
## select the important variables (demographic and behavior data) for predict CLTV.
telco_cltv <- dplyr::select(telco, Gender, Senior.Citizen, Partner, Dependents, Tenure.Months, Phone.Service, Multiple.Lines, Internet.Service,
Online.Security, Online.Backup, Device.Protection, Tech.Support, Streaming.TV, Streaming.Movies, Contract,
Paperless.Billing, Payment.Method, Monthly.Charges, Total.Charges, CLTV, Age, Under.30, Number.of.Dependents,
Referred.a.Friend, Number.of.Referrals, Offer, Avg.Monthly.Long.Distance.Charges, Avg.Monthly.GB.Download, Streaming.Music,
Unlimited.Data, Total.Refunds, Total.Extra.Data.Charges, Total.Long.Distance.Charges, Total.Revenue, Satisfaction.Score)
head(telco_cltv)
## Gender Senior.Citizen Partner Dependents Tenure.Months Phone.Service
## 1 Male No No No 2 Yes
## 2 Female No No Yes 2 Yes
## 3 Female No No Yes 8 Yes
## 4 Female No Yes Yes 28 Yes
## 5 Male No No Yes 49 Yes
## 6 Female No Yes No 10 Yes
## Multiple.Lines Internet.Service Online.Security Online.Backup
## 1 No DSL Yes Yes
## 2 No Fiber optic No No
## 3 Yes Fiber optic No No
## 4 Yes Fiber optic No No
## 5 Yes Fiber optic No Yes
## 6 No DSL No No
## Device.Protection Tech.Support Streaming.TV Streaming.Movies
## 1 No No No No
## 2 No No No No
## 3 Yes No Yes Yes
## 4 Yes Yes Yes Yes
## 5 Yes No Yes Yes
## 6 Yes Yes No No
## Contract Paperless.Billing Payment.Method
## 1 Month-to-month Yes Mailed check
## 2 Month-to-month Yes Electronic check
## 3 Month-to-month Yes Electronic check
## 4 Month-to-month Yes Electronic check
## 5 Month-to-month Yes Bank transfer (automatic)
## 6 Month-to-month No Credit card (automatic)
## Monthly.Charges Total.Charges CLTV Age Under.30 Number.of.Dependents
## 1 53.85 158 3239 37 No 0
## 2 70.70 926 2701 19 Yes 2
## 3 99.65 6105 5372 31 No 2
## 4 104.80 2647 5003 23 Yes 3
## 5 103.70 4266 5340 38 No 1
## 6 55.20 4417 5925 21 Yes 0
## Referred.a.Friend Number.of.Referrals Offer
## 1 No 0 None
## 2 No 0 None
## 3 No 0 None
## 4 No 0 Offer C
## 5 No 0 None
## 6 Yes 6 None
## Avg.Monthly.Long.Distance.Charges Avg.Monthly.GB.Download
## 1 10.47 21
## 2 9.12 51
## 3 12.15 26
## 4 4.89 47
## 5 44.33 11
## 6 9.41 69
## Streaming.Music Unlimited.Data Total.Refunds Total.Extra.Data.Charges
## 1 No Yes 0 0
## 2 No Yes 0 0
## 3 Yes Yes 0 0
## 4 Yes Yes 0 0
## 5 Yes Yes 0 0
## 6 No Yes 0 0
## Total.Long.Distance.Charges Total.Revenue Satisfaction.Score
## 1 20.94 129.09 1
## 2 18.24 169.89 2
## 3 97.20 917.70 3
## 4 136.92 3182.97 3
## 5 2172.17 7208.47 1
## 6 94.10 622.45 1
summary(telco_cltv)
## Gender Senior.Citizen Partner Dependents Tenure.Months
## Female:3488 No :5901 No :3641 No :5416 Min. : 0.00
## Male :3555 Yes:1142 Yes:3402 Yes:1627 1st Qu.: 9.00
## Median :29.00
## Mean :32.37
## 3rd Qu.:55.00
## Max. :72.00
##
## Phone.Service Multiple.Lines Internet.Service
## No : 682 No :3390 DSL :2421
## Yes:6361 No phone service: 682 Fiber optic:3096
## Yes :2971 No :1526
##
##
##
##
## Online.Security Online.Backup
## No :3498 No :3088
## No internet service:1526 No internet service:1526
## Yes :2019 Yes :2429
##
##
##
##
## Device.Protection Tech.Support
## No :3095 No :3473
## No internet service:1526 No internet service:1526
## Yes :2422 Yes :2044
##
##
##
##
## Streaming.TV Streaming.Movies
## No :2810 No :2785
## No internet service:1526 No internet service:1526
## Yes :2707 Yes :2732
##
##
##
##
## Contract Paperless.Billing Payment.Method
## Month-to-month:3875 No :2872 Bank transfer (automatic):1544
## One year :1473 Yes:4171 Credit card (automatic) :1522
## Two year :1695 Electronic check :2365
## Mailed check :1612
##
##
##
## Monthly.Charges Total.Charges CLTV Age Under.30
## Min. : 18.25 Min. : 1 Min. :2003 Min. :19.00 No :5642
## 1st Qu.: 35.50 1st Qu.:1610 1st Qu.:3469 1st Qu.:32.00 Yes:1401
## Median : 70.35 Median :3250 Median :4527 Median :46.00
## Mean : 64.76 Mean :3259 Mean :4400 Mean :46.51
## 3rd Qu.: 89.85 3rd Qu.:4902 3rd Qu.:5380 3rd Qu.:60.00
## Max. :118.75 Max. :6531 Max. :6500 Max. :80.00
##
## Number.of.Dependents Referred.a.Friend Number.of.Referrals Offer
## 0 :5416 No :3821 0 :3821 None :3877
## 1 : 553 Yes:3222 1 :1086 Offer A: 520
## 2 : 531 5 : 264 Offer B: 824
## 3 : 517 3 : 255 Offer C: 415
## 5 : 10 7 : 248 Offer D: 602
## 4 : 9 9 : 238 Offer E: 805
## (Other): 7 (Other):1131
## Avg.Monthly.Long.Distance.Charges Avg.Monthly.GB.Download Streaming.Music
## Min. : 0.00 Min. : 0.00 No :4555
## 1st Qu.: 9.21 1st Qu.: 3.00 Yes:2488
## Median :22.89 Median :17.00
## Mean :22.96 Mean :20.52
## 3rd Qu.:36.40 3rd Qu.:27.00
## Max. :49.99 Max. :85.00
##
## Unlimited.Data Total.Refunds Total.Extra.Data.Charges
## No :2298 Min. : 0.000 Min. : 0.000
## Yes:4745 1st Qu.: 0.000 1st Qu.: 0.000
## Median : 0.000 Median : 0.000
## Mean : 1.962 Mean : 6.861
## 3rd Qu.: 0.000 3rd Qu.: 0.000
## Max. :49.790 Max. :150.000
##
## Total.Long.Distance.Charges Total.Revenue Satisfaction.Score
## Min. : 0.00 Min. : 21.36 1: 922
## 1st Qu.: 70.55 1st Qu.: 605.61 2: 518
## Median : 401.44 Median : 2108.64 3:2665
## Mean : 749.10 Mean : 3034.38 4:1789
## 3rd Qu.:1191.10 3rd Qu.: 4801.15 5:1149
## Max. :3564.72 Max. :11979.34
##
sapply(telco_cltv, class)
## Gender Senior.Citizen
## "factor" "factor"
## Partner Dependents
## "factor" "factor"
## Tenure.Months Phone.Service
## "numeric" "factor"
## Multiple.Lines Internet.Service
## "factor" "factor"
## Online.Security Online.Backup
## "factor" "factor"
## Device.Protection Tech.Support
## "factor" "factor"
## Streaming.TV Streaming.Movies
## "factor" "factor"
## Contract Paperless.Billing
## "factor" "factor"
## Payment.Method Monthly.Charges
## "factor" "numeric"
## Total.Charges CLTV
## "numeric" "numeric"
## Age Under.30
## "numeric" "factor"
## Number.of.Dependents Referred.a.Friend
## "factor" "factor"
## Number.of.Referrals Offer
## "factor" "factor"
## Avg.Monthly.Long.Distance.Charges Avg.Monthly.GB.Download
## "numeric" "numeric"
## Streaming.Music Unlimited.Data
## "factor" "factor"
## Total.Refunds Total.Extra.Data.Charges
## "numeric" "numeric"
## Total.Long.Distance.Charges Total.Revenue
## "numeric" "numeric"
## Satisfaction.Score
## "factor"
## Split the train and test set
set.seed(123)
splitData = sample.split(telco_cltv$CLTV, SplitRatio = 0.8)
train_set = telco_cltv[splitData, ]
nrow(train_set)/nrow(telco_cltv)
## [1] 0.8604288
test_set = telco_cltv[!splitData, ]
nrow(test_set)/nrow(telco_cltv)
## [1] 0.1395712
## Check the targer variable "CLTV" distribute. no outlier
p1 = train_set%>%
ggplot(aes(y=CLTV)) + geom_boxplot()
p2 = train_set%>%
ggplot(aes(CLTV)) + geom_histogram()
p3 = train_set %>%
ggplot(aes(sqrt(CLTV))) + geom_histogram()
p4 = train_set %>%
ggplot(aes(y=sqrt(CLTV))) + geom_boxplot()
ggarrange(p1,p2,p3,p4 + rremove("x.text"),
labels = c("A", "B" ,"C" ,"D"),
ncol = 2, nrow = 2)
## Test the linearity with conitnuous independent variables
## Tenure.Months, Monthly.Charges, Total.Charges, Age, Avg,Monthly.Long.Distance.Charges, Avg.Monthly.GB.Download,
## Total.Refunds, Total.Extra.Data.Charges, Total.Long.Distance.Charges, Total.Revenue
T1 = train_set %>%
ggplot(aes(x= Tenure.Months, y=CLTV)) + geom_point() + geom_smooth(method = "loess")
T2 = train_set %>%
ggplot(aes(x= Monthly.Charges, y=CLTV)) + geom_point() + geom_smooth(method = "loess")
T3 = train_set %>%
ggplot(aes(x= Total.Charges, y= CLTV)) + geom_point() + geom_smooth(method = 'loess')
T4 = train_set %>%
ggplot(aes(x= Age, y= CLTV)) + geom_point() + geom_smooth(method = 'loess')
ggarrange(T1,T2, T3, T4 + rremove("x.text"),
labels = c("A", "B", "C", "D"),
ncol = 2, nrow = 2)
R1 = train_set %>%
ggplot(aes(x= Avg.Monthly.Long.Distance.Charges, y=CLTV)) + geom_point() + geom_smooth(method = "loess")
R2 = train_set %>%
ggplot(aes(x= Avg.Monthly.GB.Download, y=CLTV)) + geom_point() + geom_smooth(method = "loess")
R3 = train_set %>%
ggplot(aes(x= Total.Refunds, y= CLTV)) + geom_point() + geom_smooth(method = 'loess')
R4 = train_set %>%
ggplot(aes(x= Total.Extra.Data.Charges, y= CLTV)) + geom_point() + geom_smooth(method = 'loess')
ggarrange(R1,R2, R3, R4 + rremove("x.text"),
labels = c("A", "B", "C", "D"),
ncol = 2, nrow = 2)
Y1 = train_set %>%
ggplot(aes(x= Total.Long.Distance.Charges, y=CLTV)) + geom_point() + geom_smooth(method = "loess")
Y2 = train_set %>%
ggplot(aes(x= Total.Revenue, y=CLTV)) + geom_point() + geom_smooth(method = "loess")
ggarrange(Y1,Y2 + rremove("x.text"),
labels = c("A", "B"),
ncol = 2, nrow = 1)
## Create multilinear model
model1 = lm(CLTV ~., data = train_set)
summary(model1)
##
## Call:
## lm(formula = CLTV ~ ., data = train_set)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2690.20 -830.51 24.63 888.88 2399.03
##
## Coefficients: (9 not defined because of singularities)
## Estimate Std. Error t value
## (Intercept) 4.313e+03 3.739e+02 11.535
## GenderMale -6.358e+00 2.803e+01 -0.227
## Senior.CitizenYes -4.824e+01 6.340e+01 -0.761
## PartnerYes 1.206e+02 9.175e+01 1.315
## DependentsYes 2.780e+02 1.092e+03 0.255
## Tenure.Months 1.581e+01 1.840e+00 8.594
## Phone.ServiceYes 4.552e+02 2.803e+02 1.624
## Multiple.LinesNo phone service NA NA NA
## Multiple.LinesYes 9.656e+01 7.645e+01 1.263
## Internet.ServiceFiber optic 4.769e+02 3.438e+02 1.387
## Internet.ServiceNo -5.341e+02 3.585e+02 -1.490
## Online.SecurityNo internet service NA NA NA
## Online.SecurityYes 1.297e+02 7.887e+01 1.645
## Online.BackupNo internet service NA NA NA
## Online.BackupYes 5.892e+01 7.656e+01 0.770
## Device.ProtectionNo internet service NA NA NA
## Device.ProtectionYes 1.688e+01 7.770e+01 0.217
## Tech.SupportNo internet service NA NA NA
## Tech.SupportYes 6.680e+01 7.854e+01 0.850
## Streaming.TVNo internet service NA NA NA
## Streaming.TVYes 1.765e+02 1.415e+02 1.248
## Streaming.MoviesNo internet service NA NA NA
## Streaming.MoviesYes 2.256e+02 1.520e+02 1.484
## ContractOne year -4.907e+01 4.421e+01 -1.110
## ContractTwo year 2.520e+01 5.410e+01 0.466
## Paperless.BillingYes 3.571e+01 3.136e+01 1.139
## Payment.MethodCredit card (automatic) -4.270e+01 4.248e+01 -1.005
## Payment.MethodElectronic check -1.122e+00 4.159e+01 -0.027
## Payment.MethodMailed check -2.864e+01 4.546e+01 -0.630
## Monthly.Charges -2.225e+01 1.372e+01 -1.621
## Total.Charges -3.560e-03 8.386e-03 -0.425
## Age 5.695e-01 1.702e+00 0.335
## Under.30Yes -4.817e+01 5.926e+01 -0.813
## Number.of.Dependents1 -2.276e+02 1.092e+03 -0.208
## Number.of.Dependents2 -2.739e+02 1.092e+03 -0.251
## Number.of.Dependents3 -2.411e+02 1.092e+03 -0.221
## Number.of.Dependents4 -4.575e+02 1.158e+03 -0.395
## Number.of.Dependents5 -6.398e+02 1.150e+03 -0.556
## Number.of.Dependents6 -7.177e+02 1.543e+03 -0.465
## Number.of.Dependents8 -9.707e+02 1.545e+03 -0.628
## Number.of.Dependents9 NA NA NA
## Referred.a.FriendYes 5.463e+02 7.768e+02 0.703
## Number.of.Referrals1 -7.464e+02 7.727e+02 -0.966
## Number.of.Referrals2 -6.471e+02 7.753e+02 -0.835
## Number.of.Referrals3 -7.468e+02 7.751e+02 -0.963
## Number.of.Referrals4 -7.540e+02 7.755e+02 -0.972
## Number.of.Referrals5 -6.712e+02 7.751e+02 -0.866
## Number.of.Referrals6 -7.871e+02 7.757e+02 -1.015
## Number.of.Referrals7 -6.621e+02 7.751e+02 -0.854
## Number.of.Referrals8 -7.699e+02 7.758e+02 -0.992
## Number.of.Referrals9 -5.674e+02 7.753e+02 -0.732
## Number.of.Referrals10 -7.861e+02 7.756e+02 -1.014
## Number.of.Referrals11 NA NA NA
## OfferOffer A 6.093e+01 6.315e+01 0.965
## OfferOffer B 3.690e+01 4.858e+01 0.760
## OfferOffer C -2.665e+02 6.117e+01 -4.356
## OfferOffer D -2.338e+01 5.298e+01 -0.441
## OfferOffer E 2.523e+02 5.093e+01 4.954
## Avg.Monthly.Long.Distance.Charges 4.523e-01 1.610e+00 0.281
## Avg.Monthly.GB.Download 1.504e+00 1.125e+00 1.337
## Streaming.MusicYes -2.017e+01 6.496e+01 -0.310
## Unlimited.DataYes -3.584e+01 7.176e+01 -0.499
## Total.Refunds -1.723e+00 1.810e+00 -0.952
## Total.Extra.Data.Charges 4.253e-01 8.953e-01 0.475
## Total.Long.Distance.Charges -6.277e-02 4.653e-02 -1.349
## Total.Revenue 8.410e-02 2.228e-02 3.774
## Satisfaction.Score2 -3.684e+01 6.535e+01 -0.564
## Satisfaction.Score3 -9.454e-01 4.857e+01 -0.019
## Satisfaction.Score4 2.073e+00 5.265e+01 0.039
## Satisfaction.Score5 -1.572e+01 5.715e+01 -0.275
## Pr(>|t|)
## (Intercept) < 2e-16 ***
## GenderMale 0.820595
## Senior.CitizenYes 0.446772
## PartnerYes 0.188666
## DependentsYes 0.798981
## Tenure.Months < 2e-16 ***
## Phone.ServiceYes 0.104394
## Multiple.LinesNo phone service NA
## Multiple.LinesYes 0.206629
## Internet.ServiceFiber optic 0.165419
## Internet.ServiceNo 0.136309
## Online.SecurityNo internet service NA
## Online.SecurityYes 0.100123
## Online.BackupNo internet service NA
## Online.BackupYes 0.441527
## Device.ProtectionNo internet service NA
## Device.ProtectionYes 0.828037
## Tech.SupportNo internet service NA
## Tech.SupportYes 0.395100
## Streaming.TVNo internet service NA
## Streaming.TVYes 0.212084
## Streaming.MoviesNo internet service NA
## Streaming.MoviesYes 0.137752
## ContractOne year 0.267074
## ContractTwo year 0.641301
## Paperless.BillingYes 0.254933
## Payment.MethodCredit card (automatic) 0.314860
## Payment.MethodElectronic check 0.978471
## Payment.MethodMailed check 0.528704
## Monthly.Charges 0.105063
## Total.Charges 0.671211
## Age 0.737983
## Under.30Yes 0.416342
## Number.of.Dependents1 0.834982
## Number.of.Dependents2 0.802049
## Number.of.Dependents3 0.825353
## Number.of.Dependents4 0.692767
## Number.of.Dependents5 0.578122
## Number.of.Dependents6 0.641897
## Number.of.Dependents8 0.529918
## Number.of.Dependents9 NA
## Referred.a.FriendYes 0.481927
## Number.of.Referrals1 0.334127
## Number.of.Referrals2 0.403974
## Number.of.Referrals3 0.335389
## Number.of.Referrals4 0.330938
## Number.of.Referrals5 0.386517
## Number.of.Referrals6 0.310292
## Number.of.Referrals7 0.393057
## Number.of.Referrals8 0.321041
## Number.of.Referrals9 0.464334
## Number.of.Referrals10 0.310837
## Number.of.Referrals11 NA
## OfferOffer A 0.334709
## OfferOffer B 0.447475
## OfferOffer C 1.34e-05 ***
## OfferOffer D 0.658953
## OfferOffer E 7.46e-07 ***
## Avg.Monthly.Long.Distance.Charges 0.778718
## Avg.Monthly.GB.Download 0.181214
## Streaming.MusicYes 0.756198
## Unlimited.DataYes 0.617478
## Total.Refunds 0.341126
## Total.Extra.Data.Charges 0.634809
## Total.Long.Distance.Charges 0.177361
## Total.Revenue 0.000162 ***
## Satisfaction.Score2 0.572962
## Satisfaction.Score3 0.984471
## Satisfaction.Score4 0.968594
## Satisfaction.Score5 0.783296
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1087 on 5999 degrees of freedom
## Multiple R-squared: 0.1872, Adjusted R-squared: 0.1791
## F-statistic: 23.03 on 60 and 5999 DF, p-value: < 2.2e-16
## Check the model. Residual plot is random.
options(repr.plot.width = 8, repr.plot.height = 6)
par(mfrow = c(2,2))
plot(model1)
## Check the residual of the model. Residual is normal distribution
model_stat = augment(model1)
head(model_stat)
## # A tibble: 6 x 43
## .rownames CLTV Gender Senior.Citizen Partner Dependents Tenure.Months
## <chr> <dbl> <fct> <fct> <fct> <fct> <dbl>
## 1 1 3239 Male No No No 2
## 2 2 2701 Female No No Yes 2
## 3 3 5372 Female No No Yes 8
## 4 4 5003 Female No Yes Yes 28
## 5 5 5340 Male No No Yes 49
## 6 6 5925 Female No Yes No 10
## # ... with 36 more variables: Phone.Service <fct>, Multiple.Lines <fct>,
## # Internet.Service <fct>, Online.Security <fct>, Online.Backup <fct>,
## # Device.Protection <fct>, Tech.Support <fct>, Streaming.TV <fct>,
## # Streaming.Movies <fct>, Contract <fct>, Paperless.Billing <fct>,
## # Payment.Method <fct>, Monthly.Charges <dbl>, Total.Charges <dbl>,
## # Age <dbl>, Under.30 <fct>, Number.of.Dependents <fct>,
## # Referred.a.Friend <fct>, Number.of.Referrals <fct>, Offer <fct>,
## # Avg.Monthly.Long.Distance.Charges <dbl>,
## # Avg.Monthly.GB.Download <dbl>, Streaming.Music <fct>,
## # Unlimited.Data <fct>, Total.Refunds <dbl>,
## # Total.Extra.Data.Charges <dbl>, Total.Long.Distance.Charges <dbl>,
## # Total.Revenue <dbl>, Satisfaction.Score <fct>, .fitted <dbl>,
## # .se.fit <dbl>, .resid <dbl>, .hat <dbl>, .sigma <dbl>, .cooksd <dbl>,
## # .std.resid <dbl>
options(repr.plot.width = 4, repr.plot.height = 3)
model_stat %>%
ggplot(aes(.resid)) + geom_histogram()
## Check the perfect multicollinearity variables.
alias(model1)
## Model :
## CLTV ~ Gender + Senior.Citizen + Partner + Dependents + Tenure.Months +
## Phone.Service + Multiple.Lines + Internet.Service + Online.Security +
## Online.Backup + Device.Protection + Tech.Support + Streaming.TV +
## Streaming.Movies + Contract + Paperless.Billing + Payment.Method +
## Monthly.Charges + Total.Charges + Age + Under.30 + Number.of.Dependents +
## Referred.a.Friend + Number.of.Referrals + Offer + Avg.Monthly.Long.Distance.Charges +
## Avg.Monthly.GB.Download + Streaming.Music + Unlimited.Data +
## Total.Refunds + Total.Extra.Data.Charges + Total.Long.Distance.Charges +
## Total.Revenue + Satisfaction.Score
##
## Complete :
## (Intercept) GenderMale
## Multiple.LinesNo phone service 1 0
## Online.SecurityNo internet service 0 0
## Online.BackupNo internet service 0 0
## Device.ProtectionNo internet service 0 0
## Tech.SupportNo internet service 0 0
## Streaming.TVNo internet service 0 0
## Streaming.MoviesNo internet service 0 0
## Number.of.Dependents9 0 0
## Number.of.Referrals11 0 0
## Senior.CitizenYes PartnerYes
## Multiple.LinesNo phone service 0 0
## Online.SecurityNo internet service 0 0
## Online.BackupNo internet service 0 0
## Device.ProtectionNo internet service 0 0
## Tech.SupportNo internet service 0 0
## Streaming.TVNo internet service 0 0
## Streaming.MoviesNo internet service 0 0
## Number.of.Dependents9 0 0
## Number.of.Referrals11 0 0
## DependentsYes Tenure.Months
## Multiple.LinesNo phone service 0 0
## Online.SecurityNo internet service 0 0
## Online.BackupNo internet service 0 0
## Device.ProtectionNo internet service 0 0
## Tech.SupportNo internet service 0 0
## Streaming.TVNo internet service 0 0
## Streaming.MoviesNo internet service 0 0
## Number.of.Dependents9 1 0
## Number.of.Referrals11 0 0
## Phone.ServiceYes Multiple.LinesYes
## Multiple.LinesNo phone service -1 0
## Online.SecurityNo internet service 0 0
## Online.BackupNo internet service 0 0
## Device.ProtectionNo internet service 0 0
## Tech.SupportNo internet service 0 0
## Streaming.TVNo internet service 0 0
## Streaming.MoviesNo internet service 0 0
## Number.of.Dependents9 0 0
## Number.of.Referrals11 0 0
## Internet.ServiceFiber optic
## Multiple.LinesNo phone service 0
## Online.SecurityNo internet service 0
## Online.BackupNo internet service 0
## Device.ProtectionNo internet service 0
## Tech.SupportNo internet service 0
## Streaming.TVNo internet service 0
## Streaming.MoviesNo internet service 0
## Number.of.Dependents9 0
## Number.of.Referrals11 0
## Internet.ServiceNo Online.SecurityYes
## Multiple.LinesNo phone service 0 0
## Online.SecurityNo internet service 1 0
## Online.BackupNo internet service 1 0
## Device.ProtectionNo internet service 1 0
## Tech.SupportNo internet service 1 0
## Streaming.TVNo internet service 1 0
## Streaming.MoviesNo internet service 1 0
## Number.of.Dependents9 0 0
## Number.of.Referrals11 0 0
## Online.BackupYes Device.ProtectionYes
## Multiple.LinesNo phone service 0 0
## Online.SecurityNo internet service 0 0
## Online.BackupNo internet service 0 0
## Device.ProtectionNo internet service 0 0
## Tech.SupportNo internet service 0 0
## Streaming.TVNo internet service 0 0
## Streaming.MoviesNo internet service 0 0
## Number.of.Dependents9 0 0
## Number.of.Referrals11 0 0
## Tech.SupportYes Streaming.TVYes
## Multiple.LinesNo phone service 0 0
## Online.SecurityNo internet service 0 0
## Online.BackupNo internet service 0 0
## Device.ProtectionNo internet service 0 0
## Tech.SupportNo internet service 0 0
## Streaming.TVNo internet service 0 0
## Streaming.MoviesNo internet service 0 0
## Number.of.Dependents9 0 0
## Number.of.Referrals11 0 0
## Streaming.MoviesYes ContractOne year
## Multiple.LinesNo phone service 0 0
## Online.SecurityNo internet service 0 0
## Online.BackupNo internet service 0 0
## Device.ProtectionNo internet service 0 0
## Tech.SupportNo internet service 0 0
## Streaming.TVNo internet service 0 0
## Streaming.MoviesNo internet service 0 0
## Number.of.Dependents9 0 0
## Number.of.Referrals11 0 0
## ContractTwo year Paperless.BillingYes
## Multiple.LinesNo phone service 0 0
## Online.SecurityNo internet service 0 0
## Online.BackupNo internet service 0 0
## Device.ProtectionNo internet service 0 0
## Tech.SupportNo internet service 0 0
## Streaming.TVNo internet service 0 0
## Streaming.MoviesNo internet service 0 0
## Number.of.Dependents9 0 0
## Number.of.Referrals11 0 0
## Payment.MethodCredit card (automatic)
## Multiple.LinesNo phone service 0
## Online.SecurityNo internet service 0
## Online.BackupNo internet service 0
## Device.ProtectionNo internet service 0
## Tech.SupportNo internet service 0
## Streaming.TVNo internet service 0
## Streaming.MoviesNo internet service 0
## Number.of.Dependents9 0
## Number.of.Referrals11 0
## Payment.MethodElectronic check
## Multiple.LinesNo phone service 0
## Online.SecurityNo internet service 0
## Online.BackupNo internet service 0
## Device.ProtectionNo internet service 0
## Tech.SupportNo internet service 0
## Streaming.TVNo internet service 0
## Streaming.MoviesNo internet service 0
## Number.of.Dependents9 0
## Number.of.Referrals11 0
## Payment.MethodMailed check
## Multiple.LinesNo phone service 0
## Online.SecurityNo internet service 0
## Online.BackupNo internet service 0
## Device.ProtectionNo internet service 0
## Tech.SupportNo internet service 0
## Streaming.TVNo internet service 0
## Streaming.MoviesNo internet service 0
## Number.of.Dependents9 0
## Number.of.Referrals11 0
## Monthly.Charges Total.Charges Age
## Multiple.LinesNo phone service 0 0 0
## Online.SecurityNo internet service 0 0 0
## Online.BackupNo internet service 0 0 0
## Device.ProtectionNo internet service 0 0 0
## Tech.SupportNo internet service 0 0 0
## Streaming.TVNo internet service 0 0 0
## Streaming.MoviesNo internet service 0 0 0
## Number.of.Dependents9 0 0 0
## Number.of.Referrals11 0 0 0
## Under.30Yes Number.of.Dependents1
## Multiple.LinesNo phone service 0 0
## Online.SecurityNo internet service 0 0
## Online.BackupNo internet service 0 0
## Device.ProtectionNo internet service 0 0
## Tech.SupportNo internet service 0 0
## Streaming.TVNo internet service 0 0
## Streaming.MoviesNo internet service 0 0
## Number.of.Dependents9 0 -1
## Number.of.Referrals11 0 0
## Number.of.Dependents2
## Multiple.LinesNo phone service 0
## Online.SecurityNo internet service 0
## Online.BackupNo internet service 0
## Device.ProtectionNo internet service 0
## Tech.SupportNo internet service 0
## Streaming.TVNo internet service 0
## Streaming.MoviesNo internet service 0
## Number.of.Dependents9 -1
## Number.of.Referrals11 0
## Number.of.Dependents3
## Multiple.LinesNo phone service 0
## Online.SecurityNo internet service 0
## Online.BackupNo internet service 0
## Device.ProtectionNo internet service 0
## Tech.SupportNo internet service 0
## Streaming.TVNo internet service 0
## Streaming.MoviesNo internet service 0
## Number.of.Dependents9 -1
## Number.of.Referrals11 0
## Number.of.Dependents4
## Multiple.LinesNo phone service 0
## Online.SecurityNo internet service 0
## Online.BackupNo internet service 0
## Device.ProtectionNo internet service 0
## Tech.SupportNo internet service 0
## Streaming.TVNo internet service 0
## Streaming.MoviesNo internet service 0
## Number.of.Dependents9 -1
## Number.of.Referrals11 0
## Number.of.Dependents5
## Multiple.LinesNo phone service 0
## Online.SecurityNo internet service 0
## Online.BackupNo internet service 0
## Device.ProtectionNo internet service 0
## Tech.SupportNo internet service 0
## Streaming.TVNo internet service 0
## Streaming.MoviesNo internet service 0
## Number.of.Dependents9 -1
## Number.of.Referrals11 0
## Number.of.Dependents6
## Multiple.LinesNo phone service 0
## Online.SecurityNo internet service 0
## Online.BackupNo internet service 0
## Device.ProtectionNo internet service 0
## Tech.SupportNo internet service 0
## Streaming.TVNo internet service 0
## Streaming.MoviesNo internet service 0
## Number.of.Dependents9 -1
## Number.of.Referrals11 0
## Number.of.Dependents8
## Multiple.LinesNo phone service 0
## Online.SecurityNo internet service 0
## Online.BackupNo internet service 0
## Device.ProtectionNo internet service 0
## Tech.SupportNo internet service 0
## Streaming.TVNo internet service 0
## Streaming.MoviesNo internet service 0
## Number.of.Dependents9 -1
## Number.of.Referrals11 0
## Referred.a.FriendYes
## Multiple.LinesNo phone service 0
## Online.SecurityNo internet service 0
## Online.BackupNo internet service 0
## Device.ProtectionNo internet service 0
## Tech.SupportNo internet service 0
## Streaming.TVNo internet service 0
## Streaming.MoviesNo internet service 0
## Number.of.Dependents9 0
## Number.of.Referrals11 1
## Number.of.Referrals1
## Multiple.LinesNo phone service 0
## Online.SecurityNo internet service 0
## Online.BackupNo internet service 0
## Device.ProtectionNo internet service 0
## Tech.SupportNo internet service 0
## Streaming.TVNo internet service 0
## Streaming.MoviesNo internet service 0
## Number.of.Dependents9 0
## Number.of.Referrals11 -1
## Number.of.Referrals2
## Multiple.LinesNo phone service 0
## Online.SecurityNo internet service 0
## Online.BackupNo internet service 0
## Device.ProtectionNo internet service 0
## Tech.SupportNo internet service 0
## Streaming.TVNo internet service 0
## Streaming.MoviesNo internet service 0
## Number.of.Dependents9 0
## Number.of.Referrals11 -1
## Number.of.Referrals3
## Multiple.LinesNo phone service 0
## Online.SecurityNo internet service 0
## Online.BackupNo internet service 0
## Device.ProtectionNo internet service 0
## Tech.SupportNo internet service 0
## Streaming.TVNo internet service 0
## Streaming.MoviesNo internet service 0
## Number.of.Dependents9 0
## Number.of.Referrals11 -1
## Number.of.Referrals4
## Multiple.LinesNo phone service 0
## Online.SecurityNo internet service 0
## Online.BackupNo internet service 0
## Device.ProtectionNo internet service 0
## Tech.SupportNo internet service 0
## Streaming.TVNo internet service 0
## Streaming.MoviesNo internet service 0
## Number.of.Dependents9 0
## Number.of.Referrals11 -1
## Number.of.Referrals5
## Multiple.LinesNo phone service 0
## Online.SecurityNo internet service 0
## Online.BackupNo internet service 0
## Device.ProtectionNo internet service 0
## Tech.SupportNo internet service 0
## Streaming.TVNo internet service 0
## Streaming.MoviesNo internet service 0
## Number.of.Dependents9 0
## Number.of.Referrals11 -1
## Number.of.Referrals6
## Multiple.LinesNo phone service 0
## Online.SecurityNo internet service 0
## Online.BackupNo internet service 0
## Device.ProtectionNo internet service 0
## Tech.SupportNo internet service 0
## Streaming.TVNo internet service 0
## Streaming.MoviesNo internet service 0
## Number.of.Dependents9 0
## Number.of.Referrals11 -1
## Number.of.Referrals7
## Multiple.LinesNo phone service 0
## Online.SecurityNo internet service 0
## Online.BackupNo internet service 0
## Device.ProtectionNo internet service 0
## Tech.SupportNo internet service 0
## Streaming.TVNo internet service 0
## Streaming.MoviesNo internet service 0
## Number.of.Dependents9 0
## Number.of.Referrals11 -1
## Number.of.Referrals8
## Multiple.LinesNo phone service 0
## Online.SecurityNo internet service 0
## Online.BackupNo internet service 0
## Device.ProtectionNo internet service 0
## Tech.SupportNo internet service 0
## Streaming.TVNo internet service 0
## Streaming.MoviesNo internet service 0
## Number.of.Dependents9 0
## Number.of.Referrals11 -1
## Number.of.Referrals9
## Multiple.LinesNo phone service 0
## Online.SecurityNo internet service 0
## Online.BackupNo internet service 0
## Device.ProtectionNo internet service 0
## Tech.SupportNo internet service 0
## Streaming.TVNo internet service 0
## Streaming.MoviesNo internet service 0
## Number.of.Dependents9 0
## Number.of.Referrals11 -1
## Number.of.Referrals10 OfferOffer A
## Multiple.LinesNo phone service 0 0
## Online.SecurityNo internet service 0 0
## Online.BackupNo internet service 0 0
## Device.ProtectionNo internet service 0 0
## Tech.SupportNo internet service 0 0
## Streaming.TVNo internet service 0 0
## Streaming.MoviesNo internet service 0 0
## Number.of.Dependents9 0 0
## Number.of.Referrals11 -1 0
## OfferOffer B OfferOffer C
## Multiple.LinesNo phone service 0 0
## Online.SecurityNo internet service 0 0
## Online.BackupNo internet service 0 0
## Device.ProtectionNo internet service 0 0
## Tech.SupportNo internet service 0 0
## Streaming.TVNo internet service 0 0
## Streaming.MoviesNo internet service 0 0
## Number.of.Dependents9 0 0
## Number.of.Referrals11 0 0
## OfferOffer D OfferOffer E
## Multiple.LinesNo phone service 0 0
## Online.SecurityNo internet service 0 0
## Online.BackupNo internet service 0 0
## Device.ProtectionNo internet service 0 0
## Tech.SupportNo internet service 0 0
## Streaming.TVNo internet service 0 0
## Streaming.MoviesNo internet service 0 0
## Number.of.Dependents9 0 0
## Number.of.Referrals11 0 0
## Avg.Monthly.Long.Distance.Charges
## Multiple.LinesNo phone service 0
## Online.SecurityNo internet service 0
## Online.BackupNo internet service 0
## Device.ProtectionNo internet service 0
## Tech.SupportNo internet service 0
## Streaming.TVNo internet service 0
## Streaming.MoviesNo internet service 0
## Number.of.Dependents9 0
## Number.of.Referrals11 0
## Avg.Monthly.GB.Download
## Multiple.LinesNo phone service 0
## Online.SecurityNo internet service 0
## Online.BackupNo internet service 0
## Device.ProtectionNo internet service 0
## Tech.SupportNo internet service 0
## Streaming.TVNo internet service 0
## Streaming.MoviesNo internet service 0
## Number.of.Dependents9 0
## Number.of.Referrals11 0
## Streaming.MusicYes Unlimited.DataYes
## Multiple.LinesNo phone service 0 0
## Online.SecurityNo internet service 0 0
## Online.BackupNo internet service 0 0
## Device.ProtectionNo internet service 0 0
## Tech.SupportNo internet service 0 0
## Streaming.TVNo internet service 0 0
## Streaming.MoviesNo internet service 0 0
## Number.of.Dependents9 0 0
## Number.of.Referrals11 0 0
## Total.Refunds
## Multiple.LinesNo phone service 0
## Online.SecurityNo internet service 0
## Online.BackupNo internet service 0
## Device.ProtectionNo internet service 0
## Tech.SupportNo internet service 0
## Streaming.TVNo internet service 0
## Streaming.MoviesNo internet service 0
## Number.of.Dependents9 0
## Number.of.Referrals11 0
## Total.Extra.Data.Charges
## Multiple.LinesNo phone service 0
## Online.SecurityNo internet service 0
## Online.BackupNo internet service 0
## Device.ProtectionNo internet service 0
## Tech.SupportNo internet service 0
## Streaming.TVNo internet service 0
## Streaming.MoviesNo internet service 0
## Number.of.Dependents9 0
## Number.of.Referrals11 0
## Total.Long.Distance.Charges
## Multiple.LinesNo phone service 0
## Online.SecurityNo internet service 0
## Online.BackupNo internet service 0
## Device.ProtectionNo internet service 0
## Tech.SupportNo internet service 0
## Streaming.TVNo internet service 0
## Streaming.MoviesNo internet service 0
## Number.of.Dependents9 0
## Number.of.Referrals11 0
## Total.Revenue Satisfaction.Score2
## Multiple.LinesNo phone service 0 0
## Online.SecurityNo internet service 0 0
## Online.BackupNo internet service 0 0
## Device.ProtectionNo internet service 0 0
## Tech.SupportNo internet service 0 0
## Streaming.TVNo internet service 0 0
## Streaming.MoviesNo internet service 0 0
## Number.of.Dependents9 0 0
## Number.of.Referrals11 0 0
## Satisfaction.Score3
## Multiple.LinesNo phone service 0
## Online.SecurityNo internet service 0
## Online.BackupNo internet service 0
## Device.ProtectionNo internet service 0
## Tech.SupportNo internet service 0
## Streaming.TVNo internet service 0
## Streaming.MoviesNo internet service 0
## Number.of.Dependents9 0
## Number.of.Referrals11 0
## Satisfaction.Score4
## Multiple.LinesNo phone service 0
## Online.SecurityNo internet service 0
## Online.BackupNo internet service 0
## Device.ProtectionNo internet service 0
## Tech.SupportNo internet service 0
## Streaming.TVNo internet service 0
## Streaming.MoviesNo internet service 0
## Number.of.Dependents9 0
## Number.of.Referrals11 0
## Satisfaction.Score5
## Multiple.LinesNo phone service 0
## Online.SecurityNo internet service 0
## Online.BackupNo internet service 0
## Device.ProtectionNo internet service 0
## Tech.SupportNo internet service 0
## Streaming.TVNo internet service 0
## Streaming.MoviesNo internet service 0
## Number.of.Dependents9 0
## Number.of.Referrals11 0
## create model 2 to remove the independent variables that perfect multicollinearity
colnames(train_set)
## [1] "Gender"
## [2] "Senior.Citizen"
## [3] "Partner"
## [4] "Dependents"
## [5] "Tenure.Months"
## [6] "Phone.Service"
## [7] "Multiple.Lines"
## [8] "Internet.Service"
## [9] "Online.Security"
## [10] "Online.Backup"
## [11] "Device.Protection"
## [12] "Tech.Support"
## [13] "Streaming.TV"
## [14] "Streaming.Movies"
## [15] "Contract"
## [16] "Paperless.Billing"
## [17] "Payment.Method"
## [18] "Monthly.Charges"
## [19] "Total.Charges"
## [20] "CLTV"
## [21] "Age"
## [22] "Under.30"
## [23] "Number.of.Dependents"
## [24] "Referred.a.Friend"
## [25] "Number.of.Referrals"
## [26] "Offer"
## [27] "Avg.Monthly.Long.Distance.Charges"
## [28] "Avg.Monthly.GB.Download"
## [29] "Streaming.Music"
## [30] "Unlimited.Data"
## [31] "Total.Refunds"
## [32] "Total.Extra.Data.Charges"
## [33] "Total.Long.Distance.Charges"
## [34] "Total.Revenue"
## [35] "Satisfaction.Score"
model2 = lm(CLTV ~. -Phone.Service -Dependents -Referred.a.Friend, data = train_set)
summary(model2)
##
## Call:
## lm(formula = CLTV ~ . - Phone.Service - Dependents - Referred.a.Friend,
## data = train_set)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2690.20 -830.51 24.63 888.88 2399.03
##
## Coefficients: (6 not defined because of singularities)
## Estimate Std. Error t value
## (Intercept) 4.768e+03 6.342e+02 7.519
## GenderMale -6.358e+00 2.803e+01 -0.227
## Senior.CitizenYes -4.824e+01 6.340e+01 -0.761
## PartnerYes 1.206e+02 9.175e+01 1.315
## Tenure.Months 1.581e+01 1.840e+00 8.594
## Multiple.LinesNo phone service -4.552e+02 2.803e+02 -1.624
## Multiple.LinesYes 9.656e+01 7.645e+01 1.263
## Internet.ServiceFiber optic 4.769e+02 3.438e+02 1.387
## Internet.ServiceNo -5.341e+02 3.585e+02 -1.490
## Online.SecurityNo internet service NA NA NA
## Online.SecurityYes 1.297e+02 7.887e+01 1.645
## Online.BackupNo internet service NA NA NA
## Online.BackupYes 5.892e+01 7.656e+01 0.770
## Device.ProtectionNo internet service NA NA NA
## Device.ProtectionYes 1.688e+01 7.770e+01 0.217
## Tech.SupportNo internet service NA NA NA
## Tech.SupportYes 6.680e+01 7.854e+01 0.850
## Streaming.TVNo internet service NA NA NA
## Streaming.TVYes 1.765e+02 1.415e+02 1.248
## Streaming.MoviesNo internet service NA NA NA
## Streaming.MoviesYes 2.256e+02 1.520e+02 1.484
## ContractOne year -4.907e+01 4.421e+01 -1.110
## ContractTwo year 2.520e+01 5.410e+01 0.466
## Paperless.BillingYes 3.571e+01 3.136e+01 1.139
## Payment.MethodCredit card (automatic) -4.270e+01 4.248e+01 -1.005
## Payment.MethodElectronic check -1.122e+00 4.159e+01 -0.027
## Payment.MethodMailed check -2.864e+01 4.546e+01 -0.630
## Monthly.Charges -2.225e+01 1.372e+01 -1.621
## Total.Charges -3.560e-03 8.386e-03 -0.425
## Age 5.695e-01 1.702e+00 0.335
## Under.30Yes -4.817e+01 5.926e+01 -0.813
## Number.of.Dependents1 5.041e+01 5.527e+01 0.912
## Number.of.Dependents2 4.123e+00 5.637e+01 0.073
## Number.of.Dependents3 3.693e+01 6.012e+01 0.614
## Number.of.Dependents4 -1.795e+02 3.871e+02 -0.464
## Number.of.Dependents5 -3.618e+02 3.652e+02 -0.991
## Number.of.Dependents6 -4.397e+02 1.093e+03 -0.402
## Number.of.Dependents8 -6.927e+02 1.095e+03 -0.633
## Number.of.Dependents9 2.780e+02 1.092e+03 0.255
## Number.of.Referrals1 -2.001e+02 9.541e+01 -2.097
## Number.of.Referrals2 -1.008e+02 1.163e+02 -0.867
## Number.of.Referrals3 -2.005e+02 1.144e+02 -1.752
## Number.of.Referrals4 -2.078e+02 1.169e+02 -1.778
## Number.of.Referrals5 -1.250e+02 1.151e+02 -1.085
## Number.of.Referrals6 -2.409e+02 1.189e+02 -2.025
## Number.of.Referrals7 -1.158e+02 1.152e+02 -1.005
## Number.of.Referrals8 -2.237e+02 1.194e+02 -1.874
## Number.of.Referrals9 -2.110e+01 1.161e+02 -0.182
## Number.of.Referrals10 -2.398e+02 1.182e+02 -2.030
## Number.of.Referrals11 5.463e+02 7.768e+02 0.703
## OfferOffer A 6.093e+01 6.315e+01 0.965
## OfferOffer B 3.690e+01 4.858e+01 0.760
## OfferOffer C -2.665e+02 6.117e+01 -4.356
## OfferOffer D -2.338e+01 5.298e+01 -0.441
## OfferOffer E 2.523e+02 5.093e+01 4.954
## Avg.Monthly.Long.Distance.Charges 4.523e-01 1.610e+00 0.281
## Avg.Monthly.GB.Download 1.504e+00 1.125e+00 1.337
## Streaming.MusicYes -2.017e+01 6.496e+01 -0.310
## Unlimited.DataYes -3.584e+01 7.176e+01 -0.499
## Total.Refunds -1.723e+00 1.810e+00 -0.952
## Total.Extra.Data.Charges 4.253e-01 8.953e-01 0.475
## Total.Long.Distance.Charges -6.277e-02 4.653e-02 -1.349
## Total.Revenue 8.410e-02 2.228e-02 3.774
## Satisfaction.Score2 -3.684e+01 6.535e+01 -0.564
## Satisfaction.Score3 -9.454e-01 4.857e+01 -0.019
## Satisfaction.Score4 2.073e+00 5.265e+01 0.039
## Satisfaction.Score5 -1.572e+01 5.715e+01 -0.275
## Pr(>|t|)
## (Intercept) 6.33e-14 ***
## GenderMale 0.820595
## Senior.CitizenYes 0.446772
## PartnerYes 0.188666
## Tenure.Months < 2e-16 ***
## Multiple.LinesNo phone service 0.104394
## Multiple.LinesYes 0.206629
## Internet.ServiceFiber optic 0.165419
## Internet.ServiceNo 0.136309
## Online.SecurityNo internet service NA
## Online.SecurityYes 0.100123
## Online.BackupNo internet service NA
## Online.BackupYes 0.441527
## Device.ProtectionNo internet service NA
## Device.ProtectionYes 0.828037
## Tech.SupportNo internet service NA
## Tech.SupportYes 0.395100
## Streaming.TVNo internet service NA
## Streaming.TVYes 0.212084
## Streaming.MoviesNo internet service NA
## Streaming.MoviesYes 0.137752
## ContractOne year 0.267074
## ContractTwo year 0.641301
## Paperless.BillingYes 0.254933
## Payment.MethodCredit card (automatic) 0.314860
## Payment.MethodElectronic check 0.978471
## Payment.MethodMailed check 0.528704
## Monthly.Charges 0.105063
## Total.Charges 0.671211
## Age 0.737983
## Under.30Yes 0.416342
## Number.of.Dependents1 0.361788
## Number.of.Dependents2 0.941705
## Number.of.Dependents3 0.539079
## Number.of.Dependents4 0.642860
## Number.of.Dependents5 0.321929
## Number.of.Dependents6 0.687557
## Number.of.Dependents8 0.526996
## Number.of.Dependents9 0.798981
## Number.of.Referrals1 0.036001 *
## Number.of.Referrals2 0.385886
## Number.of.Referrals3 0.079817 .
## Number.of.Referrals4 0.075529 .
## Number.of.Referrals5 0.277850
## Number.of.Referrals6 0.042896 *
## Number.of.Referrals7 0.314994
## Number.of.Referrals8 0.061024 .
## Number.of.Referrals9 0.855809
## Number.of.Referrals10 0.042432 *
## Number.of.Referrals11 0.481927
## OfferOffer A 0.334709
## OfferOffer B 0.447475
## OfferOffer C 1.34e-05 ***
## OfferOffer D 0.658953
## OfferOffer E 7.46e-07 ***
## Avg.Monthly.Long.Distance.Charges 0.778718
## Avg.Monthly.GB.Download 0.181214
## Streaming.MusicYes 0.756198
## Unlimited.DataYes 0.617478
## Total.Refunds 0.341126
## Total.Extra.Data.Charges 0.634809
## Total.Long.Distance.Charges 0.177361
## Total.Revenue 0.000162 ***
## Satisfaction.Score2 0.572962
## Satisfaction.Score3 0.984471
## Satisfaction.Score4 0.968594
## Satisfaction.Score5 0.783296
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1087 on 5999 degrees of freedom
## Multiple R-squared: 0.1872, Adjusted R-squared: 0.1791
## F-statistic: 23.03 on 60 and 5999 DF, p-value: < 2.2e-16
alias(model2)
## Model :
## CLTV ~ (Gender + Senior.Citizen + Partner + Dependents + Tenure.Months +
## Phone.Service + Multiple.Lines + Internet.Service + Online.Security +
## Online.Backup + Device.Protection + Tech.Support + Streaming.TV +
## Streaming.Movies + Contract + Paperless.Billing + Payment.Method +
## Monthly.Charges + Total.Charges + Age + Under.30 + Number.of.Dependents +
## Referred.a.Friend + Number.of.Referrals + Offer + Avg.Monthly.Long.Distance.Charges +
## Avg.Monthly.GB.Download + Streaming.Music + Unlimited.Data +
## Total.Refunds + Total.Extra.Data.Charges + Total.Long.Distance.Charges +
## Total.Revenue + Satisfaction.Score) - Phone.Service - Dependents -
## Referred.a.Friend
##
## Complete :
## (Intercept) GenderMale
## Online.SecurityNo internet service 0 0
## Online.BackupNo internet service 0 0
## Device.ProtectionNo internet service 0 0
## Tech.SupportNo internet service 0 0
## Streaming.TVNo internet service 0 0
## Streaming.MoviesNo internet service 0 0
## Senior.CitizenYes PartnerYes
## Online.SecurityNo internet service 0 0
## Online.BackupNo internet service 0 0
## Device.ProtectionNo internet service 0 0
## Tech.SupportNo internet service 0 0
## Streaming.TVNo internet service 0 0
## Streaming.MoviesNo internet service 0 0
## Tenure.Months
## Online.SecurityNo internet service 0
## Online.BackupNo internet service 0
## Device.ProtectionNo internet service 0
## Tech.SupportNo internet service 0
## Streaming.TVNo internet service 0
## Streaming.MoviesNo internet service 0
## Multiple.LinesNo phone service
## Online.SecurityNo internet service 0
## Online.BackupNo internet service 0
## Device.ProtectionNo internet service 0
## Tech.SupportNo internet service 0
## Streaming.TVNo internet service 0
## Streaming.MoviesNo internet service 0
## Multiple.LinesYes
## Online.SecurityNo internet service 0
## Online.BackupNo internet service 0
## Device.ProtectionNo internet service 0
## Tech.SupportNo internet service 0
## Streaming.TVNo internet service 0
## Streaming.MoviesNo internet service 0
## Internet.ServiceFiber optic
## Online.SecurityNo internet service 0
## Online.BackupNo internet service 0
## Device.ProtectionNo internet service 0
## Tech.SupportNo internet service 0
## Streaming.TVNo internet service 0
## Streaming.MoviesNo internet service 0
## Internet.ServiceNo Online.SecurityYes
## Online.SecurityNo internet service 1 0
## Online.BackupNo internet service 1 0
## Device.ProtectionNo internet service 1 0
## Tech.SupportNo internet service 1 0
## Streaming.TVNo internet service 1 0
## Streaming.MoviesNo internet service 1 0
## Online.BackupYes Device.ProtectionYes
## Online.SecurityNo internet service 0 0
## Online.BackupNo internet service 0 0
## Device.ProtectionNo internet service 0 0
## Tech.SupportNo internet service 0 0
## Streaming.TVNo internet service 0 0
## Streaming.MoviesNo internet service 0 0
## Tech.SupportYes Streaming.TVYes
## Online.SecurityNo internet service 0 0
## Online.BackupNo internet service 0 0
## Device.ProtectionNo internet service 0 0
## Tech.SupportNo internet service 0 0
## Streaming.TVNo internet service 0 0
## Streaming.MoviesNo internet service 0 0
## Streaming.MoviesYes ContractOne year
## Online.SecurityNo internet service 0 0
## Online.BackupNo internet service 0 0
## Device.ProtectionNo internet service 0 0
## Tech.SupportNo internet service 0 0
## Streaming.TVNo internet service 0 0
## Streaming.MoviesNo internet service 0 0
## ContractTwo year Paperless.BillingYes
## Online.SecurityNo internet service 0 0
## Online.BackupNo internet service 0 0
## Device.ProtectionNo internet service 0 0
## Tech.SupportNo internet service 0 0
## Streaming.TVNo internet service 0 0
## Streaming.MoviesNo internet service 0 0
## Payment.MethodCredit card (automatic)
## Online.SecurityNo internet service 0
## Online.BackupNo internet service 0
## Device.ProtectionNo internet service 0
## Tech.SupportNo internet service 0
## Streaming.TVNo internet service 0
## Streaming.MoviesNo internet service 0
## Payment.MethodElectronic check
## Online.SecurityNo internet service 0
## Online.BackupNo internet service 0
## Device.ProtectionNo internet service 0
## Tech.SupportNo internet service 0
## Streaming.TVNo internet service 0
## Streaming.MoviesNo internet service 0
## Payment.MethodMailed check
## Online.SecurityNo internet service 0
## Online.BackupNo internet service 0
## Device.ProtectionNo internet service 0
## Tech.SupportNo internet service 0
## Streaming.TVNo internet service 0
## Streaming.MoviesNo internet service 0
## Monthly.Charges Total.Charges Age
## Online.SecurityNo internet service 0 0 0
## Online.BackupNo internet service 0 0 0
## Device.ProtectionNo internet service 0 0 0
## Tech.SupportNo internet service 0 0 0
## Streaming.TVNo internet service 0 0 0
## Streaming.MoviesNo internet service 0 0 0
## Under.30Yes Number.of.Dependents1
## Online.SecurityNo internet service 0 0
## Online.BackupNo internet service 0 0
## Device.ProtectionNo internet service 0 0
## Tech.SupportNo internet service 0 0
## Streaming.TVNo internet service 0 0
## Streaming.MoviesNo internet service 0 0
## Number.of.Dependents2
## Online.SecurityNo internet service 0
## Online.BackupNo internet service 0
## Device.ProtectionNo internet service 0
## Tech.SupportNo internet service 0
## Streaming.TVNo internet service 0
## Streaming.MoviesNo internet service 0
## Number.of.Dependents3
## Online.SecurityNo internet service 0
## Online.BackupNo internet service 0
## Device.ProtectionNo internet service 0
## Tech.SupportNo internet service 0
## Streaming.TVNo internet service 0
## Streaming.MoviesNo internet service 0
## Number.of.Dependents4
## Online.SecurityNo internet service 0
## Online.BackupNo internet service 0
## Device.ProtectionNo internet service 0
## Tech.SupportNo internet service 0
## Streaming.TVNo internet service 0
## Streaming.MoviesNo internet service 0
## Number.of.Dependents5
## Online.SecurityNo internet service 0
## Online.BackupNo internet service 0
## Device.ProtectionNo internet service 0
## Tech.SupportNo internet service 0
## Streaming.TVNo internet service 0
## Streaming.MoviesNo internet service 0
## Number.of.Dependents6
## Online.SecurityNo internet service 0
## Online.BackupNo internet service 0
## Device.ProtectionNo internet service 0
## Tech.SupportNo internet service 0
## Streaming.TVNo internet service 0
## Streaming.MoviesNo internet service 0
## Number.of.Dependents8
## Online.SecurityNo internet service 0
## Online.BackupNo internet service 0
## Device.ProtectionNo internet service 0
## Tech.SupportNo internet service 0
## Streaming.TVNo internet service 0
## Streaming.MoviesNo internet service 0
## Number.of.Dependents9
## Online.SecurityNo internet service 0
## Online.BackupNo internet service 0
## Device.ProtectionNo internet service 0
## Tech.SupportNo internet service 0
## Streaming.TVNo internet service 0
## Streaming.MoviesNo internet service 0
## Number.of.Referrals1
## Online.SecurityNo internet service 0
## Online.BackupNo internet service 0
## Device.ProtectionNo internet service 0
## Tech.SupportNo internet service 0
## Streaming.TVNo internet service 0
## Streaming.MoviesNo internet service 0
## Number.of.Referrals2
## Online.SecurityNo internet service 0
## Online.BackupNo internet service 0
## Device.ProtectionNo internet service 0
## Tech.SupportNo internet service 0
## Streaming.TVNo internet service 0
## Streaming.MoviesNo internet service 0
## Number.of.Referrals3
## Online.SecurityNo internet service 0
## Online.BackupNo internet service 0
## Device.ProtectionNo internet service 0
## Tech.SupportNo internet service 0
## Streaming.TVNo internet service 0
## Streaming.MoviesNo internet service 0
## Number.of.Referrals4
## Online.SecurityNo internet service 0
## Online.BackupNo internet service 0
## Device.ProtectionNo internet service 0
## Tech.SupportNo internet service 0
## Streaming.TVNo internet service 0
## Streaming.MoviesNo internet service 0
## Number.of.Referrals5
## Online.SecurityNo internet service 0
## Online.BackupNo internet service 0
## Device.ProtectionNo internet service 0
## Tech.SupportNo internet service 0
## Streaming.TVNo internet service 0
## Streaming.MoviesNo internet service 0
## Number.of.Referrals6
## Online.SecurityNo internet service 0
## Online.BackupNo internet service 0
## Device.ProtectionNo internet service 0
## Tech.SupportNo internet service 0
## Streaming.TVNo internet service 0
## Streaming.MoviesNo internet service 0
## Number.of.Referrals7
## Online.SecurityNo internet service 0
## Online.BackupNo internet service 0
## Device.ProtectionNo internet service 0
## Tech.SupportNo internet service 0
## Streaming.TVNo internet service 0
## Streaming.MoviesNo internet service 0
## Number.of.Referrals8
## Online.SecurityNo internet service 0
## Online.BackupNo internet service 0
## Device.ProtectionNo internet service 0
## Tech.SupportNo internet service 0
## Streaming.TVNo internet service 0
## Streaming.MoviesNo internet service 0
## Number.of.Referrals9
## Online.SecurityNo internet service 0
## Online.BackupNo internet service 0
## Device.ProtectionNo internet service 0
## Tech.SupportNo internet service 0
## Streaming.TVNo internet service 0
## Streaming.MoviesNo internet service 0
## Number.of.Referrals10
## Online.SecurityNo internet service 0
## Online.BackupNo internet service 0
## Device.ProtectionNo internet service 0
## Tech.SupportNo internet service 0
## Streaming.TVNo internet service 0
## Streaming.MoviesNo internet service 0
## Number.of.Referrals11 OfferOffer A
## Online.SecurityNo internet service 0 0
## Online.BackupNo internet service 0 0
## Device.ProtectionNo internet service 0 0
## Tech.SupportNo internet service 0 0
## Streaming.TVNo internet service 0 0
## Streaming.MoviesNo internet service 0 0
## OfferOffer B OfferOffer C
## Online.SecurityNo internet service 0 0
## Online.BackupNo internet service 0 0
## Device.ProtectionNo internet service 0 0
## Tech.SupportNo internet service 0 0
## Streaming.TVNo internet service 0 0
## Streaming.MoviesNo internet service 0 0
## OfferOffer D OfferOffer E
## Online.SecurityNo internet service 0 0
## Online.BackupNo internet service 0 0
## Device.ProtectionNo internet service 0 0
## Tech.SupportNo internet service 0 0
## Streaming.TVNo internet service 0 0
## Streaming.MoviesNo internet service 0 0
## Avg.Monthly.Long.Distance.Charges
## Online.SecurityNo internet service 0
## Online.BackupNo internet service 0
## Device.ProtectionNo internet service 0
## Tech.SupportNo internet service 0
## Streaming.TVNo internet service 0
## Streaming.MoviesNo internet service 0
## Avg.Monthly.GB.Download
## Online.SecurityNo internet service 0
## Online.BackupNo internet service 0
## Device.ProtectionNo internet service 0
## Tech.SupportNo internet service 0
## Streaming.TVNo internet service 0
## Streaming.MoviesNo internet service 0
## Streaming.MusicYes Unlimited.DataYes
## Online.SecurityNo internet service 0 0
## Online.BackupNo internet service 0 0
## Device.ProtectionNo internet service 0 0
## Tech.SupportNo internet service 0 0
## Streaming.TVNo internet service 0 0
## Streaming.MoviesNo internet service 0 0
## Total.Refunds
## Online.SecurityNo internet service 0
## Online.BackupNo internet service 0
## Device.ProtectionNo internet service 0
## Tech.SupportNo internet service 0
## Streaming.TVNo internet service 0
## Streaming.MoviesNo internet service 0
## Total.Extra.Data.Charges
## Online.SecurityNo internet service 0
## Online.BackupNo internet service 0
## Device.ProtectionNo internet service 0
## Tech.SupportNo internet service 0
## Streaming.TVNo internet service 0
## Streaming.MoviesNo internet service 0
## Total.Long.Distance.Charges
## Online.SecurityNo internet service 0
## Online.BackupNo internet service 0
## Device.ProtectionNo internet service 0
## Tech.SupportNo internet service 0
## Streaming.TVNo internet service 0
## Streaming.MoviesNo internet service 0
## Total.Revenue Satisfaction.Score2
## Online.SecurityNo internet service 0 0
## Online.BackupNo internet service 0 0
## Device.ProtectionNo internet service 0 0
## Tech.SupportNo internet service 0 0
## Streaming.TVNo internet service 0 0
## Streaming.MoviesNo internet service 0 0
## Satisfaction.Score3
## Online.SecurityNo internet service 0
## Online.BackupNo internet service 0
## Device.ProtectionNo internet service 0
## Tech.SupportNo internet service 0
## Streaming.TVNo internet service 0
## Streaming.MoviesNo internet service 0
## Satisfaction.Score4
## Online.SecurityNo internet service 0
## Online.BackupNo internet service 0
## Device.ProtectionNo internet service 0
## Tech.SupportNo internet service 0
## Streaming.TVNo internet service 0
## Streaming.MoviesNo internet service 0
## Satisfaction.Score5
## Online.SecurityNo internet service 0
## Online.BackupNo internet service 0
## Device.ProtectionNo internet service 0
## Tech.SupportNo internet service 0
## Streaming.TVNo internet service 0
## Streaming.MoviesNo internet service 0
## Remove 'no internet service' since it cause perfect multicollinearity.(Majority people will move to 100% internet serviice base on current trend)
train_set = train_set %>% filter(Internet.Service != 'No')
test_set = test_set %>% filter(Internet.Service != 'No')
## Create model 3
model3 = lm(CLTV ~. -Phone.Service -Dependents -Referred.a.Friend, data = train_set)
summary(model3)
##
## Call:
## lm(formula = CLTV ~ . - Phone.Service - Dependents - Referred.a.Friend,
## data = train_set)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2657.45 -833.86 28.25 866.79 2325.03
##
## Coefficients:
## Estimate Std. Error t value
## (Intercept) 4430.18634 656.17969 6.751
## GenderMale 0.31887 31.48387 0.010
## Senior.CitizenYes -51.92576 69.04435 -0.752
## PartnerYes 225.43093 104.31260 2.161
## Tenure.Months 15.31535 2.75161 5.566
## Multiple.LinesNo phone service -299.75859 287.82532 -1.041
## Multiple.LinesYes 59.75192 79.72014 0.750
## Internet.ServiceFiber optic 353.41775 353.34807 1.000
## Online.SecurityYes 106.27981 80.97497 1.313
## Online.BackupYes 40.43010 78.24082 0.517
## Device.ProtectionYes -11.37229 79.42168 -0.143
## Tech.SupportYes 35.75064 80.32470 0.445
## Streaming.TVYes 122.37592 145.16256 0.843
## Streaming.MoviesYes 181.10993 155.73111 1.163
## ContractOne year 18.09718 50.35453 0.359
## ContractTwo year 85.27555 64.88835 1.314
## Paperless.BillingYes 27.69305 35.22718 0.786
## Payment.MethodCredit card (automatic) -56.06321 47.59585 -1.178
## Payment.MethodElectronic check 2.23972 44.61301 0.050
## Payment.MethodMailed check -95.15602 54.97409 -1.731
## Monthly.Charges -17.55853 14.13583 -1.242
## Total.Charges 0.02673 0.01039 2.571
## Age 0.48653 1.95109 0.249
## Under.30Yes -31.27106 73.19019 -0.427
## Number.of.Dependents1 129.98240 65.82260 1.975
## Number.of.Dependents2 5.26599 68.05899 0.077
## Number.of.Dependents3 82.93283 76.28490 1.087
## Number.of.Dependents4 -202.75907 487.27851 -0.416
## Number.of.Dependents5 -278.89521 446.88625 -0.624
## Number.of.Dependents6 -427.96822 1087.35803 -0.394
## Number.of.Dependents8 -687.35899 1090.08048 -0.631
## Number.of.Dependents9 215.79649 1085.05826 0.199
## Number.of.Referrals1 -299.11940 108.12806 -2.766
## Number.of.Referrals2 -193.82319 132.42357 -1.464
## Number.of.Referrals3 -297.98971 131.33004 -2.269
## Number.of.Referrals4 -299.43144 131.31612 -2.280
## Number.of.Referrals5 -183.74082 132.16486 -1.390
## Number.of.Referrals6 -277.08261 135.57114 -2.044
## Number.of.Referrals7 -199.28583 130.64551 -1.525
## Number.of.Referrals8 -333.37634 137.23455 -2.429
## Number.of.Referrals9 -83.01834 134.36413 -0.618
## Number.of.Referrals10 -338.87695 135.77606 -2.496
## Number.of.Referrals11 394.65583 773.83664 0.510
## OfferOffer A 13.98235 70.18326 0.199
## OfferOffer B 25.82373 54.07775 0.478
## OfferOffer C -239.29993 69.34529 -3.451
## OfferOffer D -36.05501 61.20580 -0.589
## OfferOffer E 222.23184 57.82164 3.843
## Avg.Monthly.Long.Distance.Charges 4.35099 1.87776 2.317
## Avg.Monthly.GB.Download 0.86952 1.31784 0.660
## Streaming.MusicYes -33.24155 65.47848 -0.508
## Unlimited.DataYes -31.99222 71.42018 -0.448
## Total.Refunds -1.02586 1.97139 -0.520
## Total.Extra.Data.Charges 0.51037 0.89141 0.573
## Total.Long.Distance.Charges -0.12489 0.06340 -1.970
## Total.Revenue 0.08548 0.03436 2.488
## Satisfaction.Score2 -55.75242 66.90967 -0.833
## Satisfaction.Score3 -3.43747 50.44763 -0.068
## Satisfaction.Score4 1.18365 55.70122 0.021
## Satisfaction.Score5 -1.56934 64.17527 -0.024
## Pr(>|t|)
## (Intercept) 1.64e-11 ***
## GenderMale 0.991920
## Senior.CitizenYes 0.452050
## PartnerYes 0.030737 *
## Tenure.Months 2.75e-08 ***
## Multiple.LinesNo phone service 0.297716
## Multiple.LinesYes 0.453581
## Internet.ServiceFiber optic 0.317267
## Online.SecurityYes 0.189415
## Online.BackupYes 0.605363
## Device.ProtectionYes 0.886147
## Tech.SupportYes 0.656285
## Streaming.TVYes 0.399256
## Streaming.MoviesYes 0.244903
## ContractOne year 0.719316
## ContractTwo year 0.188847
## Paperless.BillingYes 0.431832
## Payment.MethodCredit card (automatic) 0.238896
## Payment.MethodElectronic check 0.959963
## Payment.MethodMailed check 0.083531 .
## Monthly.Charges 0.214251
## Total.Charges 0.010160 *
## Age 0.803091
## Under.30Yes 0.669211
## Number.of.Dependents1 0.048356 *
## Number.of.Dependents2 0.938329
## Number.of.Dependents3 0.277028
## Number.of.Dependents4 0.677352
## Number.of.Dependents5 0.532602
## Number.of.Dependents6 0.693905
## Number.of.Dependents8 0.528360
## Number.of.Dependents9 0.842365
## Number.of.Referrals1 0.005691 **
## Number.of.Referrals2 0.143354
## Number.of.Referrals3 0.023313 *
## Number.of.Referrals4 0.022638 *
## Number.of.Referrals5 0.164522
## Number.of.Referrals6 0.041027 *
## Number.of.Referrals7 0.127228
## Number.of.Referrals8 0.015168 *
## Number.of.Referrals9 0.536697
## Number.of.Referrals10 0.012600 *
## Number.of.Referrals11 0.610076
## OfferOffer A 0.842094
## OfferOffer B 0.633007
## OfferOffer C 0.000564 ***
## OfferOffer D 0.555837
## OfferOffer E 0.000123 ***
## Avg.Monthly.Long.Distance.Charges 0.020540 *
## Avg.Monthly.GB.Download 0.509410
## Streaming.MusicYes 0.611708
## Unlimited.DataYes 0.654214
## Total.Refunds 0.602828
## Total.Extra.Data.Charges 0.566983
## Total.Long.Distance.Charges 0.048918 *
## Total.Revenue 0.012882 *
## Satisfaction.Score2 0.404747
## Satisfaction.Score3 0.945678
## Satisfaction.Score4 0.983047
## Satisfaction.Score5 0.980492
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1079 on 4698 degrees of freedom
## Multiple R-squared: 0.2018, Adjusted R-squared: 0.1917
## F-statistic: 20.13 on 59 and 4698 DF, p-value: < 2.2e-16
vif(model3)
## GVIF Df GVIF^(1/(2*Df))
## Gender 1.012013 1 1.005989
## Senior.Citizen 3.059255 1 1.749073
## Partner 11.099126 1 3.331535
## Tenure.Months 18.663591 1 4.320138
## Multiple.Lines 62.889593 2 2.816078
## Internet.Service 125.614780 1 11.207800
## Online.Security 6.184359 1 2.486837
## Online.Backup 6.156967 1 2.481324
## Device.Protection 6.346850 1 2.519296
## Tech.Support 6.166034 1 2.483150
## Streaming.TV 21.507346 1 4.637601
## Streaming.Movies 24.761574 1 4.976100
## Contract 2.878311 2 1.302520
## Paperless.Billing 1.113959 1 1.055442
## Payment.Method 1.465679 3 1.065794
## Monthly.Charges 394.896240 1 19.871996
## Total.Charges 1.422116 1 1.192525
## Age 4.638890 1 2.153808
## Under.30 3.423640 1 1.850308
## Number.of.Dependents 1.585674 8 1.029232
## Number.of.Referrals 13.418113 11 1.125275
## Offer 2.358625 5 1.089597
## Avg.Monthly.Long.Distance.Charges 3.605966 1 1.898938
## Avg.Monthly.GB.Download 2.722165 1 1.649898
## Streaming.Music 4.340924 1 2.083488
## Unlimited.Data 2.517348 1 1.586615
## Total.Refunds 1.018983 1 1.009447
## Total.Extra.Data.Charges 2.554012 1 1.598128
## Total.Long.Distance.Charges 12.114837 1 3.480637
## Total.Revenue 43.954081 1 6.629787
## Satisfaction.Score 1.549107 4 1.056234
## Remove Monthly Charges due to high VIF value
model4 = lm(CLTV ~. -Phone.Service -Dependents -Referred.a.Friend -Monthly.Charges, data = train_set)
summary(model4)
##
## Call:
## lm(formula = CLTV ~ . - Phone.Service - Dependents - Referred.a.Friend -
## Monthly.Charges, data = train_set)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2658.0 -839.2 21.1 866.8 2299.6
##
## Coefficients:
## Estimate Std. Error t value
## (Intercept) 3637.77817 153.63199 23.679
## GenderMale 0.05707 31.48498 0.002
## Senior.CitizenYes -50.48219 69.03856 -0.731
## PartnerYes 222.02637 104.28260 2.129
## Tenure.Months 15.55683 2.74489 5.668
## Multiple.LinesNo phone service 48.20255 66.10257 0.729
## Multiple.LinesYes -27.16526 38.19766 -0.711
## Internet.ServiceFiber optic -81.22880 49.09955 -1.654
## Online.SecurityYes 18.75945 39.90646 0.470
## Online.BackupYes -46.07574 35.65952 -1.292
## Device.ProtectionYes -98.86192 36.69934 -2.694
## Tech.SupportYes -51.72175 38.64023 -1.339
## Streaming.TVYes -51.56973 38.23113 -1.349
## Streaming.MoviesYes 6.18784 66.49200 0.093
## ContractOne year 18.06388 50.35743 0.359
## ContractTwo year 85.53247 64.89177 1.318
## Paperless.BillingYes 28.04302 35.22808 0.796
## Payment.MethodCredit card (automatic) -55.90864 47.59844 -1.175
## Payment.MethodElectronic check 2.69869 44.61406 0.060
## Payment.MethodMailed check -93.82658 54.96684 -1.707
## Total.Charges 0.02711 0.01039 2.610
## Age 0.42986 1.95067 0.220
## Under.30Yes -35.93623 73.09798 -0.492
## Number.of.Dependents1 129.03105 65.82194 1.960
## Number.of.Dependents2 4.86349 68.06215 0.071
## Number.of.Dependents3 78.96986 76.22256 1.036
## Number.of.Dependents4 -206.32003 487.29822 -0.423
## Number.of.Dependents5 -285.83310 446.87716 -0.640
## Number.of.Dependents6 -416.14468 1087.37917 -0.383
## Number.of.Dependents8 -690.67209 1090.14018 -0.634
## Number.of.Dependents9 195.82669 1085.00182 0.180
## Number.of.Referrals1 -293.48619 108.03915 -2.716
## Number.of.Referrals2 -190.85747 132.40970 -1.441
## Number.of.Referrals3 -294.78265 131.31224 -2.245
## Number.of.Referrals4 -294.11172 131.25384 -2.241
## Number.of.Referrals5 -181.09987 132.15539 -1.370
## Number.of.Referrals6 -271.13753 135.49445 -2.001
## Number.of.Referrals7 -194.87640 130.60482 -1.492
## Number.of.Referrals8 -330.69069 137.22544 -2.410
## Number.of.Referrals9 -78.15233 134.31477 -0.582
## Number.of.Referrals10 -332.68906 135.69247 -2.452
## Number.of.Referrals11 383.63978 773.83051 0.496
## OfferOffer A 16.59600 70.15577 0.237
## OfferOffer B 26.65728 54.07671 0.493
## OfferOffer C -240.20464 69.34547 -3.464
## OfferOffer D -35.86081 61.20913 -0.586
## OfferOffer E 222.18521 57.82497 3.842
## Avg.Monthly.Long.Distance.Charges 4.22395 1.87508 2.253
## Avg.Monthly.GB.Download 0.94386 1.31655 0.717
## Streaming.MusicYes -31.49184 65.46711 -0.481
## Unlimited.DataYes -30.60813 71.41561 -0.429
## Total.Refunds -1.03192 1.97150 -0.523
## Total.Extra.Data.Charges 0.52579 0.89137 0.590
## Total.Long.Distance.Charges -0.11990 0.06328 -1.895
## Total.Revenue 0.08174 0.03423 2.388
## Satisfaction.Score2 -56.66877 66.90947 -0.847
## Satisfaction.Score3 -4.21424 50.44667 -0.084
## Satisfaction.Score4 1.75911 55.70251 0.032
## Satisfaction.Score5 -2.08950 64.17761 -0.033
## Pr(>|t|)
## (Intercept) < 2e-16 ***
## GenderMale 0.998554
## Senior.CitizenYes 0.464683
## PartnerYes 0.033299 *
## Tenure.Months 1.53e-08 ***
## Multiple.LinesNo phone service 0.465911
## Multiple.LinesYes 0.477010
## Internet.ServiceFiber optic 0.098119 .
## Online.SecurityYes 0.638316
## Online.BackupYes 0.196385
## Device.ProtectionYes 0.007089 **
## Tech.SupportYes 0.180783
## Streaming.TVYes 0.177436
## Streaming.MoviesYes 0.925859
## ContractOne year 0.719826
## ContractTwo year 0.187541
## Paperless.BillingYes 0.426048
## Payment.MethodCredit card (automatic) 0.240218
## Payment.MethodElectronic check 0.951768
## Payment.MethodMailed check 0.087894 .
## Total.Charges 0.009092 **
## Age 0.825596
## Under.30Yes 0.623013
## Number.of.Dependents1 0.050019 .
## Number.of.Dependents2 0.943037
## Number.of.Dependents3 0.300235
## Number.of.Dependents4 0.672026
## Number.of.Dependents5 0.522449
## Number.of.Dependents6 0.701956
## Number.of.Dependents8 0.526397
## Number.of.Dependents9 0.856780
## Number.of.Referrals1 0.006622 **
## Number.of.Referrals2 0.149534
## Number.of.Referrals3 0.024821 *
## Number.of.Referrals4 0.025087 *
## Number.of.Referrals5 0.170641
## Number.of.Referrals6 0.045439 *
## Number.of.Referrals7 0.135738
## Number.of.Referrals8 0.015998 *
## Number.of.Referrals9 0.560689
## Number.of.Referrals10 0.014251 *
## Number.of.Referrals11 0.620082
## OfferOffer A 0.813009
## OfferOffer B 0.622069
## OfferOffer C 0.000537 ***
## OfferOffer D 0.557989
## OfferOffer E 0.000123 ***
## Avg.Monthly.Long.Distance.Charges 0.024325 *
## Avg.Monthly.GB.Download 0.473458
## Streaming.MusicYes 0.630515
## Unlimited.DataYes 0.668240
## Total.Refunds 0.600709
## Total.Extra.Data.Charges 0.555312
## Total.Long.Distance.Charges 0.058189 .
## Total.Revenue 0.016976 *
## Satisfaction.Score2 0.397068
## Satisfaction.Score3 0.933427
## Satisfaction.Score4 0.974808
## Satisfaction.Score5 0.974028
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1079 on 4699 degrees of freedom
## Multiple R-squared: 0.2015, Adjusted R-squared: 0.1916
## F-statistic: 20.45 on 58 and 4699 DF, p-value: < 2.2e-16
vif(model4)
## GVIF Df GVIF^(1/(2*Df))
## Gender 1.011968 1 1.005966
## Senior.Citizen 3.058388 1 1.748825
## Partner 11.091463 1 3.330385
## Tenure.Months 18.570426 1 4.309342
## Multiple.Lines 2.462129 2 1.252644
## Internet.Service 2.425159 1 1.557292
## Online.Security 1.501860 1 1.225504
## Online.Backup 1.278795 1 1.130838
## Device.Protection 1.355022 1 1.164054
## Tech.Support 1.426714 1 1.194451
## Streaming.TV 1.491632 1 1.221324
## Streaming.Movies 4.513528 1 2.124507
## Contract 2.878260 2 1.302514
## Paperless.Billing 1.113888 1 1.055409
## Payment.Method 1.465062 3 1.065719
## Total.Charges 1.420836 1 1.191988
## Age 4.636354 1 2.153219
## Under.30 3.414624 1 1.847870
## Number.of.Dependents 1.582197 8 1.029091
## Number.of.Referrals 13.371591 11 1.125097
## Offer 2.356026 5 1.089477
## Avg.Monthly.Long.Distance.Charges 3.595269 1 1.896119
## Avg.Monthly.GB.Download 2.716549 1 1.648196
## Streaming.Music 4.338915 1 2.083006
## Unlimited.Data 2.516735 1 1.586422
## Total.Refunds 1.018976 1 1.009444
## Total.Extra.Data.Charges 2.553516 1 1.597973
## Total.Long.Distance.Charges 12.066090 1 3.473628
## Total.Revenue 43.616000 1 6.604241
## Satisfaction.Score 1.547808 4 1.056123
## Remove Total.Revenue due to high VIF value
model5 = lm(CLTV ~. -Phone.Service -Dependents -Referred.a.Friend -Monthly.Charges -Total.Revenue, data = train_set)
summary(model5)
##
## Call:
## lm(formula = CLTV ~ . - Phone.Service - Dependents - Referred.a.Friend -
## Monthly.Charges - Total.Revenue, data = train_set)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2653.95 -831.62 14.23 861.73 2283.41
##
## Coefficients:
## Estimate Std. Error t value
## (Intercept) 3.568e+03 1.509e+02 23.646
## GenderMale -3.386e-01 3.150e+01 -0.011
## Senior.CitizenYes -4.837e+01 6.907e+01 -0.700
## PartnerYes 2.183e+02 1.043e+02 2.092
## Tenure.Months 2.098e+01 1.542e+00 13.605
## Multiple.LinesNo phone service -4.129e-01 6.292e+01 -0.007
## Multiple.LinesYes -8.023e+00 3.737e+01 -0.215
## Internet.ServiceFiber optic -1.576e+01 4.075e+01 -0.387
## Online.SecurityYes 3.277e+01 3.949e+01 0.830
## Online.BackupYes -3.062e+01 3.509e+01 -0.873
## Device.ProtectionYes -8.420e+01 3.620e+01 -2.326
## Tech.SupportYes -3.617e+01 3.811e+01 -0.949
## Streaming.TVYes -2.507e+01 3.660e+01 -0.685
## Streaming.MoviesYes 3.182e+01 6.565e+01 0.485
## ContractOne year 1.749e+01 5.038e+01 0.347
## ContractTwo year 8.612e+01 6.492e+01 1.326
## Paperless.BillingYes 2.680e+01 3.524e+01 0.760
## Payment.MethodCredit card (automatic) -5.488e+01 4.762e+01 -1.152
## Payment.MethodElectronic check 2.194e+00 4.464e+01 0.049
## Payment.MethodMailed check -8.280e+01 5.480e+01 -1.511
## Total.Charges 3.525e-02 9.820e-03 3.589
## Age 3.581e-01 1.951e+00 0.184
## Under.30Yes -3.644e+01 7.313e+01 -0.498
## Number.of.Dependents1 1.325e+02 6.584e+01 2.013
## Number.of.Dependents2 5.229e+00 6.810e+01 0.077
## Number.of.Dependents3 8.293e+01 7.624e+01 1.088
## Number.of.Dependents4 -2.008e+02 4.875e+02 -0.412
## Number.of.Dependents5 -2.710e+02 4.471e+02 -0.606
## Number.of.Dependents6 -3.566e+02 1.088e+03 -0.328
## Number.of.Dependents8 -7.070e+02 1.091e+03 -0.648
## Number.of.Dependents9 2.153e+02 1.086e+03 0.198
## Number.of.Referrals1 -2.915e+02 1.081e+02 -2.696
## Number.of.Referrals2 -1.846e+02 1.325e+02 -1.394
## Number.of.Referrals3 -2.896e+02 1.314e+02 -2.205
## Number.of.Referrals4 -2.890e+02 1.313e+02 -2.201
## Number.of.Referrals5 -1.802e+02 1.322e+02 -1.363
## Number.of.Referrals6 -2.687e+02 1.356e+02 -1.982
## Number.of.Referrals7 -1.932e+02 1.307e+02 -1.478
## Number.of.Referrals8 -3.244e+02 1.373e+02 -2.363
## Number.of.Referrals9 -7.583e+01 1.344e+02 -0.564
## Number.of.Referrals10 -3.333e+02 1.358e+02 -2.455
## Number.of.Referrals11 3.336e+02 7.739e+02 0.431
## OfferOffer A 2.798e+01 7.003e+01 0.400
## OfferOffer B 2.005e+01 5.403e+01 0.371
## OfferOffer C -2.468e+02 6.933e+01 -3.560
## OfferOffer D -4.137e+01 6.120e+01 -0.676
## OfferOffer E 2.291e+02 5.778e+01 3.965
## Avg.Monthly.Long.Distance.Charges 3.099e+00 1.816e+00 1.707
## Avg.Monthly.GB.Download 9.631e-01 1.317e+00 0.731
## Streaming.MusicYes -3.232e+01 6.550e+01 -0.493
## Unlimited.DataYes -3.237e+01 7.145e+01 -0.453
## Total.Refunds -1.313e+00 1.969e+00 -0.667
## Total.Extra.Data.Charges 5.773e-01 8.916e-01 0.648
## Total.Long.Distance.Charges -5.078e-03 4.116e-02 -0.123
## Satisfaction.Score2 -5.593e+01 6.694e+01 -0.835
## Satisfaction.Score3 -3.876e+00 5.047e+01 -0.077
## Satisfaction.Score4 7.947e+00 5.567e+01 0.143
## Satisfaction.Score5 3.551e+00 6.417e+01 0.055
## Pr(>|t|)
## (Intercept) < 2e-16 ***
## GenderMale 0.991424
## Senior.CitizenYes 0.483791
## PartnerYes 0.036473 *
## Tenure.Months < 2e-16 ***
## Multiple.LinesNo phone service 0.994764
## Multiple.LinesYes 0.829995
## Internet.ServiceFiber optic 0.698985
## Online.SecurityYes 0.406647
## Online.BackupYes 0.382787
## Device.ProtectionYes 0.020058 *
## Tech.SupportYes 0.342517
## Streaming.TVYes 0.493453
## Streaming.MoviesYes 0.627917
## ContractOne year 0.728466
## ContractTwo year 0.184762
## Paperless.BillingYes 0.447042
## Payment.MethodCredit card (automatic) 0.249217
## Payment.MethodElectronic check 0.960798
## Payment.MethodMailed check 0.130886
## Total.Charges 0.000335 ***
## Age 0.854388
## Under.30Yes 0.618317
## Number.of.Dependents1 0.044174 *
## Number.of.Dependents2 0.938791
## Number.of.Dependents3 0.276762
## Number.of.Dependents4 0.680519
## Number.of.Dependents5 0.544412
## Number.of.Dependents6 0.743062
## Number.of.Dependents8 0.516850
## Number.of.Dependents9 0.842758
## Number.of.Referrals1 0.007033 **
## Number.of.Referrals2 0.163357
## Number.of.Referrals3 0.027518 *
## Number.of.Referrals4 0.027777 *
## Number.of.Referrals5 0.172913
## Number.of.Referrals6 0.047552 *
## Number.of.Referrals7 0.139386
## Number.of.Referrals8 0.018147 *
## Number.of.Referrals9 0.572580
## Number.of.Referrals10 0.014130 *
## Number.of.Referrals11 0.666420
## OfferOffer A 0.689485
## OfferOffer B 0.710586
## OfferOffer C 0.000375 ***
## OfferOffer D 0.499021
## OfferOffer E 7.44e-05 ***
## Avg.Monthly.Long.Distance.Charges 0.087940 .
## Avg.Monthly.GB.Download 0.464686
## Streaming.MusicYes 0.621724
## Unlimited.DataYes 0.650515
## Total.Refunds 0.505048
## Total.Extra.Data.Charges 0.517312
## Total.Long.Distance.Charges 0.901815
## Satisfaction.Score2 0.403508
## Satisfaction.Score3 0.938791
## Satisfaction.Score4 0.886486
## Satisfaction.Score5 0.955864
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1080 on 4700 degrees of freedom
## Multiple R-squared: 0.2005, Adjusted R-squared: 0.1908
## F-statistic: 20.68 on 57 and 4700 DF, p-value: < 2.2e-16
vif(model5)
## GVIF Df GVIF^(1/(2*Df))
## Gender 1.011940 1 1.005952
## Senior.Citizen 3.057884 1 1.748681
## Partner 11.088934 1 3.330005
## Tenure.Months 5.855407 1 2.419795
## Multiple.Lines 2.056744 2 1.197554
## Internet.Service 1.669102 1 1.291937
## Online.Security 1.469381 1 1.212180
## Online.Backup 1.236694 1 1.112067
## Device.Protection 1.317123 1 1.147660
## Tech.Support 1.386215 1 1.177377
## Streaming.TV 1.365958 1 1.168742
## Streaming.Movies 4.395904 1 2.096641
## Contract 2.878023 2 1.302488
## Paperless.Billing 1.113644 1 1.055293
## Payment.Method 1.450228 3 1.063913
## Total.Charges 1.268110 1 1.126104
## Age 4.635255 1 2.152964
## Under.30 3.414596 1 1.847863
## Number.of.Dependents 1.579609 8 1.028986
## Number.of.Referrals 13.342417 11 1.124986
## Offer 2.318502 5 1.087729
## Avg.Monthly.Long.Distance.Charges 3.368429 1 1.835328
## Avg.Monthly.GB.Download 2.716447 1 1.648165
## Streaming.Music 4.338793 1 2.082977
## Unlimited.Data 2.516466 1 1.586337
## Total.Refunds 1.015356 1 1.007649
## Total.Extra.Data.Charges 2.552020 1 1.597504
## Total.Long.Distance.Charges 5.100261 1 2.258376
## Satisfaction.Score 1.540333 4 1.055484
## Remove Number.of.Referrals due to high VIF value
model6 = lm(CLTV ~. -Phone.Service -Dependents -Referred.a.Friend -Monthly.Charges -Total.Revenue -Number.of.Referrals, data = train_set)
summary(model6)
##
## Call:
## lm(formula = CLTV ~ . - Phone.Service - Dependents - Referred.a.Friend -
## Monthly.Charges - Total.Revenue - Number.of.Referrals, data = train_set)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2659.14 -831.32 14.32 868.25 2298.45
##
## Coefficients:
## Estimate Std. Error t value
## (Intercept) 3.554e+03 1.505e+02 23.618
## GenderMale 1.466e+00 3.149e+01 0.047
## Senior.CitizenYes -4.685e+01 6.901e+01 -0.679
## PartnerYes -2.807e+01 3.584e+01 -0.783
## Tenure.Months 2.118e+01 1.540e+00 13.756
## Multiple.LinesNo phone service -2.070e+00 6.284e+01 -0.033
## Multiple.LinesYes -6.690e+00 3.735e+01 -0.179
## Internet.ServiceFiber optic -1.703e+01 4.071e+01 -0.418
## Online.SecurityYes 3.485e+01 3.941e+01 0.884
## Online.BackupYes -3.356e+01 3.506e+01 -0.957
## Device.ProtectionYes -8.569e+01 3.619e+01 -2.368
## Tech.SupportYes -3.802e+01 3.808e+01 -0.999
## Streaming.TVYes -2.445e+01 3.657e+01 -0.668
## Streaming.MoviesYes 2.819e+01 6.556e+01 0.430
## ContractOne year 1.876e+01 5.031e+01 0.373
## ContractTwo year 8.625e+01 6.484e+01 1.330
## Paperless.BillingYes 2.412e+01 3.521e+01 0.685
## Payment.MethodCredit card (automatic) -5.185e+01 4.757e+01 -1.090
## Payment.MethodElectronic check 6.515e+00 4.459e+01 0.146
## Payment.MethodMailed check -7.936e+01 5.477e+01 -1.449
## Total.Charges 3.603e-02 9.811e-03 3.672
## Age 3.342e-01 1.951e+00 0.171
## Under.30Yes -3.711e+01 7.312e+01 -0.507
## Number.of.Dependents1 1.365e+02 6.571e+01 2.078
## Number.of.Dependents2 1.024e+01 6.802e+01 0.151
## Number.of.Dependents3 8.466e+01 7.607e+01 1.113
## Number.of.Dependents4 -2.125e+02 4.870e+02 -0.436
## Number.of.Dependents5 -2.614e+02 4.466e+02 -0.585
## Number.of.Dependents6 -2.877e+02 1.085e+03 -0.265
## Number.of.Dependents8 -6.382e+02 1.088e+03 -0.587
## Number.of.Dependents9 1.659e+02 1.086e+03 0.153
## OfferOffer A 2.815e+01 6.992e+01 0.403
## OfferOffer B 1.257e+01 5.398e+01 0.233
## OfferOffer C -2.439e+02 6.930e+01 -3.519
## OfferOffer D -4.347e+01 6.118e+01 -0.710
## OfferOffer E 2.312e+02 5.779e+01 4.001
## Avg.Monthly.Long.Distance.Charges 3.117e+00 1.814e+00 1.718
## Avg.Monthly.GB.Download 9.505e-01 1.316e+00 0.722
## Streaming.MusicYes -3.049e+01 6.539e+01 -0.466
## Unlimited.DataYes -2.891e+01 7.144e+01 -0.405
## Total.Refunds -1.256e+00 1.968e+00 -0.638
## Total.Extra.Data.Charges 5.683e-01 8.905e-01 0.638
## Total.Long.Distance.Charges -5.748e-03 4.109e-02 -0.140
## Satisfaction.Score2 -5.795e+01 6.695e+01 -0.866
## Satisfaction.Score3 2.685e+00 4.975e+01 0.054
## Satisfaction.Score4 1.655e+01 5.487e+01 0.302
## Satisfaction.Score5 1.298e+01 6.320e+01 0.205
## Pr(>|t|)
## (Intercept) < 2e-16 ***
## GenderMale 0.962884
## Senior.CitizenYes 0.497231
## PartnerYes 0.433484
## Tenure.Months < 2e-16 ***
## Multiple.LinesNo phone service 0.973723
## Multiple.LinesYes 0.857858
## Internet.ServiceFiber optic 0.675687
## Online.SecurityYes 0.376610
## Online.BackupYes 0.338590
## Device.ProtectionYes 0.017944 *
## Tech.SupportYes 0.318080
## Streaming.TVYes 0.503881
## Streaming.MoviesYes 0.667214
## ContractOne year 0.709241
## ContractTwo year 0.183557
## Paperless.BillingYes 0.493359
## Payment.MethodCredit card (automatic) 0.275806
## Payment.MethodElectronic check 0.883836
## Payment.MethodMailed check 0.147376
## Total.Charges 0.000243 ***
## Age 0.864009
## Under.30Yes 0.611848
## Number.of.Dependents1 0.037765 *
## Number.of.Dependents2 0.880365
## Number.of.Dependents3 0.265778
## Number.of.Dependents4 0.662550
## Number.of.Dependents5 0.558397
## Number.of.Dependents6 0.790893
## Number.of.Dependents8 0.557469
## Number.of.Dependents9 0.878564
## OfferOffer A 0.687235
## OfferOffer B 0.815829
## OfferOffer C 0.000437 ***
## OfferOffer D 0.477460
## OfferOffer E 6.4e-05 ***
## Avg.Monthly.Long.Distance.Charges 0.085876 .
## Avg.Monthly.GB.Download 0.470191
## Streaming.MusicYes 0.641056
## Unlimited.DataYes 0.685710
## Total.Refunds 0.523276
## Total.Extra.Data.Charges 0.523363
## Total.Long.Distance.Charges 0.888754
## Satisfaction.Score2 0.386791
## Satisfaction.Score3 0.956951
## Satisfaction.Score4 0.763019
## Satisfaction.Score5 0.837355
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1080 on 4711 degrees of freedom
## Multiple R-squared: 0.1979, Adjusted R-squared: 0.1901
## F-statistic: 25.27 on 46 and 4711 DF, p-value: < 2.2e-16
vif(model6)
## GVIF Df GVIF^(1/(2*Df))
## Gender 1.010425 1 1.005199
## Senior.Citizen 3.049819 1 1.746373
## Partner 1.307625 1 1.143514
## Tenure.Months 5.833284 1 2.415219
## Multiple.Lines 2.046426 2 1.196049
## Internet.Service 1.664427 1 1.290127
## Online.Security 1.461790 1 1.209045
## Online.Backup 1.233893 1 1.110807
## Device.Protection 1.315388 1 1.146903
## Tech.Support 1.382732 1 1.175896
## Streaming.TV 1.362520 1 1.167270
## Streaming.Movies 4.379129 1 2.092637
## Contract 2.849140 2 1.299207
## Paperless.Billing 1.110718 1 1.053906
## Payment.Method 1.438927 3 1.062527
## Total.Charges 1.264584 1 1.124537
## Age 4.629320 1 2.151585
## Under.30 3.410164 1 1.846663
## Number.of.Dependents 1.530260 8 1.026947
## Offer 2.291987 5 1.086479
## Avg.Monthly.Long.Distance.Charges 3.359368 1 1.832858
## Avg.Monthly.GB.Download 2.709188 1 1.645961
## Streaming.Music 4.320691 1 2.078627
## Unlimited.Data 2.513476 1 1.585395
## Total.Refunds 1.013103 1 1.006530
## Total.Extra.Data.Charges 2.543454 1 1.594821
## Total.Long.Distance.Charges 5.078160 1 2.253477
## Satisfaction.Score 1.456183 4 1.048098
## Check the model. Residual plot is random.
options(repr.plot.width = 8, repr.plot.height = 6)
par(mfrow = c(2,2))
plot(model6)
## Check the residual of the model. Residual is normal distribution
model_stat = augment(model1)
head(model_stat)
## # A tibble: 6 x 43
## .rownames CLTV Gender Senior.Citizen Partner Dependents Tenure.Months
## <chr> <dbl> <fct> <fct> <fct> <fct> <dbl>
## 1 1 3239 Male No No No 2
## 2 2 2701 Female No No Yes 2
## 3 3 5372 Female No No Yes 8
## 4 4 5003 Female No Yes Yes 28
## 5 5 5340 Male No No Yes 49
## 6 6 5925 Female No Yes No 10
## # ... with 36 more variables: Phone.Service <fct>, Multiple.Lines <fct>,
## # Internet.Service <fct>, Online.Security <fct>, Online.Backup <fct>,
## # Device.Protection <fct>, Tech.Support <fct>, Streaming.TV <fct>,
## # Streaming.Movies <fct>, Contract <fct>, Paperless.Billing <fct>,
## # Payment.Method <fct>, Monthly.Charges <dbl>, Total.Charges <dbl>,
## # Age <dbl>, Under.30 <fct>, Number.of.Dependents <fct>,
## # Referred.a.Friend <fct>, Number.of.Referrals <fct>, Offer <fct>,
## # Avg.Monthly.Long.Distance.Charges <dbl>,
## # Avg.Monthly.GB.Download <dbl>, Streaming.Music <fct>,
## # Unlimited.Data <fct>, Total.Refunds <dbl>,
## # Total.Extra.Data.Charges <dbl>, Total.Long.Distance.Charges <dbl>,
## # Total.Revenue <dbl>, Satisfaction.Score <fct>, .fitted <dbl>,
## # .se.fit <dbl>, .resid <dbl>, .hat <dbl>, .sigma <dbl>, .cooksd <dbl>,
## # .std.resid <dbl>
options(repr.plot.width = 4, repr.plot.height = 3)
model_stat %>%
ggplot(aes(.resid)) + geom_histogram()
## Predict with test_set
predictTest = predict(model6, newdata = test_set)
## Error Calculation
SSE = sum((test_set$CLTV - predictTest)^2)
SST = sum((test_set$CLTV - mean(telco_cltv$CLTV))^2)
R_sq = 1 - (SSE/SST)
round(R_sq, 4)
## [1] 0.0621