Importing the accounts manager data?
Removing the duplicates?
Sorting of the accounts manager data
AccountsManager <- read.csv("AccountsManager.csv",header=TRUE)
View(AccountsManager)
df<-AccountsManager
View(df)
nrow(df)
## [1] 561
library(dplyr)
##
## Attaching package: 'dplyr'
##
## The following objects are masked from 'package:stats':
##
## filter, lag
##
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
Acc<-(distinct(df))
nrow(Acc)
## [1] 500
Importing the clickstream output ofdata
Importing the customer social score data
Merging accounts manager data and clickstream output ofdata
ClickStream_output<- read.csv("ClickStream_output.csv")
click <-(ClickStream_output)
View(click)
CustomerSocialScore<- read.csv("CustomerSocialScore.csv")
View(CustomerSocialScore)
CustTrans_Internal<- read.csv("CustTrans_Internal.csv")
View(CustTrans_Internal)
equifaxdata_external<- read.csv("equifaxdata_external.csv")
View(equifaxdata_external)
merging1<-merge(Acc,click)
View(merging1)
Merging 1 and customer social score data
merging2<-merge(merging1,CustomerSocialScore)
View(merging2)
Merging 2 and equifaxdata_external data
merging3<-merge(merging2,equifaxdata_external)
View(merging3)
merging4<-merge(merging3,CustTrans_Internal)
View(merging4)
Creating a new variable for email domain
Frequency of email domain
library(reshape2)
m4<-colsplit(merging3$"Email","@",c("email","domain"))
View(m4)
merging5<-append(merging4,m4)
View(merging5)
m4$domain <-as.factor(m4$domain)
table(m4$domain)
##
## angalich.com ankeny.org aol.com aquas.com
## 1 1 64 1
## arceo.org arias.org barfield.com beech.com
## 1 1 1 1
## berlanga.com bowley.org brachle.org briddick.com
## 1 1 1 1
## brossart.com buemi.com burnard.com caiafa.org
## 1 1 1 1
## canlas.com caudy.org centini.org chaffins.org
## 1 1 1 1
## chavous.org chui.com colaizzo.com cookey.org
## 1 1 1 1
## corrio.com cousey.org cox.net craghead.org
## 1 1 70 1
## cronauer.com crupi.com darakjy.org daufeldt.com
## 1 1 1 1
## degonia.org degroot.org deleo.com denooyer.org
## 1 1 1 1
## dewar.com dickerson.org dopico.org dorshorst.org
## 1 1 1 1
## emard.com engelberg.org felger.org fishburne.com
## 1 1 1 1
## fortino.com frey.com galam.org gato.org
## 1 1 1 1
## gellinger.com gesick.org gillaspie.com glick.com
## 1 1 1 1
## gmail.com gobern.org grenet.org haroldson.org
## 68 1 1 1
## hauenstein.org hellickson.org hixenbaugh.org hoa.org
## 1 1 1 1
## hollack.org honeywell.com hotmail.com jacobos.com
## 1 1 71 1
## julia.org kampa.org kitty.com klimek.org
## 1 1 1 1
## kohnert.com konopacki.org koppinger.com kulzer.org
## 1 1 1 1
## lapage.com lietz.com lindall.com lipkin.com
## 1 1 1 1
## loader.com loder.org lother.com malvin.com
## 1 1 1 1
## mastella.com mccullan.com mclaird.com melnyk.com
## 1 1 1 1
## meteer.com miceli.org mirafuentes.com monarrez.org
## 1 1 1 1
## mondella.com morasca.com mulqueen.org nayar.com
## 1 1 1 1
## newville.com nicolozakes.org nunlee.org onofrio.com
## 1 1 1 1
## ostrosky.com paa.com palaspas.org parvis.com
## 1 1 1 1
## patak.org pelkowski.org perez.org perin.org
## 1 1 1 1
## perruzza.com pontoriero.com poullion.com regusters.com
## 1 1 1 1
## reitler.com restrepo.com rhym.com rim.org
## 1 1 1 1
## royster.com sarao.org saulter.com saylors.org
## 1 1 1 1
## schmierer.org schoeneck.org scipione.com semidey.com
## 1 1 1 1
## setter.org shin.com shinko.com shire.com
## 1 1 1 1
## silvestrini.com similton.com slusarski.com spickerman.com
## 1 1 1 1
## staback.com suffield.org sweigard.com tegarden.com
## 1 1 1 1
## toyama.org uyetake.org vanausdal.org venere.org
## 1 1 1 1
## villanueva.com vocelka.com vonasek.org walthall.org
## 1 1 1 1
## worlds.com yahoo.com yaw.org yglesias.com
## 1 86 1 1
## zane.com zurcher.org
## 1 1
Maximum and minimum number of page visits
maximum<-max(merging5$no_of_visits)
View(maximum)
minimum<- min(merging5$no_of_visits)
View(minimum)
Average “Interest Score” for by each “loan_type”
merging4 %>% group_by(loan_type) %>% summarise(mean1=mean(interest_score))
## Source: local data frame [4 x 2]
##
## loan_type mean1
## (fctr) (dbl)
## 1 Auto 796.0000
## 2 Children 749.5000
## 3 Mortgage 750.8452
## 4 Personal 748.5370
Frequency of Customers for different categories
merging4 %>% group_by(loan_type) %>% summarise(n_distinct(Customer_Id))
## Source: local data frame [4 x 2]
##
## loan_type n_distinct(Customer_Id)
## (fctr) (int)
## 1 Auto 18
## 2 Children 36
## 3 Mortgage 365
## 4 Personal 81
Average of num_late_payment and num_miss_payments
y<-summarise(merging4,num_late_payment_mean=mean(num_late_payments),num_miss_payments_mean=mean(num_miss_payments))
head(y,n=10)
## num_late_payment_mean num_miss_payments_mean
## 1 2.538 1.84
the average ‘Credit_score’ for each type of ’refinance’AND zipcode
T<-merging4 %>% group_by(refinance,Zipcode) %>%
summarise(mean1=mean(Credit_score))
T
## Source: local data frame [484 x 3]
## Groups: refinance [?]
##
## refinance Zipcode mean1
## (fctr) (int) (dbl)
## 1 For longer ternure 1602 709
## 2 For longer ternure 2138 620
## 3 For longer ternure 2346 756
## 4 For longer ternure 2745 750
## 5 For longer ternure 2909 701
## 6 For longer ternure 3865 744
## 7 For longer ternure 4864 744
## 8 For longer ternure 7009 744
## 9 For longer ternure 7032 650
## 10 For longer ternure 7050 701
## .. ... ... ...
Average ‘Credit_score’ by age group and pie chart
r<-merging4 %>% group_by(Agegroup) %>%
summarise(mean=mean(Credit_score))
pie(r$mean)

The correlation between ‘num_enquiries’, ‘Credit_score’ andinterpretation
cor(merging4$num_enquiries,merging4$Credit_score)
## [1] -0.01114677
levels(merging4$open_mortg_loans)
## [1] "n" "y"
head(merging4,n=10)
## Customer_Id Firstname LastName Address1
## 1 1234567890 James Butt Benton John B Jr
## 2 1234567891 Josephine Darakjy Chanay Jeffrey A Esq
## 3 1234567892 Art Venere Chemel James L Cpa
## 4 1234567893 Lenna Paprocki Feltz Printing Service
## 5 1234567894 Donette Foller Printing Dimensions
## 6 1234567895 Simona Morasca Chapman Ross E Esq
## 7 1234567896 Mitsue Tollner Morlong Associates
## 8 1234567897 Leota Dilliard Commercial Press
## 9 1234567898 Sage Wieser Truhlar And Truhlar Attys
## 10 1234567899 Kris Marrier King Christopher A Esq
## Address2 City County State Zipcode
## 1 6649 N Blue Gum St New Orleans Orleans LA 70116
## 2 4 B Blue Ridge Blvd Brighton Livingston MI 48116
## 3 8 W Cerritos Ave #54 Bridgeport Gloucester NJ 8014
## 4 639 Main St Anchorage Anchorage AK 99501
## 5 34 Center St Hamilton Butler OH 45011
## 6 3 Mcauley Dr Ashland Ashland OH 44805
## 7 7 Eads St Chicago Cook IL 60632
## 8 7 W Jackson Blvd San Jose Santa Clara CA 95111
## 9 5 Boston Ave #88 Sioux Falls Minnehaha SD 57105
## 10 228 Runamuck Pl #2808 Baltimore Baltimore City MD 21224
## Phone1 Phone1.1 Email Agegroup
## 1 504-621-8927 504-845-1427 jbutt@gmail.com <20
## 2 810-292-9388 810-374-9840 josephine_darakjy@darakjy.org 20-25
## 3 856-636-8749 856-264-4130 art@venere.org 25-35
## 4 907-385-4412 907-921-2010 lpaprocki@hotmail.com 35-45
## 5 513-570-1893 513-549-4561 donette.foller@cox.net >45
## 6 419-503-2484 419-800-6759 simona@morasca.com <20
## 7 773-573-6914 773-924-8565 mitsue_tollner@yahoo.com 20-25
## 8 408-752-3500 408-813-1105 leota@hotmail.com 25-35
## 9 605-414-2147 605-794-4895 sage_wieser@cox.net 35-45
## 10 410-655-8723 410-804-4694 kris@gmail.com >45
## date_on
## 1 06-12-2014
## 2 06-12-2014
## 3 06-12-2014
## 4 06-12-2014
## 5 06-12-2014
## 6 06-12-2014
## 7 06-12-2014
## 8 06-12-2014
## 9 06-12-2014
## 10 06-12-2014
## page_visit
## 1 http://www.bbva.com/personal-banking/loans/home-loans.html
## 2 http://www.bbva.com/personal-banking/loans/home-improvement-loans.html
## 3 http://www.bbva.com/personal-banking/loans/home-loans.html#product_documentation
## 4 http://www.bbva.com/personal-banking/loans/home-loans.html
## 5 http://www.bbva.com/personal-banking/loans/home-loans.html#product_faqs
## 6 http://www.bbva.com/personal-banking/loans/home-loans.html#product_documentation
## 7 http://www.bbva.com/personal-banking/loans/home-loans.html
## 8 http://www.bbva.com/personal-banking/loans/home-loans.html#product_eligibility
## 9 http://www.bbva.com/personal-banking/loans/home-loans.html#product_eligibility
## 10 http://www.bbva.com/personal-banking/loans/home-loans.html
## no_of_visits area_of_interest interest_score num_new_accounts
## 1 5 Mortgage 1.0 1
## 2 11 Mortgage 1.0 2
## 3 15 Mortgage 28.0 3
## 4 9 Mortgage 2.0 4
## 5 11 Mortgage 15.5 3
## 6 12 Mortgage 18.5 1
## 7 7 Mortgage 21.5 2
## 8 7 Mortgage 24.5 3
## 9 7 Mortgage 27.5 4
## 10 7 Mortgage 30.5 3
## num_enquiries Credit_score Current_credit_limit Highest_Credit
## 1 34 620 200 34
## 2 2 789 400 2341
## 3 5 756 4000 12344
## 4 6 744 343 1234
## 5 7 744 234 1243
## 6 34 750 200 34
## 7 2 701 400 2341
## 8 5 650 4000 12344
## 9 6 705 343 1234
## 10 7 709 234 1243
## num_judge Single_Highest_Credit debt_month revolv_credit
## 1 0 2314 1234 y
## 2 0 1234 2134 n
## 3 0 546 1234 n
## 4 0 56 213 n
## 5 0 324 34 n
## 6 0 2314 1234 n
## 7 0 1234 2134 n
## 8 0 546 1234 n
## 9 0 56 213 n
## 10 1 324 34 n
## open_mortg_loans Value_Segmentation prev_loan_close_Date HasopenLoan
## 1 y High value 6/13/2014 Y
## 2 y Med value 6/13/2014 N
## 3 y Low value 6/13/2014 Y
## 4 y High value 6/13/2014 Y
## 5 y Med value 6/13/2014 N
## 6 n Low value 6/13/2014 Y
## 7 y High value 6/13/2014 Y
## 8 n Med value 6/13/2014 N
## 9 y Low value 6/13/2014 Y
## 10 n High value 6/13/2014 Y
## num_late_payments num_miss_payments refinance loan_type
## 1 1 4 No Refinancing Mortgage
## 2 2 3 For Shortern tenure Mortgage
## 3 3 4 For longer ternure Mortgage
## 4 1 1 No Refinancing Mortgage
## 5 2 1 For Shortern tenure Mortgage
## 6 4 1 For longer ternure Mortgage
## 7 3 1 No Refinancing Personal
## 8 2 2 For Shortern tenure Personal
## 9 1 2 For longer ternure Personal
## 10 1 2 No Refinancing Personal
Exclude records that have Credit score < 700 and have Open Mortgage Loan and have revolving
credit
A<-filter(merging4,Credit_score > 700 & open_mortg_loans == "n",revolv_credit == "n")
head(A,n=10)
## Customer_Id Firstname LastName Address1
## 1 1234567895 Simona Morasca Chapman Ross E Esq
## 2 1234567899 Kris Marrier King Christopher A Esq
## 3 1234567904 Cammy Albares Rousseaux Michael Esq
## 4 1234567905 Mattie Poquette Century Communications
## 5 1234567906 Meaghan Garufi Bolton Wilbur Esq
## 6 1234567907 Gladys Rim T M Byxbee Company Pc
## 7 1234567908 Yuki Whobrey Farmers Insurance Group
## 8 1234567909 Fletcher Flosi Post Box Services Plus
## 9 1234567911 Veronika Inouye C 4 Network Inc
## 10 1234567921 Francine Vocelka Cascade Realty Advisors Inc
## Address2 City County State Zipcode
## 1 3 Mcauley Dr Ashland Ashland OH 44805
## 2 228 Runamuck Pl #2808 Baltimore Baltimore City MD 21224
## 3 56 E Morehead St Laredo Webb TX 78045
## 4 73 State Road 434 E Phoenix Maricopa AZ 85013
## 5 69734 E Carrillo St Mc Minnville Warren TN 37110
## 6 322 New Horizon Blvd Milwaukee Milwaukee WI 53207
## 7 1 State Route 27 Taylor Wayne MI 48180
## 8 394 Manchester Blvd Rockford Winnebago IL 61109
## 9 6 Greenleaf Ave San Jose Santa Clara CA 95111
## 10 366 South Dr Las Cruces Dona Ana NM 88011
## Phone1 Phone1.1 Email Agegroup
## 1 419-503-2484 419-800-6759 simona@morasca.com <20
## 2 410-655-8723 410-804-4694 kris@gmail.com >45
## 3 956-537-6195 956-841-7216 calbares@gmail.com >45
## 4 602-277-4385 602-953-6360 mattie@aol.com <20
## 5 931-313-9635 931-235-7959 meaghan@hotmail.com 20-25
## 6 414-661-9598 414-377-2880 gladys.rim@rim.org 25-35
## 7 313-288-7937 313-341-4470 yuki_whobrey@aol.com 35-45
## 8 815-828-2147 815-426-5657 fletcher.flosi@yahoo.com >45
## 9 408-540-1785 408-813-4592 vinouye@aol.com 20-25
## 10 505-977-3911 505-335-5293 francine_vocelka@vocelka.com 35-45
## date_on
## 1 06-12-2014
## 2 06-12-2014
## 3 06-12-2014
## 4 06-12-2014
## 5 06-12-2014
## 6 06-12-2014
## 7 06-12-2014
## 8 06-12-2014
## 9 06-12-2014
## 10 06-12-2014
## page_visit
## 1 http://www.bbva.com/personal-banking/loans/home-loans.html#product_documentation
## 2 http://www.bbva.com/personal-banking/loans/home-loans.html
## 3 http://www.bbva.com/personal-banking/loans/home-loans.html#product_faqs
## 4 http://www.bbva.com/personal-banking/loans/home-improvement-loans.html
## 5 http://www.bbva.com/personal-banking/loans/home-loans.html
## 6 http://www.bbva.com/personal-banking/loans/home-loans.html#product_documentation
## 7 http://www.bbva.com/personal-banking/loans/home-loans.html#product_faqs
## 8 http://www.bbva.com/personal-banking/loans/home-loans.html
## 9 http://www.bbva.com/personal-banking/loans/home-loans.html#product_documentation
## 10 http://www.bbva.com/personal-banking/loans/home-loans.html
## no_of_visits area_of_interest interest_score num_new_accounts
## 1 12 Mortgage 18.5 1
## 2 7 Mortgage 30.5 3
## 3 1 Mortgage 45.5 3
## 4 1 Mortgage 48.5 1
## 5 3 Mortgage 51.5 2
## 6 2 Mortgage 54.5 3
## 7 1 Mortgage 57.5 4
## 8 5 Mortgage 60.5 3
## 9 15 Mortgage 66.5 2
## 10 1 Mortgage 96.5 2
## num_enquiries Credit_score Current_credit_limit Highest_Credit
## 1 34 750 200 34
## 2 7 709 234 1243
## 3 7 789 234 1243
## 4 34 756 200 34
## 5 2 744 400 2341
## 6 5 744 4000 12344
## 7 6 750 343 1234
## 8 7 701 234 1243
## 9 2 705 400 2341
## 10 2 750 400 1243
## num_judge Single_Highest_Credit debt_month revolv_credit
## 1 0 2314 1234 n
## 2 1 324 34 n
## 3 5 324 34 n
## 4 0 2314 1234 n
## 5 0 1234 2134 n
## 6 0 546 1234 n
## 7 0 56 213 n
## 8 0 324 34 n
## 9 0 1234 2134 n
## 10 0 1234 2134 n
## open_mortg_loans Value_Segmentation prev_loan_close_Date HasopenLoan
## 1 n Low value 6/13/2014 Y
## 2 n High value 6/13/2014 Y
## 3 n Low value 6/13/2014 Y
## 4 n High value 6/13/2014 Y
## 5 n Med value 6/13/2014 N
## 6 n Low value 6/13/2014 Y
## 7 n High value 6/13/2014 Y
## 8 n Med value 6/13/2014 N
## 9 n High value 6/13/2014 Y
## 10 n Med value 6/13/2014 N
## num_late_payments num_miss_payments refinance loan_type
## 1 4 1 For longer ternure Mortgage
## 2 1 2 No Refinancing Personal
## 3 2 3 For longer ternure Mortgage
## 4 2 3 No Refinancing Mortgage
## 5 2 3 For Shortern tenure Mortgage
## 6 2 3 For longer ternure Mortgage
## 7 2 3 No Refinancing Mortgage
## 8 2 3 For Shortern tenure Mortgage
## 9 3 1 No Refinancing Mortgage
## 10 2 2 For Shortern tenure Mortgage
From the AccountManager data, remove records with age group ‘<20’
H<-filter(Acc,Agegroup != "<20")
head(H,n=10)
## Customer_Id Firstname LastName Address1
## 1 1234568132 Ceola Setter Southern Steel Shelving Co
## 2 1234567891 Josephine Darakjy Chanay Jeffrey A Esq
## 3 1234568121 Eladia Saulter Tyee Productions Inc
## 4 1234568152 Geoffrey Acey Price Business Services
## 5 1234568131 Lauran Burnard Professionals Unlimited
## 6 1234568210 Roosevelt Hoffis Denbrook Myron
## 7 1234568002 Jesusa Shin Carroccio A Thomas Esq
## 8 1234568127 Katina Polidori Cape & Associates Real Estate
## 9 1234568052 Larae Gudroe Lehigh Furn Divsn Lehigh
## 10 1234568123 Gwenn Suffield Deltam Systems Inc
## Address2 City County State Zipcode
## 1 96263 Greenwood Pl Warren Knox ME 4864
## 2 4 B Blue Ridge Blvd Brighton Livingston MI 48116
## 3 3958 S Dupont Hwy #7 Ramsey Bergen NJ 7446
## 4 7 West Ave #1 Palatine Cook IL 60067
## 5 66697 Park Pl #3224 Riverton Fremont WY 82501
## 6 60 Old Dover Rd Hialeah Miami-Dade FL 33014
## 7 2239 Shawnee Mission Pky Tullahoma Coffee TN 37388
## 8 5 Little River Tpke Wilmington Middlesex MA 1887
## 9 6651 Municipal Rd Houma Terrebonne LA 70360
## 10 3270 Dequindre Rd Deer Park Suffolk NY 11729
## Phone1 Phone1.1 Email Agegroup
## 1 207-627-7565 207-297-5029 ceola.setter@setter.org 25-35
## 2 810-292-9388 810-374-9840 josephine_darakjy@darakjy.org 20-25
## 3 201-474-4924 201-365-8698 eladia@saulter.com 20-25
## 4 847-222-1734 847-556-2909 geoffrey@gmail.com 35-45
## 5 307-342-7795 307-453-7589 lburnard@burnard.com 20-25
## 6 305-622-4739 305-302-1135 roosevelt.hoffis@aol.com 20-25
## 7 931-273-8709 931-739-1551 jshin@shin.com 25-35
## 8 978-626-2978 978-679-7429 katina_polidori@aol.com 25-35
## 9 985-890-7262 985-261-5783 larae_gudroe@gmail.com 35-45
## 10 631-258-6558 631-295-9879 gwenn_suffield@suffield.org 35-45
From the CustTrans_Internal data, exclude records with 3 defaults in 12 months and 5 late
payments in 12 months.
S<-filter(CustTrans_Internal,num_late_payments <5 ,num_miss_payments > 3)
head(S,n=10)
## Customer_Id Value_Segmentation prev_loan_close_Date HasopenLoan
## 1 1234567890 High value 6/13/2014 Y
## 2 1234567892 Low value 6/13/2014 Y
## 3 1234567943 High value 6/13/2014 Y
## 4 1234567945 Low value 6/13/2014 Y
## 5 1234567996 High value 6/13/2014 Y
## 6 1234567998 Low value 6/13/2014 Y
## 7 1234568049 High value 6/13/2014 Y
## 8 1234568051 Low value 6/13/2014 Y
## 9 1234568103 High value 6/13/2014 Y
## 10 1234568105 Low value 6/13/2014 Y
## num_late_payments num_miss_payments refinance loan_type
## 1 1 4 No Refinancing Mortgage
## 2 3 4 For longer ternure Mortgage
## 3 1 4 No Refinancing Mortgage
## 4 3 4 For longer ternure Mortgage
## 5 1 4 No Refinancing Mortgage
## 6 3 4 For longer ternure Mortgage
## 7 1 4 No Refinancing Mortgage
## 8 3 4 For longer ternure Mortgage
## 9 1 4 No Refinancing Mortgage
## 10 3 4 For longer ternure Mortgage