Greg Metcalf works for a national credit card company, and he is performing a customer value analysis on a subset of credit card customers. In order to perform the RFM analysis on the customers, Greg has compiled a data set that contains the dates of the last transaction (Last-TransactionDate), total number of transactions in the past 2 years (Frequency), and total spending during the past 2 years (Spending). A portion of the data set is shown in the accompanying table.

a. Greg wants to calculate the number of days between April 1, 2019 and the last transaction date. Create a new variable “DaysSinceLast” which contains the number of days since the last transaction. What is the average number of days since the last purchase for all the customers? (Round your answer to two decimal places.)

library(readxl)
myData <- read_excel("Ch2_Q45_V03_Data_File.xlsx")
View(myData)

myData$Date = as.Date(myData$LastTransactionDate,format='%m-%d-%Y')
EndDate = as.Date("04/01/2019", format="%m/%d/%Y")
myData$DSL = abs(round(difftime(myData$Date,EndDate),digit=2))
round(mean(myData$DSL), digit=2)
Time difference of 501.82 days

b. Create the RFM scores for each customer. How many customers have an RFM score of 4 4 1? What is their average spending?

# five bins for days Since 1/1/2019, five bins for total spending, five bins for frequency
myData$DateReverse <- as.numeric(myData$DSL * -1)
recencyBins <- quantile(myData$DateReverse, probs =seq(0,1, by=1/5))
myData$Binned_Recency <- cut(myData$DateReverse, breaks = recencyBins, labels = c("1","2","3","4","5"), include.lowest=TRUE, right=FALSE)

FrequencyBins <- quantile(myData$Frequency, probs =seq(0,1, by=1/5))
myData$Binned_Frequency <- cut(myData$Frequency, breaks = FrequencyBins, labels = c("1","2","3","4","5"), include.lowest=TRUE, right=FALSE)

MonetaryBins <- quantile(myData$TotalSpending, probs =seq(0,1, by=1/5))
myData$Binned_Monetary <- cut(myData$TotalSpending, breaks = MonetaryBins, labels = c("1","2","3","4","5"), include.lowest=TRUE, right=FALSE)

myData$RFM <-paste(myData$Binned_Recency, myData$Binned_Frequency, myData$Binned_Monetary)

n = length(which(myData$RFM == "4 4 1"))
myData$AverageOrderSize = myData$TotalSpending/n

myData[which(myData$RFM == "4 4 1"),]

c. Create a new variable called “LogSpending” which contains the natural logarithms for the total spending during the past 2 years. Bin the logarithm values into 5 equal interval groups. Label the groups using numbers 1 (lowest values) to 5 (highest values). How many observations are in group 2?


myData$LogSpending = round(log(myData$TotalSpending),digit=2)
ht = quantile(myData$LogSpending, probs=seq(0,1, by = 1/5))
myData$Binned_Log = cut(myData$LogSpending, breaks=5, labels = c("1","2","3","4","5"), include.lowest=TRUE, right=FALSE)
table(myData$Binned_Log)

  1   2   3   4   5 
  3  10  17  56 114 

d. Create a new variable called “AverageOrderSize” which contains the average spending per order. This is calculated by dividing total spending (Spending) by total number of transactions (Frequency) in the past 2 years. Bin the values of AverageOrderSize into 5 equal interval groups. Label the groups using numbers 1 (lowest values) to 5 (highest values). How many observations are in group 2?


myData$AverageOrderSize <-myData$TotalSpending/myData$Frequency
myData$Binned_AOS <- cut(myData$AverageOrderSize, breaks=5, labels = c("1","2","3","4","5"), include.lowest=TRUE, right=FALSE)
table(myData$Binned_AOS)

  1   2   3   4   5 
169  23   6   1   1 
LS0tCnRpdGxlOiAiUiBOb3RlYm9vayIKb3V0cHV0OgogIGh0bWxfbm90ZWJvb2s6IGRlZmF1bHQKICBodG1sX2RvY3VtZW50OgogICAgZGZfcHJpbnQ6IHBhZ2VkCiAgcGRmX2RvY3VtZW50OiBkZWZhdWx0CiAgd29yZF9kb2N1bWVudDogZGVmYXVsdAotLS0KCiMjIEdyZWcgTWV0Y2FsZiB3b3JrcyBmb3IgYSBuYXRpb25hbCBjcmVkaXQgY2FyZCBjb21wYW55LCBhbmQgaGUgaXMgcGVyZm9ybWluZyBhIGN1c3RvbWVyIHZhbHVlIGFuYWx5c2lzIG9uIGEgc3Vic2V0IG9mIGNyZWRpdCBjYXJkIGN1c3RvbWVycy4gSW4gb3JkZXIgdG8gcGVyZm9ybSB0aGUgUkZNIGFuYWx5c2lzIG9uIHRoZSBjdXN0b21lcnMsIEdyZWcgaGFzIGNvbXBpbGVkIGEgZGF0YSBzZXQgdGhhdCBjb250YWlucyB0aGUgZGF0ZXMgb2YgdGhlIGxhc3QgdHJhbnNhY3Rpb24gKExhc3QtVHJhbnNhY3Rpb25EYXRlKSwgdG90YWwgbnVtYmVyIG9mIHRyYW5zYWN0aW9ucyBpbiB0aGUgcGFzdCAyIHllYXJzIChGcmVxdWVuY3kpLCBhbmQgdG90YWwgc3BlbmRpbmcgZHVyaW5nIHRoZSBwYXN0IDIgeWVhcnMgKFNwZW5kaW5nKS4gQSBwb3J0aW9uIG9mIHRoZSBkYXRhIHNldCBpcyBzaG93biBpbiB0aGUgYWNjb21wYW55aW5nIHRhYmxlLiAKCiMjIGEuIEdyZWcgd2FudHMgdG8gY2FsY3VsYXRlIHRoZSBudW1iZXIgb2YgZGF5cyBiZXR3ZWVuIEFwcmlsIDEsIDIwMTkgYW5kIHRoZSBsYXN0IHRyYW5zYWN0aW9uIGRhdGUuIENyZWF0ZSBhIG5ldyB2YXJpYWJsZSDigJxEYXlzU2luY2VMYXN04oCdIHdoaWNoIGNvbnRhaW5zIHRoZSBudW1iZXIgb2YgZGF5cyBzaW5jZSB0aGUgbGFzdCB0cmFuc2FjdGlvbi4gIFdoYXQgaXMgdGhlIGF2ZXJhZ2UgbnVtYmVyIG9mIGRheXMgc2luY2UgdGhlIGxhc3QgcHVyY2hhc2UgZm9yIGFsbCB0aGUgY3VzdG9tZXJzPyAoUm91bmQgeW91ciBhbnN3ZXIgdG8gdHdvIGRlY2ltYWwgcGxhY2VzLikKCgpgYGB7cn0KbGlicmFyeShyZWFkeGwpCm15RGF0YSA8LSByZWFkX2V4Y2VsKCJDaDJfUTQ1X1YwM19EYXRhX0ZpbGUueGxzeCIpClZpZXcobXlEYXRhKQoKbXlEYXRhJERhdGUgPSBhcy5EYXRlKG15RGF0YSRMYXN0VHJhbnNhY3Rpb25EYXRlLGZvcm1hdD0nJW0tJWQtJVknKQpFbmREYXRlID0gYXMuRGF0ZSgiMDQvMDEvMjAxOSIsIGZvcm1hdD0iJW0vJWQvJVkiKQpteURhdGEkRFNMID0gYWJzKHJvdW5kKGRpZmZ0aW1lKG15RGF0YSREYXRlLEVuZERhdGUpLGRpZ2l0PTIpKQpyb3VuZChtZWFuKG15RGF0YSREU0wpLCBkaWdpdD0yKQpgYGAKCiMjIGIuIENyZWF0ZSB0aGUgUkZNIHNjb3JlcyBmb3IgZWFjaCBjdXN0b21lci4gSG93IG1hbnkgY3VzdG9tZXJzIGhhdmUgYW4gUkZNIHNjb3JlIG9mIDQgNCAxPyBXaGF0IGlzIHRoZWlyIGF2ZXJhZ2Ugc3BlbmRpbmc/CgpgYGB7cn0KIyBmaXZlIGJpbnMgZm9yIGRheXMgU2luY2UgNC8xLzIwMTksIGZpdmUgYmlucyBmb3IgdG90YWwgc3BlbmRpbmcsIGZpdmUgYmlucyBmb3IgZnJlcXVlbmN5Cm15RGF0YSREYXRlUmV2ZXJzZSA8LSBhcy5udW1lcmljKG15RGF0YSREU0wgKiAtMSkKcmVjZW5jeUJpbnMgPC0gcXVhbnRpbGUobXlEYXRhJERhdGVSZXZlcnNlLCBwcm9icyA9c2VxKDAsMSwgYnk9MS81KSkKbXlEYXRhJEJpbm5lZF9SZWNlbmN5IDwtIGN1dChteURhdGEkRGF0ZVJldmVyc2UsIGJyZWFrcyA9IHJlY2VuY3lCaW5zLCBsYWJlbHMgPSBjKCIxIiwiMiIsIjMiLCI0IiwiNSIpLCBpbmNsdWRlLmxvd2VzdD1UUlVFLCByaWdodD1GQUxTRSkKCkZyZXF1ZW5jeUJpbnMgPC0gcXVhbnRpbGUobXlEYXRhJEZyZXF1ZW5jeSwgcHJvYnMgPXNlcSgwLDEsIGJ5PTEvNSkpCm15RGF0YSRCaW5uZWRfRnJlcXVlbmN5IDwtIGN1dChteURhdGEkRnJlcXVlbmN5LCBicmVha3MgPSBGcmVxdWVuY3lCaW5zLCBsYWJlbHMgPSBjKCIxIiwiMiIsIjMiLCI0IiwiNSIpLCBpbmNsdWRlLmxvd2VzdD1UUlVFLCByaWdodD1GQUxTRSkKCk1vbmV0YXJ5QmlucyA8LSBxdWFudGlsZShteURhdGEkVG90YWxTcGVuZGluZywgcHJvYnMgPXNlcSgwLDEsIGJ5PTEvNSkpCm15RGF0YSRCaW5uZWRfTW9uZXRhcnkgPC0gY3V0KG15RGF0YSRUb3RhbFNwZW5kaW5nLCBicmVha3MgPSBNb25ldGFyeUJpbnMsIGxhYmVscyA9IGMoIjEiLCIyIiwiMyIsIjQiLCI1IiksIGluY2x1ZGUubG93ZXN0PVRSVUUsIHJpZ2h0PUZBTFNFKQoKbXlEYXRhJFJGTSA8LXBhc3RlKG15RGF0YSRCaW5uZWRfUmVjZW5jeSwgbXlEYXRhJEJpbm5lZF9GcmVxdWVuY3ksIG15RGF0YSRCaW5uZWRfTW9uZXRhcnkpCgpuID0gbGVuZ3RoKHdoaWNoKG15RGF0YSRSRk0gPT0gIjQgNCAxIikpCm15RGF0YSRBdmVyYWdlT3JkZXJTaXplID0gbXlEYXRhJFRvdGFsU3BlbmRpbmcvbgoKbXlEYXRhW3doaWNoKG15RGF0YSRSRk0gPT0gIjQgNCAxIiksXQpgYGAKCiMjIGMuIENyZWF0ZSBhIG5ldyB2YXJpYWJsZSBjYWxsZWQg4oCcTG9nU3BlbmRpbmfigJ0gd2hpY2ggY29udGFpbnMgdGhlIG5hdHVyYWwgbG9nYXJpdGhtcyBmb3IgdGhlIHRvdGFsIHNwZW5kaW5nIGR1cmluZyB0aGUgcGFzdCAyIHllYXJzLiBCaW4gdGhlIGxvZ2FyaXRobSB2YWx1ZXMgaW50byA1IGVxdWFsIGludGVydmFsIGdyb3Vwcy4gTGFiZWwgdGhlIGdyb3VwcyB1c2luZyBudW1iZXJzIDEgKGxvd2VzdCB2YWx1ZXMpIHRvIDUgKGhpZ2hlc3QgdmFsdWVzKS4gSG93IG1hbnkgb2JzZXJ2YXRpb25zIGFyZSBpbiBncm91cCAyPwoKYGBge3J9CgpteURhdGEkQmlubmVkX0xvZyA9IGN1dChteURhdGEkTG9nU3BlbmRpbmcsIGJyZWFrcz01LCBsYWJlbHMgPSBjKCIxIiwiMiIsIjMiLCI0IiwiNSIpLCBpbmNsdWRlLmxvd2VzdD1UUlVFLCByaWdodD1GQUxTRSkKdGFibGUobXlEYXRhJEJpbm5lZF9Mb2cpCmBgYAoKIyMgZC4gQ3JlYXRlIGEgbmV3IHZhcmlhYmxlIGNhbGxlZCDigJxBdmVyYWdlT3JkZXJTaXpl4oCdIHdoaWNoIGNvbnRhaW5zIHRoZSBhdmVyYWdlIHNwZW5kaW5nIHBlciBvcmRlci4gVGhpcyBpcyBjYWxjdWxhdGVkIGJ5IGRpdmlkaW5nIHRvdGFsIHNwZW5kaW5nIChTcGVuZGluZykgYnkgdG90YWwgbnVtYmVyIG9mIHRyYW5zYWN0aW9ucyAoRnJlcXVlbmN5KSBpbiB0aGUgcGFzdCAyIHllYXJzLiBCaW4gdGhlIHZhbHVlcyBvZiBBdmVyYWdlT3JkZXJTaXplIGludG8gNSBlcXVhbCBpbnRlcnZhbCBncm91cHMuIExhYmVsIHRoZSBncm91cHMgdXNpbmcgbnVtYmVycyAxIChsb3dlc3QgdmFsdWVzKSB0byA1IChoaWdoZXN0IHZhbHVlcykuIEhvdyBtYW55IG9ic2VydmF0aW9ucyBhcmUgaW4gZ3JvdXAgMj8KYGBge3J9CgpteURhdGEkQXZlcmFnZU9yZGVyU2l6ZSA8LW15RGF0YSRUb3RhbFNwZW5kaW5nL215RGF0YSRGcmVxdWVuY3kKbXlEYXRhJEJpbm5lZF9BT1MgPC0gY3V0KG15RGF0YSRBdmVyYWdlT3JkZXJTaXplLCBicmVha3M9NSwgbGFiZWxzID0gYygiMSIsIjIiLCIzIiwiNCIsIjUiKSwgaW5jbHVkZS5sb3dlc3Q9VFJVRSwgcmlnaHQ9RkFMU0UpCnRhYmxlKG15RGF0YSRCaW5uZWRfQU9TKQoKCmBgYAoKCg==