Primary Analysis
Dependent Variable: COD
Independent Variable: FinalTotalPrice, WebsiteDiscount, HasWebsiteDiscount, Brand, SubCategory, ShippingCity,
ShippingState, ShippingAddressType, BillingCity, BillingState, BillingAddressType, CODChargeContinuous Variable: OrderItemCode, OrderID, MRP, VendorDiscount, CustomerID, COD, FinalTotalPrice, VAT, WebsiteDiscount
Categorical Variable: OrderDate, Category, SubCategory, Brand, ProductColor, ProductSize, VATPercent, CODCharge, WebsiteDiscountCode, HasVendorDiscount, HasWebsiteDiscount, ShippingName, ShippingCity, ShippingState, ShippingPincode, ShippingAddressType, BillingCity, BillingState, BillingPincode, BillingAddressType
attach(projData)
round(addmargins(prop.table(table(projData$COD,projData$HasWebsiteDiscount,dnn=c("COD","DISCOUNT")),1)*100,2),1)
## DISCOUNT
## COD 0 1 Sum
## 0 59.9 40.1 100.0
## 1 68.3 31.7 100.0
round(addmargins(prop.table(table(projData$COD,projData$Brand,dnn=c("COD","BRAND")),1)*100,2),1)
## BRAND
## COD ATHENA FABALLEY GRITSTONES HARPA MEIRA MISS CHASE MONTEIL & MUNERO
## 0 7.5 16.2 14.5 17.6 12.2 3.0 13.3
## 1 10.0 14.3 15.3 14.7 11.6 4.1 14.9
## BRAND
## COD MR BUTTON THE VANCA TSHIRT COMPANY Sum
## 0 1.8 10.8 3.1 100.0
## 1 1.9 10.2 3.0 100.0
round(addmargins(prop.table(table(projData$COD,projData$ShippingState,dnn=c("COD","SHIPPING STATE")),1)*100,2),1)
## SHIPPING STATE
## COD AN AP AR AS BR CH CT DD DL DN GA
## 0 0.1 7.1 0.0 0.9 0.4 1.1 0.4 0.0 12.9 0.0 0.5
## 1 0.0 5.6 0.0 2.5 1.3 1.3 1.0 0.0 11.0 0.1 1.5
## SHIPPING STATE
## COD GJ HP HR JH JK KA KL MH ML MP MZ
## 0 2.8 0.5 7.9 0.7 0.4 16.4 2.0 21.3 0.2 1.5 0.0
## 1 5.0 0.7 5.7 1.5 2.1 10.1 0.1 17.4 0.8 3.1 0.4
## SHIPPING STATE
## COD OR PB PY RJ SK TN TR UP UT WB Sum
## 0 1.8 1.4 0.0 1.9 0.1 4.6 0.0 7.9 0.9 4.1 100.0
## 1 2.9 2.9 0.1 3.2 0.0 3.3 0.2 9.1 1.3 5.8 100.0
round(addmargins(prop.table(table(projData$COD,projData$BillingState,dnn=c("COD","BILLING STATE")),1)*100,2),1)
## BILLING STATE
## COD AN AP AS BR CH CT DD DL DN GA GJ
## 0 0.1 7.1 0.9 0.4 1.1 0.4 0.0 13.2 0.0 0.5 2.8
## 1 0.0 5.7 2.5 1.3 1.2 1.0 0.0 11.2 0.1 1.5 4.9
## BILLING STATE
## COD HP HR JH JK KA KL MH ML MN MP MZ
## 0 0.5 7.7 0.7 0.4 16.3 2.0 21.2 0.2 0.0 1.6 0.0
## 1 0.8 5.5 1.5 2.1 10.1 0.1 17.4 0.8 0.0 3.1 0.4
## BILLING STATE
## COD NL OR PB PY RJ SK TN TR UP UT WB
## 0 0.0 1.9 1.3 0.1 1.9 0.1 4.6 0.0 7.9 0.9 4.0
## 1 0.0 2.9 2.9 0.1 3.2 0.0 3.2 0.2 9.1 1.3 5.8
## BILLING STATE
## COD Sum
## 0 100.0
## 1 100.0
table(projData$COD,projData$ShippingAddressType,dnn=c("COD","ADDRESS TYPE"),exclude = c("Null"))
## ADDRESS TYPE
## COD Home Office
## 0 9697 2891
## 1 10840 2015
aggregate(projData$FinalTotalPrice~projData$HasVendorDiscount, FUN = mean)
## projData$HasVendorDiscount projData$FinalTotalPrice
## 1 0 808.4329
## 2 1 702.3885
boxplot(projData$FinalTotalPrice~projData$HasVendorDiscount, ylab = "Final Total Price" , xlab =" Vendor discount (1= Present, 0 = Not Present" , main= "Mean price based on vendor discount")
library(gplots)
##
## Attaching package: 'gplots'
## The following object is masked from 'package:stats':
##
## lowess
plotmeans(projData$FinalTotalPrice~projData$HasVendorDiscount, ylab = "Final Total Price" , xlab =" Vendor discount (1= Present, 0 = Not Present" , main= "Mean price based on vendor discount")
## Warning in arrows(x, li, x, pmax(y - gap, li), col = barcol, lwd = lwd, :
## zero-length arrow is of indeterminate angle and so skipped
## Warning in arrows(x, li, x, pmax(y - gap, li), col = barcol, lwd = lwd, :
## zero-length arrow is of indeterminate angle and so skipped
## Warning in arrows(x, ui, x, pmin(y + gap, ui), col = barcol, lwd = lwd, :
## zero-length arrow is of indeterminate angle and so skipped
## Warning in arrows(x, ui, x, pmin(y + gap, ui), col = barcol, lwd = lwd, :
## zero-length arrow is of indeterminate angle and so skipped
aggregate(projData$FinalTotalPrice~projData$HasWebsiteDiscount, FUN = mean)
## projData$HasWebsiteDiscount projData$FinalTotalPrice
## 1 0 747.507
## 2 1 768.375
boxplot(projData$FinalTotalPrice~projData$HasWebsiteDiscount, ylab = "Final Total Price" , xlab =" Website discount (1= Present, 0 = Not Present" , main= "Mean price based on Website discount")
boxplot(projData$FinalTotalPrice~projData$COD, ylab = "Final Total Price" , xlab =" COD (1= COD, 0 = Not COD" , main= "Mean price based on COD")
projdata1<-subset(projData,select=(COD), projData$COD==1)
dim(projdata1)
## [1] 28144 1
aggregate(projData$COD, by=list(add=projData$ShippingAddressType), sum)
## add x
## 1 Home 10840
## 2 Null 15289
## 3 Office 2015
From the data we can see Home type address has higher COD than office type address. ##Correlations
Pearson’s Correlation Test
attach(projData)
## The following objects are masked from projData (pos = 4):
##
## BillingAddressType, BillingCity, BillingPincode, BillingState,
## Brand, Category, COD, CODCharge, CustomerID, FinalTotalPrice,
## HasVendorDiscount, HasWebsiteDiscount, MRP, OrderDate,
## OrderID, OrderItemCode, ProductColor, ProductSize,
## ShippingAddressType, ShippingCity, ShippingName,
## ShippingPincode, ShippingState, SubCategory, VAT, VATPercent,
## VendorDiscount, WebsiteDiscount, WebsiteDiscountCode
library(Hmisc)
## Loading required package: lattice
## Loading required package: survival
## Loading required package: Formula
## Loading required package: ggplot2
##
## Attaching package: 'Hmisc'
## The following objects are masked from 'package:base':
##
## format.pval, units
CODselected <- c("MRP", "VendorDiscount", "FinalTotalPrice", "VAT", "WebsiteDiscount")
rcorr(as.matrix(projData[,CODselected]))
## MRP VendorDiscount FinalTotalPrice VAT WebsiteDiscount
## MRP 1.00 0.53 0.82 0.82 0.33
## VendorDiscount 0.53 1.00 0.09 0.08 -0.29
## FinalTotalPrice 0.82 0.09 1.00 1.00 0.19
## VAT 0.82 0.08 1.00 1.00 0.20
## WebsiteDiscount 0.33 -0.29 0.19 0.20 1.00
##
## n= 45898
##
##
## P
## MRP VendorDiscount FinalTotalPrice VAT WebsiteDiscount
## MRP 0 0 0 0
## VendorDiscount 0 0 0 0
## FinalTotalPrice 0 0 0 0
## VAT 0 0 0 0
## WebsiteDiscount 0 0 0 0
Corrgrams
library(corrgram)
##
## Attaching package: 'corrgram'
## The following object is masked from 'package:lattice':
##
## panel.fill
corrgram(projData[,CODselected], order=TRUE,
main="Impact of COD",
lower.panel=panel.pts, upper.panel=panel.pie,
diag.panel=panel.minmax, text.panel=panel.txt)
library("PerformanceAnalytics")
## Loading required package: xts
## Loading required package: zoo
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
##
## Attaching package: 'PerformanceAnalytics'
## The following object is masked from 'package:gplots':
##
## textplot
## The following object is masked from 'package:graphics':
##
## legend
chart.Correlation(projData[,CODselected], histogram = TRUE , main= "Correlation between Prices and discount")
As expected, VAT and FinalTotalPrice are highly correlated as VAT forms a fixed percentage of FinalTotalPrice (except the COD charge). Also, MRP is also quite highly correlated to FinalTotalPrice and VAT as MRP forms a major chunk of FinalTotalPrice.