Reading Dataset into R:
setwd("C:/Office/Capestone Project")
ecommercedata.df <- read.csv(paste("Effect of Price on Order Conversion.csv"),sep = ",")
Viewing the dataset in R:
View(ecommercedata.df)
Descriptive Statistics:
library(psych)
describe(ecommercedata.df[,c(5:10)])
## vars n mean sd median
## Minimum.Price 1 1314 612.83 1351.87 348.0
## Maximum.Price 2 1314 748.56 1646.64 429.0
## Shipping.Fee.Charged.To.Customer 3 1314 65.93 82.77 49.0
## Orders 4 1314 1741.10 4724.07 454.0
## Product_Visits 5 1314 69303.72 152102.85 21625.5
## Product.Margin* 6 1314 23.42 17.72 17.0
## trimmed mad min max range
## Minimum.Price 383.31 208.31 17 29276 29259
## Maximum.Price 470.01 257.23 21 35692 35671
## Shipping.Fee.Charged.To.Customer 52.14 29.65 0 1209 1209
## Orders 830.90 547.82 13 84770 84757
## Product_Visits 37142.63 26522.97 13 1932477 1932464
## Product.Margin* 20.97 11.86 1 66 65
## skew kurtosis se
## Minimum.Price 12.76 229.49 37.29
## Maximum.Price 12.79 230.28 45.43
## Shipping.Fee.Charged.To.Customer 5.52 46.37 2.28
## Orders 8.44 102.75 130.32
## Product_Visits 6.23 53.96 4196.04
## Product.Margin* 1.13 0.27 0.49
One Way Contingency Table:
table(ecommercedata.df$Category)
##
## a. Electronic Accessories b. Home Furnishings
## 380 200
## c. Footwear (Men & Women) d. Ethnic Wear (Men & Women)
## 280 234
## e. Home & Kitchen Appliances
## 220
Two Way Contingency Table:
table(ecommercedata.df$Category, ecommercedata.df$Shipping.Fee.Charged.To.Customer)
##
## 0 9 19 29 39 49 59 69 79 89 99
## a. Electronic Accessories 46 21 16 28 35 51 125 40 5 0 0
## b. Home Furnishings 47 18 33 26 26 21 11 3 0 2 0
## c. Footwear (Men & Women) 34 8 17 19 77 91 6 15 2 2 2
## d. Ethnic Wear (Men & Women) 0 0 1 1 8 27 96 37 17 3 3
## e. Home & Kitchen Appliances 4 0 1 1 2 12 38 48 12 7 3
##
## 109 119 129 139 149 159 169 179 189 199 209
## a. Electronic Accessories 0 0 4 1 0 0 0 0 0 0 0
## b. Home Furnishings 0 1 5 1 1 0 0 0 3 0 0
## c. Footwear (Men & Women) 0 0 0 3 0 0 0 0 2 1 1
## d. Ethnic Wear (Men & Women) 3 0 22 6 3 0 1 2 3 0 0
## e. Home & Kitchen Appliances 3 13 10 5 7 4 7 1 5 2 3
##
## 229 239 249 259 269 289 299 319 339 379 399
## a. Electronic Accessories 0 1 0 0 0 0 0 0 0 4 1
## b. Home Furnishings 0 0 0 0 0 1 0 0 0 0 0
## c. Footwear (Men & Women) 0 0 0 0 0 0 0 0 0 0 0
## d. Ethnic Wear (Men & Women) 0 0 0 0 0 0 0 1 0 0 0
## e. Home & Kitchen Appliances 1 1 3 2 4 0 1 1 2 0 0
##
## 409 419 429 489 519 559 599 649 719 739
## a. Electronic Accessories 2 0 0 0 0 0 0 0 0 0
## b. Home Furnishings 1 0 0 0 0 0 0 0 0 0
## c. Footwear (Men & Women) 0 0 0 0 0 0 0 0 0 0
## d. Ethnic Wear (Men & Women) 0 0 0 0 0 0 0 0 0 0
## e. Home & Kitchen Appliances 1 2 1 1 4 1 2 1 1 2
##
## 1209
## a. Electronic Accessories 0
## b. Home Furnishings 0
## c. Footwear (Men & Women) 0
## d. Ethnic Wear (Men & Women) 0
## e. Home & Kitchen Appliances 1
BoxPlot:
boxplot(ecommercedata.df$Minimum.Price~ecommercedata.df$Category,main= "Minimum Price per Category",ylab="Category",xlab="Minimum Price ", horizontal = TRUE)
boxplot(ecommercedata.df$Maximum.Price~ecommercedata.df$Category,main= "Maximum Price per Category",ylab="Category",xlab="Maximum Price ", horizontal = TRUE)
Histograms:
hist(ecommercedata.df$Product_Visits, breaks = 10000, xlim=c(0,20000), col="red")
hist(ecommercedata.df$Orders, breaks = 1000, xlim=c(0,5000), col="green")
Correlation Matrix:
ecommercenumerical <-ecommercedata.df[,c(5:9)]
cor(ecommercenumerical)
## Minimum.Price Maximum.Price
## Minimum.Price 1.00000000 0.99998775
## Maximum.Price 0.99998775 1.00000000
## Shipping.Fee.Charged.To.Customer 0.24575070 0.24398256
## Orders -0.08599098 -0.08609678
## Product_Visits -0.09005734 -0.08996123
## Shipping.Fee.Charged.To.Customer
## Minimum.Price 0.2457507
## Maximum.Price 0.2439826
## Shipping.Fee.Charged.To.Customer 1.0000000
## Orders -0.1109196
## Product_Visits -0.1004566
## Orders Product_Visits
## Minimum.Price -0.08599098 -0.09005734
## Maximum.Price -0.08609678 -0.08996123
## Shipping.Fee.Charged.To.Customer -0.11091960 -0.10045665
## Orders 1.00000000 0.86298159
## Product_Visits 0.86298159 1.00000000
Visualizing using Corrgram:
library(corrgram)
corrgram(ecommercenumerical)
Scatterplot:
library(car)
##
## Attaching package: 'car'
## The following object is masked from 'package:psych':
##
## logit
scatterplot(ecommercedata.df$Minimum.Price~ecommercedata.df$Orders, ylim= c(0,2000), xlim=c(0,50000))
scatterplot(ecommercedata.df$Maximum.Price~ecommercedata.df$Orders, ylim= c(0,2000), xlim=c(0,50000))
High Minimum and Maximum Price results in less conversion of the item orders from visits.
T-tests:
t.test(ecommercedata.df$Minimum.Price,ecommercedata.df$Orders)
##
## Welch Two Sample t-test
##
## data: ecommercedata.df$Minimum.Price and ecommercedata.df$Orders
## t = -8.3235, df = 1526.6, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -1394.1640 -862.3824
## sample estimates:
## mean of x mean of y
## 612.8295 1741.1027