This is my initial project work on Retail Sales Marketing Profit Cost.
retail.df <- read.csv(paste("RetailSales.csv", sep=""))
attach(retail.df)
library(psych)
describe(retail.df)
## vars n mean sd median trimmed
## Year 1 84672 2005.50 1.12 2005.50 2005.50
## Product.line* 2 84672 3.15 1.66 3.00 3.19
## Product.type* 3 84672 10.51 6.23 10.00 10.39
## Product* 4 84672 72.50 41.57 72.50 72.50
## Order.method.type* 5 84672 4.00 2.00 4.00 4.00
## Retailer.country* 6 84672 11.00 6.06 11.00 11.00
## Revenue 7 24743 189418.25 390750.90 59867.27 106149.19
## Planned.revenue 8 24743 198817.54 402535.53 63906.84 112896.09
## Product.cost 9 24743 111625.14 238415.56 32783.72 61046.41
## Quantity 10 24743 3606.56 8777.72 1043.00 1829.23
## Unit.cost 11 24743 84.89 131.11 36.83 51.88
## Unit.price 12 24743 155.99 246.80 66.77 94.20
## Gross.profit 13 24743 77793.11 158122.34 25793.76 44033.05
## Unit.sale.price 14 24743 147.23 232.05 62.65 89.23
## mad min max range skew
## Year 1.48 2004.00 2007.00 3.00 0.00
## Product.line* 2.97 1.00 5.00 4.00 -0.14
## Product.type* 8.90 1.00 21.00 20.00 0.16
## Product* 53.37 1.00 144.00 143.00 0.00
## Order.method.type* 2.97 1.00 7.00 6.00 0.00
## Retailer.country* 7.41 1.00 21.00 20.00 0.00
## Revenue 75285.35 0.00 10054289.00 10054289.00 6.90
## Planned.revenue 80672.73 15.69 10054289.00 10054273.31 6.68
## Product.cost 42496.83 5.76 6756852.78 6756847.02 7.23
## Quantity 1294.31 1.00 313628.00 313627.00 9.50
## Unit.cost 43.48 0.85 690.00 689.15 2.45
## Unit.price 77.10 2.06 1359.72 1357.66 2.72
## Gross.profit 31742.48 -18159.60 3521097.76 3539257.36 6.90
## Unit.sale.price 72.14 0.00 1307.80 1307.80 2.70
## kurtosis se
## Year -1.36 0.00
## Product.line* -1.62 0.01
## Product.type* -1.27 0.02
## Product* -1.20 0.14
## Order.method.type* -1.25 0.01
## Retailer.country* -1.21 0.02
## Revenue 85.05 2484.13
## Planned.revenue 79.43 2559.05
## Product.cost 96.19 1515.68
## Quantity 175.46 55.80
## Unit.cost 5.72 0.83
## Unit.price 7.74 1.57
## Gross.profit 83.84 1005.23
## Unit.sale.price 7.62 1.48
mytable <- with(retail.df, table(Order.method.type))
mytable
## Order.method.type
## E-mail Fax Mail Sales visit Special Telephone
## 12096 12096 12096 12096 12096 12096
## Web
## 12096
ProductlinebyOrdertype <- xtabs(~ retail.df$Product.line + retail.df$Order.method.type , data=retail.df)
ProductlinebyOrdertype
## retail.df$Order.method.type
## retail.df$Product.line E-mail Fax Mail Sales visit Special Telephone
## Camping Equipment 3444 3444 3444 3444 3444 3444
## Golf Equipment 1260 1260 1260 1260 1260 1260
## Mountaineering Equipment 1764 1764 1764 1764 1764 1764
## Outdoor Protection 1260 1260 1260 1260 1260 1260
## Personal Accessories 4368 4368 4368 4368 4368 4368
## retail.df$Order.method.type
## retail.df$Product.line Web
## Camping Equipment 3444
## Golf Equipment 1260
## Mountaineering Equipment 1764
## Outdoor Protection 1260
## Personal Accessories 4368
addmargins(ProductlinebyOrdertype)
## retail.df$Order.method.type
## retail.df$Product.line E-mail Fax Mail Sales visit Special
## Camping Equipment 3444 3444 3444 3444 3444
## Golf Equipment 1260 1260 1260 1260 1260
## Mountaineering Equipment 1764 1764 1764 1764 1764
## Outdoor Protection 1260 1260 1260 1260 1260
## Personal Accessories 4368 4368 4368 4368 4368
## Sum 12096 12096 12096 12096 12096
## retail.df$Order.method.type
## retail.df$Product.line Telephone Web Sum
## Camping Equipment 3444 3444 24108
## Golf Equipment 1260 1260 8820
## Mountaineering Equipment 1764 1764 12348
## Outdoor Protection 1260 1260 8820
## Personal Accessories 4368 4368 30576
## Sum 12096 12096 84672
ProducttypebyOrdertype <- xtabs(~ retail.df$Product.type + retail.df$Order.method.type , data=retail.df)
ProducttypebyOrdertype
## retail.df$Order.method.type
## retail.df$Product.type E-mail Fax Mail Sales visit Special Telephone Web
## Binoculars 504 504 504 504 504 504 504
## Climbing Accessories 588 588 588 588 588 588 588
## Cooking Gear 840 840 840 840 840 840 840
## Eyewear 1344 1344 1344 1344 1344 1344 1344
## First Aid 420 420 420 420 420 420 420
## Golf Accessories 336 336 336 336 336 336 336
## Insect Repellents 420 420 420 420 420 420 420
## Irons 336 336 336 336 336 336 336
## Knives 588 588 588 588 588 588 588
## Lanterns 1008 1008 1008 1008 1008 1008 1008
## Navigation 840 840 840 840 840 840 840
## Packs 504 504 504 504 504 504 504
## Putters 252 252 252 252 252 252 252
## Rope 336 336 336 336 336 336 336
## Safety 336 336 336 336 336 336 336
## Sleeping Bags 588 588 588 588 588 588 588
## Sunscreen 420 420 420 420 420 420 420
## Tents 504 504 504 504 504 504 504
## Tools 504 504 504 504 504 504 504
## Watches 1092 1092 1092 1092 1092 1092 1092
## Woods 336 336 336 336 336 336 336
addmargins(ProducttypebyOrdertype)
## retail.df$Order.method.type
## retail.df$Product.type E-mail Fax Mail Sales visit Special Telephone
## Binoculars 504 504 504 504 504 504
## Climbing Accessories 588 588 588 588 588 588
## Cooking Gear 840 840 840 840 840 840
## Eyewear 1344 1344 1344 1344 1344 1344
## First Aid 420 420 420 420 420 420
## Golf Accessories 336 336 336 336 336 336
## Insect Repellents 420 420 420 420 420 420
## Irons 336 336 336 336 336 336
## Knives 588 588 588 588 588 588
## Lanterns 1008 1008 1008 1008 1008 1008
## Navigation 840 840 840 840 840 840
## Packs 504 504 504 504 504 504
## Putters 252 252 252 252 252 252
## Rope 336 336 336 336 336 336
## Safety 336 336 336 336 336 336
## Sleeping Bags 588 588 588 588 588 588
## Sunscreen 420 420 420 420 420 420
## Tents 504 504 504 504 504 504
## Tools 504 504 504 504 504 504
## Watches 1092 1092 1092 1092 1092 1092
## Woods 336 336 336 336 336 336
## Sum 12096 12096 12096 12096 12096 12096
## retail.df$Order.method.type
## retail.df$Product.type Web Sum
## Binoculars 504 3528
## Climbing Accessories 588 4116
## Cooking Gear 840 5880
## Eyewear 1344 9408
## First Aid 420 2940
## Golf Accessories 336 2352
## Insect Repellents 420 2940
## Irons 336 2352
## Knives 588 4116
## Lanterns 1008 7056
## Navigation 840 5880
## Packs 504 3528
## Putters 252 1764
## Rope 336 2352
## Safety 336 2352
## Sleeping Bags 588 4116
## Sunscreen 420 2940
## Tents 504 3528
## Tools 504 3528
## Watches 1092 7644
## Woods 336 2352
## Sum 12096 84672
boxplot(Gross.profit ~ Order.method.type ,data=retail.df, main="Effect of Order method type on Gross profit", ylab="Order type", xlab="Gross profit", horizontal=FALSE)
boxplot(Quantity ~ Order.method.type ,data=retail.df, main="Effect of Order method type on quantity", ylab="Order type", xlab="Quantity", horizontal=FALSE)
boxplot(Gross.profit ~ Year ,data=retail.df, main="Distribution of gross profit with year", ylab="Year", xlab="Gross profit", horizontal=FALSE)
library(lattice)
histogram(~Gross.profit, data = retail.df,
main = "Distribution of Gross profit", xlab="gross profit", col='red' )
library(lattice)
histogram(~Revenue, data = retail.df,
main = "Distribution of Revenue", xlab="revenue", col='blue' )
library(car)
##
## Attaching package: 'car'
## The following object is masked from 'package:psych':
##
## logit
# Scatter plot of Gross profit with order method
scatterplot(Gross.profit ~ Order.method.type ,data=retail.df, main="Scatterplot of Gross profit with order type", xlab="Order method type", ylab="Gross profit", horizontal=TRUE)
## [1] "7457" "7604" "16574" "16584" "9956" "1590" "16731" "58925"
## [9] "16133" "17172" "37912" "16009" "37177" "38059" "16597" "37765"
## [17] "38206" "16163" "80115" "2780" "16113" "16554" "16407" "23031"
## [25] "1568" "14344" "22737" "16848" "1996" "16701" "1497" "22662"
## [33] "22956" "1492" "2668" "1494" "71758" "46033" "1499" "22666"
## [41] "16828" "25061" "16093" "16534" "22996" "16240" "23885" "16823"
## [49] "16088" "16387" "1471" "22639" "1765" "22933" "3529" "1618"
## [57] "7351" "3676" "9850" "17053" "49288" "51787" "52228" "58843"
## [65] "51934" "28561" "52522" "49729" "31060" "72955"
# Scatterplot of Gross profit with year
scatterplot(Gross.profit ~ Year ,data=retail.df, main="Scatterplot of Gross profit with year", ylab="Gross profit", xlab="Year", horizontal=TRUE)
## Warning in plot.window(...): "horizontal" is not a graphical parameter
## Warning in plot.xy(xy, type, ...): "horizontal" is not a graphical
## parameter
## Warning in axis(side = side, at = at, labels = labels, ...): "horizontal"
## is not a graphical parameter
## Warning in axis(side = side, at = at, labels = labels, ...): "horizontal"
## is not a graphical parameter
## Warning in box(...): "horizontal" is not a graphical parameter
## Warning in title(...): "horizontal" is not a graphical parameter
# Average gross profit by order method
avgprofit = aggregate(Gross.profit ~ Order.method.type , data = retail.df, mean)
avgprofit
## Order.method.type Gross.profit
## 1 E-mail 35324.29
## 2 Fax 16319.28
## 3 Mail 16386.31
## 4 Sales visit 29109.29
## 5 Special 14006.19
## 6 Telephone 36904.41
## 7 Web 139377.47
library(car)
scatterplotMatrix(~ Gross.profit + Revenue , data=retail.df,
main="Gross profit")
# Scatterplot Matrices from the car Package
library(car)
scatterplotMatrix(~Gross.profit+Year+Product.type+Order.method.type , data=retail.df)
library(Hmisc)
## Loading required package: survival
## Loading required package: Formula
## Loading required package: ggplot2
##
## Attaching package: 'ggplot2'
## The following objects are masked from 'package:psych':
##
## %+%, alpha
##
## Attaching package: 'Hmisc'
## The following object is masked from 'package:psych':
##
## describe
## The following objects are masked from 'package:base':
##
## format.pval, units
colretails <- c("Gross.profit","Product.cost","Quantity")
corMatrix <- rcorr(as.matrix(retail.df[,colretails]))
corMatrix
## Gross.profit Product.cost Quantity
## Gross.profit 1.00 0.94 0.49
## Product.cost 0.94 1.00 0.51
## Quantity 0.49 0.51 1.00
##
## n= 24743
##
##
## P
## Gross.profit Product.cost Quantity
## Gross.profit 0 0
## Product.cost 0 0
## Quantity 0 0
library(Hmisc)
library(car)
library(corrgram)
colretails <- c("Gross.profit","Product.cost","Quantity")
corrgram(retail.df[,colretails], order=TRUE,
main="Gross profit vs others",
lower.panel=panel.pts, upper.panel=panel.pie,
diag.panel=panel.minmax, text.panel=panel.txt)
cor.test(retail.df[,"Gross.profit"], retail.df[,"Quantity"])
##
## Pearson's product-moment correlation
##
## data: retail.df[, "Gross.profit"] and retail.df[, "Quantity"]
## t = 87.538, df = 24741, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.4767203 0.4957483
## sample estimates:
## cor
## 0.486292
cor.test(retail.df[,"Gross.profit"], retail.df[,"Revenue"])
##
## Pearson's product-moment correlation
##
## data: retail.df[, "Gross.profit"] and retail.df[, "Revenue"]
## t = 736.41, df = 24741, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.9773903 0.9784778
## sample estimates:
## cor
## 0.9779407
So, Gross profit and Quantity & Gross profit and Revenue are highly correlated.
t.test(retail.df$Revenue,retail.df$Planned.revenue)
##
## Welch Two Sample t-test
##
## data: retail.df$Revenue and retail.df$Planned.revenue
## t = -2.6355, df = 49440, p-value = 0.008405
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -16389.583 -2408.994
## sample estimates:
## mean of x mean of y
## 189418.3 198817.5
Since the p-value is less than 0.05, we fail to reject the null hypothesis that planned revenue and revenue are equal.