This is my initial project work on Retail Sales Marketing Profit Cost.

Read the data set in R

retail.df <- read.csv(paste("RetailSales.csv", sep=""))
attach(retail.df)

Get a summary of data set

library(psych)
describe(retail.df)
##                    vars     n      mean        sd   median   trimmed
## Year                  1 84672   2005.50      1.12  2005.50   2005.50
## Product.line*         2 84672      3.15      1.66     3.00      3.19
## Product.type*         3 84672     10.51      6.23    10.00     10.39
## Product*              4 84672     72.50     41.57    72.50     72.50
## Order.method.type*    5 84672      4.00      2.00     4.00      4.00
## Retailer.country*     6 84672     11.00      6.06    11.00     11.00
## Revenue               7 24743 189418.25 390750.90 59867.27 106149.19
## Planned.revenue       8 24743 198817.54 402535.53 63906.84 112896.09
## Product.cost          9 24743 111625.14 238415.56 32783.72  61046.41
## Quantity             10 24743   3606.56   8777.72  1043.00   1829.23
## Unit.cost            11 24743     84.89    131.11    36.83     51.88
## Unit.price           12 24743    155.99    246.80    66.77     94.20
## Gross.profit         13 24743  77793.11 158122.34 25793.76  44033.05
## Unit.sale.price      14 24743    147.23    232.05    62.65     89.23
##                         mad       min         max       range  skew
## Year                   1.48   2004.00     2007.00        3.00  0.00
## Product.line*          2.97      1.00        5.00        4.00 -0.14
## Product.type*          8.90      1.00       21.00       20.00  0.16
## Product*              53.37      1.00      144.00      143.00  0.00
## Order.method.type*     2.97      1.00        7.00        6.00  0.00
## Retailer.country*      7.41      1.00       21.00       20.00  0.00
## Revenue            75285.35      0.00 10054289.00 10054289.00  6.90
## Planned.revenue    80672.73     15.69 10054289.00 10054273.31  6.68
## Product.cost       42496.83      5.76  6756852.78  6756847.02  7.23
## Quantity            1294.31      1.00   313628.00   313627.00  9.50
## Unit.cost             43.48      0.85      690.00      689.15  2.45
## Unit.price            77.10      2.06     1359.72     1357.66  2.72
## Gross.profit       31742.48 -18159.60  3521097.76  3539257.36  6.90
## Unit.sale.price       72.14      0.00     1307.80     1307.80  2.70
##                    kurtosis      se
## Year                  -1.36    0.00
## Product.line*         -1.62    0.01
## Product.type*         -1.27    0.02
## Product*              -1.20    0.14
## Order.method.type*    -1.25    0.01
## Retailer.country*     -1.21    0.02
## Revenue               85.05 2484.13
## Planned.revenue       79.43 2559.05
## Product.cost          96.19 1515.68
## Quantity             175.46   55.80
## Unit.cost              5.72    0.83
## Unit.price             7.74    1.57
## Gross.profit          83.84 1005.23
## Unit.sale.price        7.62    1.48

One way contigency table

mytable <- with(retail.df, table(Order.method.type))
mytable
## Order.method.type
##      E-mail         Fax        Mail Sales visit     Special   Telephone 
##       12096       12096       12096       12096       12096       12096 
##         Web 
##       12096

Two way contigency tables

ProductlinebyOrdertype <- xtabs(~ retail.df$Product.line + retail.df$Order.method.type , data=retail.df)
ProductlinebyOrdertype
##                           retail.df$Order.method.type
## retail.df$Product.line     E-mail  Fax Mail Sales visit Special Telephone
##   Camping Equipment          3444 3444 3444        3444    3444      3444
##   Golf Equipment             1260 1260 1260        1260    1260      1260
##   Mountaineering Equipment   1764 1764 1764        1764    1764      1764
##   Outdoor Protection         1260 1260 1260        1260    1260      1260
##   Personal Accessories       4368 4368 4368        4368    4368      4368
##                           retail.df$Order.method.type
## retail.df$Product.line      Web
##   Camping Equipment        3444
##   Golf Equipment           1260
##   Mountaineering Equipment 1764
##   Outdoor Protection       1260
##   Personal Accessories     4368
addmargins(ProductlinebyOrdertype)
##                           retail.df$Order.method.type
## retail.df$Product.line     E-mail   Fax  Mail Sales visit Special
##   Camping Equipment          3444  3444  3444        3444    3444
##   Golf Equipment             1260  1260  1260        1260    1260
##   Mountaineering Equipment   1764  1764  1764        1764    1764
##   Outdoor Protection         1260  1260  1260        1260    1260
##   Personal Accessories       4368  4368  4368        4368    4368
##   Sum                       12096 12096 12096       12096   12096
##                           retail.df$Order.method.type
## retail.df$Product.line     Telephone   Web   Sum
##   Camping Equipment             3444  3444 24108
##   Golf Equipment                1260  1260  8820
##   Mountaineering Equipment      1764  1764 12348
##   Outdoor Protection            1260  1260  8820
##   Personal Accessories          4368  4368 30576
##   Sum                          12096 12096 84672
ProducttypebyOrdertype <- xtabs(~ retail.df$Product.type + retail.df$Order.method.type , data=retail.df)
ProducttypebyOrdertype
##                       retail.df$Order.method.type
## retail.df$Product.type E-mail  Fax Mail Sales visit Special Telephone  Web
##   Binoculars              504  504  504         504     504       504  504
##   Climbing Accessories    588  588  588         588     588       588  588
##   Cooking Gear            840  840  840         840     840       840  840
##   Eyewear                1344 1344 1344        1344    1344      1344 1344
##   First Aid               420  420  420         420     420       420  420
##   Golf Accessories        336  336  336         336     336       336  336
##   Insect Repellents       420  420  420         420     420       420  420
##   Irons                   336  336  336         336     336       336  336
##   Knives                  588  588  588         588     588       588  588
##   Lanterns               1008 1008 1008        1008    1008      1008 1008
##   Navigation              840  840  840         840     840       840  840
##   Packs                   504  504  504         504     504       504  504
##   Putters                 252  252  252         252     252       252  252
##   Rope                    336  336  336         336     336       336  336
##   Safety                  336  336  336         336     336       336  336
##   Sleeping Bags           588  588  588         588     588       588  588
##   Sunscreen               420  420  420         420     420       420  420
##   Tents                   504  504  504         504     504       504  504
##   Tools                   504  504  504         504     504       504  504
##   Watches                1092 1092 1092        1092    1092      1092 1092
##   Woods                   336  336  336         336     336       336  336
addmargins(ProducttypebyOrdertype)
##                       retail.df$Order.method.type
## retail.df$Product.type E-mail   Fax  Mail Sales visit Special Telephone
##   Binoculars              504   504   504         504     504       504
##   Climbing Accessories    588   588   588         588     588       588
##   Cooking Gear            840   840   840         840     840       840
##   Eyewear                1344  1344  1344        1344    1344      1344
##   First Aid               420   420   420         420     420       420
##   Golf Accessories        336   336   336         336     336       336
##   Insect Repellents       420   420   420         420     420       420
##   Irons                   336   336   336         336     336       336
##   Knives                  588   588   588         588     588       588
##   Lanterns               1008  1008  1008        1008    1008      1008
##   Navigation              840   840   840         840     840       840
##   Packs                   504   504   504         504     504       504
##   Putters                 252   252   252         252     252       252
##   Rope                    336   336   336         336     336       336
##   Safety                  336   336   336         336     336       336
##   Sleeping Bags           588   588   588         588     588       588
##   Sunscreen               420   420   420         420     420       420
##   Tents                   504   504   504         504     504       504
##   Tools                   504   504   504         504     504       504
##   Watches                1092  1092  1092        1092    1092      1092
##   Woods                   336   336   336         336     336       336
##   Sum                   12096 12096 12096       12096   12096     12096
##                       retail.df$Order.method.type
## retail.df$Product.type   Web   Sum
##   Binoculars             504  3528
##   Climbing Accessories   588  4116
##   Cooking Gear           840  5880
##   Eyewear               1344  9408
##   First Aid              420  2940
##   Golf Accessories       336  2352
##   Insect Repellents      420  2940
##   Irons                  336  2352
##   Knives                 588  4116
##   Lanterns              1008  7056
##   Navigation             840  5880
##   Packs                  504  3528
##   Putters                252  1764
##   Rope                   336  2352
##   Safety                 336  2352
##   Sleeping Bags          588  4116
##   Sunscreen              420  2940
##   Tents                  504  3528
##   Tools                  504  3528
##   Watches               1092  7644
##   Woods                  336  2352
##   Sum                  12096 84672

Effect of order method on gross profit

boxplot(Gross.profit ~ Order.method.type ,data=retail.df, main="Effect of Order method type on Gross profit", ylab="Order type", xlab="Gross profit", horizontal=FALSE)

Effect of order method on quantity

boxplot(Quantity ~ Order.method.type ,data=retail.df, main="Effect of Order method type on quantity", ylab="Order type", xlab="Quantity", horizontal=FALSE)

Gross profit according to year

boxplot(Gross.profit ~ Year ,data=retail.df, main="Distribution of gross profit with year", ylab="Year", xlab="Gross profit", horizontal=FALSE)

Histograms

library(lattice)
histogram(~Gross.profit, data = retail.df,
main = "Distribution of Gross profit", xlab="gross profit", col='red' ) 

library(lattice)
histogram(~Revenue, data = retail.df,
main = "Distribution of Revenue", xlab="revenue", col='blue' ) 

Scatterplots

library(car)
## 
## Attaching package: 'car'
## The following object is masked from 'package:psych':
## 
##     logit
# Scatter plot of Gross profit with order method
scatterplot(Gross.profit ~ Order.method.type ,data=retail.df, main="Scatterplot of Gross profit with order type", xlab="Order method type", ylab="Gross profit", horizontal=TRUE)

##  [1] "7457"  "7604"  "16574" "16584" "9956"  "1590"  "16731" "58925"
##  [9] "16133" "17172" "37912" "16009" "37177" "38059" "16597" "37765"
## [17] "38206" "16163" "80115" "2780"  "16113" "16554" "16407" "23031"
## [25] "1568"  "14344" "22737" "16848" "1996"  "16701" "1497"  "22662"
## [33] "22956" "1492"  "2668"  "1494"  "71758" "46033" "1499"  "22666"
## [41] "16828" "25061" "16093" "16534" "22996" "16240" "23885" "16823"
## [49] "16088" "16387" "1471"  "22639" "1765"  "22933" "3529"  "1618" 
## [57] "7351"  "3676"  "9850"  "17053" "49288" "51787" "52228" "58843"
## [65] "51934" "28561" "52522" "49729" "31060" "72955"
# Scatterplot of Gross profit with year
scatterplot(Gross.profit ~ Year ,data=retail.df, main="Scatterplot of Gross profit with year", ylab="Gross profit", xlab="Year", horizontal=TRUE)
## Warning in plot.window(...): "horizontal" is not a graphical parameter
## Warning in plot.xy(xy, type, ...): "horizontal" is not a graphical
## parameter
## Warning in axis(side = side, at = at, labels = labels, ...): "horizontal"
## is not a graphical parameter

## Warning in axis(side = side, at = at, labels = labels, ...): "horizontal"
## is not a graphical parameter
## Warning in box(...): "horizontal" is not a graphical parameter
## Warning in title(...): "horizontal" is not a graphical parameter

# Average gross profit by order method
avgprofit = aggregate(Gross.profit ~ Order.method.type , data = retail.df, mean)
avgprofit
##   Order.method.type Gross.profit
## 1            E-mail     35324.29
## 2               Fax     16319.28
## 3              Mail     16386.31
## 4       Sales visit     29109.29
## 5           Special     14006.19
## 6         Telephone     36904.41
## 7               Web    139377.47
library(car)
scatterplotMatrix(~ Gross.profit + Revenue , data=retail.df,
    main="Gross profit")

# Scatterplot Matrices from the car Package
library(car)
scatterplotMatrix(~Gross.profit+Year+Product.type+Order.method.type , data=retail.df)

CORRELATION MATRIX

library(Hmisc)
## Loading required package: survival
## Loading required package: Formula
## Loading required package: ggplot2
## 
## Attaching package: 'ggplot2'
## The following objects are masked from 'package:psych':
## 
##     %+%, alpha
## 
## Attaching package: 'Hmisc'
## The following object is masked from 'package:psych':
## 
##     describe
## The following objects are masked from 'package:base':
## 
##     format.pval, units
colretails <- c("Gross.profit","Product.cost","Quantity")
corMatrix <- rcorr(as.matrix(retail.df[,colretails]))
corMatrix
##              Gross.profit Product.cost Quantity
## Gross.profit         1.00         0.94     0.49
## Product.cost         0.94         1.00     0.51
## Quantity             0.49         0.51     1.00
## 
## n= 24743 
## 
## 
## P
##              Gross.profit Product.cost Quantity
## Gross.profit               0            0      
## Product.cost  0                         0      
## Quantity      0            0

CORRGRAM

library(Hmisc)
library(car)
library(corrgram)
colretails <- c("Gross.profit","Product.cost","Quantity")
corrgram(retail.df[,colretails], order=TRUE,
         main="Gross profit vs others",
         lower.panel=panel.pts, upper.panel=panel.pie,
         diag.panel=panel.minmax, text.panel=panel.txt)

Pearson’s Correlation test

cor.test(retail.df[,"Gross.profit"], retail.df[,"Quantity"])
## 
##  Pearson's product-moment correlation
## 
## data:  retail.df[, "Gross.profit"] and retail.df[, "Quantity"]
## t = 87.538, df = 24741, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.4767203 0.4957483
## sample estimates:
##      cor 
## 0.486292
cor.test(retail.df[,"Gross.profit"], retail.df[,"Revenue"])
## 
##  Pearson's product-moment correlation
## 
## data:  retail.df[, "Gross.profit"] and retail.df[, "Revenue"]
## t = 736.41, df = 24741, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.9773903 0.9784778
## sample estimates:
##       cor 
## 0.9779407

So, Gross profit and Quantity & Gross profit and Revenue are highly correlated.

T-test

t.test(retail.df$Revenue,retail.df$Planned.revenue)
## 
##  Welch Two Sample t-test
## 
## data:  retail.df$Revenue and retail.df$Planned.revenue
## t = -2.6355, df = 49440, p-value = 0.008405
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -16389.583  -2408.994
## sample estimates:
## mean of x mean of y 
##  189418.3  198817.5

Since the p-value is less than 0.05, we fail to reject the null hypothesis that planned revenue and revenue are equal.