PetrolStation

Emilio — Jun 22, 2014, 12:30 PM

# ===============================================================================
# PSDA.r -> Petrol Station Data Analysis by Emilio González González
#           End of Degree Project.  Jan-June 2014
# ===============================================================================
library("plyr")
library("lattice")
library("ggplot2")
refactor_factor <- function(col) {
    if (is.factor(col)) factor(col)
    else col
}

setwd("~/Economic Degree/TrabajoFinGrado")
# Setting of global parameters
SampleSize <- 10     # Sets the number of records to show when printing a sample
topX       <- 10     # Sets the number of records to show when asking for the top x 
maximumLimit  <- 80 # Sets theshold when splitting the file in two subsets: below and above this amount
WeekDays <-c("Sunday", "Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday")

# Data read. Show structure, summary and 10 ramdom records
data <- read.table("CSVCarburantesFecha.csv", header=TRUE, sep=";", na.strings="NA", dec=",", strip.white=TRUE)
str(data)
'data.frame':   185164 obs. of  5 variables:
 $ date    : Factor w/ 337 levels "01/01/2013","01/02/2013",..: 282 325 325 325 325 325 325 325 325 325 ...
 $ product : Factor w/ 8 levels "DIESEL e+","DIESELe+10",..: 1 7 7 1 1 7 1 1 1 1 ...
 $ quantity: num  234.13 3.44 3.44 7.25 7.25 ...
 $ price   : num  1.44 1.46 1.46 1.38 1.38 ...
 $ amount  : num  335.98 5.01 5.01 10.01 10.01 ...
summary(data)
         date               product          quantity         price      
 31/03/2013:  2061   DIESEL e+  :125779   Min.   :    0   Min.   :0.651  
 31/08/2012:  1641   GASOLINA 95: 40313   1st Qu.:   23   1st Qu.:1.376  
 14/10/2012:  1468   DIESELe+10 :  6338   Median :   38   Median :1.409  
 31/10/2012:  1364   GAS B      :  5011   Mean   :  169   Mean   :1.400  
 30/09/2012:  1310   gasolina98 :  2910   3rd Qu.:   53   3rd Qu.:1.446  
 30/11/2012:  1180   GAS C      :  1932   Max.   :84601   Max.   :1.688  
 (Other)   :176140   (Other)    :  2881                                  
     amount     
 Min.   :    0  
 1st Qu.:   32  
 Median :   54  
 Mean   :  179  
 3rd Qu.:   75  
 Max.   :63112  

data[sample(nrow(data),10),]
             date     product quantity price amount
154602 03/05/2013   DIESEL e+    19.42 1.339  26.00
149647 23/04/2013   DIESEL e+    42.78 1.329  56.85
43477  27/09/2012   DIESEL e+    13.94 1.435  20.00
92331  24/12/2012       GAS C   918.00 0.947 869.35
155902 05/05/2013   DIESEL e+    26.34 1.329  35.01
83296  07/12/2012 GASOLINA 95     7.35 1.418  10.42
76914  24/11/2012   DIESEL e+   107.60 1.394 149.99
23455  28/08/2012   DIESEL e+    44.76 1.430  64.01
34572  12/09/2012   DIESEL e+    31.04 1.450  45.01
170167 01/06/2013   DIESEL e+    43.83 1.369  60.00
# Split records (transactions) in two subsets depending on Quantity been smaller or bigger than a given thershold 
# we will not consider transactions with Quantity bigger thatn the threshold (outliers)
bigSales   <- data[which(data$quantity > maximumLimit),]
smallSales <- data[which(data$quantity <= maximumLimit),]

# Factor variable with the different fuels available in the dataframe 
factor.fuels <- levels(smallSales$product)

# Workaround to achieve the days of the week in English in a Spanish localized computer
temp1 = as.Date(smallSales$date, format="%d/%m/%Y")
temp2 = as.POSIXlt(temp1)$wday+1
temp3 = as.POSIXlt(temp1)$mday+1
smallSales$weekday <- as.factor(WeekDays[temp2])
summary(smallSales)
         date               product          quantity         price     
 31/03/2013:  1879   DIESEL e+  :115843   Min.   : 0.06   Min.   :0.88  
 14/10/2012:  1452   GASOLINA 95: 40105   1st Qu.:21.52   1st Qu.:1.38  
 31/08/2012:  1282   DIESELe+10 :  6166   Median :35.82   Median :1.42  
 03/08/2012:  1104   gasolina98 :  2846   Mean   :35.91   Mean   :1.42  
 19/08/2012:  1102   GASOLEO B  :   353   3rd Qu.:47.78   3rd Qu.:1.45  
 30/09/2012:  1095   GAS A      :   106   Max.   :80.00   Max.   :1.69  
 (Other)   :157579   (Other)    :    74                                 
     amount           weekday     
 Min.   :  0.1   Friday   :29225  
 1st Qu.: 30.0   Monday   :21588  
 Median : 50.0   Saturday :21339  
 Mean   : 50.9   Sunday   :33101  
 3rd Qu.: 68.0   Thursday :20933  
 Max.   :135.0   Tuesday  :18581  
                 Wednesday:20726  
#Boxplot of quantities by product 
#=============================================================================================
boxplot(smallSales$quantity ~ smallSales$product, rescale=TRUE,
        cex.axis=0.55,las=1, horizontal=TRUE,
        main=sprintf("Boxplot individual transactions less than %s litres", maximumLimit), col.main="darkblue", col=rainbow(10),
        cex.main=1.4, cex.lab=1.5, col.lab="darkblue", xlab="Litres")

plot of chunk unnamed-chunk-1

# Using Lattice package: bwplot(~smallSales$quantity | smallSales$product)
# Some cleaning of temporary variables/space
rm(data, temp1, temp2)

#Histogram of refuels 
#=============================================================================================
hist(smallSales$quantity, las=1, xlab="Litres", ylab="Freq.",
     main=sprintf("Frequency Histogram\n individual transactions less than %s litres", maximumLimit), 
     col.main="darkblue", col=rainbow(17))

plot of chunk unnamed-chunk-1

# Creation a new dataset (dataDP) with data grouped by date and product description
dataDP <-ddply(smallSales, c("date", "product", "weekday"), summarise, qty=sum(quantity), avgprice=mean(price), amnt=sum(amount), freq=length(product))
# and print out some statisticssummary(dataDP$qty)
sd(dataDP$qty)
[1] 5620
sum(dataDP$qty)
[1] 5942079
boxplot(dataDP$qty, rescale=TRUE, horizontal=TRUE,
        las=1, main=sprintf("Boxplot Quantity individual transactions less than %s litres", maximumLimit), col.main="darkblue", col="yellow",
        cex.lab=1.2, col.lab="darkblue", xlab="Litres", ylab="Sum of all Products")

plot of chunk unnamed-chunk-1

# Creation of a new Dataset from dataDP sorted by cronological date
temp1 = as.Date(dataDP$date, format="%d/%m/%Y")
sort.dates <- temp1[order(temp1)]
dataDPbyDate <- dataDP[order(temp1),]
# Qty sold by cronological Date
# =================================================================
#barchart(dataDPbyDate$qty ~dataDPbyDate$date|factor.fuels)

# Add a column with the day of the month
dataDP$monthDay <- as.POSIXlt(as.Date(dataDP$date, format="%d/%m/%Y"))$mday
#Create new frameset grouping by day of the month
dataDM <- ddply(dataDP, c("monthDay"), summarize, qty=sum(qty), avgprice=mean(avgprice), amnt=sum(amnt), freq=sum(freq) )
plot(dataDM$qty,  las=1, type="b",xlab="Day of the month", ylab="Litres", 
     cex.main=1.4,col.lab="darkblue", col.main="darkblue", col="blue",
     cex.lab=1.4,  pch=10,
     main=paste("Total quantities per day of the month\n for individual transactions < ",as.character(maximumLimit)))

plot of chunk unnamed-chunk-1

min(dataDM$qty)
[1] 154529
max(dataDM$qty)
[1] 244846
max(dataDM$qty)/min(dataDM$qty)
[1] 1.584
# by product and day of the month
dataDPM <- ddply(dataDP, c("monthDay","product"), summarize, qty=sum(qty), avgprice=mean(avgprice), amnt=sum(amnt), freq=sum(freq) )
#boxplot(dataDPM$qty ~ dataDPM$product, horizontal=TRUE)
fivenum(dataDPM$qty)
[1]     43.0    398.2   3652.7  35345.4 188332.0
# ... sales histogram by quantity
#hist(smallSales$quantity, breaks=20)
histogram(~quantity, data=smallSales, breaks=20,type="count", 
          main=paste("Histogram of Quantities for individual transactions < ",as.character(maximumLimit)),
        col=rainbow(20))

plot of chunk unnamed-chunk-1

# Sort dataset (records grouped by date and product) by decreasing quantity 
order.qty <- order(dataDP$qty, decreasing=T)
bestSaleDays <- dataDP[order.qty, c("date", "product","qty")]
head(bestSaleDays,10)
           date   product   qty
1521 31/03/2013 DIESEL e+ 45623
1535 31/08/2012 DIESEL e+ 36657
682  14/10/2012 DIESEL e+ 33758
1542 31/10/2012 DIESEL e+ 33296
1496 30/09/2012 DIESEL e+ 30894
1506 30/11/2012 DIESEL e+ 29137
125  03/08/2012 DIESEL e+ 27207
1292 26/08/2012 DIESEL e+ 26193
446  09/12/2012 DIESEL e+ 26168
937  19/08/2012 DIESEL e+ 25703
# Show sales by product. Grouping by product 
#=================================================================================
dataP <-ddply(smallSales, c("product"), summarise, qty=sum(quantity), avgprice=mean(price), amnt=sum(amount), freq=length(product))
dataP$product <-refactor_factor(dataP$product)
factor.fuels <- levels(dataP$product)
summary(dataP)
        product       qty             avgprice          amnt        
 DIESEL e+  :1   Min.   :   4362   Min.   :0.982   Min.   :   4316  
 DIESELe+10 :1   1st Qu.:  11568   1st Qu.:1.235   1st Qu.:  13496  
 GAS A      :1   Median : 103710   Median :1.398   Median : 165172  
 GAS B      :1   Mean   : 848868   Mean   :1.341   Mean   :1203727  
 GASOLEO B  :1   3rd Qu.: 711507   3rd Qu.:1.473   3rd Qu.:1048836  
 GASOLINA 95:1   Max.   :4387857   Max.   :1.594   Max.   :6131938  
 gasolina98 :1                                                      
      freq       
 Min.   :    74  
 1st Qu.:   230  
 Median :  2846  
 Mean   : 23642  
 3rd Qu.: 23136  
 Max.   :115843  

dataP
      product     qty avgprice    amnt   freq
1   DIESEL e+ 4387857   1.3978 6131938 115843
2  DIESELe+10  247733   1.4710  364394   6166
3       GAS A    5756   1.3718    7877    106
4       GAS B    4362   0.9816    4316     74
5   GASOLEO B   17380   1.0987   19115    353
6 GASOLINA 95 1175281   1.4755 1733278  40105
7  gasolina98  103710   1.5938  165172   2846
order.p <- order(dataP$qty, decreasing=TRUE)
dataP[order.p,]
      product     qty avgprice    amnt   freq
1   DIESEL e+ 4387857   1.3978 6131938 115843
6 GASOLINA 95 1175281   1.4755 1733278  40105
2  DIESELe+10  247733   1.4710  364394   6166
7  gasolina98  103710   1.5938  165172   2846
5   GASOLEO B   17380   1.0987   19115    353
3       GAS A    5756   1.3718    7877    106
4       GAS B    4362   0.9816    4316     74
write.table(dataP,"splitproducts.txt", sep=";")
# Pie Chart Distribution of sales by fuel
attach(dataP)
pct1  <- round(qty/sum(qty)*100, digits=2)
pct1  <- paste(pct1, "%", sep="")
lbls <- paste(factor.fuels, pct1)   #add percents to labels
#lbls <- paste(factor.fuels,"%",sep="") # ad % to labels 
pie(qty, main="Sales by product (share)", labels=pct1, col=rainbow(7))
legend(1.0, 1.0, lbls, cex=0.8, fill=rainbow(7))

plot of chunk unnamed-chunk-1

#barchart(qty~product, col=rainbow(7))
barplot(qty, col=rainbow(7), las=2, 
        main="Quantities (litres) sold by fuel type", cex.main=1.8,
        names.arg=factor.fuels, cex.axis=0.8, cex.names = 0.7)

plot of chunk unnamed-chunk-1

barplot(amnt, col=rainbow(7), las=2, 
        main="Amount of sales (euros) by fuel type", cex.main=1.8,
        names.arg=factor.fuels, cex.axis=0.8, cex.names = 0.7)

plot of chunk unnamed-chunk-1

dataP
      product     qty avgprice    amnt   freq
1   DIESEL e+ 4387857   1.3978 6131938 115843
2  DIESELe+10  247733   1.4710  364394   6166
3       GAS A    5756   1.3718    7877    106
4       GAS B    4362   0.9816    4316     74
5   GASOLEO B   17380   1.0987   19115    353
6 GASOLINA 95 1175281   1.4755 1733278  40105
7  gasolina98  103710   1.5938  165172   2846
dataP$meanservice <- qty/freq
dataP[order(dataP$meanservice, decreasing=TRUE),c("product","meanservice")]
      product meanservice
4       GAS B       58.95
3       GAS A       54.30
5   GASOLEO B       49.24
2  DIESELe+10       40.18
1   DIESEL e+       37.88
7  gasolina98       36.44
6 GASOLINA 95       29.31
barplot(dataP$meanservice, col=rainbow(9), las=2, cex.main=1.8, cex.names= 0.7,
        main=paste("Average Litres Per Transaction\n transactions < ",as.character(maximumLimit)),
        names.arg=factor.fuels)

plot of chunk unnamed-chunk-1

detach(dataP)

# Basic statistics by product
#================================================================================
statsP <-ddply(smallSales, c("product"),  summarise, mean=mean(price), sd=sd(price), min=min(price), max=max(price), "quantile values (0% 25% 50% 75% 100%)"=list(quantile(price)))
statsP              
      product   mean      sd   min   max
1   DIESEL e+ 1.3978 0.03558 1.314 1.465
2  DIESELe+10 1.4710 0.03944 1.374 1.532
3       GAS A 1.3718 0.03905 1.270 1.448
4       GAS B 0.9816 0.05169 0.880 1.050
5   GASOLEO B 1.0987 0.03044 1.010 1.138
6 GASOLINA 95 1.4755 0.04523 1.376 1.549
7  gasolina98 1.5938 0.04666 1.493 1.688
  quantile values (0% 25% 50% 75% 100%)
1     1.314, 1.369, 1.398, 1.429, 1.465
2     1.374, 1.443, 1.473, 1.500, 1.532
3     1.270, 1.349, 1.370, 1.400, 1.448
4     0.880, 0.935, 0.995, 1.023, 1.050
5     1.010, 1.085, 1.100, 1.123, 1.138
6     1.376, 1.438, 1.476, 1.510, 1.549
7     1.493, 1.554, 1.595, 1.629, 1.688
# Grouping sales by date (summing up altogeher the different products)
#=====================================================================
dataD <-ddply(dataDP, c("date", "weekday"), summarise, qty=sum(qty), avgprice=mean(avgprice), freq=sum(freq), avgqty=qty/freq )
summary(dataD)
         date          weekday        qty           avgprice   
 01/01/2013:  1   Friday   :48   Min.   : 2775   Min.   :1.27  
 01/02/2013:  1   Monday   :48   1st Qu.:12942   1st Qu.:1.40  
 01/03/2013:  1   Saturday :48   Median :15650   Median :1.44  
 01/04/2013:  1   Sunday   :48   Mean   :17685   Mean   :1.44  
 01/05/2013:  1   Thursday :48   3rd Qu.:21371   3rd Qu.:1.48  
 01/06/2013:  1   Tuesday  :48   Max.   :65286   Max.   :1.55  
 (Other)   :330   Wednesday:48                                 
      freq          avgqty    
 Min.   :  79   Min.   :28.8  
 1st Qu.: 351   1st Qu.:34.5  
 Median : 433   Median :36.3  
 Mean   : 492   Mean   :36.1  
 3rd Qu.: 595   3rd Qu.:37.6  
 Max.   :1879   Max.   :41.3  

sd(dataD$qty)
[1] 7238
# Sorting date-grouped sales by Quantity
# ======================================
order.qty <- order(dataD$qty, decreasing=T)
bestSaleDays <- dataD[order.qty,c("date", "qty")]
# ... showing the top best seller days
head(bestSaleDays, topX)
          date   qty
331 31/03/2013 65286
152 14/10/2012 49368
334 31/08/2012 48319
335 31/10/2012 40863
29  03/08/2012 40791
326 30/09/2012 39852
43  04/11/2012 37106
205 19/08/2012 35947
282 26/08/2012 35835
99  09/12/2012 35229
# Grouping sales by day of the Week
# =================================
dataWP <-ddply(smallSales, c("weekday", "product"), summarise, qty=sum(quantity), avgprice=mean(price), amnt=sum(amount))
dataWP
     weekday     product    qty avgprice      amnt
1     Friday   DIESEL e+ 810163   1.4013 1.135e+06
2     Friday  DIESELe+10  44173   1.4777 6.527e+04
3     Friday       GAS A   1390   1.3695 1.902e+03
4     Friday       GAS B   1260   0.9624 1.225e+03
5     Friday   GASOLEO B   4000   1.1047 4.425e+03
6     Friday GASOLINA 95 202988   1.4812 3.007e+05
7     Friday  gasolina98  17796   1.5990 2.849e+04
8     Monday   DIESEL e+ 587487   1.3854 8.137e+05
9     Monday  DIESELe+10  35140   1.4559 5.121e+04
10    Monday       GAS A    879   1.3731 1.201e+03
11    Monday       GAS B    472   0.9903 4.685e+02
12    Monday   GASOLEO B   1366   1.0880 1.487e+03
13    Monday GASOLINA 95 146498   1.4635 2.142e+05
14    Monday  gasolina98  11429   1.5863 1.813e+04
15  Saturday   DIESEL e+ 508908   1.4008 7.126e+05
16  Saturday  DIESELe+10  33934   1.4752 5.000e+04
17  Saturday       GAS A    319   1.3857 4.396e+02
18  Saturday       GAS B    141   0.9706 1.376e+02
19  Saturday   GASOLEO B   2229   1.1068 2.468e+03
20  Saturday GASOLINA 95 163502   1.4808 2.421e+05
21  Saturday  gasolina98  15482   1.5947 2.462e+04
22    Sunday   DIESEL e+ 799238   1.3989 1.118e+06
23    Sunday  DIESELe+10  48206   1.4729 7.104e+04
24    Sunday       GAS A    258   1.4104 3.627e+02
25    Sunday       GAS B     53   0.9800 5.194e+01
26    Sunday   GASOLEO B   2143   1.1026 2.359e+03
27    Sunday GASOLINA 95 259878   1.4748 3.831e+05
28    Sunday  gasolina98  22055   1.5912 3.502e+04
29  Thursday   DIESEL e+ 587633   1.4023 8.240e+05
30  Thursday  DIESELe+10  29419   1.4738 4.334e+04
31  Thursday       GAS A    849   1.3927 1.179e+03
32  Thursday       GAS B    640   0.9825 6.335e+02
33  Thursday   GASOLEO B   2919   1.0969 3.202e+03
34  Thursday GASOLINA 95 137787   1.4776 2.035e+05
35  Thursday  gasolina98  14064   1.5914 2.239e+04
36   Tuesday   DIESEL e+ 520401   1.3952 7.259e+05
37   Tuesday  DIESELe+10  27000   1.4690 3.965e+04
38   Tuesday       GAS A    563   1.3432 7.558e+02
39   Tuesday       GAS B    523   1.0116 5.311e+02
40   Tuesday   GASOLEO B   1754   1.0945 1.926e+03
41   Tuesday GASOLINA 95 121609   1.4717 1.789e+05
42   Tuesday  gasolina98   9556   1.5971 1.525e+04
43 Wednesday   DIESEL e+ 574027   1.3988 8.029e+05
44 Wednesday  DIESELe+10  29860   1.4698 4.389e+04
45 Wednesday       GAS A   1498   1.3626 2.038e+03
46 Wednesday       GAS B   1273   0.9887 1.269e+03
47 Wednesday   GASOLEO B   2969   1.0911 3.247e+03
48 Wednesday GASOLINA 95 143018   1.4763 2.109e+05
49 Wednesday  gasolina98  13329   1.5970 2.128e+04
# ... some graphs with the sales by day of the week for all the products
options(scipen=999)
barchart(dataWP$qty ~ factor(dataWP$weekday, levels=WeekDays) | dataWP$product, scales=list(x=list(rot=90)), 
       col=rainbow(10), main=list(label="Sales distribution by product and day of the week",cex=2), ylab="Sales",xlab="")

plot of chunk unnamed-chunk-1

#xyplot(dataWP$qty ~ factor(dataWP$weekday, levels=WeekDays) | dataWP$product, scales=list(x=list(rot=90)), main="Sales distribution by dayof the week", ylab="Sales", xlab="")
histogram(~smallSales$quantity | factor(smallSales$weekday, levels=WeekDays), 
          main=list(label="Histogram by day of the week",cex=2), 
          xlab="Quantity", col=rainbow(22))

plot of chunk unnamed-chunk-1

# ... summing up all the quantities (ie. total liters sold and ??? invoiced considering all the different products)
dataW <- ddply(dataWP, c("weekday"), summarise, qty=sum(qty), amnt=sum(amnt))
dataWOrdered <-dataW[order(dataW$amnt, decreasing=T),]
dataWOrdered
    weekday     qty    amnt
4    Sunday 1131831 1609760
1    Friday 1081769 1536931
2    Monday  783272 1100365
5  Thursday  773312 1098181
7 Wednesday  765973 1085585
3  Saturday  724515 1032396
6   Tuesday  681407  962871
# Pie Chart %Qty sold (litre) by day of the week
pct1  <- round(dataWOrdered$qty/sum(dataWOrdered$qty)*100, digits=2)
lbls <- paste(WeekDays, pct1)   # add percents to labels
lbls <- paste(lbls,"%",sep="") # ad % to labels 
pie(dataWOrdered$qty, main="Sales by day of the week (litres)", labels=lbls, col=rainbow(7))

plot of chunk unnamed-chunk-1

# Pie Chart % Total Sales Amount (euros) by day of the week
pct2  <- round(dataWOrdered$amnt/sum(dataWOrdered$amnt)*100, digits=2)
lbls <- paste(WeekDays, pct2)   # add percents to labels
lbls <- paste(lbls,"%",sep="") # ad % to labels 
pie(dataWOrdered$amnt, main="Sales by day of the week (Euros)", labels=lbls, col=rainbow(7))

plot of chunk unnamed-chunk-1

# Which days are sold the most expensive products?
diffAmntLitres = data.frame(matrix(c(WeekDays, pct2-pct1),ncol=2))
(diffAmntLitres)
         X1                   X2
1    Sunday   0.0500000000000007
2    Monday   0.0299999999999976
3   Tuesday   -0.119999999999999
4 Wednesday   0.0199999999999996
5  Thursday -0.00999999999999979
6    Friday   0.0600000000000005
7  Saturday  -0.0400000000000009
rm(lbls, pct1, pct2)    # ... some cleaning

# Cronologically sort by date
dataDbyDate = dataD[order(as.Date(dataD$date, format="%d/%m/%Y")),]
#dataDbyDate$date <- reorder(dataDbyDate$date, neworder=order(dataDbyDate$date))
#xyplot(qty ~ date, data=dataDP, scales=list(x=list(rot=90)))

# graph of a subset of the first 50 registers
#xyplot(freq ~ date[order(as.Date(date,format="%d%m%Y"))] | product, data = dataDP[1:50,],  scales=list(x=list(rot=90)))

#plot(dataD$avgqty ~ dataD$date)
dataD <- read.table("CSVDataDate.csv", header=TRUE, sep=";", na.strings="NA", dec=",", strip.white=TRUE)
#xyplot(quantity ~ date, data=dataD, scales=list(x=list(rot=90)))

# Plot of the prices ordered by incresing price across the week
plotPriceDOF <- function ( data, product, ...) {
    o = data[data$product== product,c("weekday","avgprice")]
    o = o[order(o$avgprice),]
    # creating a factor of days of the week ordered by avgprice
    o$weekday = factor(o$weekday, levels=arrange(o, avgprice)$weekday)
    print(o)
    print (sd(o$avgprice))
    plot(o$avgprice ~ o$weekday, type="n",
         main=paste(product, " price"),
         xlab ="", ylab="Price in Euros", las=2, col="darkblue" )
}
par(mfrow=c(2,2))
for (description in dataP$product) plotPriceDOF(dataWP, description)
     weekday avgprice
8     Monday    1.385
36   Tuesday    1.395
43 Wednesday    1.399
22    Sunday    1.399
15  Saturday    1.401
1     Friday    1.401
29  Thursday    1.402
[1] 0.005809
     weekday avgprice
9     Monday    1.456
37   Tuesday    1.469
44 Wednesday    1.470
23    Sunday    1.473
30  Thursday    1.474
16  Saturday    1.475
2     Friday    1.478
[1] 0.007158
     weekday avgprice
38   Tuesday    1.343
45 Wednesday    1.363
3     Friday    1.369
10    Monday    1.373
17  Saturday    1.386
31  Thursday    1.393
24    Sunday    1.410
[1] 0.02184
     weekday avgprice
4     Friday   0.9624
18  Saturday   0.9706
25    Sunday   0.9800
32  Thursday   0.9825
46 Wednesday   0.9887
11    Monday   0.9903
39   Tuesday   1.0116
[1] 0.01575

plot of chunk unnamed-chunk-1

     weekday avgprice
12    Monday    1.088
47 Wednesday    1.091
40   Tuesday    1.094
33  Thursday    1.097
26    Sunday    1.103
5     Friday    1.105
19  Saturday    1.107
[1] 0.007125
     weekday avgprice
13    Monday    1.463
41   Tuesday    1.472
27    Sunday    1.475
48 Wednesday    1.476
34  Thursday    1.478
20  Saturday    1.481
6     Friday    1.481
[1] 0.006115
     weekday avgprice
14    Monday    1.586
28    Sunday    1.591
35  Thursday    1.591
21  Saturday    1.595
49 Wednesday    1.597
42   Tuesday    1.597
7     Friday    1.599
[1] 0.004446
par(mfrow=c(1,1))

plot of chunk unnamed-chunk-1

# PairWise comparison between group means (p<0.05--> different mean)
dataWDiesel <- smallSales[smallSales$product=="DIESEL e+",]

pairwise.t.test(dataWDiesel$price, dataWDiesel$weekday)

    Pairwise comparisons using t tests with pooled SD 

data:  dataWDiesel$price and dataWDiesel$weekday 

          Friday               Monday               Saturday            
Monday    < 0.0000000000000002 -                    -                   
Saturday  0.4460               < 0.0000000000000002 -                   
Sunday    0.0000000000239605   < 0.0000000000000002 0.0000028989867463  
Thursday  0.0272               < 0.0000000000000002 0.0017              
Tuesday   < 0.0000000000000002 < 0.0000000000000002 < 0.0000000000000002
Wednesday 0.0000000002001815   < 0.0000000000000002 0.0000032825472895  
          Sunday               Thursday             Tuesday           
Monday    -                    -                    -                 
Saturday  -                    -                    -                 
Sunday    -                    -                    -                 
Thursday  < 0.0000000000000002 -                    -                 
Tuesday   < 0.0000000000000002 < 0.0000000000000002 -                 
Wednesday 0.6629               < 0.0000000000000002 0.0000000000000004

P value adjustment method: holm 
histogram(~quantity | product, data=smallSales, 
          main=list(label=paste("Histogram of frecuencies for individual transactions < ",as.character(maximumLimit)),cex=2.0),
          breaks=20,type="count",  xlab="Litres", ylab="Frequency",        col=rainbow(20))

plot of chunk unnamed-chunk-1

histogram(~quantity | product, data=smallSales, 
          main=list(label=paste("Histogram of probabilities for individual transactions < ",as.character(maximumLimit)),cex=2.0),
          breaks=20,  xlab="Litres", ylab="Frequency",        col=rainbow(20))

plot of chunk unnamed-chunk-1