PetrolStation2

Emilio — Jun 22, 2014, 10:43 PM

# ===============================================================================
# PSDA.r -> Petrol Station Data Analysis by Emilio González González
#           End of Degree Project.  Jan-June 2014
# ===============================================================================
library("plyr")
library("lattice")
library("ggplot2")
refactor_factor <- function(col) {
    if (is.factor(col)) factor(col)
    else col
}

setwd("~/Economic Degree/TrabajoFinGrado")
# Setting of global parameters
SampleSize <- 10     # Sets the number of records to show when printing a sample
topX       <- 10     # Sets the number of records to show when asking for the top x 
maximumLimit  <- 2000 # Sets theshold when splitting the file in two subsets: below and above this amount
WeekDays <-c("Sunday", "Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday")

# Data read. Show structure, summary and 10 ramdom records
data <- read.table("CSVCarburantesFecha.csv", header=TRUE, sep=";", na.strings="NA", dec=",", strip.white=TRUE)
str(data)
'data.frame':   185164 obs. of  5 variables:
 $ date    : Factor w/ 337 levels "01/01/2013","01/02/2013",..: 282 325 325 325 325 325 325 325 325 325 ...
 $ product : Factor w/ 8 levels "DIESEL e+","DIESELe+10",..: 1 7 7 1 1 7 1 1 1 1 ...
 $ quantity: num  234.13 3.44 3.44 7.25 7.25 ...
 $ price   : num  1.44 1.46 1.46 1.38 1.38 ...
 $ amount  : num  335.98 5.01 5.01 10.01 10.01 ...
summary(data)
         date               product          quantity         price      
 31/03/2013:  2061   DIESEL e+  :125779   Min.   :    0   Min.   :0.651  
 31/08/2012:  1641   GASOLINA 95: 40313   1st Qu.:   23   1st Qu.:1.376  
 14/10/2012:  1468   DIESELe+10 :  6338   Median :   38   Median :1.409  
 31/10/2012:  1364   GAS B      :  5011   Mean   :  169   Mean   :1.400  
 30/09/2012:  1310   gasolina98 :  2910   3rd Qu.:   53   3rd Qu.:1.446  
 30/11/2012:  1180   GAS C      :  1932   Max.   :84601   Max.   :1.688  
 (Other)   :176140   (Other)    :  2881                                  
     amount     
 Min.   :    0  
 1st Qu.:   32  
 Median :   54  
 Mean   :  179  
 3rd Qu.:   75  
 Max.   :63112  

data[sample(nrow(data),10),]
             date     product quantity price amount
85323  10/12/2012   DIESEL e+    14.83 1.349  20.01
136869 31/03/2013 GASOLINA 95    19.88 1.509  30.00
66290  03/11/2012 GASOLINA 95    41.14 1.410  58.01
23089  28/08/2012 GASOLINA 95     9.44 1.535  14.49
171310 03/06/2013   DIESEL e+    37.15 1.346  50.00
74407  18/11/2012   DIESEL e+    69.47 1.382  96.01
131462 21/03/2013   DIESEL e+   112.38 1.389 156.10
168398 29/05/2013       GAS B   449.00 0.935 419.82
68684  07/11/2012 GASOLINA 95    50.47 1.402  70.76
117164 16/02/2013   DIESEL e+    13.84 1.445  20.00
# Split records (transactions) in two subsets depending on Quantity been smaller or bigger than a given thershold 
# we will not consider transactions with Quantity bigger thatn the threshold (outliers)
bigSales   <- data[which(data$quantity > maximumLimit),]
smallSales <- data[which(data$quantity <= maximumLimit),]

# Factor variable with the different fuels available in the dataframe 
factor.fuels <- levels(smallSales$product)

# Workaround to achieve the days of the week in English in a Spanish localized computer
temp1 = as.Date(smallSales$date, format="%d/%m/%Y")
temp2 = as.POSIXlt(temp1)$wday+1
temp3 = as.POSIXlt(temp1)$mday+1
smallSales$weekday <- as.factor(WeekDays[temp2])
summary(smallSales)
         date               product          quantity          price      
 31/03/2013:  2051   DIESEL e+  :125532   Min.   :   0.1   Min.   :0.845  
 31/08/2012:  1619   GASOLINA 95: 40232   1st Qu.:  22.4   1st Qu.:1.379  
 14/10/2012:  1468   DIESELe+10 :  6294   Median :  37.6   Median :1.410  
 31/10/2012:  1343   GAS B      :  4180   Mean   :  67.9   Mean   :1.404  
 30/09/2012:  1298   gasolina98 :  2891   3rd Qu.:  52.6   3rd Qu.:1.447  
 03/08/2012:  1167   GAS C      :  1733   Max.   :2000.0   Max.   :1.688  
 (Other)   :174601   (Other)    :  2685                                   
     amount            weekday     
 Min.   :   0.1   Friday   :32693  
 1st Qu.:  31.0   Monday   :24850  
 Median :  53.5   Saturday :22420  
 Mean   :  86.0   Sunday   :34105  
 3rd Qu.:  75.0   Thursday :24030  
 Max.   :2692.0   Tuesday  :21459  
                  Wednesday:23990  
#Boxplot of quantities by product 
#=============================================================================================
boxplot(smallSales$quantity ~ smallSales$product, rescale=TRUE,
        cex.axis=0.55,las=1, horizontal=TRUE,
        main=sprintf("Boxplot individual transactions less than %s litres", maximumLimit), col.main="darkblue", col=rainbow(10),
        cex.main=1.4, cex.lab=1.5, col.lab="darkblue", xlab="Litres")

plot of chunk unnamed-chunk-1

# Using Lattice package: bwplot(~smallSales$quantity | smallSales$product)
# Some cleaning of temporary variables/space
rm(data, temp1, temp2)

#Histogram of refuels 
#=============================================================================================
hist(smallSales$quantity, las=1, xlab="Litres", ylab="Freq.",
     main=sprintf("Frequency Histogram\n individual transactions less than %s litres", maximumLimit), 
     col.main="darkblue", col=rainbow(17))

plot of chunk unnamed-chunk-1

# Creation a new dataset (dataDP) with data grouped by date and product description
dataDP <-ddply(smallSales, c("date", "product", "weekday"), summarise, qty=sum(quantity), avgprice=mean(price), amnt=sum(amount), freq=length(product))
# and print out some statisticssummary(dataDP$qty)
sd(dataDP$qty)
[1] 10013
sum(dataDP$qty)
[1] 12462380
ggplot(dataDP, aes(x=avgprice, fill=product))+geom_density() + ggtitle("Density Plot price per product")

plot of chunk unnamed-chunk-1

boxplot(dataDP$qty, rescale=TRUE, horizontal=TRUE,
        las=1, main=sprintf("Boxplot Quantity individual transactions less than %s litres", maximumLimit), col.main="darkblue", col="yellow",
        cex.lab=1.2, col.lab="darkblue", xlab="Litres", ylab="Sum of all Products")

plot of chunk unnamed-chunk-1

# Creation of a new Dataset from dataDP sorted by cronological date
temp1 = as.Date(dataDP$date, format="%d/%m/%Y")
sort.dates <- temp1[order(temp1)]
dataDPbyDate <- dataDP[order(temp1),]
# Qty sold by cronological Date
# =================================================================
#barchart(dataDPbyDate$qty ~dataDPbyDate$date|factor.fuels)

# Add a column with the day of the month
dataDP$monthDay <- as.POSIXlt(as.Date(dataDP$date, format="%d/%m/%Y"))$mday
#Create new frameset grouping by day of the month
dataDM <- ddply(dataDP, c("monthDay"), summarize, qty=sum(qty), avgprice=mean(avgprice), amnt=sum(amnt), freq=sum(freq) )
plot(dataDM$qty,  las=1, type="b",xlab="Day of the month", ylab="Litres", 
     cex.main=1.4,col.lab="darkblue", col.main="darkblue", col="blue",
     cex.lab=1.4,  pch=10,
     main=paste("Total quantities per day of the month\n for individual transactions < ",as.character(maximumLimit)))

plot of chunk unnamed-chunk-1

min(dataDM$qty)
[1] 300567
max(dataDM$qty)
[1] 1132072
max(dataDM$qty)/min(dataDM$qty)
[1] 3.766
# by product and day of the month
dataDPM <- ddply(dataDP, c("monthDay","product"), summarize, qty=sum(qty), avgprice=mean(avgprice), amnt=sum(amnt), freq=sum(freq) )
#boxplot(dataDPM$qty ~ dataDPM$product, horizontal=TRUE)
fivenum(dataDPM$qty)
[1]   2246   6495  15376  44420 614567
# ... sales histogram by quantity
#hist(smallSales$quantity, breaks=20)
histogram(~quantity, data=smallSales, breaks=20,type="count", 
          main=paste("Histogram of Quantities for individual transactions < ",as.character(maximumLimit)),
        col=rainbow(20))

plot of chunk unnamed-chunk-1

# Sort dataset (records grouped by date and product) by decreasing quantity 
order.qty <- order(dataDP$qty, decreasing=T)
bestSaleDays <- dataDP[order.qty, c("date", "product","qty")]
head(bestSaleDays,10)
           date product    qty
2164 31/08/2012   GAS B 128429
2171 31/10/2012   GAS B 112666
1048 15/10/2012   GAS B 105331
2123 30/11/2012   GAS B 103506
998  15/02/2013   GAS B  97092
1034 15/08/2012   GAS B  89206
1041 15/09/2012   GAS B  82066
1056 15/11/2012   GAS B  70055
983  14/12/2012   GAS B  68408
1006 15/03/2013   GAS C  63176
# Show sales by product. Grouping by product 
#=================================================================================
dataP <-ddply(smallSales, c("product"), summarise, qty=sum(quantity), avgprice=mean(price), amnt=sum(amount), freq=length(product))
dataP$product <-refactor_factor(dataP$product)
factor.fuels <- levels(dataP$product)
summary(dataP)
       product       qty             avgprice          amnt        
 DIESEL e+ :1   Min.   : 107509   Min.   :0.949   Min.   : 171273  
 DIESELe+10:1   1st Qu.: 238745   1st Qu.:1.068   1st Qu.: 334498  
 GAS A     :1   Median : 777508   Median :1.376   Median : 820168  
 GAS B     :1   Mean   :1557798   Mean   :1.290   Mean   :1972459  
 GAS C     :1   3rd Qu.:1584928   3rd Qu.:1.472   3rd Qu.:1987621  
 GASOLEO B :1   Max.   :6396032   Max.   :1.594   Max.   :8940817  
 (Other)   :2                                                      
      freq       
 Min.   :  1254  
 1st Qu.:  1658  
 Median :  3536  
 Mean   : 22943  
 3rd Qu.: 14778  
 Max.   :125532  

dataP
      product     qty avgprice    amnt   freq
1   DIESEL e+ 6396032   1.3978 8940817 125532
2  DIESELe+10  258926   1.4710  380865   6294
3       GAS A  441361   1.3538  587695   1254
4       GAS B 2780185   0.9785 2701237   4180
5       GAS C 1113655   0.9489 1052641   1733
6   GASOLEO B  178203   1.0979  195398   1431
7 GASOLINA 95 1186509   1.4755 1749749  40232
8  gasolina98  107509   1.5940  171273   2891
order.p <- order(dataP$qty, decreasing=TRUE)
dataP[order.p,]
      product     qty avgprice    amnt   freq
1   DIESEL e+ 6396032   1.3978 8940817 125532
4       GAS B 2780185   0.9785 2701237   4180
7 GASOLINA 95 1186509   1.4755 1749749  40232
5       GAS C 1113655   0.9489 1052641   1733
3       GAS A  441361   1.3538  587695   1254
2  DIESELe+10  258926   1.4710  380865   6294
6   GASOLEO B  178203   1.0979  195398   1431
8  gasolina98  107509   1.5940  171273   2891
write.table(dataP,"splitproducts.txt", sep=";")
# Pie Chart Distribution of sales by fuel
attach(dataP)
pct1  <- round(qty/sum(qty)*100, digits=2)
pct1  <- paste(pct1, "%", sep="")
lbls <- paste(factor.fuels, pct1)   #add percents to labels
#lbls <- paste(factor.fuels,"%",sep="") # ad % to labels 
pie(qty, main="Sales by product (share)", labels=pct1, col=rainbow(7))
legend(1.0, 1.0, lbls, cex=0.8, fill=rainbow(7))

plot of chunk unnamed-chunk-1

#barchart(qty~product, col=rainbow(7))
barplot(qty, col=rainbow(7), las=2, 
        main="Quantities (litres) sold by fuel type", cex.main=1.8,
        names.arg=factor.fuels, cex.axis=0.8, cex.names = 0.7)

plot of chunk unnamed-chunk-1

barplot(amnt, col=rainbow(7), las=2, 
        main="Amount of sales (euros) by fuel type", cex.main=1.8,
        names.arg=factor.fuels, cex.axis=0.8, cex.names = 0.7)

plot of chunk unnamed-chunk-1

dataP
      product     qty avgprice    amnt   freq
1   DIESEL e+ 6396032   1.3978 8940817 125532
2  DIESELe+10  258926   1.4710  380865   6294
3       GAS A  441361   1.3538  587695   1254
4       GAS B 2780185   0.9785 2701237   4180
5       GAS C 1113655   0.9489 1052641   1733
6   GASOLEO B  178203   1.0979  195398   1431
7 GASOLINA 95 1186509   1.4755 1749749  40232
8  gasolina98  107509   1.5940  171273   2891
dataP$meanservice <- qty/freq
dataP[order(dataP$meanservice, decreasing=TRUE),c("product","meanservice")]
      product meanservice
4       GAS B      665.12
5       GAS C      642.62
3       GAS A      351.96
6   GASOLEO B      124.53
1   DIESEL e+       50.95
2  DIESELe+10       41.14
8  gasolina98       37.19
7 GASOLINA 95       29.49
barplot(dataP$meanservice, col=rainbow(9), las=2, cex.main=1.8, cex.names= 0.7,
        main=paste("Average Litres Per Transaction\n transactions < ",as.character(maximumLimit)),
        names.arg=factor.fuels)

plot of chunk unnamed-chunk-1

detach(dataP)

# Basic statistics by product
#================================================================================
statsP <-ddply(smallSales, c("product"),  summarise, mean=mean(price), sd=sd(price), min=min(price), max=max(price), "quantile values (0% 25% 50% 75% 100%)"=list(quantile(price)))
statsP              
      product   mean      sd   min   max
1   DIESEL e+ 1.3978 0.03562 1.296 1.465
2  DIESELe+10 1.4710 0.03931 1.374 1.532
3       GAS A 1.3538 0.04391 1.168 1.465
4       GAS B 0.9785 0.04908 0.855 1.380
5       GAS C 0.9489 0.02634 0.845 1.000
6   GASOLEO B 1.0979 0.03450 1.003 1.138
7 GASOLINA 95 1.4755 0.04521 1.376 1.549
8  gasolina98 1.5940 0.04653 1.493 1.688
  quantile values (0% 25% 50% 75% 100%)
1     1.296, 1.369, 1.398, 1.429, 1.465
2     1.374, 1.443, 1.473, 1.500, 1.532
3     1.168, 1.340, 1.355, 1.380, 1.465
4     0.855, 0.940, 0.983, 1.020, 1.380
5     0.845, 0.935, 0.950, 0.970, 1.000
6     1.003, 1.085, 1.105, 1.123, 1.138
7     1.376, 1.438, 1.476, 1.510, 1.549
8     1.493, 1.554, 1.595, 1.629, 1.688
# Grouping sales by date (summing up altogeher the different products)
#=====================================================================
dataD <-ddply(dataDP, c("date", "weekday"), summarise, qty=sum(qty), avgprice=mean(avgprice), freq=sum(freq), avgqty=qty/freq )
summary(dataD)
         date          weekday        qty            avgprice   
 01/01/2013:  1   Friday   :48   Min.   :   234   Min.   :1.15  
 01/02/2013:  1   Monday   :48   1st Qu.: 23320   1st Qu.:1.28  
 01/03/2013:  1   Saturday :48   Median : 30047   Median :1.33  
 01/04/2013:  1   Sunday   :48   Mean   : 36980   Mean   :1.33  
 01/05/2013:  1   Thursday :49   3rd Qu.: 39444   3rd Qu.:1.38  
 01/06/2013:  1   Tuesday  :48   Max.   :213260   Max.   :1.53  
 (Other)   :331   Wednesday:48                                  
      freq          avgqty     
 Min.   :   1   Min.   : 34.6  
 1st Qu.: 402   1st Qu.: 42.8  
 Median : 487   Median : 64.0  
 Mean   : 545   Mean   : 68.1  
 3rd Qu.: 649   3rd Qu.: 78.7  
 Max.   :2051   Max.   :284.3  

sd(dataD$qty)
[1] 28251
# Sorting date-grouped sales by Quantity
# ======================================
order.qty <- order(dataD$qty, decreasing=T)
bestSaleDays <- dataD[order.qty,c("date", "qty")]
# ... showing the top best seller days
head(bestSaleDays, topX)
          date    qty
335 31/08/2012 213260
336 31/10/2012 212530
329 30/11/2012 198730
163 15/10/2012 164745
154 14/12/2012 154063
332 31/03/2013 153038
331 31/01/2013 131348
156 15/02/2013 130803
164 15/11/2012 125366
161 15/08/2012 123980
# Grouping sales by day of the Week
# =================================
dataWP <-ddply(smallSales, c("weekday", "product"), summarise, qty=sum(quantity), avgprice=mean(price), amnt=sum(amount))
dataWP
     weekday     product     qty avgprice    amnt
1     Friday   DIESEL e+ 1101728   1.4013 1543662
2     Friday  DIESELe+10   46628   1.4776   68886
3     Friday       GAS A  104869   1.3457  138986
4     Friday       GAS B  759356   0.9773  737556
5     Friday       GAS C  285520   0.9515  270380
6     Friday   GASOLEO B   21837   1.0992   23922
7     Friday GASOLINA 95  205155   1.4811  303813
8     Friday  gasolina98   18575   1.5990   29732
9     Monday   DIESEL e+  971203   1.3856 1346817
10    Monday  DIESELe+10   36866   1.4558   53720
11    Monday       GAS A   75234   1.3527  100092
12    Monday       GAS B  493217   0.9793  478483
13    Monday       GAS C  220170   0.9480  208153
14    Monday   GASOLEO B   35947   1.0938   39347
15    Monday GASOLINA 95  147822   1.4635  216127
16    Monday  gasolina98   11926   1.5864   18919
17  Saturday   DIESEL e+  622577   1.4009  872176
18  Saturday  DIESELe+10   34919   1.4753   51464
19  Saturday       GAS A   21474   1.3712   29177
20  Saturday       GAS B  150607   1.0001  149997
21  Saturday       GAS C   45117   0.9484   42484
22  Saturday   GASOLEO B   13265   1.1053   14647
23  Saturday GASOLINA 95  164191   1.4808  243093
24  Saturday  gasolina98   16132   1.5956   25688
25    Sunday   DIESEL e+  905489   1.3989 1266499
26    Sunday  DIESELe+10   49860   1.4730   73490
27    Sunday       GAS A   20676   1.3824   28251
28    Sunday       GAS B   95732   1.0016   94426
29    Sunday       GAS C   42766   0.9402   40163
30    Sunday   GASOLEO B   13488   1.0960   14745
31    Sunday GASOLINA 95  262063   1.4748  386296
32    Sunday  gasolina98   22654   1.5916   35999
33  Thursday   DIESEL e+  977275   1.4023 1370729
34  Thursday  DIESELe+10   30918   1.4740   45562
35  Thursday       GAS A   62653   1.3564   83385
36  Thursday       GAS B  404642   0.9784  393853
37  Thursday       GAS C  166777   0.9522  158436
38  Thursday   GASOLEO B   28778   1.0988   31629
39  Thursday GASOLINA 95  139241   1.4776  205643
40  Thursday  gasolina98   14569   1.5914   23189
41   Tuesday   DIESEL e+  883052   1.3952 1231954
42   Tuesday  DIESELe+10   27782   1.4691   40803
43   Tuesday       GAS A   59738   1.3501   79325
44   Tuesday       GAS B  346757   0.9671  332047
45   Tuesday       GAS C  194313   0.9446  182886
46   Tuesday   GASOLEO B   32193   1.0998   35377
47   Tuesday GASOLINA 95  123361   1.4717  181454
48   Tuesday  gasolina98    9884   1.5966   15766
49 Wednesday   DIESEL e+  934707   1.3991 1308980
50 Wednesday  DIESELe+10   31953   1.4694   46940
51 Wednesday       GAS A   96717   1.3517  128478
52 Wednesday       GAS B  529874   0.9759  514875
53 Wednesday       GAS C  158992   0.9500  150140
54 Wednesday   GASOLEO B   32695   1.0957   35731
55 Wednesday GASOLINA 95  144676   1.4762  213325
56 Wednesday  gasolina98   13769   1.5970   21980
# ... some graphs with the sales by day of the week for all the products
options(scipen=999)
barchart(dataWP$qty ~ factor(dataWP$weekday, levels=WeekDays) | dataWP$product, scales=list(x=list(rot=90)), 
       col=rainbow(10), main=list(label="Sales distribution by product and day of the week",cex=2), ylab="Sales",xlab="")

plot of chunk unnamed-chunk-1

#xyplot(dataWP$qty ~ factor(dataWP$weekday, levels=WeekDays) | dataWP$product, scales=list(x=list(rot=90)), main="Sales distribution by dayof the week", ylab="Sales", xlab="")
histogram(~smallSales$quantity | factor(smallSales$weekday, levels=WeekDays), 
          main=list(label="Histogram by day of the week",cex=2), 
          xlab="Quantity", col=rainbow(22))

plot of chunk unnamed-chunk-1

# ... summing up all the quantities (ie. total liters sold and ??? invoiced considering all the different products)
dataW <- ddply(dataWP, c("weekday"), summarise, qty=sum(qty), amnt=sum(amnt))
dataWOrdered <-dataW[order(dataW$amnt, decreasing=T),]
dataWOrdered
    weekday     qty    amnt
1    Friday 2543668 3116937
2    Monday 1992386 2461658
7 Wednesday 1943382 2420448
5  Thursday 1824853 2312426
6   Tuesday 1677081 2099613
4    Sunday 1412728 1939869
3  Saturday 1068282 1428726
# Pie Chart %Qty sold (litre) by day of the week
pct1  <- round(dataWOrdered$qty/sum(dataWOrdered$qty)*100, digits=2)
lbls <- paste(WeekDays, pct1)   # add percents to labels
lbls <- paste(lbls,"%",sep="") # ad % to labels 
pie(dataWOrdered$qty, main="Sales by day of the week (litres)", labels=lbls, col=rainbow(7))

plot of chunk unnamed-chunk-1

# Pie Chart % Total Sales Amount (euros) by day of the week
pct2  <- round(dataWOrdered$amnt/sum(dataWOrdered$amnt)*100, digits=2)
lbls <- paste(WeekDays, pct2)   # add percents to labels
lbls <- paste(lbls,"%",sep="") # ad % to labels 
pie(dataWOrdered$amnt, main="Sales by day of the week (Euros)", labels=lbls, col=rainbow(7))

plot of chunk unnamed-chunk-1

# Which days are sold the most expensive products?
diffAmntLitres = data.frame(matrix(c(WeekDays, pct2-pct1),ncol=2))
(diffAmntLitres)
         X1                  X2
1    Sunday               -0.66
2    Monday  -0.390000000000001
3   Tuesday               -0.25
4 Wednesday 0.00999999999999979
5  Thursday               -0.15
6    Friday   0.949999999999999
7  Saturday                0.48
rm(lbls, pct1, pct2)    # ... some cleaning

# Cronologically sort by date
dataDbyDate = dataD[order(as.Date(dataD$date, format="%d/%m/%Y")),]
#dataDbyDate$date <- reorder(dataDbyDate$date, neworder=order(dataDbyDate$date))
#xyplot(qty ~ date, data=dataDP, scales=list(x=list(rot=90)))

# graph of a subset of the first 50 registers
#xyplot(freq ~ date[order(as.Date(date,format="%d%m%Y"))] | product, data = dataDP[1:50,],  scales=list(x=list(rot=90)))

#plot(dataD$avgqty ~ dataD$date)
dataD <- read.table("CSVDataDate.csv", header=TRUE, sep=";", na.strings="NA", dec=",", strip.white=TRUE)
#xyplot(quantity ~ date, data=dataD, scales=list(x=list(rot=90)))

# Plot of the prices ordered by incresing price across the week
plotPriceDOF <- function ( data, product, ...) {
    o = data[data$product== product,c("weekday","avgprice")]
    o = o[order(o$avgprice),]
    # creating a factor of days of the week ordered by avgprice
    o$weekday = factor(o$weekday, levels=arrange(o, avgprice)$weekday)
    print(o)
    print (sd(o$avgprice))
    plot(o$avgprice ~ o$weekday, type="n",
         main=paste(product, " price"),
         xlab ="", ylab="Price in Euros", las=2, col="darkblue" )
}
par(mfrow=c(2,2))
for (description in dataP$product) plotPriceDOF(dataWP, description)
     weekday avgprice
9     Monday    1.386
41   Tuesday    1.395
25    Sunday    1.399
49 Wednesday    1.399
17  Saturday    1.401
1     Friday    1.401
33  Thursday    1.402
[1] 0.005769
     weekday avgprice
10    Monday    1.456
42   Tuesday    1.469
50 Wednesday    1.469
26    Sunday    1.473
34  Thursday    1.474
18  Saturday    1.475
2     Friday    1.478
[1] 0.007215
     weekday avgprice
3     Friday    1.346
43   Tuesday    1.350
51 Wednesday    1.352
11    Monday    1.353
35  Thursday    1.356
19  Saturday    1.371
27    Sunday    1.382
[1] 0.01323
     weekday avgprice
44   Tuesday   0.9671
52 Wednesday   0.9759
4     Friday   0.9773
36  Thursday   0.9784
12    Monday   0.9793
20  Saturday   1.0001
28    Sunday   1.0016
[1] 0.01298

plot of chunk unnamed-chunk-1

     weekday avgprice
29    Sunday   0.9402
45   Tuesday   0.9446
13    Monday   0.9480
21  Saturday   0.9484
53 Wednesday   0.9500
5     Friday   0.9515
37  Thursday   0.9522
[1] 0.004193
     weekday avgprice
14    Monday    1.094
54 Wednesday    1.096
30    Sunday    1.096
38  Thursday    1.099
6     Friday    1.099
46   Tuesday    1.100
22  Saturday    1.105
[1] 0.003752
     weekday avgprice
15    Monday    1.464
47   Tuesday    1.472
31    Sunday    1.475
55 Wednesday    1.476
39  Thursday    1.478
23  Saturday    1.481
7     Friday    1.481
[1] 0.00608
     weekday avgprice
16    Monday    1.586
40  Thursday    1.591
32    Sunday    1.592
24  Saturday    1.596
48   Tuesday    1.597
56 Wednesday    1.597
8     Friday    1.599
[1] 0.00437

plot of chunk unnamed-chunk-1

par(mfrow=c(1,1))

# PairWise comparison between group means (p<0.05--> different mean)
dataWDiesel <- smallSales[smallSales$product=="DIESEL e+",]

pairwise.t.test(dataWDiesel$price, dataWDiesel$weekday)

    Pairwise comparisons using t tests with pooled SD 

data:  dataWDiesel$price and dataWDiesel$weekday 

          Friday               Monday               Saturday            
Monday    < 0.0000000000000002 -                    -                   
Saturday  0.6729               < 0.0000000000000002 -                   
Sunday    0.0000000000144801   < 0.0000000000000002 0.0000006118621281  
Thursday  0.0144               < 0.0000000000000002 0.0022              
Tuesday   < 0.0000000000000002 < 0.0000000000000002 < 0.0000000000000002
Wednesday 0.0000000342377153   < 0.0000000000000002 0.0000504246623320  
          Sunday               Thursday             Tuesday             
Monday    -                    -                    -                   
Saturday  -                    -                    -                   
Sunday    -                    -                    -                   
Thursday  < 0.0000000000000002 -                    -                   
Tuesday   < 0.0000000000000002 < 0.0000000000000002 -                   
Wednesday 0.6729               0.0000000000000046   < 0.0000000000000002

P value adjustment method: holm 
histogram(~quantity | product, data=smallSales, 
          main=list(label=paste("Histogram of frecuencies for individual transactions < ",as.character(maximumLimit)),cex=2.0),
          breaks=20,type="count",  xlab="Litres", ylab="Frequency",        col=rainbow(20))

plot of chunk unnamed-chunk-1

histogram(~quantity | product, data=smallSales, 
          main=list(label=paste("Histogram of probabilities for individual transactions < ",as.character(maximumLimit)),cex=2.0),
          breaks=20,  xlab="Litres", ylab="Frequency",        col=rainbow(20))

plot of chunk unnamed-chunk-1