Emilio — Jun 22, 2014, 10:43 PM
# ===============================================================================
# PSDA.r -> Petrol Station Data Analysis by Emilio González González
# End of Degree Project. Jan-June 2014
# ===============================================================================
library("plyr")
library("lattice")
library("ggplot2")
refactor_factor <- function(col) {
if (is.factor(col)) factor(col)
else col
}
setwd("~/Economic Degree/TrabajoFinGrado")
# Setting of global parameters
SampleSize <- 10 # Sets the number of records to show when printing a sample
topX <- 10 # Sets the number of records to show when asking for the top x
maximumLimit <- 2000 # Sets theshold when splitting the file in two subsets: below and above this amount
WeekDays <-c("Sunday", "Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday")
# Data read. Show structure, summary and 10 ramdom records
data <- read.table("CSVCarburantesFecha.csv", header=TRUE, sep=";", na.strings="NA", dec=",", strip.white=TRUE)
str(data)
'data.frame': 185164 obs. of 5 variables:
$ date : Factor w/ 337 levels "01/01/2013","01/02/2013",..: 282 325 325 325 325 325 325 325 325 325 ...
$ product : Factor w/ 8 levels "DIESEL e+","DIESELe+10",..: 1 7 7 1 1 7 1 1 1 1 ...
$ quantity: num 234.13 3.44 3.44 7.25 7.25 ...
$ price : num 1.44 1.46 1.46 1.38 1.38 ...
$ amount : num 335.98 5.01 5.01 10.01 10.01 ...
summary(data)
date product quantity price
31/03/2013: 2061 DIESEL e+ :125779 Min. : 0 Min. :0.651
31/08/2012: 1641 GASOLINA 95: 40313 1st Qu.: 23 1st Qu.:1.376
14/10/2012: 1468 DIESELe+10 : 6338 Median : 38 Median :1.409
31/10/2012: 1364 GAS B : 5011 Mean : 169 Mean :1.400
30/09/2012: 1310 gasolina98 : 2910 3rd Qu.: 53 3rd Qu.:1.446
30/11/2012: 1180 GAS C : 1932 Max. :84601 Max. :1.688
(Other) :176140 (Other) : 2881
amount
Min. : 0
1st Qu.: 32
Median : 54
Mean : 179
3rd Qu.: 75
Max. :63112
data[sample(nrow(data),10),]
date product quantity price amount
85323 10/12/2012 DIESEL e+ 14.83 1.349 20.01
136869 31/03/2013 GASOLINA 95 19.88 1.509 30.00
66290 03/11/2012 GASOLINA 95 41.14 1.410 58.01
23089 28/08/2012 GASOLINA 95 9.44 1.535 14.49
171310 03/06/2013 DIESEL e+ 37.15 1.346 50.00
74407 18/11/2012 DIESEL e+ 69.47 1.382 96.01
131462 21/03/2013 DIESEL e+ 112.38 1.389 156.10
168398 29/05/2013 GAS B 449.00 0.935 419.82
68684 07/11/2012 GASOLINA 95 50.47 1.402 70.76
117164 16/02/2013 DIESEL e+ 13.84 1.445 20.00
# Split records (transactions) in two subsets depending on Quantity been smaller or bigger than a given thershold
# we will not consider transactions with Quantity bigger thatn the threshold (outliers)
bigSales <- data[which(data$quantity > maximumLimit),]
smallSales <- data[which(data$quantity <= maximumLimit),]
# Factor variable with the different fuels available in the dataframe
factor.fuels <- levels(smallSales$product)
# Workaround to achieve the days of the week in English in a Spanish localized computer
temp1 = as.Date(smallSales$date, format="%d/%m/%Y")
temp2 = as.POSIXlt(temp1)$wday+1
temp3 = as.POSIXlt(temp1)$mday+1
smallSales$weekday <- as.factor(WeekDays[temp2])
summary(smallSales)
date product quantity price
31/03/2013: 2051 DIESEL e+ :125532 Min. : 0.1 Min. :0.845
31/08/2012: 1619 GASOLINA 95: 40232 1st Qu.: 22.4 1st Qu.:1.379
14/10/2012: 1468 DIESELe+10 : 6294 Median : 37.6 Median :1.410
31/10/2012: 1343 GAS B : 4180 Mean : 67.9 Mean :1.404
30/09/2012: 1298 gasolina98 : 2891 3rd Qu.: 52.6 3rd Qu.:1.447
03/08/2012: 1167 GAS C : 1733 Max. :2000.0 Max. :1.688
(Other) :174601 (Other) : 2685
amount weekday
Min. : 0.1 Friday :32693
1st Qu.: 31.0 Monday :24850
Median : 53.5 Saturday :22420
Mean : 86.0 Sunday :34105
3rd Qu.: 75.0 Thursday :24030
Max. :2692.0 Tuesday :21459
Wednesday:23990
#Boxplot of quantities by product
#=============================================================================================
boxplot(smallSales$quantity ~ smallSales$product, rescale=TRUE,
cex.axis=0.55,las=1, horizontal=TRUE,
main=sprintf("Boxplot individual transactions less than %s litres", maximumLimit), col.main="darkblue", col=rainbow(10),
cex.main=1.4, cex.lab=1.5, col.lab="darkblue", xlab="Litres")
# Using Lattice package: bwplot(~smallSales$quantity | smallSales$product)
# Some cleaning of temporary variables/space
rm(data, temp1, temp2)
#Histogram of refuels
#=============================================================================================
hist(smallSales$quantity, las=1, xlab="Litres", ylab="Freq.",
main=sprintf("Frequency Histogram\n individual transactions less than %s litres", maximumLimit),
col.main="darkblue", col=rainbow(17))
# Creation a new dataset (dataDP) with data grouped by date and product description
dataDP <-ddply(smallSales, c("date", "product", "weekday"), summarise, qty=sum(quantity), avgprice=mean(price), amnt=sum(amount), freq=length(product))
# and print out some statisticssummary(dataDP$qty)
sd(dataDP$qty)
[1] 10013
sum(dataDP$qty)
[1] 12462380
ggplot(dataDP, aes(x=avgprice, fill=product))+geom_density() + ggtitle("Density Plot price per product")
boxplot(dataDP$qty, rescale=TRUE, horizontal=TRUE,
las=1, main=sprintf("Boxplot Quantity individual transactions less than %s litres", maximumLimit), col.main="darkblue", col="yellow",
cex.lab=1.2, col.lab="darkblue", xlab="Litres", ylab="Sum of all Products")
# Creation of a new Dataset from dataDP sorted by cronological date
temp1 = as.Date(dataDP$date, format="%d/%m/%Y")
sort.dates <- temp1[order(temp1)]
dataDPbyDate <- dataDP[order(temp1),]
# Qty sold by cronological Date
# =================================================================
#barchart(dataDPbyDate$qty ~dataDPbyDate$date|factor.fuels)
# Add a column with the day of the month
dataDP$monthDay <- as.POSIXlt(as.Date(dataDP$date, format="%d/%m/%Y"))$mday
#Create new frameset grouping by day of the month
dataDM <- ddply(dataDP, c("monthDay"), summarize, qty=sum(qty), avgprice=mean(avgprice), amnt=sum(amnt), freq=sum(freq) )
plot(dataDM$qty, las=1, type="b",xlab="Day of the month", ylab="Litres",
cex.main=1.4,col.lab="darkblue", col.main="darkblue", col="blue",
cex.lab=1.4, pch=10,
main=paste("Total quantities per day of the month\n for individual transactions < ",as.character(maximumLimit)))
min(dataDM$qty)
[1] 300567
max(dataDM$qty)
[1] 1132072
max(dataDM$qty)/min(dataDM$qty)
[1] 3.766
# by product and day of the month
dataDPM <- ddply(dataDP, c("monthDay","product"), summarize, qty=sum(qty), avgprice=mean(avgprice), amnt=sum(amnt), freq=sum(freq) )
#boxplot(dataDPM$qty ~ dataDPM$product, horizontal=TRUE)
fivenum(dataDPM$qty)
[1] 2246 6495 15376 44420 614567
# ... sales histogram by quantity
#hist(smallSales$quantity, breaks=20)
histogram(~quantity, data=smallSales, breaks=20,type="count",
main=paste("Histogram of Quantities for individual transactions < ",as.character(maximumLimit)),
col=rainbow(20))
# Sort dataset (records grouped by date and product) by decreasing quantity
order.qty <- order(dataDP$qty, decreasing=T)
bestSaleDays <- dataDP[order.qty, c("date", "product","qty")]
head(bestSaleDays,10)
date product qty
2164 31/08/2012 GAS B 128429
2171 31/10/2012 GAS B 112666
1048 15/10/2012 GAS B 105331
2123 30/11/2012 GAS B 103506
998 15/02/2013 GAS B 97092
1034 15/08/2012 GAS B 89206
1041 15/09/2012 GAS B 82066
1056 15/11/2012 GAS B 70055
983 14/12/2012 GAS B 68408
1006 15/03/2013 GAS C 63176
# Show sales by product. Grouping by product
#=================================================================================
dataP <-ddply(smallSales, c("product"), summarise, qty=sum(quantity), avgprice=mean(price), amnt=sum(amount), freq=length(product))
dataP$product <-refactor_factor(dataP$product)
factor.fuels <- levels(dataP$product)
summary(dataP)
product qty avgprice amnt
DIESEL e+ :1 Min. : 107509 Min. :0.949 Min. : 171273
DIESELe+10:1 1st Qu.: 238745 1st Qu.:1.068 1st Qu.: 334498
GAS A :1 Median : 777508 Median :1.376 Median : 820168
GAS B :1 Mean :1557798 Mean :1.290 Mean :1972459
GAS C :1 3rd Qu.:1584928 3rd Qu.:1.472 3rd Qu.:1987621
GASOLEO B :1 Max. :6396032 Max. :1.594 Max. :8940817
(Other) :2
freq
Min. : 1254
1st Qu.: 1658
Median : 3536
Mean : 22943
3rd Qu.: 14778
Max. :125532
dataP
product qty avgprice amnt freq
1 DIESEL e+ 6396032 1.3978 8940817 125532
2 DIESELe+10 258926 1.4710 380865 6294
3 GAS A 441361 1.3538 587695 1254
4 GAS B 2780185 0.9785 2701237 4180
5 GAS C 1113655 0.9489 1052641 1733
6 GASOLEO B 178203 1.0979 195398 1431
7 GASOLINA 95 1186509 1.4755 1749749 40232
8 gasolina98 107509 1.5940 171273 2891
order.p <- order(dataP$qty, decreasing=TRUE)
dataP[order.p,]
product qty avgprice amnt freq
1 DIESEL e+ 6396032 1.3978 8940817 125532
4 GAS B 2780185 0.9785 2701237 4180
7 GASOLINA 95 1186509 1.4755 1749749 40232
5 GAS C 1113655 0.9489 1052641 1733
3 GAS A 441361 1.3538 587695 1254
2 DIESELe+10 258926 1.4710 380865 6294
6 GASOLEO B 178203 1.0979 195398 1431
8 gasolina98 107509 1.5940 171273 2891
write.table(dataP,"splitproducts.txt", sep=";")
# Pie Chart Distribution of sales by fuel
attach(dataP)
pct1 <- round(qty/sum(qty)*100, digits=2)
pct1 <- paste(pct1, "%", sep="")
lbls <- paste(factor.fuels, pct1) #add percents to labels
#lbls <- paste(factor.fuels,"%",sep="") # ad % to labels
pie(qty, main="Sales by product (share)", labels=pct1, col=rainbow(7))
legend(1.0, 1.0, lbls, cex=0.8, fill=rainbow(7))
#barchart(qty~product, col=rainbow(7))
barplot(qty, col=rainbow(7), las=2,
main="Quantities (litres) sold by fuel type", cex.main=1.8,
names.arg=factor.fuels, cex.axis=0.8, cex.names = 0.7)
barplot(amnt, col=rainbow(7), las=2,
main="Amount of sales (euros) by fuel type", cex.main=1.8,
names.arg=factor.fuels, cex.axis=0.8, cex.names = 0.7)
dataP
product qty avgprice amnt freq
1 DIESEL e+ 6396032 1.3978 8940817 125532
2 DIESELe+10 258926 1.4710 380865 6294
3 GAS A 441361 1.3538 587695 1254
4 GAS B 2780185 0.9785 2701237 4180
5 GAS C 1113655 0.9489 1052641 1733
6 GASOLEO B 178203 1.0979 195398 1431
7 GASOLINA 95 1186509 1.4755 1749749 40232
8 gasolina98 107509 1.5940 171273 2891
dataP$meanservice <- qty/freq
dataP[order(dataP$meanservice, decreasing=TRUE),c("product","meanservice")]
product meanservice
4 GAS B 665.12
5 GAS C 642.62
3 GAS A 351.96
6 GASOLEO B 124.53
1 DIESEL e+ 50.95
2 DIESELe+10 41.14
8 gasolina98 37.19
7 GASOLINA 95 29.49
barplot(dataP$meanservice, col=rainbow(9), las=2, cex.main=1.8, cex.names= 0.7,
main=paste("Average Litres Per Transaction\n transactions < ",as.character(maximumLimit)),
names.arg=factor.fuels)
detach(dataP)
# Basic statistics by product
#================================================================================
statsP <-ddply(smallSales, c("product"), summarise, mean=mean(price), sd=sd(price), min=min(price), max=max(price), "quantile values (0% 25% 50% 75% 100%)"=list(quantile(price)))
statsP
product mean sd min max
1 DIESEL e+ 1.3978 0.03562 1.296 1.465
2 DIESELe+10 1.4710 0.03931 1.374 1.532
3 GAS A 1.3538 0.04391 1.168 1.465
4 GAS B 0.9785 0.04908 0.855 1.380
5 GAS C 0.9489 0.02634 0.845 1.000
6 GASOLEO B 1.0979 0.03450 1.003 1.138
7 GASOLINA 95 1.4755 0.04521 1.376 1.549
8 gasolina98 1.5940 0.04653 1.493 1.688
quantile values (0% 25% 50% 75% 100%)
1 1.296, 1.369, 1.398, 1.429, 1.465
2 1.374, 1.443, 1.473, 1.500, 1.532
3 1.168, 1.340, 1.355, 1.380, 1.465
4 0.855, 0.940, 0.983, 1.020, 1.380
5 0.845, 0.935, 0.950, 0.970, 1.000
6 1.003, 1.085, 1.105, 1.123, 1.138
7 1.376, 1.438, 1.476, 1.510, 1.549
8 1.493, 1.554, 1.595, 1.629, 1.688
# Grouping sales by date (summing up altogeher the different products)
#=====================================================================
dataD <-ddply(dataDP, c("date", "weekday"), summarise, qty=sum(qty), avgprice=mean(avgprice), freq=sum(freq), avgqty=qty/freq )
summary(dataD)
date weekday qty avgprice
01/01/2013: 1 Friday :48 Min. : 234 Min. :1.15
01/02/2013: 1 Monday :48 1st Qu.: 23320 1st Qu.:1.28
01/03/2013: 1 Saturday :48 Median : 30047 Median :1.33
01/04/2013: 1 Sunday :48 Mean : 36980 Mean :1.33
01/05/2013: 1 Thursday :49 3rd Qu.: 39444 3rd Qu.:1.38
01/06/2013: 1 Tuesday :48 Max. :213260 Max. :1.53
(Other) :331 Wednesday:48
freq avgqty
Min. : 1 Min. : 34.6
1st Qu.: 402 1st Qu.: 42.8
Median : 487 Median : 64.0
Mean : 545 Mean : 68.1
3rd Qu.: 649 3rd Qu.: 78.7
Max. :2051 Max. :284.3
sd(dataD$qty)
[1] 28251
# Sorting date-grouped sales by Quantity
# ======================================
order.qty <- order(dataD$qty, decreasing=T)
bestSaleDays <- dataD[order.qty,c("date", "qty")]
# ... showing the top best seller days
head(bestSaleDays, topX)
date qty
335 31/08/2012 213260
336 31/10/2012 212530
329 30/11/2012 198730
163 15/10/2012 164745
154 14/12/2012 154063
332 31/03/2013 153038
331 31/01/2013 131348
156 15/02/2013 130803
164 15/11/2012 125366
161 15/08/2012 123980
# Grouping sales by day of the Week
# =================================
dataWP <-ddply(smallSales, c("weekday", "product"), summarise, qty=sum(quantity), avgprice=mean(price), amnt=sum(amount))
dataWP
weekday product qty avgprice amnt
1 Friday DIESEL e+ 1101728 1.4013 1543662
2 Friday DIESELe+10 46628 1.4776 68886
3 Friday GAS A 104869 1.3457 138986
4 Friday GAS B 759356 0.9773 737556
5 Friday GAS C 285520 0.9515 270380
6 Friday GASOLEO B 21837 1.0992 23922
7 Friday GASOLINA 95 205155 1.4811 303813
8 Friday gasolina98 18575 1.5990 29732
9 Monday DIESEL e+ 971203 1.3856 1346817
10 Monday DIESELe+10 36866 1.4558 53720
11 Monday GAS A 75234 1.3527 100092
12 Monday GAS B 493217 0.9793 478483
13 Monday GAS C 220170 0.9480 208153
14 Monday GASOLEO B 35947 1.0938 39347
15 Monday GASOLINA 95 147822 1.4635 216127
16 Monday gasolina98 11926 1.5864 18919
17 Saturday DIESEL e+ 622577 1.4009 872176
18 Saturday DIESELe+10 34919 1.4753 51464
19 Saturday GAS A 21474 1.3712 29177
20 Saturday GAS B 150607 1.0001 149997
21 Saturday GAS C 45117 0.9484 42484
22 Saturday GASOLEO B 13265 1.1053 14647
23 Saturday GASOLINA 95 164191 1.4808 243093
24 Saturday gasolina98 16132 1.5956 25688
25 Sunday DIESEL e+ 905489 1.3989 1266499
26 Sunday DIESELe+10 49860 1.4730 73490
27 Sunday GAS A 20676 1.3824 28251
28 Sunday GAS B 95732 1.0016 94426
29 Sunday GAS C 42766 0.9402 40163
30 Sunday GASOLEO B 13488 1.0960 14745
31 Sunday GASOLINA 95 262063 1.4748 386296
32 Sunday gasolina98 22654 1.5916 35999
33 Thursday DIESEL e+ 977275 1.4023 1370729
34 Thursday DIESELe+10 30918 1.4740 45562
35 Thursday GAS A 62653 1.3564 83385
36 Thursday GAS B 404642 0.9784 393853
37 Thursday GAS C 166777 0.9522 158436
38 Thursday GASOLEO B 28778 1.0988 31629
39 Thursday GASOLINA 95 139241 1.4776 205643
40 Thursday gasolina98 14569 1.5914 23189
41 Tuesday DIESEL e+ 883052 1.3952 1231954
42 Tuesday DIESELe+10 27782 1.4691 40803
43 Tuesday GAS A 59738 1.3501 79325
44 Tuesday GAS B 346757 0.9671 332047
45 Tuesday GAS C 194313 0.9446 182886
46 Tuesday GASOLEO B 32193 1.0998 35377
47 Tuesday GASOLINA 95 123361 1.4717 181454
48 Tuesday gasolina98 9884 1.5966 15766
49 Wednesday DIESEL e+ 934707 1.3991 1308980
50 Wednesday DIESELe+10 31953 1.4694 46940
51 Wednesday GAS A 96717 1.3517 128478
52 Wednesday GAS B 529874 0.9759 514875
53 Wednesday GAS C 158992 0.9500 150140
54 Wednesday GASOLEO B 32695 1.0957 35731
55 Wednesday GASOLINA 95 144676 1.4762 213325
56 Wednesday gasolina98 13769 1.5970 21980
# ... some graphs with the sales by day of the week for all the products
options(scipen=999)
barchart(dataWP$qty ~ factor(dataWP$weekday, levels=WeekDays) | dataWP$product, scales=list(x=list(rot=90)),
col=rainbow(10), main=list(label="Sales distribution by product and day of the week",cex=2), ylab="Sales",xlab="")
#xyplot(dataWP$qty ~ factor(dataWP$weekday, levels=WeekDays) | dataWP$product, scales=list(x=list(rot=90)), main="Sales distribution by dayof the week", ylab="Sales", xlab="")
histogram(~smallSales$quantity | factor(smallSales$weekday, levels=WeekDays),
main=list(label="Histogram by day of the week",cex=2),
xlab="Quantity", col=rainbow(22))
# ... summing up all the quantities (ie. total liters sold and ??? invoiced considering all the different products)
dataW <- ddply(dataWP, c("weekday"), summarise, qty=sum(qty), amnt=sum(amnt))
dataWOrdered <-dataW[order(dataW$amnt, decreasing=T),]
dataWOrdered
weekday qty amnt
1 Friday 2543668 3116937
2 Monday 1992386 2461658
7 Wednesday 1943382 2420448
5 Thursday 1824853 2312426
6 Tuesday 1677081 2099613
4 Sunday 1412728 1939869
3 Saturday 1068282 1428726
# Pie Chart %Qty sold (litre) by day of the week
pct1 <- round(dataWOrdered$qty/sum(dataWOrdered$qty)*100, digits=2)
lbls <- paste(WeekDays, pct1) # add percents to labels
lbls <- paste(lbls,"%",sep="") # ad % to labels
pie(dataWOrdered$qty, main="Sales by day of the week (litres)", labels=lbls, col=rainbow(7))
# Pie Chart % Total Sales Amount (euros) by day of the week
pct2 <- round(dataWOrdered$amnt/sum(dataWOrdered$amnt)*100, digits=2)
lbls <- paste(WeekDays, pct2) # add percents to labels
lbls <- paste(lbls,"%",sep="") # ad % to labels
pie(dataWOrdered$amnt, main="Sales by day of the week (Euros)", labels=lbls, col=rainbow(7))
# Which days are sold the most expensive products?
diffAmntLitres = data.frame(matrix(c(WeekDays, pct2-pct1),ncol=2))
(diffAmntLitres)
X1 X2
1 Sunday -0.66
2 Monday -0.390000000000001
3 Tuesday -0.25
4 Wednesday 0.00999999999999979
5 Thursday -0.15
6 Friday 0.949999999999999
7 Saturday 0.48
rm(lbls, pct1, pct2) # ... some cleaning
# Cronologically sort by date
dataDbyDate = dataD[order(as.Date(dataD$date, format="%d/%m/%Y")),]
#dataDbyDate$date <- reorder(dataDbyDate$date, neworder=order(dataDbyDate$date))
#xyplot(qty ~ date, data=dataDP, scales=list(x=list(rot=90)))
# graph of a subset of the first 50 registers
#xyplot(freq ~ date[order(as.Date(date,format="%d%m%Y"))] | product, data = dataDP[1:50,], scales=list(x=list(rot=90)))
#plot(dataD$avgqty ~ dataD$date)
dataD <- read.table("CSVDataDate.csv", header=TRUE, sep=";", na.strings="NA", dec=",", strip.white=TRUE)
#xyplot(quantity ~ date, data=dataD, scales=list(x=list(rot=90)))
# Plot of the prices ordered by incresing price across the week
plotPriceDOF <- function ( data, product, ...) {
o = data[data$product== product,c("weekday","avgprice")]
o = o[order(o$avgprice),]
# creating a factor of days of the week ordered by avgprice
o$weekday = factor(o$weekday, levels=arrange(o, avgprice)$weekday)
print(o)
print (sd(o$avgprice))
plot(o$avgprice ~ o$weekday, type="n",
main=paste(product, " price"),
xlab ="", ylab="Price in Euros", las=2, col="darkblue" )
}
par(mfrow=c(2,2))
for (description in dataP$product) plotPriceDOF(dataWP, description)
weekday avgprice
9 Monday 1.386
41 Tuesday 1.395
25 Sunday 1.399
49 Wednesday 1.399
17 Saturday 1.401
1 Friday 1.401
33 Thursday 1.402
[1] 0.005769
weekday avgprice
10 Monday 1.456
42 Tuesday 1.469
50 Wednesday 1.469
26 Sunday 1.473
34 Thursday 1.474
18 Saturday 1.475
2 Friday 1.478
[1] 0.007215
weekday avgprice
3 Friday 1.346
43 Tuesday 1.350
51 Wednesday 1.352
11 Monday 1.353
35 Thursday 1.356
19 Saturday 1.371
27 Sunday 1.382
[1] 0.01323
weekday avgprice
44 Tuesday 0.9671
52 Wednesday 0.9759
4 Friday 0.9773
36 Thursday 0.9784
12 Monday 0.9793
20 Saturday 1.0001
28 Sunday 1.0016
[1] 0.01298
weekday avgprice
29 Sunday 0.9402
45 Tuesday 0.9446
13 Monday 0.9480
21 Saturday 0.9484
53 Wednesday 0.9500
5 Friday 0.9515
37 Thursday 0.9522
[1] 0.004193
weekday avgprice
14 Monday 1.094
54 Wednesday 1.096
30 Sunday 1.096
38 Thursday 1.099
6 Friday 1.099
46 Tuesday 1.100
22 Saturday 1.105
[1] 0.003752
weekday avgprice
15 Monday 1.464
47 Tuesday 1.472
31 Sunday 1.475
55 Wednesday 1.476
39 Thursday 1.478
23 Saturday 1.481
7 Friday 1.481
[1] 0.00608
weekday avgprice
16 Monday 1.586
40 Thursday 1.591
32 Sunday 1.592
24 Saturday 1.596
48 Tuesday 1.597
56 Wednesday 1.597
8 Friday 1.599
[1] 0.00437
par(mfrow=c(1,1))
# PairWise comparison between group means (p<0.05--> different mean)
dataWDiesel <- smallSales[smallSales$product=="DIESEL e+",]
pairwise.t.test(dataWDiesel$price, dataWDiesel$weekday)
Pairwise comparisons using t tests with pooled SD
data: dataWDiesel$price and dataWDiesel$weekday
Friday Monday Saturday
Monday < 0.0000000000000002 - -
Saturday 0.6729 < 0.0000000000000002 -
Sunday 0.0000000000144801 < 0.0000000000000002 0.0000006118621281
Thursday 0.0144 < 0.0000000000000002 0.0022
Tuesday < 0.0000000000000002 < 0.0000000000000002 < 0.0000000000000002
Wednesday 0.0000000342377153 < 0.0000000000000002 0.0000504246623320
Sunday Thursday Tuesday
Monday - - -
Saturday - - -
Sunday - - -
Thursday < 0.0000000000000002 - -
Tuesday < 0.0000000000000002 < 0.0000000000000002 -
Wednesday 0.6729 0.0000000000000046 < 0.0000000000000002
P value adjustment method: holm
histogram(~quantity | product, data=smallSales,
main=list(label=paste("Histogram of frecuencies for individual transactions < ",as.character(maximumLimit)),cex=2.0),
breaks=20,type="count", xlab="Litres", ylab="Frequency", col=rainbow(20))
histogram(~quantity | product, data=smallSales,
main=list(label=paste("Histogram of probabilities for individual transactions < ",as.character(maximumLimit)),cex=2.0),
breaks=20, xlab="Litres", ylab="Frequency", col=rainbow(20))