Emilio — Jun 22, 2014, 12:30 PM
# ===============================================================================
# PSDA.r -> Petrol Station Data Analysis by Emilio González González
# End of Degree Project. Jan-June 2014
# ===============================================================================
library("plyr")
library("lattice")
library("ggplot2")
refactor_factor <- function(col) {
if (is.factor(col)) factor(col)
else col
}
setwd("~/Economic Degree/TrabajoFinGrado")
# Setting of global parameters
SampleSize <- 10 # Sets the number of records to show when printing a sample
topX <- 10 # Sets the number of records to show when asking for the top x
maximumLimit <- 80 # Sets theshold when splitting the file in two subsets: below and above this amount
WeekDays <-c("Sunday", "Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday")
# Data read. Show structure, summary and 10 ramdom records
data <- read.table("CSVCarburantesFecha.csv", header=TRUE, sep=";", na.strings="NA", dec=",", strip.white=TRUE)
str(data)
'data.frame': 185164 obs. of 5 variables:
$ date : Factor w/ 337 levels "01/01/2013","01/02/2013",..: 282 325 325 325 325 325 325 325 325 325 ...
$ product : Factor w/ 8 levels "DIESEL e+","DIESELe+10",..: 1 7 7 1 1 7 1 1 1 1 ...
$ quantity: num 234.13 3.44 3.44 7.25 7.25 ...
$ price : num 1.44 1.46 1.46 1.38 1.38 ...
$ amount : num 335.98 5.01 5.01 10.01 10.01 ...
summary(data)
date product quantity price
31/03/2013: 2061 DIESEL e+ :125779 Min. : 0 Min. :0.651
31/08/2012: 1641 GASOLINA 95: 40313 1st Qu.: 23 1st Qu.:1.376
14/10/2012: 1468 DIESELe+10 : 6338 Median : 38 Median :1.409
31/10/2012: 1364 GAS B : 5011 Mean : 169 Mean :1.400
30/09/2012: 1310 gasolina98 : 2910 3rd Qu.: 53 3rd Qu.:1.446
30/11/2012: 1180 GAS C : 1932 Max. :84601 Max. :1.688
(Other) :176140 (Other) : 2881
amount
Min. : 0
1st Qu.: 32
Median : 54
Mean : 179
3rd Qu.: 75
Max. :63112
data[sample(nrow(data),10),]
date product quantity price amount
154602 03/05/2013 DIESEL e+ 19.42 1.339 26.00
149647 23/04/2013 DIESEL e+ 42.78 1.329 56.85
43477 27/09/2012 DIESEL e+ 13.94 1.435 20.00
92331 24/12/2012 GAS C 918.00 0.947 869.35
155902 05/05/2013 DIESEL e+ 26.34 1.329 35.01
83296 07/12/2012 GASOLINA 95 7.35 1.418 10.42
76914 24/11/2012 DIESEL e+ 107.60 1.394 149.99
23455 28/08/2012 DIESEL e+ 44.76 1.430 64.01
34572 12/09/2012 DIESEL e+ 31.04 1.450 45.01
170167 01/06/2013 DIESEL e+ 43.83 1.369 60.00
# Split records (transactions) in two subsets depending on Quantity been smaller or bigger than a given thershold
# we will not consider transactions with Quantity bigger thatn the threshold (outliers)
bigSales <- data[which(data$quantity > maximumLimit),]
smallSales <- data[which(data$quantity <= maximumLimit),]
# Factor variable with the different fuels available in the dataframe
factor.fuels <- levels(smallSales$product)
# Workaround to achieve the days of the week in English in a Spanish localized computer
temp1 = as.Date(smallSales$date, format="%d/%m/%Y")
temp2 = as.POSIXlt(temp1)$wday+1
temp3 = as.POSIXlt(temp1)$mday+1
smallSales$weekday <- as.factor(WeekDays[temp2])
summary(smallSales)
date product quantity price
31/03/2013: 1879 DIESEL e+ :115843 Min. : 0.06 Min. :0.88
14/10/2012: 1452 GASOLINA 95: 40105 1st Qu.:21.52 1st Qu.:1.38
31/08/2012: 1282 DIESELe+10 : 6166 Median :35.82 Median :1.42
03/08/2012: 1104 gasolina98 : 2846 Mean :35.91 Mean :1.42
19/08/2012: 1102 GASOLEO B : 353 3rd Qu.:47.78 3rd Qu.:1.45
30/09/2012: 1095 GAS A : 106 Max. :80.00 Max. :1.69
(Other) :157579 (Other) : 74
amount weekday
Min. : 0.1 Friday :29225
1st Qu.: 30.0 Monday :21588
Median : 50.0 Saturday :21339
Mean : 50.9 Sunday :33101
3rd Qu.: 68.0 Thursday :20933
Max. :135.0 Tuesday :18581
Wednesday:20726
#Boxplot of quantities by product
#=============================================================================================
boxplot(smallSales$quantity ~ smallSales$product, rescale=TRUE,
cex.axis=0.55,las=1, horizontal=TRUE,
main=sprintf("Boxplot individual transactions less than %s litres", maximumLimit), col.main="darkblue", col=rainbow(10),
cex.main=1.4, cex.lab=1.5, col.lab="darkblue", xlab="Litres")
# Using Lattice package: bwplot(~smallSales$quantity | smallSales$product)
# Some cleaning of temporary variables/space
rm(data, temp1, temp2)
#Histogram of refuels
#=============================================================================================
hist(smallSales$quantity, las=1, xlab="Litres", ylab="Freq.",
main=sprintf("Frequency Histogram\n individual transactions less than %s litres", maximumLimit),
col.main="darkblue", col=rainbow(17))
# Creation a new dataset (dataDP) with data grouped by date and product description
dataDP <-ddply(smallSales, c("date", "product", "weekday"), summarise, qty=sum(quantity), avgprice=mean(price), amnt=sum(amount), freq=length(product))
# and print out some statisticssummary(dataDP$qty)
sd(dataDP$qty)
[1] 5620
sum(dataDP$qty)
[1] 5942079
boxplot(dataDP$qty, rescale=TRUE, horizontal=TRUE,
las=1, main=sprintf("Boxplot Quantity individual transactions less than %s litres", maximumLimit), col.main="darkblue", col="yellow",
cex.lab=1.2, col.lab="darkblue", xlab="Litres", ylab="Sum of all Products")
# Creation of a new Dataset from dataDP sorted by cronological date
temp1 = as.Date(dataDP$date, format="%d/%m/%Y")
sort.dates <- temp1[order(temp1)]
dataDPbyDate <- dataDP[order(temp1),]
# Qty sold by cronological Date
# =================================================================
#barchart(dataDPbyDate$qty ~dataDPbyDate$date|factor.fuels)
# Add a column with the day of the month
dataDP$monthDay <- as.POSIXlt(as.Date(dataDP$date, format="%d/%m/%Y"))$mday
#Create new frameset grouping by day of the month
dataDM <- ddply(dataDP, c("monthDay"), summarize, qty=sum(qty), avgprice=mean(avgprice), amnt=sum(amnt), freq=sum(freq) )
plot(dataDM$qty, las=1, type="b",xlab="Day of the month", ylab="Litres",
cex.main=1.4,col.lab="darkblue", col.main="darkblue", col="blue",
cex.lab=1.4, pch=10,
main=paste("Total quantities per day of the month\n for individual transactions < ",as.character(maximumLimit)))
min(dataDM$qty)
[1] 154529
max(dataDM$qty)
[1] 244846
max(dataDM$qty)/min(dataDM$qty)
[1] 1.584
# by product and day of the month
dataDPM <- ddply(dataDP, c("monthDay","product"), summarize, qty=sum(qty), avgprice=mean(avgprice), amnt=sum(amnt), freq=sum(freq) )
#boxplot(dataDPM$qty ~ dataDPM$product, horizontal=TRUE)
fivenum(dataDPM$qty)
[1] 43.0 398.2 3652.7 35345.4 188332.0
# ... sales histogram by quantity
#hist(smallSales$quantity, breaks=20)
histogram(~quantity, data=smallSales, breaks=20,type="count",
main=paste("Histogram of Quantities for individual transactions < ",as.character(maximumLimit)),
col=rainbow(20))
# Sort dataset (records grouped by date and product) by decreasing quantity
order.qty <- order(dataDP$qty, decreasing=T)
bestSaleDays <- dataDP[order.qty, c("date", "product","qty")]
head(bestSaleDays,10)
date product qty
1521 31/03/2013 DIESEL e+ 45623
1535 31/08/2012 DIESEL e+ 36657
682 14/10/2012 DIESEL e+ 33758
1542 31/10/2012 DIESEL e+ 33296
1496 30/09/2012 DIESEL e+ 30894
1506 30/11/2012 DIESEL e+ 29137
125 03/08/2012 DIESEL e+ 27207
1292 26/08/2012 DIESEL e+ 26193
446 09/12/2012 DIESEL e+ 26168
937 19/08/2012 DIESEL e+ 25703
# Show sales by product. Grouping by product
#=================================================================================
dataP <-ddply(smallSales, c("product"), summarise, qty=sum(quantity), avgprice=mean(price), amnt=sum(amount), freq=length(product))
dataP$product <-refactor_factor(dataP$product)
factor.fuels <- levels(dataP$product)
summary(dataP)
product qty avgprice amnt
DIESEL e+ :1 Min. : 4362 Min. :0.982 Min. : 4316
DIESELe+10 :1 1st Qu.: 11568 1st Qu.:1.235 1st Qu.: 13496
GAS A :1 Median : 103710 Median :1.398 Median : 165172
GAS B :1 Mean : 848868 Mean :1.341 Mean :1203727
GASOLEO B :1 3rd Qu.: 711507 3rd Qu.:1.473 3rd Qu.:1048836
GASOLINA 95:1 Max. :4387857 Max. :1.594 Max. :6131938
gasolina98 :1
freq
Min. : 74
1st Qu.: 230
Median : 2846
Mean : 23642
3rd Qu.: 23136
Max. :115843
dataP
product qty avgprice amnt freq
1 DIESEL e+ 4387857 1.3978 6131938 115843
2 DIESELe+10 247733 1.4710 364394 6166
3 GAS A 5756 1.3718 7877 106
4 GAS B 4362 0.9816 4316 74
5 GASOLEO B 17380 1.0987 19115 353
6 GASOLINA 95 1175281 1.4755 1733278 40105
7 gasolina98 103710 1.5938 165172 2846
order.p <- order(dataP$qty, decreasing=TRUE)
dataP[order.p,]
product qty avgprice amnt freq
1 DIESEL e+ 4387857 1.3978 6131938 115843
6 GASOLINA 95 1175281 1.4755 1733278 40105
2 DIESELe+10 247733 1.4710 364394 6166
7 gasolina98 103710 1.5938 165172 2846
5 GASOLEO B 17380 1.0987 19115 353
3 GAS A 5756 1.3718 7877 106
4 GAS B 4362 0.9816 4316 74
write.table(dataP,"splitproducts.txt", sep=";")
# Pie Chart Distribution of sales by fuel
attach(dataP)
pct1 <- round(qty/sum(qty)*100, digits=2)
pct1 <- paste(pct1, "%", sep="")
lbls <- paste(factor.fuels, pct1) #add percents to labels
#lbls <- paste(factor.fuels,"%",sep="") # ad % to labels
pie(qty, main="Sales by product (share)", labels=pct1, col=rainbow(7))
legend(1.0, 1.0, lbls, cex=0.8, fill=rainbow(7))
#barchart(qty~product, col=rainbow(7))
barplot(qty, col=rainbow(7), las=2,
main="Quantities (litres) sold by fuel type", cex.main=1.8,
names.arg=factor.fuels, cex.axis=0.8, cex.names = 0.7)
barplot(amnt, col=rainbow(7), las=2,
main="Amount of sales (euros) by fuel type", cex.main=1.8,
names.arg=factor.fuels, cex.axis=0.8, cex.names = 0.7)
dataP
product qty avgprice amnt freq
1 DIESEL e+ 4387857 1.3978 6131938 115843
2 DIESELe+10 247733 1.4710 364394 6166
3 GAS A 5756 1.3718 7877 106
4 GAS B 4362 0.9816 4316 74
5 GASOLEO B 17380 1.0987 19115 353
6 GASOLINA 95 1175281 1.4755 1733278 40105
7 gasolina98 103710 1.5938 165172 2846
dataP$meanservice <- qty/freq
dataP[order(dataP$meanservice, decreasing=TRUE),c("product","meanservice")]
product meanservice
4 GAS B 58.95
3 GAS A 54.30
5 GASOLEO B 49.24
2 DIESELe+10 40.18
1 DIESEL e+ 37.88
7 gasolina98 36.44
6 GASOLINA 95 29.31
barplot(dataP$meanservice, col=rainbow(9), las=2, cex.main=1.8, cex.names= 0.7,
main=paste("Average Litres Per Transaction\n transactions < ",as.character(maximumLimit)),
names.arg=factor.fuels)
detach(dataP)
# Basic statistics by product
#================================================================================
statsP <-ddply(smallSales, c("product"), summarise, mean=mean(price), sd=sd(price), min=min(price), max=max(price), "quantile values (0% 25% 50% 75% 100%)"=list(quantile(price)))
statsP
product mean sd min max
1 DIESEL e+ 1.3978 0.03558 1.314 1.465
2 DIESELe+10 1.4710 0.03944 1.374 1.532
3 GAS A 1.3718 0.03905 1.270 1.448
4 GAS B 0.9816 0.05169 0.880 1.050
5 GASOLEO B 1.0987 0.03044 1.010 1.138
6 GASOLINA 95 1.4755 0.04523 1.376 1.549
7 gasolina98 1.5938 0.04666 1.493 1.688
quantile values (0% 25% 50% 75% 100%)
1 1.314, 1.369, 1.398, 1.429, 1.465
2 1.374, 1.443, 1.473, 1.500, 1.532
3 1.270, 1.349, 1.370, 1.400, 1.448
4 0.880, 0.935, 0.995, 1.023, 1.050
5 1.010, 1.085, 1.100, 1.123, 1.138
6 1.376, 1.438, 1.476, 1.510, 1.549
7 1.493, 1.554, 1.595, 1.629, 1.688
# Grouping sales by date (summing up altogeher the different products)
#=====================================================================
dataD <-ddply(dataDP, c("date", "weekday"), summarise, qty=sum(qty), avgprice=mean(avgprice), freq=sum(freq), avgqty=qty/freq )
summary(dataD)
date weekday qty avgprice
01/01/2013: 1 Friday :48 Min. : 2775 Min. :1.27
01/02/2013: 1 Monday :48 1st Qu.:12942 1st Qu.:1.40
01/03/2013: 1 Saturday :48 Median :15650 Median :1.44
01/04/2013: 1 Sunday :48 Mean :17685 Mean :1.44
01/05/2013: 1 Thursday :48 3rd Qu.:21371 3rd Qu.:1.48
01/06/2013: 1 Tuesday :48 Max. :65286 Max. :1.55
(Other) :330 Wednesday:48
freq avgqty
Min. : 79 Min. :28.8
1st Qu.: 351 1st Qu.:34.5
Median : 433 Median :36.3
Mean : 492 Mean :36.1
3rd Qu.: 595 3rd Qu.:37.6
Max. :1879 Max. :41.3
sd(dataD$qty)
[1] 7238
# Sorting date-grouped sales by Quantity
# ======================================
order.qty <- order(dataD$qty, decreasing=T)
bestSaleDays <- dataD[order.qty,c("date", "qty")]
# ... showing the top best seller days
head(bestSaleDays, topX)
date qty
331 31/03/2013 65286
152 14/10/2012 49368
334 31/08/2012 48319
335 31/10/2012 40863
29 03/08/2012 40791
326 30/09/2012 39852
43 04/11/2012 37106
205 19/08/2012 35947
282 26/08/2012 35835
99 09/12/2012 35229
# Grouping sales by day of the Week
# =================================
dataWP <-ddply(smallSales, c("weekday", "product"), summarise, qty=sum(quantity), avgprice=mean(price), amnt=sum(amount))
dataWP
weekday product qty avgprice amnt
1 Friday DIESEL e+ 810163 1.4013 1.135e+06
2 Friday DIESELe+10 44173 1.4777 6.527e+04
3 Friday GAS A 1390 1.3695 1.902e+03
4 Friday GAS B 1260 0.9624 1.225e+03
5 Friday GASOLEO B 4000 1.1047 4.425e+03
6 Friday GASOLINA 95 202988 1.4812 3.007e+05
7 Friday gasolina98 17796 1.5990 2.849e+04
8 Monday DIESEL e+ 587487 1.3854 8.137e+05
9 Monday DIESELe+10 35140 1.4559 5.121e+04
10 Monday GAS A 879 1.3731 1.201e+03
11 Monday GAS B 472 0.9903 4.685e+02
12 Monday GASOLEO B 1366 1.0880 1.487e+03
13 Monday GASOLINA 95 146498 1.4635 2.142e+05
14 Monday gasolina98 11429 1.5863 1.813e+04
15 Saturday DIESEL e+ 508908 1.4008 7.126e+05
16 Saturday DIESELe+10 33934 1.4752 5.000e+04
17 Saturday GAS A 319 1.3857 4.396e+02
18 Saturday GAS B 141 0.9706 1.376e+02
19 Saturday GASOLEO B 2229 1.1068 2.468e+03
20 Saturday GASOLINA 95 163502 1.4808 2.421e+05
21 Saturday gasolina98 15482 1.5947 2.462e+04
22 Sunday DIESEL e+ 799238 1.3989 1.118e+06
23 Sunday DIESELe+10 48206 1.4729 7.104e+04
24 Sunday GAS A 258 1.4104 3.627e+02
25 Sunday GAS B 53 0.9800 5.194e+01
26 Sunday GASOLEO B 2143 1.1026 2.359e+03
27 Sunday GASOLINA 95 259878 1.4748 3.831e+05
28 Sunday gasolina98 22055 1.5912 3.502e+04
29 Thursday DIESEL e+ 587633 1.4023 8.240e+05
30 Thursday DIESELe+10 29419 1.4738 4.334e+04
31 Thursday GAS A 849 1.3927 1.179e+03
32 Thursday GAS B 640 0.9825 6.335e+02
33 Thursday GASOLEO B 2919 1.0969 3.202e+03
34 Thursday GASOLINA 95 137787 1.4776 2.035e+05
35 Thursday gasolina98 14064 1.5914 2.239e+04
36 Tuesday DIESEL e+ 520401 1.3952 7.259e+05
37 Tuesday DIESELe+10 27000 1.4690 3.965e+04
38 Tuesday GAS A 563 1.3432 7.558e+02
39 Tuesday GAS B 523 1.0116 5.311e+02
40 Tuesday GASOLEO B 1754 1.0945 1.926e+03
41 Tuesday GASOLINA 95 121609 1.4717 1.789e+05
42 Tuesday gasolina98 9556 1.5971 1.525e+04
43 Wednesday DIESEL e+ 574027 1.3988 8.029e+05
44 Wednesday DIESELe+10 29860 1.4698 4.389e+04
45 Wednesday GAS A 1498 1.3626 2.038e+03
46 Wednesday GAS B 1273 0.9887 1.269e+03
47 Wednesday GASOLEO B 2969 1.0911 3.247e+03
48 Wednesday GASOLINA 95 143018 1.4763 2.109e+05
49 Wednesday gasolina98 13329 1.5970 2.128e+04
# ... some graphs with the sales by day of the week for all the products
options(scipen=999)
barchart(dataWP$qty ~ factor(dataWP$weekday, levels=WeekDays) | dataWP$product, scales=list(x=list(rot=90)),
col=rainbow(10), main=list(label="Sales distribution by product and day of the week",cex=2), ylab="Sales",xlab="")
#xyplot(dataWP$qty ~ factor(dataWP$weekday, levels=WeekDays) | dataWP$product, scales=list(x=list(rot=90)), main="Sales distribution by dayof the week", ylab="Sales", xlab="")
histogram(~smallSales$quantity | factor(smallSales$weekday, levels=WeekDays),
main=list(label="Histogram by day of the week",cex=2),
xlab="Quantity", col=rainbow(22))
# ... summing up all the quantities (ie. total liters sold and ??? invoiced considering all the different products)
dataW <- ddply(dataWP, c("weekday"), summarise, qty=sum(qty), amnt=sum(amnt))
dataWOrdered <-dataW[order(dataW$amnt, decreasing=T),]
dataWOrdered
weekday qty amnt
4 Sunday 1131831 1609760
1 Friday 1081769 1536931
2 Monday 783272 1100365
5 Thursday 773312 1098181
7 Wednesday 765973 1085585
3 Saturday 724515 1032396
6 Tuesday 681407 962871
# Pie Chart %Qty sold (litre) by day of the week
pct1 <- round(dataWOrdered$qty/sum(dataWOrdered$qty)*100, digits=2)
lbls <- paste(WeekDays, pct1) # add percents to labels
lbls <- paste(lbls,"%",sep="") # ad % to labels
pie(dataWOrdered$qty, main="Sales by day of the week (litres)", labels=lbls, col=rainbow(7))
# Pie Chart % Total Sales Amount (euros) by day of the week
pct2 <- round(dataWOrdered$amnt/sum(dataWOrdered$amnt)*100, digits=2)
lbls <- paste(WeekDays, pct2) # add percents to labels
lbls <- paste(lbls,"%",sep="") # ad % to labels
pie(dataWOrdered$amnt, main="Sales by day of the week (Euros)", labels=lbls, col=rainbow(7))
# Which days are sold the most expensive products?
diffAmntLitres = data.frame(matrix(c(WeekDays, pct2-pct1),ncol=2))
(diffAmntLitres)
X1 X2
1 Sunday 0.0500000000000007
2 Monday 0.0299999999999976
3 Tuesday -0.119999999999999
4 Wednesday 0.0199999999999996
5 Thursday -0.00999999999999979
6 Friday 0.0600000000000005
7 Saturday -0.0400000000000009
rm(lbls, pct1, pct2) # ... some cleaning
# Cronologically sort by date
dataDbyDate = dataD[order(as.Date(dataD$date, format="%d/%m/%Y")),]
#dataDbyDate$date <- reorder(dataDbyDate$date, neworder=order(dataDbyDate$date))
#xyplot(qty ~ date, data=dataDP, scales=list(x=list(rot=90)))
# graph of a subset of the first 50 registers
#xyplot(freq ~ date[order(as.Date(date,format="%d%m%Y"))] | product, data = dataDP[1:50,], scales=list(x=list(rot=90)))
#plot(dataD$avgqty ~ dataD$date)
dataD <- read.table("CSVDataDate.csv", header=TRUE, sep=";", na.strings="NA", dec=",", strip.white=TRUE)
#xyplot(quantity ~ date, data=dataD, scales=list(x=list(rot=90)))
# Plot of the prices ordered by incresing price across the week
plotPriceDOF <- function ( data, product, ...) {
o = data[data$product== product,c("weekday","avgprice")]
o = o[order(o$avgprice),]
# creating a factor of days of the week ordered by avgprice
o$weekday = factor(o$weekday, levels=arrange(o, avgprice)$weekday)
print(o)
print (sd(o$avgprice))
plot(o$avgprice ~ o$weekday, type="n",
main=paste(product, " price"),
xlab ="", ylab="Price in Euros", las=2, col="darkblue" )
}
par(mfrow=c(2,2))
for (description in dataP$product) plotPriceDOF(dataWP, description)
weekday avgprice
8 Monday 1.385
36 Tuesday 1.395
43 Wednesday 1.399
22 Sunday 1.399
15 Saturday 1.401
1 Friday 1.401
29 Thursday 1.402
[1] 0.005809
weekday avgprice
9 Monday 1.456
37 Tuesday 1.469
44 Wednesday 1.470
23 Sunday 1.473
30 Thursday 1.474
16 Saturday 1.475
2 Friday 1.478
[1] 0.007158
weekday avgprice
38 Tuesday 1.343
45 Wednesday 1.363
3 Friday 1.369
10 Monday 1.373
17 Saturday 1.386
31 Thursday 1.393
24 Sunday 1.410
[1] 0.02184
weekday avgprice
4 Friday 0.9624
18 Saturday 0.9706
25 Sunday 0.9800
32 Thursday 0.9825
46 Wednesday 0.9887
11 Monday 0.9903
39 Tuesday 1.0116
[1] 0.01575
weekday avgprice
12 Monday 1.088
47 Wednesday 1.091
40 Tuesday 1.094
33 Thursday 1.097
26 Sunday 1.103
5 Friday 1.105
19 Saturday 1.107
[1] 0.007125
weekday avgprice
13 Monday 1.463
41 Tuesday 1.472
27 Sunday 1.475
48 Wednesday 1.476
34 Thursday 1.478
20 Saturday 1.481
6 Friday 1.481
[1] 0.006115
weekday avgprice
14 Monday 1.586
28 Sunday 1.591
35 Thursday 1.591
21 Saturday 1.595
49 Wednesday 1.597
42 Tuesday 1.597
7 Friday 1.599
[1] 0.004446
par(mfrow=c(1,1))
# PairWise comparison between group means (p<0.05--> different mean)
dataWDiesel <- smallSales[smallSales$product=="DIESEL e+",]
pairwise.t.test(dataWDiesel$price, dataWDiesel$weekday)
Pairwise comparisons using t tests with pooled SD
data: dataWDiesel$price and dataWDiesel$weekday
Friday Monday Saturday
Monday < 0.0000000000000002 - -
Saturday 0.4460 < 0.0000000000000002 -
Sunday 0.0000000000239605 < 0.0000000000000002 0.0000028989867463
Thursday 0.0272 < 0.0000000000000002 0.0017
Tuesday < 0.0000000000000002 < 0.0000000000000002 < 0.0000000000000002
Wednesday 0.0000000002001815 < 0.0000000000000002 0.0000032825472895
Sunday Thursday Tuesday
Monday - - -
Saturday - - -
Sunday - - -
Thursday < 0.0000000000000002 - -
Tuesday < 0.0000000000000002 < 0.0000000000000002 -
Wednesday 0.6629 < 0.0000000000000002 0.0000000000000004
P value adjustment method: holm
histogram(~quantity | product, data=smallSales,
main=list(label=paste("Histogram of frecuencies for individual transactions < ",as.character(maximumLimit)),cex=2.0),
breaks=20,type="count", xlab="Litres", ylab="Frequency", col=rainbow(20))
histogram(~quantity | product, data=smallSales,
main=list(label=paste("Histogram of probabilities for individual transactions < ",as.character(maximumLimit)),cex=2.0),
breaks=20, xlab="Litres", ylab="Frequency", col=rainbow(20))