library(XML)
library(ggplot2)
library(maps)
library(scales)
library(RColorBrewer)
library(plyr)
## 
## Attaching package: 'plyr'
## 
## L'objet suivant est masqué from 'package:maps':
## 
##     ozone
date.available <- Sys.Date() - 7
day <- format(date.available, "%Y%m%d")
baseurl <- "http://donnees.roulez-eco.fr/opendata/jour/"

# Récupération des données
temp <- tempfile()
download.file(paste0(baseurl, day), temp)
xmlsource <- unzip(temp, paste0("PrixCarburants_quotidien_", day, ".xml"))
unlink(temp)
parsed <- xmlParse(xmlsource)

#parsed <- xmlParse("~/R/PrixCarburants_quotidien_20150902.xml")

gazole <- data.frame(
    id = xpathSApply(parsed, "//prix[@nom='Gazole']/../@id"),
    cp = xpathSApply(parsed, "//prix[@nom='Gazole']/../@cp"),
    pop = xpathSApply(parsed, "//prix[@nom='Gazole']/../@pop"),
    ville = xpathSApply(parsed, "//prix[@nom='Gazole']/../ville/text()", xmlValue),
    adresse = xpathSApply(parsed, "//prix[@nom='Gazole']/../adresse/text()", xmlValue),
    latitude = as.numeric(xpathSApply(parsed, "//prix[@nom='Gazole']/../@latitude")) / 100000,
    longitude = as.numeric(xpathSApply(parsed, "//prix[@nom='Gazole']/../@longitude")) / 100000,
    prix = as.numeric(xpathSApply(parsed, "//prix[@nom='Gazole' and position()=1]/@valeur")) / 1000,
    date = as.Date(xpathSApply(parsed, "//prix[@nom='Gazole' and position()=1]/@maj"))
)

# Ajout départements. Cas particuliers : Corse et COM
gazole$dep <- ifelse(substr(gazole$cp, 1, 2) == "97" | substr(gazole$cp, 1, 2) == "20",
                     substr(gazole$cp, 1, 3),
                     substr(gazole$cp, 1, 2)
                     )
gazole$dep[which(gazole$dep == "200" | gazole$dep == "201")] <- "2A"
gazole$dep[which(gazole$dep == "202" | gazole$dep == "206")] <- "2B"

gazole$dep <- as.factor(gazole$dep)

# Régions
dep <- read.table("http://www.insee.fr/fr/methodes/nomenclatures/cog/telechargement/2015/txt/depts2015.txt", 
                  fill = TRUE, header = TRUE, fileEncoding = "windows-1252", sep="\t", quote="")
reg <- read.table("http://www.insee.fr/fr/methodes/nomenclatures/cog/telechargement/2015/txt/reg2015.txt", 
                  fill = TRUE, header = TRUE, fileEncoding = "windows-1252", sep="\t", quote="")
dep$REGION <- as.factor(dep$REGION)
dep <- rename(dep, c("DEP" = "dep", "NCCENR" = "lib.dep"))  
reg <- rename(reg, c("NCCENR" = "lib.reg")) 

gazole <- join(gazole, dep, by = "dep")
gazole <- join(gazole, reg, by = "REGION")

# Suppression données très anciennes (avant début de l'année)
gazole <- subset(gazole, date >= paste0(substr(day, 1, 4), "-01-01"))

ancien <- data.frame(j = as.numeric(date.available - gazole$date))
ggplot(ancien, aes(x=j)) + 
    geom_histogram() +
    ggtitle("Gazole - Ancienneté des mises à jour") +
    xlab("Jours") +
    ylab("N") 
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.

quantile(ancien$j)
##   0%  25%  50%  75% 100% 
##    0    0    3    8  259
# Suppression données anciennes (plus d'un mois)
gazole <- subset(gazole, date >= (date.available - 30))

ggplot(gazole, aes(x=date)) + 
    geom_histogram() +
    ggtitle("Gazole - Date des mises à jour (sur un mois)") +
    xlab("Date") +
    ylab("N") 
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.

quantile(ancien$j)
##   0%  25%  50%  75% 100% 
##    0    0    3    8  259
# Suppression Réunion (et mauvaises loc ?)
gazole.metro <- subset(gazole, longitude < 10 & latitude > 40 & dep != "974")

# Moyenne prix et intervalle de confiance
nrow(gazole)
## [1] 9416
mean(gazole.metro$prix)
## [1] 1.12452
confint(lm(gazole.metro$prix ~ 1))
##                2.5 %   97.5 %
## (Intercept) 1.123328 1.125712
# GLM : différences entre départements
res <- glm(data = gazole, prix~dep+pop)
summary(res)
## 
## Call:
## glm(formula = prix ~ dep + pop, data = gazole)
## 
## Deviance Residuals: 
##      Min        1Q    Median        3Q       Max  
## -0.16385  -0.03364  -0.01873   0.03180   0.47373  
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  1.2430377  0.0052718 235.792  < 2e-16 ***
## dep02       -0.0106877  0.0071693  -1.491 0.136060    
## dep03        0.0105272  0.0073487   1.433 0.152028    
## dep04        0.0207560  0.0087788   2.364 0.018083 *  
## dep05        0.0109675  0.0095256   1.151 0.249610    
## dep06        0.0365456  0.0062638   5.834 5.58e-09 ***
## dep07       -0.0021501  0.0081469  -0.264 0.791846    
## dep08       -0.0058474  0.0087797  -0.666 0.505419    
## dep09        0.0141406  0.0092462   1.529 0.126215    
## dep10       -0.0118953  0.0079645  -1.494 0.135333    
## dep11       -0.0035889  0.0073209  -0.490 0.623984    
## dep12        0.0052542  0.0071948   0.730 0.465242    
## dep13       -0.0016685  0.0056515  -0.295 0.767821    
## dep14       -0.0145767  0.0065701  -2.219 0.026536 *  
## dep15        0.0341395  0.0086475   3.948 7.94e-05 ***
## dep16       -0.0474384  0.0078831  -6.018 1.84e-09 ***
## dep17       -0.0476883  0.0066522  -7.169 8.14e-13 ***
## dep18       -0.0132953  0.0079645  -1.669 0.095091 .  
## dep19        0.0158401  0.0079221   1.999 0.045586 *  
## dep21       -0.0040978  0.0066669  -0.615 0.538798    
## dep22       -0.0096065  0.0066111  -1.453 0.146231    
## dep23        0.0115380  0.0108149   1.067 0.286062    
## dep24        0.0053851  0.0071019   0.758 0.448317    
## dep25        0.0001585  0.0070151   0.023 0.981979    
## dep26       -0.0117037  0.0067918  -1.723 0.084883 .  
## dep27       -0.0156417  0.0068970  -2.268 0.023358 *  
## dep28       -0.0253806  0.0075886  -3.345 0.000827 ***
## dep29       -0.0315465  0.0063616  -4.959 7.21e-07 ***
## dep2A        0.1029913  0.0081478  12.640  < 2e-16 ***
## dep2B        0.0916872  0.0077295  11.862  < 2e-16 ***
## dep30       -0.0039588  0.0065182  -0.607 0.543633    
## dep31       -0.0121426  0.0059589  -2.038 0.041604 *  
## dep32        0.0322383  0.0085844   3.755 0.000174 ***
## dep33       -0.0149292  0.0057989  -2.575 0.010054 *  
## dep34       -0.0072301  0.0062186  -1.163 0.244999    
## dep35       -0.0124069  0.0062923  -1.972 0.048666 *  
## dep36       -0.0050901  0.0085825  -0.593 0.553141    
## dep37       -0.0205999  0.0070144  -2.937 0.003324 ** 
## dep38       -0.0016368  0.0060324  -0.271 0.786134    
## dep39       -0.0002252  0.0075563  -0.030 0.976225    
## dep40       -0.0047732  0.0069540  -0.686 0.492478    
## dep41       -0.0045999  0.0079224  -0.581 0.561505    
## dep42       -0.0026118  0.0066093  -0.395 0.692730    
## dep43       -0.0043743  0.0084635  -0.517 0.605280    
## dep44       -0.0191075  0.0060945  -3.135 0.001723 ** 
## dep45       -0.0216796  0.0067112  -3.230 0.001241 ** 
## dep46        0.0156323  0.0084058   1.860 0.062958 .  
## dep47       -0.0005720  0.0075250  -0.076 0.939411    
## dep48        0.0333007  0.0103524   3.217 0.001301 ** 
## dep49       -0.0289588  0.0068971  -4.199 2.71e-05 ***
## dep50       -0.0041511  0.0067125  -0.618 0.536321    
## dep51       -0.0187650  0.0071932  -2.609 0.009103 ** 
## dep52        0.0054801  0.0084056   0.652 0.514446    
## dep53       -0.0120942  0.0080985  -1.493 0.135373    
## dep54       -0.0021005  0.0070353  -0.299 0.765275    
## dep55        0.0073408  0.0100848   0.728 0.466685    
## dep56       -0.0134971  0.0063413  -2.128 0.033326 *  
## dep57       -0.0015426  0.0063805  -0.242 0.808969    
## dep58       -0.0012962  0.0081956  -0.158 0.874334    
## dep59       -0.0246285  0.0056737  -4.341 1.43e-05 ***
## dep60       -0.0190027  0.0065965  -2.881 0.003977 ** 
## dep61       -0.0068850  0.0082967  -0.830 0.406648    
## dep62       -0.0237618  0.0057778  -4.113 3.95e-05 ***
## dep63       -0.0030110  0.0065689  -0.458 0.646690    
## dep64        0.0056006  0.0066234   0.846 0.397812    
## dep65        0.0156550  0.0085844   1.824 0.068235 .  
## dep66       -0.0062293  0.0079234  -0.786 0.431775    
## dep67        0.0012363  0.0062902   0.197 0.844191    
## dep68        0.0133681  0.0066969   1.996 0.045943 *  
## dep69        0.0022243  0.0058986   0.377 0.706121    
## dep70        0.0026699  0.0081478   0.328 0.743158    
## dep71        0.0009250  0.0064353   0.144 0.885705    
## dep72       -0.0152938  0.0070144  -2.180 0.029257 *  
## dep73       -0.0011322  0.0071934  -0.157 0.874938    
## dep74        0.0124531  0.0065686   1.896 0.058012 .  
## dep75        0.1117572  0.0078432  14.249  < 2e-16 ***
## dep76       -0.0147427  0.0061019  -2.416 0.015708 *  
## dep77       -0.0168801  0.0060520  -2.789 0.005295 ** 
## dep78       -0.0013494  0.0062844  -0.215 0.829983    
## dep79       -0.0414613  0.0075560  -5.487 4.19e-08 ***
## dep80       -0.0099409  0.0067585  -1.471 0.141360    
## dep81        0.0037956  0.0077281   0.491 0.623332    
## dep82       -0.0052923  0.0079223  -0.668 0.504135    
## dep83       -0.0023402  0.0060728  -0.385 0.699976    
## dep84       -0.0006568  0.0070568  -0.093 0.925848    
## dep85       -0.0058469  0.0067118  -0.871 0.383699    
## dep86       -0.0303222  0.0074041  -4.095 4.25e-05 ***
## dep87       -0.0055292  0.0075245  -0.735 0.462462    
## dep88        0.0018739  0.0072436   0.259 0.795875    
## dep89       -0.0055120  0.0073213  -0.753 0.451547    
## dep90        0.0089441  0.0126052   0.710 0.477999    
## dep91       -0.0193505  0.0063907  -3.028 0.002469 ** 
## dep92        0.0449213  0.0072192   6.222 5.10e-10 ***
## dep93        0.0240817  0.0069168   3.482 0.000501 ***
## dep94       -0.0041893  0.0070356  -0.595 0.551559    
## dep95       -0.0043596  0.0066284  -0.658 0.510741    
## dep974       0.0358842  0.0497715   0.721 0.470941    
## popN        -0.0565786  0.0287952  -1.965 0.049460 *  
## popR        -0.1199219  0.0024779 -48.397  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for gaussian family taken to be 0.002454652)
## 
##     Null deviance: 32.653  on 9415  degrees of freedom
## Residual deviance: 22.870  on 9317  degrees of freedom
## AIC: -29766
## 
## Number of Fisher Scoring iterations: 2
# Graphiques boxplot prix par région
ggplot(gazole, aes(lib.reg, prix)) + 
    geom_boxplot() +
    stat_summary(fun.y = mean,  geom = "point") +
    ggtitle(paste("Gazole", date.available)) +
    xlab("Région") +
    ylab(expression(paste("Prix (€⋅", l^"-1", ")"))) +
    theme(axis.text.x = element_text(angle = 45, hjust = 1))

ggplot(gazole, aes(x=reorder(lib.reg, prix, FUN=median), prix)) + 
    geom_boxplot(notch = TRUE) +
    stat_summary(fun.y = mean,  geom = "point") +
    ggtitle(paste("Gazole", date.available)) +
    xlab("Région") +
    ylab(expression(paste("Prix (€⋅", l^"-1", ")"))) +
    theme(axis.text.x = element_text(angle = 45, hjust = 1))

# Graphiques boxplot prix par département 
ggplot(gazole, aes(dep, prix)) + 
    geom_boxplot() +
    stat_summary(fun.y = mean,  geom = "point") +
    ggtitle(paste("Gazole", date.available)) +
    xlab("Département") +
    ylab(expression(paste("Prix (€⋅", l^"-1", ")"))) +
    theme(axis.text.x=element_text(angle = 90, hjust = 0))

ggplot(gazole, aes(x=reorder(dep, prix, FUN=median), prix)) + 
    geom_boxplot(notch = TRUE) +
    stat_summary(fun.y = mean,  geom = "point") +
    ggtitle(paste("Gazole", date.available)) +
    xlab("Département") +
    ylab(expression(paste("Prix (€⋅", l^"-1", ")"))) +
    theme(axis.text.x=element_text(angle = 90, hjust = 0))
## notch went outside hinges. Try setting notch=FALSE.
## notch went outside hinges. Try setting notch=FALSE.
## notch went outside hinges. Try setting notch=FALSE.
## notch went outside hinges. Try setting notch=FALSE.
## notch went outside hinges. Try setting notch=FALSE.
## notch went outside hinges. Try setting notch=FALSE.
## notch went outside hinges. Try setting notch=FALSE.
## notch went outside hinges. Try setting notch=FALSE.
## notch went outside hinges. Try setting notch=FALSE.
## notch went outside hinges. Try setting notch=FALSE.
## notch went outside hinges. Try setting notch=FALSE.
## notch went outside hinges. Try setting notch=FALSE.
## notch went outside hinges. Try setting notch=FALSE.
## notch went outside hinges. Try setting notch=FALSE.
## notch went outside hinges. Try setting notch=FALSE.
## notch went outside hinges. Try setting notch=FALSE.
## notch went outside hinges. Try setting notch=FALSE.
## notch went outside hinges. Try setting notch=FALSE.
## notch went outside hinges. Try setting notch=FALSE.
## notch went outside hinges. Try setting notch=FALSE.
## notch went outside hinges. Try setting notch=FALSE.
## notch went outside hinges. Try setting notch=FALSE.
## notch went outside hinges. Try setting notch=FALSE.
## notch went outside hinges. Try setting notch=FALSE.
## notch went outside hinges. Try setting notch=FALSE.
## notch went outside hinges. Try setting notch=FALSE.
## notch went outside hinges. Try setting notch=FALSE.
## notch went outside hinges. Try setting notch=FALSE.
## notch went outside hinges. Try setting notch=FALSE.
## notch went outside hinges. Try setting notch=FALSE.
## notch went outside hinges. Try setting notch=FALSE.
## notch went outside hinges. Try setting notch=FALSE.
## notch went outside hinges. Try setting notch=FALSE.
## notch went outside hinges. Try setting notch=FALSE.
## notch went outside hinges. Try setting notch=FALSE.
## notch went outside hinges. Try setting notch=FALSE.
## notch went outside hinges. Try setting notch=FALSE.
## notch went outside hinges. Try setting notch=FALSE.
## notch went outside hinges. Try setting notch=FALSE.

#  densité route/autoroute
ggplot(subset(gazole, gazole$pop !="N"), aes(x = prix, fill = pop)) + 
    #geom_histogram() +
    ggtitle(paste("Gazole", date.available)) +
    geom_density(alpha = 0.4) +
    xlab(expression(paste("Prix (€⋅", l^"-1", ")"))) +
    ylab("N") +
    scale_fill_manual(name="Localisation",
                        values=c("blue", "red"),
                        breaks=c("A", "R"),
                        labels=c("Autoroute", "Route"))

# Carte
france.map <- map_data("france")
ggplot(france.map, aes(long, lat)) +
    geom_polygon(aes(group = group), fill = NA, colour = "grey60") +
    geom_point(data = gazole.metro, aes(longitude, latitude, color = cut(prix, quantile(prix, seq(0,1,0.2))))) +
    scale_colour_manual("Prix (€)", values = rev(brewer.pal(5, "RdBu")), guide = guide_legend(reverse = TRUE)) +
    coord_fixed(ratio = 1.45) +
    ggtitle(paste("Gazole", date.available)) +
    xlab("") +
    ylab("") 

nstations.dep <- aggregate(gazole$id, by = list(gazole$dep), FUN = length)
colnames(nstations.dep) <- c("dep", "n")
nstations.dep[order(nstations.dep$n),] 
##    dep   n
## 97 974   1
## 91  90  18
## 22  23  26
## 49  48  29
## 56  55  31
## 5   05  36
## 9   09  39
## 4   04  45
## 8   08  45
## 15  15  47
## 33  32  48
## 37  36  48
## 66  65  48
## 44  43  50
## 47  46  51
## 53  52  51
## 62  61  53
## 59  58  55
## 7   07  56
## 29  2A  56
## 71  70  56
## 54  53  57
## 10  10  60
## 18  18  60
## 19  19  61
## 42  41  61
## 67  66  61
## 83  82  61
## 16  16  62
## 76  75  63
## 30  2B  66
## 82  81  66
## 27  28  70
## 40  39  71
## 80  79  71
## 48  47  72
## 88  87  72
## 87  86  76
## 3   03  78
## 11  11  79
## 90  89  79
## 89  88  82
## 93  92  83
## 12  12  84
## 52  51  84
## 74  73  84
## 2   02  85
## 23  24  88
## 85  84  90
## 55  54  91
## 95  94  91
## 24  25  92
## 38  37  92
## 73  72  92
## 41  40  95
## 94  93  97
## 26  27  98
## 50  49  98
## 25  26 104
## 81  80 106
## 1   01 109
## 46  45 109
## 51  50 109
## 86  85 109
## 69  68 110
## 20  21 112
## 17  17 113
## 65  64 115
## 96  95 115
## 21  22 116
## 43  42 116
## 61  60 117
## 14  14 119
## 64  63 119
## 75  74 119
## 31  30 123
## 72  71 130
## 92  91 134
## 58  57 135
## 28  29 137
## 57  56 139
## 36  35 144
## 68  67 144
## 79  78 145
## 6   06 147
## 35  34 152
## 77  76 167
## 45  44 168
## 84  83 171
## 78  77 174
## 39  38 177
## 32  31 189
## 70  69 200
## 34  33 221
## 63  62 226
## 60  59 254
## 13  13 261
# Ain
gazole.01 <- subset(gazole.metro, gazole.metro$dep == "01")

mean(gazole.metro$prix)
## [1] 1.12452
mean(gazole.01$prix)
## [1] 1.131917
comp1 <- rbind(data.frame(terr = "France", prix = gazole.metro$prix), data.frame(terr = "Ain", prix = gazole.01$prix))
res.01 <- aov(data=comp1, prix ~ terr)
summary(res.01)
##               Df Sum Sq  Mean Sq F value Pr(>F)
## terr           1   0.01 0.005896   1.697  0.193
## Residuals   9495  32.99 0.003474
# les 25 % de stations les moins chères du dép (hors autoroute)
stmp <- subset(gazole.01, prix < quantile(subset(gazole.01, pop == "R")$prix)[2])
stmp[order(stmp$prix),]
##          id    cp pop                    ville                    adresse
## 16  1120004 01120   R                 MONTLUEL       Cours de la Portelle
## 17  1120005 01120   R                LA BOISSE              Route de Thil
## 103 1700004 01700   R                  BEYNOST           ZAC des Beterses
## 95  1600001 01700   R                  MIRIBEL             Rue du Figuier
## 96  1600002 01600   R                  TREVOUX          324 Route de Lyon
## 106 1800001 01800   R                MEXIMIEUX           47 Route de Lyon
## 97  1600003 01600   R                 MASSIEUX           Avenue Lavoisier
## 10  1090001 01090   R                MONTCEAUX           LE GRAND RIVOLET
## 92  1570001 01570   R                 FEILLENS                 Grande Rue
## 70  1400005 01400   R CHATILLON SUR CHALARONNE        Avenue Jean Jaurès
## 1   1000001 01000   R   SAINT-DENIS-LèS-BOURG      596 AVENUE DE TREVOUX
## 2   1000002 01000   R          BOURG-EN-BRESSE        16 Avenue de Marboz
## 5   1000007 01000   R          Bourg-en-Bresse    Avenue Amédée Mercier
## 6   1000008 01000   R          BOURG-EN-BRESSE       Bd Charles de Gaulle
## 7   1000009 01000   R          Bourg-en-Bresse            56 Rue du Stand
## 29  1170001 01170   R                   SéGNY                        RN5
## 38  1210003 01210   R          FERNEY-VOLTAIRE            Route de Meyrin
## 58  1340003 01340   R                    Jayat              Les Cézilles
## 99  1630003 01630   R      SAINT-GENIS-POUILLY        148 Rue des Chalets
## 100 1630004 01630   R                   PéRON            ZA DU PRE MUNNY
## 104 1710001 01710   R                   THOIRY        route de Bellegarde
## 108 1800003 01800   R                Meximieux         ROUTE DE CHALAMONT
## 102 1700003 01700   R                  Miribel                      RN 83
## 56  1340001 01340   R      MONTREVEL-EN-BRESSE             Rue des Luyers
## 81  1480001 01480   R         JASSANS-RIOTTIER  Rue Ã\u0089douard Herriot
##     latitude longitude  prix       date dep REGION CHEFLIEU TNCC NCC
## 16  45.85251  5.058720 1.068 2015-09-21  01     82    01053    5 AIN
## 17  45.84681  5.047780 1.068 2015-09-19  01     82    01053    5 AIN
## 103 45.82650  4.998950 1.068 2015-09-19  01     82    01053    5 AIN
## 95  45.82834  4.971830 1.071 2015-09-22  01     82    01053    5 AIN
## 96  45.93472  4.790628 1.071 2015-09-24  01     82    01053    5 AIN
## 106 45.90119  5.190522 1.071 2015-09-24  01     82    01053    5 AIN
## 97  45.90391  4.818722 1.072 2015-09-23  01     82    01053    5 AIN
## 10  46.09636  4.774550 1.074 2015-09-23  01     82    01053    5 AIN
## 92  46.32904  4.884930 1.077 2015-09-22  01     82    01053    5 AIN
## 70  46.11765  4.960360 1.078 2015-09-22  01     82    01053    5 AIN
## 1   46.20114  5.197910 1.079 2015-09-22  01     82    01053    5 AIN
## 2   46.21842  5.227670 1.079 2015-09-22  01     82    01053    5 AIN
## 5   46.20105  5.248910 1.079 2015-09-24  01     82    01053    5 AIN
## 6   46.19942  5.240704 1.079 2015-09-24  01     82    01053    5 AIN
## 7   46.19566  5.229350 1.079 2015-09-22  01     82    01053    5 AIN
## 29  46.29301  6.081496 1.079 2015-09-24  01     82    01053    5 AIN
## 38  46.24370  6.092662 1.079 2015-09-17  01     82    01053    5 AIN
## 58  46.34627  5.126150 1.079 2015-09-22  01     82    01053    5 AIN
## 99  46.26326  6.029500 1.079 2015-09-18  01     82    01053    5 AIN
## 100 46.18322  5.928918 1.079 2015-09-18  01     82    01053    5 AIN
## 104 46.22732  5.992776 1.079 2015-09-17  01     82    01053    5 AIN
## 108 45.91162  5.189714 1.079 2015-09-22  01     82    01053    5 AIN
## 102 45.87147  4.912410 1.080 2015-09-24  01     82    01053    5 AIN
## 56  46.33208  5.122780 1.082 2015-09-24  01     82    01053    5 AIN
## 81  45.98403  4.756940 1.082 2015-09-18  01     82    01053    5 AIN
##     lib.dep CHEFLIEU.1 TNCC.1       NCC.1     lib.reg
## 16      Ain      69123      0 RHONE-ALPES Rhône-Alpes
## 17      Ain      69123      0 RHONE-ALPES Rhône-Alpes
## 103     Ain      69123      0 RHONE-ALPES Rhône-Alpes
## 95      Ain      69123      0 RHONE-ALPES Rhône-Alpes
## 96      Ain      69123      0 RHONE-ALPES Rhône-Alpes
## 106     Ain      69123      0 RHONE-ALPES Rhône-Alpes
## 97      Ain      69123      0 RHONE-ALPES Rhône-Alpes
## 10      Ain      69123      0 RHONE-ALPES Rhône-Alpes
## 92      Ain      69123      0 RHONE-ALPES Rhône-Alpes
## 70      Ain      69123      0 RHONE-ALPES Rhône-Alpes
## 1       Ain      69123      0 RHONE-ALPES Rhône-Alpes
## 2       Ain      69123      0 RHONE-ALPES Rhône-Alpes
## 5       Ain      69123      0 RHONE-ALPES Rhône-Alpes
## 6       Ain      69123      0 RHONE-ALPES Rhône-Alpes
## 7       Ain      69123      0 RHONE-ALPES Rhône-Alpes
## 29      Ain      69123      0 RHONE-ALPES Rhône-Alpes
## 38      Ain      69123      0 RHONE-ALPES Rhône-Alpes
## 58      Ain      69123      0 RHONE-ALPES Rhône-Alpes
## 99      Ain      69123      0 RHONE-ALPES Rhône-Alpes
## 100     Ain      69123      0 RHONE-ALPES Rhône-Alpes
## 104     Ain      69123      0 RHONE-ALPES Rhône-Alpes
## 108     Ain      69123      0 RHONE-ALPES Rhône-Alpes
## 102     Ain      69123      0 RHONE-ALPES Rhône-Alpes
## 56      Ain      69123      0 RHONE-ALPES Rhône-Alpes
## 81      Ain      69123      0 RHONE-ALPES Rhône-Alpes
# Savoie
gazole.73 <- subset(gazole.metro, gazole.metro$dep == "73")
subset(gazole.73, prix < quantile(subset(gazole.73, pop == "R")$prix)[2])
##            id    cp pop                 ville
## 7376 73000001 73000   R               BASSENS
## 7377 73000002 73000   R             CHAMBéRY
## 7378 73000007 73000   R             Chambéry
## 7379 73000009 73000   R              CHAMBERY
## 7380 73000010 73000   R             CHAMBéRY
## 7381 73000012 73000   R              CHAMBERY
## 7382 73000013 73000   R              CHAMBERY
## 7387 73100005 73100   R         Aix-les-Bains
## 7401 73160002 73160   R                Cognin
## 7419 73290001 73290   R     LA MOTTE-SERVOLEX
## 7427 73330001 73330   R Le Pont-de-Beauvoisin
## 7428 73330002 73330   R Le Pont-de-Beauvoisin
## 7436 73420001 73420   R   Drumettaz-Clarafond
## 7437 73420002 73420   R        VIVIERS-DU-LAC
## 7438 73420004 73420   R               Voglans
## 7443 73490002 73490   R            LA RAVOIRE
## 7444 73490003 73490   R            LA RAVOIRE
##                            adresse latitude longitude  prix       date dep
## 7376               21 RUE CENTRALE 45.57566  5.944094 1.075 2015-09-24  73
## 7377      1097 Avenue des Landiers 45.59483  5.898027 1.075 2015-09-24  73
## 7378    204 Avenue du Grand Verger 45.57977  5.909180 1.075 2015-09-24  73
## 7379       345 Quai du 11 Novembre 45.56727  5.935046 1.084 2015-09-22  73
## 7380       583 Avenue des Landiers 45.58905  5.905130 1.069 2015-09-24  73
## 7381         VOIE RAP. URB. RN 201 45.60964  5.886971 1.075 2015-09-24  73
## 7382           1388 AVENUE DE LYON 45.56332  5.902070 1.077 2015-09-24  73
## 7387 50 AVENUE GD PORT RP HOPITAUX 45.69676  5.907780 1.084 2015-09-24  73
## 7401          Rue de l'Ã\u0089pine 45.56230  5.894290 1.075 2015-09-19  73
## 7419              75 Rue Lavoisier 45.60217  5.875460 1.079 2015-09-19  73
## 7427                ZI LA BARONNIE 45.52954  5.689600 1.059 2015-09-15  73
## 7428                   zi Baronnie 45.52956  5.684770 1.069 2015-09-10  73
## 7436    214 Chemin de la Boisière 45.65712  5.910790 1.079 2015-09-16  73
## 7437                       La Coua 45.64736  5.893918 1.079 2015-09-16  73
## 7438          Route de l'aéroport 45.63796  5.889880 1.079 2015-09-16  73
## 7443            rue du pré Renaud 45.56279  5.963060 1.079 2015-09-24  73
## 7444     RN 6 CARREFOUR LA TROUSSE 45.56953  5.961659 1.079 2015-09-24  73
##      REGION CHEFLIEU TNCC    NCC lib.dep CHEFLIEU.1 TNCC.1       NCC.1
## 7376     82    73065    3 SAVOIE  Savoie      69123      0 RHONE-ALPES
## 7377     82    73065    3 SAVOIE  Savoie      69123      0 RHONE-ALPES
## 7378     82    73065    3 SAVOIE  Savoie      69123      0 RHONE-ALPES
## 7379     82    73065    3 SAVOIE  Savoie      69123      0 RHONE-ALPES
## 7380     82    73065    3 SAVOIE  Savoie      69123      0 RHONE-ALPES
## 7381     82    73065    3 SAVOIE  Savoie      69123      0 RHONE-ALPES
## 7382     82    73065    3 SAVOIE  Savoie      69123      0 RHONE-ALPES
## 7387     82    73065    3 SAVOIE  Savoie      69123      0 RHONE-ALPES
## 7401     82    73065    3 SAVOIE  Savoie      69123      0 RHONE-ALPES
## 7419     82    73065    3 SAVOIE  Savoie      69123      0 RHONE-ALPES
## 7427     82    73065    3 SAVOIE  Savoie      69123      0 RHONE-ALPES
## 7428     82    73065    3 SAVOIE  Savoie      69123      0 RHONE-ALPES
## 7436     82    73065    3 SAVOIE  Savoie      69123      0 RHONE-ALPES
## 7437     82    73065    3 SAVOIE  Savoie      69123      0 RHONE-ALPES
## 7438     82    73065    3 SAVOIE  Savoie      69123      0 RHONE-ALPES
## 7443     82    73065    3 SAVOIE  Savoie      69123      0 RHONE-ALPES
## 7444     82    73065    3 SAVOIE  Savoie      69123      0 RHONE-ALPES
##          lib.reg
## 7376 Rhône-Alpes
## 7377 Rhône-Alpes
## 7378 Rhône-Alpes
## 7379 Rhône-Alpes
## 7380 Rhône-Alpes
## 7381 Rhône-Alpes
## 7382 Rhône-Alpes
## 7387 Rhône-Alpes
## 7401 Rhône-Alpes
## 7419 Rhône-Alpes
## 7427 Rhône-Alpes
## 7428 Rhône-Alpes
## 7436 Rhône-Alpes
## 7437 Rhône-Alpes
## 7438 Rhône-Alpes
## 7443 Rhône-Alpes
## 7444 Rhône-Alpes
# Rhône-Alpes
gazole.ra <- subset(gazole.metro, gazole.metro$REGION == "82")

mean(gazole.metro$prix)
## [1] 1.12452
mean(gazole.ra$prix)
## [1] 1.131214
comp2 <- rbind(data.frame(terr = "France", prix = gazole.metro$prix), data.frame(terr = "RA", prix = gazole.ra$prix))
res.ra <- aov(data=comp2, prix ~ terr)
summary(res.ra)
##                Df Sum Sq Mean Sq F value   Pr(>F)    
## terr            1   0.04 0.03914   11.25 0.000798 ***
## Residuals   10349  36.00 0.00348                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Prix par date de mise à jour depuis un mois
ggplot(gazole, aes(date, prix)) + 
    geom_point() +
    stat_smooth() +
    ggtitle("Gazole - Prix par date de mise à jour") +
    xlab("Date") +
    scale_x_date(labels = date_format("%d %b")) +
    ylab(expression(paste("Prix (€⋅", l^"-1", ")")))  
## geom_smooth: method="auto" and size of largest group is >=1000, so using gam with formula: y ~ s(x, bs = "cs"). Use 'method = x' to change the smoothing method.

ggplot(subset(gazole, gazole$pop !="N"), aes(date, prix, color = pop)) + 
    geom_point(position = position_jitter(w = 0.1)) +
    stat_smooth(method = "glm") +
    ggtitle("Gazole - Prix par date de mise à jour") +
    xlab("Date") +
    scale_x_date(labels = date_format("%d %b")) +
    ylab(expression(paste("Prix (€⋅", l^"-1", ")"))) +
    scale_color_manual(name="Localisation",
                      values=c("blue", "red"),
                      breaks=c("A", "R"),
                      labels=c("Autoroute", "Route"))

# Groupes
library(agricolae)
out <- HSD.test(res, "dep", group = TRUE, console = TRUE, main = "Prix par dép.")
## 
## Study: Prix par dép.
## 
## HSD Test for prix 
## 
## Mean Square Error:  0.002454652 
## 
## dep,  means
## 
##         prix        std   r   Min   Max
## 01  1.131917 0.06178384 109 1.068 1.279
## 02  1.119482 0.05335742  85 1.067 1.280
## 03  1.136718 0.04734343  78 1.080 1.249
## 04  1.151867 0.05496305  45 1.066 1.280
## 05  1.134083 0.05161471  36 1.059 1.250
## 06  1.162109 0.06362014 147 1.067 1.330
## 07  1.123107 0.04331352  56 1.075 1.230
## 08  1.119933 0.04010294  45 1.087 1.240
## 09  1.137256 0.04890846  39 1.083 1.250
## 10  1.117217 0.05306219  60 1.067 1.290
## 11  1.131671 0.05984075  79 1.059 1.256
## 12  1.129798 0.04923286  84 1.058 1.250
## 13  1.128123 0.05352591 261 1.046 1.290
## 14  1.110555 0.05140116 119 1.059 1.254
## 15  1.157255 0.04949944  47 1.097 1.266
## 16  1.075677 0.04681312  62 1.030 1.227
## 17  1.079673 0.06111427 113 1.011 1.300
## 18  1.115817 0.05345345  60 1.057 1.280
## 19  1.146820 0.05795156  61 1.091 1.279
## 21  1.132937 0.05461417 112 1.067 1.260
## 22  1.114543 0.06282555 116 1.053 1.430
## 23  1.134654 0.04043458  26 1.099 1.240
## 24  1.129864 0.06045102  88 1.069 1.300
## 25  1.127185 0.04130879  92 1.078 1.254
## 26  1.124096 0.05731594 104 1.065 1.269
## 27  1.114816 0.05915005  98 1.054 1.272
## 28  1.108014 0.05785451  70 1.057 1.300
## 29  1.091569 0.03724300 137 1.059 1.270
## 2A  1.226107 0.02043142  56 1.190 1.280
## 2B  1.214803 0.02530322  66 1.169 1.280
## 30  1.123057 0.05132128 123 1.059 1.276
## 31  1.118587 0.05592661 189 1.056 1.276
## 32  1.155354 0.06333279  48 1.080 1.310
## 33  1.116869 0.05892612 221 1.055 1.290
## 34  1.122197 0.05436010 152 1.054 1.264
## 35  1.111542 0.05682323 144 1.065 1.360
## 36  1.125521 0.06046029  48 1.075 1.270
## 37  1.110337 0.05508201  92 1.055 1.280
## 38  1.128254 0.05800429 177 1.059 1.300
## 39  1.127958 0.05127334  71 1.074 1.290
## 40  1.127179 0.05689202  95 1.084 1.290
## 41  1.130311 0.07385426  61 1.065 1.370
## 42  1.126707 0.05675164 116 1.058 1.380
## 43  1.121140 0.04529270  50 1.064 1.259
## 44  1.107577 0.05467361 168 1.054 1.309
## 45  1.109138 0.05461472 109 1.059 1.280
## 46  1.143451 0.06226277  51 1.088 1.349
## 47  1.125875 0.05486396  72 1.064 1.260
## 48  1.160552 0.05854033  29 1.089 1.270
## 49  1.100276 0.05361985  98 1.060 1.280
## 50  1.121165 0.04816898 109 1.064 1.255
## 51  1.112917 0.06021292  84 1.054 1.294
## 52  1.140353 0.05351666  51 1.070 1.270
## 53  1.117333 0.05685299  57 1.069 1.260
## 54  1.127604 0.05632384  91 1.057 1.300
## 55  1.138194 0.05615124  31 1.073 1.269
## 56  1.109619 0.05692194 139 1.050 1.363
## 57  1.126015 0.04654046 135 1.071 1.269
## 58  1.124000 0.04258847  55 1.070 1.249
## 59  1.103209 0.04856339 254 1.054 1.299
## 60  1.107188 0.05024559 117 1.053 1.254
## 61  1.123019 0.05837626  53 1.055 1.280
## 62  1.106252 0.05134323 226 1.053 1.275
## 63  1.126151 0.05595877 119 1.055 1.280
## 64  1.133930 0.05878309 115 1.066 1.279
## 65  1.138771 0.04499042  48 1.092 1.230
## 66  1.118852 0.04794887  61 1.063 1.260
## 67  1.131847 0.05300789 144 1.070 1.294
## 68  1.139755 0.05397182 110 1.082 1.280
## 69  1.133135 0.06139539 200 1.036 1.349
## 70  1.125786 0.03885391  56 1.085 1.240
## 71  1.127731 0.05208532 130 1.059 1.280
## 72  1.118250 0.05896052  92 1.065 1.259
## 73  1.133405 0.06593774  84 1.059 1.290
## 74  1.144639 0.06241356 119 1.075 1.314
## 75  1.234873 0.08995545  63 1.086 1.560
## 76  1.111246 0.05379451 167 1.050 1.265
## 77  1.115195 0.05443686 174 1.051 1.290
## 78  1.127166 0.06017729 145 1.057 1.259
## 79  1.091789 0.05931777  71 1.035 1.229
## 80  1.121094 0.05917650 106 1.059 1.320
## 81  1.130545 0.06565298  66 1.075 1.299
## 82  1.123721 0.04342854  61 1.079 1.249
## 83  1.126386 0.05556398 171 1.055 1.430
## 84  1.127789 0.05601815  90 1.067 1.270
## 85  1.121670 0.07478043 109 1.049 1.591
## 86  1.099105 0.06030624  76 1.035 1.230
## 87  1.122583 0.05140279  72 1.079 1.250
## 88  1.127915 0.04638092  82 1.069 1.274
## 89  1.131266 0.05850810  79 1.056 1.280
## 90  1.138722 0.04892829  18 1.089 1.230
## 91  1.110030 0.05621253 134 1.049 1.359
## 92  1.169482 0.05873138  83 1.067 1.290
## 93  1.149670 0.06577435  97 1.075 1.490
## 94  1.124198 0.05347091  91 1.059 1.259
## 95  1.123478 0.05026393 115 1.070 1.249
## 974 1.159000         NA   1 1.159 1.159
## 
## alpha: 0.05 ; Df Error: 9317 
## Critical Value of Studentized Range: 6.068435 
## 
## Harmonic Mean of Cell Sizes  42.74826
## Honestly Significant Difference: 0.0459846 
## 
## Means with the same letter are not significantly different.
## 
## Groups, Treatments and means
## a     75      1.235 
## a     2A      1.226 
## a     2B      1.215 
## b     92      1.169 
## b     06      1.162 
## bc    48      1.161 
## bcd   974     1.159 
## bcd   15      1.157 
## bcd   32      1.155 
## bcd   04      1.152 
## bcd   93      1.15 
## bcd   19      1.147 
## bcd   74      1.145 
## bcd   46      1.143 
## bcd   52      1.14 
## bcd   68      1.14 
## bcd   65      1.139 
## bcd   90      1.139 
## bcd   55      1.138 
## bcd   09      1.137 
## bcd   03      1.137 
## bcd   23      1.135 
## bcd   05      1.134 
## cd    64      1.134 
## cd    73      1.133 
## cd    69      1.133 
## cd    21      1.133 
## cd    01      1.132 
## cd    67      1.132 
## cd    11      1.132 
## cd    89      1.131 
## cd    81      1.131 
## cd    41      1.13 
## cd    24      1.13 
## cd    12      1.13 
## cd    38      1.128 
## cd    13      1.128 
## cd    39      1.128 
## cd    88      1.128 
## cd    84      1.128 
## cd    71      1.128 
## cd    54      1.128 
## cd    25      1.127 
## cd    40      1.127 
## cd    78      1.127 
## cd    42      1.127 
## cd    83      1.126 
## cd    63      1.126 
## cd    57      1.126 
## cd    47      1.126 
## cd    70      1.126 
## cd    36      1.126 
## cd    94      1.124 
## cd    26      1.124 
## cd    58      1.124 
## cd    82      1.124 
## cd    95      1.123 
## cd    07      1.123 
## cd    30      1.123 
## cd    61      1.123 
## cd    87      1.123 
## cd    34      1.122 
## cd    85      1.122 
## cd    50      1.121 
## cd    43      1.121 
## cd    80      1.121 
## cd    08      1.12 
## cd    02      1.119 
## cd    66      1.119 
## cd    31      1.119 
## cd    72      1.118 
## cd    53      1.117 
## cd    10      1.117 
## d     33      1.117 
## d     18      1.116 
## d     77      1.115 
## d     27      1.115 
## d     22      1.115 
## d     51      1.113 
## d     35      1.112 
## d     76      1.111 
## d     14      1.111 
## d     37      1.11 
## d     91      1.11 
## d     56      1.11 
## d     45      1.109 
## d     28      1.108 
## d     44      1.108 
## d     60      1.107 
## d     62      1.106 
## d     59      1.103 
## d     49      1.1 
## d     86      1.099 
## d     79      1.092 
## d     29      1.092 
## d     17      1.08 
## d     16      1.076
bar.group(out$groups, ylim=c(0, 1.8))

res.reg <-glm(data = gazole, prix~lib.reg)
out <- HSD.test(res.reg, "lib.reg", group = TRUE, console = TRUE, main = "Prix par région")
## 
## Study: Prix par région
## 
## HSD Test for prix 
## 
## Mean Square Error:  0.003209802 
## 
## lib.reg,  means
## 
##                                prix        std   r   Min   Max
## Alsace                     1.135272 0.05346560 254 1.070 1.294
## Aquitaine                  1.124878 0.05851376 591 1.055 1.300
## Auvergne                   1.133075 0.05214388 294 1.055 1.280
## Basse-Normandie            1.117021 0.05169664 281 1.055 1.280
## Bourgogne                  1.129479 0.05290585 376 1.056 1.280
## Bretagne                   1.106588 0.05459222 536 1.050 1.430
## Centre                     1.114843 0.05895147 440 1.055 1.370
## Champagne-Ardenne          1.121137 0.05436990 240 1.054 1.294
## Corse                      1.219992 0.02378398 122 1.169 1.280
## Franche-Comté              1.127962 0.04440962 237 1.074 1.290
## Haute-Normandie            1.112566 0.05575050 265 1.050 1.272
## Île-de-France              1.135378 0.06773668 902 1.049 1.560
## Languedoc-Roussillon       1.126167 0.05467494 444 1.054 1.276
## La Réunion                 1.159000         NA   1 1.159 1.159
## Limousin                   1.133855 0.05333164 159 1.079 1.279
## Lorraine                   1.128015 0.05009940 339 1.057 1.300
## Midi-Pyrénées              1.130147 0.05583528 586 1.056 1.349
## Nord-Pas-de-Calais         1.104642 0.04986228 480 1.053 1.299
## Pays de la Loire           1.112078 0.06045626 524 1.049 1.591
## Picardie                   1.115367 0.05451723 308 1.053 1.320
## Poitou-Charentes           1.086161 0.05847527 322 1.011 1.300
## Provence-Alpes-Côte d'Azur 1.136059 0.05795672 750 1.046 1.430
## Rhône-Alpes                1.131216 0.05958438 965 1.036 1.380
## 
## alpha: 0.05 ; Df Error: 9393 
## Critical Value of Studentized Range: 5.114829 
## 
## Harmonic Mean of Cell Sizes  21.56054
## Honestly Significant Difference: 0.06240803 
## 
## Means with the same letter are not significantly different.
## 
## Groups, Treatments and means
## a     Corse                           1.22 
## ab    La Réunion                      1.159 
## b     Provence-Alpes-Côte d'Azur      1.136 
## b     Île-de-France                   1.135 
## b     Alsace                          1.135 
## b     Limousin                        1.134 
## b     Auvergne                        1.133 
## b     Rhône-Alpes                     1.131 
## b     Midi-Pyrénées                   1.13 
## b     Bourgogne                       1.129 
## b     Lorraine                        1.128 
## b     Franche-Comté                   1.128 
## b     Languedoc-Roussillon            1.126 
## b     Aquitaine                       1.125 
## b     Champagne-Ardenne               1.121 
## b     Basse-Normandie                 1.117 
## b     Picardie                        1.115 
## b     Centre                          1.115 
## b     Haute-Normandie                 1.113 
## b     Pays de la Loire                1.112 
## b     Bretagne                        1.107 
## b     Nord-Pas-de-Calais              1.105 
## b     Poitou-Charentes                1.086
par(mar=c(12,5,1,1))
bar.group(out$groups, ylim=c(0, 1.8), las=2)