library(XML)
library(ggplot2)
library(maps)
library(scales)
library(RColorBrewer)
library(plyr)
##
## Attaching package: 'plyr'
##
## L'objet suivant est masqué from 'package:maps':
##
## ozone
date.available <- Sys.Date() - 7
day <- format(date.available, "%Y%m%d")
baseurl <- "http://donnees.roulez-eco.fr/opendata/jour/"
# Récupération des données
temp <- tempfile()
download.file(paste0(baseurl, day), temp)
xmlsource <- unzip(temp, paste0("PrixCarburants_quotidien_", day, ".xml"))
unlink(temp)
parsed <- xmlParse(xmlsource)
#parsed <- xmlParse("~/R/PrixCarburants_quotidien_20150902.xml")
gazole <- data.frame(
id = xpathSApply(parsed, "//prix[@nom='Gazole']/../@id"),
cp = xpathSApply(parsed, "//prix[@nom='Gazole']/../@cp"),
pop = xpathSApply(parsed, "//prix[@nom='Gazole']/../@pop"),
ville = xpathSApply(parsed, "//prix[@nom='Gazole']/../ville/text()", xmlValue),
adresse = xpathSApply(parsed, "//prix[@nom='Gazole']/../adresse/text()", xmlValue),
latitude = as.numeric(xpathSApply(parsed, "//prix[@nom='Gazole']/../@latitude")) / 100000,
longitude = as.numeric(xpathSApply(parsed, "//prix[@nom='Gazole']/../@longitude")) / 100000,
prix = as.numeric(xpathSApply(parsed, "//prix[@nom='Gazole' and position()=1]/@valeur")) / 1000,
date = as.Date(xpathSApply(parsed, "//prix[@nom='Gazole' and position()=1]/@maj"))
)
# Ajout départements. Cas particuliers : Corse et COM
gazole$dep <- ifelse(substr(gazole$cp, 1, 2) == "97" | substr(gazole$cp, 1, 2) == "20",
substr(gazole$cp, 1, 3),
substr(gazole$cp, 1, 2)
)
gazole$dep[which(gazole$dep == "200" | gazole$dep == "201")] <- "2A"
gazole$dep[which(gazole$dep == "202" | gazole$dep == "206")] <- "2B"
gazole$dep <- as.factor(gazole$dep)
# Régions
dep <- read.table("http://www.insee.fr/fr/methodes/nomenclatures/cog/telechargement/2015/txt/depts2015.txt",
fill = TRUE, header = TRUE, fileEncoding = "windows-1252", sep="\t", quote="")
reg <- read.table("http://www.insee.fr/fr/methodes/nomenclatures/cog/telechargement/2015/txt/reg2015.txt",
fill = TRUE, header = TRUE, fileEncoding = "windows-1252", sep="\t", quote="")
dep$REGION <- as.factor(dep$REGION)
dep <- rename(dep, c("DEP" = "dep", "NCCENR" = "lib.dep"))
reg <- rename(reg, c("NCCENR" = "lib.reg"))
gazole <- join(gazole, dep, by = "dep")
gazole <- join(gazole, reg, by = "REGION")
# Suppression données très anciennes (avant début de l'année)
gazole <- subset(gazole, date >= paste0(substr(day, 1, 4), "-01-01"))
ancien <- data.frame(j = as.numeric(date.available - gazole$date))
ggplot(ancien, aes(x=j)) +
geom_histogram() +
ggtitle("Gazole - Ancienneté des mises à jour") +
xlab("Jours") +
ylab("N")
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.

quantile(ancien$j)
## 0% 25% 50% 75% 100%
## 0 0 3 8 259
# Suppression données anciennes (plus d'un mois)
gazole <- subset(gazole, date >= (date.available - 30))
ggplot(gazole, aes(x=date)) +
geom_histogram() +
ggtitle("Gazole - Date des mises à jour (sur un mois)") +
xlab("Date") +
ylab("N")
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.

quantile(ancien$j)
## 0% 25% 50% 75% 100%
## 0 0 3 8 259
# Suppression Réunion (et mauvaises loc ?)
gazole.metro <- subset(gazole, longitude < 10 & latitude > 40 & dep != "974")
# Moyenne prix et intervalle de confiance
nrow(gazole)
## [1] 9416
mean(gazole.metro$prix)
## [1] 1.12452
confint(lm(gazole.metro$prix ~ 1))
## 2.5 % 97.5 %
## (Intercept) 1.123328 1.125712
# GLM : différences entre départements
res <- glm(data = gazole, prix~dep+pop)
summary(res)
##
## Call:
## glm(formula = prix ~ dep + pop, data = gazole)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -0.16385 -0.03364 -0.01873 0.03180 0.47373
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.2430377 0.0052718 235.792 < 2e-16 ***
## dep02 -0.0106877 0.0071693 -1.491 0.136060
## dep03 0.0105272 0.0073487 1.433 0.152028
## dep04 0.0207560 0.0087788 2.364 0.018083 *
## dep05 0.0109675 0.0095256 1.151 0.249610
## dep06 0.0365456 0.0062638 5.834 5.58e-09 ***
## dep07 -0.0021501 0.0081469 -0.264 0.791846
## dep08 -0.0058474 0.0087797 -0.666 0.505419
## dep09 0.0141406 0.0092462 1.529 0.126215
## dep10 -0.0118953 0.0079645 -1.494 0.135333
## dep11 -0.0035889 0.0073209 -0.490 0.623984
## dep12 0.0052542 0.0071948 0.730 0.465242
## dep13 -0.0016685 0.0056515 -0.295 0.767821
## dep14 -0.0145767 0.0065701 -2.219 0.026536 *
## dep15 0.0341395 0.0086475 3.948 7.94e-05 ***
## dep16 -0.0474384 0.0078831 -6.018 1.84e-09 ***
## dep17 -0.0476883 0.0066522 -7.169 8.14e-13 ***
## dep18 -0.0132953 0.0079645 -1.669 0.095091 .
## dep19 0.0158401 0.0079221 1.999 0.045586 *
## dep21 -0.0040978 0.0066669 -0.615 0.538798
## dep22 -0.0096065 0.0066111 -1.453 0.146231
## dep23 0.0115380 0.0108149 1.067 0.286062
## dep24 0.0053851 0.0071019 0.758 0.448317
## dep25 0.0001585 0.0070151 0.023 0.981979
## dep26 -0.0117037 0.0067918 -1.723 0.084883 .
## dep27 -0.0156417 0.0068970 -2.268 0.023358 *
## dep28 -0.0253806 0.0075886 -3.345 0.000827 ***
## dep29 -0.0315465 0.0063616 -4.959 7.21e-07 ***
## dep2A 0.1029913 0.0081478 12.640 < 2e-16 ***
## dep2B 0.0916872 0.0077295 11.862 < 2e-16 ***
## dep30 -0.0039588 0.0065182 -0.607 0.543633
## dep31 -0.0121426 0.0059589 -2.038 0.041604 *
## dep32 0.0322383 0.0085844 3.755 0.000174 ***
## dep33 -0.0149292 0.0057989 -2.575 0.010054 *
## dep34 -0.0072301 0.0062186 -1.163 0.244999
## dep35 -0.0124069 0.0062923 -1.972 0.048666 *
## dep36 -0.0050901 0.0085825 -0.593 0.553141
## dep37 -0.0205999 0.0070144 -2.937 0.003324 **
## dep38 -0.0016368 0.0060324 -0.271 0.786134
## dep39 -0.0002252 0.0075563 -0.030 0.976225
## dep40 -0.0047732 0.0069540 -0.686 0.492478
## dep41 -0.0045999 0.0079224 -0.581 0.561505
## dep42 -0.0026118 0.0066093 -0.395 0.692730
## dep43 -0.0043743 0.0084635 -0.517 0.605280
## dep44 -0.0191075 0.0060945 -3.135 0.001723 **
## dep45 -0.0216796 0.0067112 -3.230 0.001241 **
## dep46 0.0156323 0.0084058 1.860 0.062958 .
## dep47 -0.0005720 0.0075250 -0.076 0.939411
## dep48 0.0333007 0.0103524 3.217 0.001301 **
## dep49 -0.0289588 0.0068971 -4.199 2.71e-05 ***
## dep50 -0.0041511 0.0067125 -0.618 0.536321
## dep51 -0.0187650 0.0071932 -2.609 0.009103 **
## dep52 0.0054801 0.0084056 0.652 0.514446
## dep53 -0.0120942 0.0080985 -1.493 0.135373
## dep54 -0.0021005 0.0070353 -0.299 0.765275
## dep55 0.0073408 0.0100848 0.728 0.466685
## dep56 -0.0134971 0.0063413 -2.128 0.033326 *
## dep57 -0.0015426 0.0063805 -0.242 0.808969
## dep58 -0.0012962 0.0081956 -0.158 0.874334
## dep59 -0.0246285 0.0056737 -4.341 1.43e-05 ***
## dep60 -0.0190027 0.0065965 -2.881 0.003977 **
## dep61 -0.0068850 0.0082967 -0.830 0.406648
## dep62 -0.0237618 0.0057778 -4.113 3.95e-05 ***
## dep63 -0.0030110 0.0065689 -0.458 0.646690
## dep64 0.0056006 0.0066234 0.846 0.397812
## dep65 0.0156550 0.0085844 1.824 0.068235 .
## dep66 -0.0062293 0.0079234 -0.786 0.431775
## dep67 0.0012363 0.0062902 0.197 0.844191
## dep68 0.0133681 0.0066969 1.996 0.045943 *
## dep69 0.0022243 0.0058986 0.377 0.706121
## dep70 0.0026699 0.0081478 0.328 0.743158
## dep71 0.0009250 0.0064353 0.144 0.885705
## dep72 -0.0152938 0.0070144 -2.180 0.029257 *
## dep73 -0.0011322 0.0071934 -0.157 0.874938
## dep74 0.0124531 0.0065686 1.896 0.058012 .
## dep75 0.1117572 0.0078432 14.249 < 2e-16 ***
## dep76 -0.0147427 0.0061019 -2.416 0.015708 *
## dep77 -0.0168801 0.0060520 -2.789 0.005295 **
## dep78 -0.0013494 0.0062844 -0.215 0.829983
## dep79 -0.0414613 0.0075560 -5.487 4.19e-08 ***
## dep80 -0.0099409 0.0067585 -1.471 0.141360
## dep81 0.0037956 0.0077281 0.491 0.623332
## dep82 -0.0052923 0.0079223 -0.668 0.504135
## dep83 -0.0023402 0.0060728 -0.385 0.699976
## dep84 -0.0006568 0.0070568 -0.093 0.925848
## dep85 -0.0058469 0.0067118 -0.871 0.383699
## dep86 -0.0303222 0.0074041 -4.095 4.25e-05 ***
## dep87 -0.0055292 0.0075245 -0.735 0.462462
## dep88 0.0018739 0.0072436 0.259 0.795875
## dep89 -0.0055120 0.0073213 -0.753 0.451547
## dep90 0.0089441 0.0126052 0.710 0.477999
## dep91 -0.0193505 0.0063907 -3.028 0.002469 **
## dep92 0.0449213 0.0072192 6.222 5.10e-10 ***
## dep93 0.0240817 0.0069168 3.482 0.000501 ***
## dep94 -0.0041893 0.0070356 -0.595 0.551559
## dep95 -0.0043596 0.0066284 -0.658 0.510741
## dep974 0.0358842 0.0497715 0.721 0.470941
## popN -0.0565786 0.0287952 -1.965 0.049460 *
## popR -0.1199219 0.0024779 -48.397 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for gaussian family taken to be 0.002454652)
##
## Null deviance: 32.653 on 9415 degrees of freedom
## Residual deviance: 22.870 on 9317 degrees of freedom
## AIC: -29766
##
## Number of Fisher Scoring iterations: 2
# Graphiques boxplot prix par région
ggplot(gazole, aes(lib.reg, prix)) +
geom_boxplot() +
stat_summary(fun.y = mean, geom = "point") +
ggtitle(paste("Gazole", date.available)) +
xlab("Région") +
ylab(expression(paste("Prix (€⋅", l^"-1", ")"))) +
theme(axis.text.x = element_text(angle = 45, hjust = 1))

ggplot(gazole, aes(x=reorder(lib.reg, prix, FUN=median), prix)) +
geom_boxplot(notch = TRUE) +
stat_summary(fun.y = mean, geom = "point") +
ggtitle(paste("Gazole", date.available)) +
xlab("Région") +
ylab(expression(paste("Prix (€⋅", l^"-1", ")"))) +
theme(axis.text.x = element_text(angle = 45, hjust = 1))

# Graphiques boxplot prix par département
ggplot(gazole, aes(dep, prix)) +
geom_boxplot() +
stat_summary(fun.y = mean, geom = "point") +
ggtitle(paste("Gazole", date.available)) +
xlab("Département") +
ylab(expression(paste("Prix (€⋅", l^"-1", ")"))) +
theme(axis.text.x=element_text(angle = 90, hjust = 0))

ggplot(gazole, aes(x=reorder(dep, prix, FUN=median), prix)) +
geom_boxplot(notch = TRUE) +
stat_summary(fun.y = mean, geom = "point") +
ggtitle(paste("Gazole", date.available)) +
xlab("Département") +
ylab(expression(paste("Prix (€⋅", l^"-1", ")"))) +
theme(axis.text.x=element_text(angle = 90, hjust = 0))
## notch went outside hinges. Try setting notch=FALSE.
## notch went outside hinges. Try setting notch=FALSE.
## notch went outside hinges. Try setting notch=FALSE.
## notch went outside hinges. Try setting notch=FALSE.
## notch went outside hinges. Try setting notch=FALSE.
## notch went outside hinges. Try setting notch=FALSE.
## notch went outside hinges. Try setting notch=FALSE.
## notch went outside hinges. Try setting notch=FALSE.
## notch went outside hinges. Try setting notch=FALSE.
## notch went outside hinges. Try setting notch=FALSE.
## notch went outside hinges. Try setting notch=FALSE.
## notch went outside hinges. Try setting notch=FALSE.
## notch went outside hinges. Try setting notch=FALSE.
## notch went outside hinges. Try setting notch=FALSE.
## notch went outside hinges. Try setting notch=FALSE.
## notch went outside hinges. Try setting notch=FALSE.
## notch went outside hinges. Try setting notch=FALSE.
## notch went outside hinges. Try setting notch=FALSE.
## notch went outside hinges. Try setting notch=FALSE.
## notch went outside hinges. Try setting notch=FALSE.
## notch went outside hinges. Try setting notch=FALSE.
## notch went outside hinges. Try setting notch=FALSE.
## notch went outside hinges. Try setting notch=FALSE.
## notch went outside hinges. Try setting notch=FALSE.
## notch went outside hinges. Try setting notch=FALSE.
## notch went outside hinges. Try setting notch=FALSE.
## notch went outside hinges. Try setting notch=FALSE.
## notch went outside hinges. Try setting notch=FALSE.
## notch went outside hinges. Try setting notch=FALSE.
## notch went outside hinges. Try setting notch=FALSE.
## notch went outside hinges. Try setting notch=FALSE.
## notch went outside hinges. Try setting notch=FALSE.
## notch went outside hinges. Try setting notch=FALSE.
## notch went outside hinges. Try setting notch=FALSE.
## notch went outside hinges. Try setting notch=FALSE.
## notch went outside hinges. Try setting notch=FALSE.
## notch went outside hinges. Try setting notch=FALSE.
## notch went outside hinges. Try setting notch=FALSE.
## notch went outside hinges. Try setting notch=FALSE.

# densité route/autoroute
ggplot(subset(gazole, gazole$pop !="N"), aes(x = prix, fill = pop)) +
#geom_histogram() +
ggtitle(paste("Gazole", date.available)) +
geom_density(alpha = 0.4) +
xlab(expression(paste("Prix (€⋅", l^"-1", ")"))) +
ylab("N") +
scale_fill_manual(name="Localisation",
values=c("blue", "red"),
breaks=c("A", "R"),
labels=c("Autoroute", "Route"))

# Carte
france.map <- map_data("france")
ggplot(france.map, aes(long, lat)) +
geom_polygon(aes(group = group), fill = NA, colour = "grey60") +
geom_point(data = gazole.metro, aes(longitude, latitude, color = cut(prix, quantile(prix, seq(0,1,0.2))))) +
scale_colour_manual("Prix (€)", values = rev(brewer.pal(5, "RdBu")), guide = guide_legend(reverse = TRUE)) +
coord_fixed(ratio = 1.45) +
ggtitle(paste("Gazole", date.available)) +
xlab("") +
ylab("")

nstations.dep <- aggregate(gazole$id, by = list(gazole$dep), FUN = length)
colnames(nstations.dep) <- c("dep", "n")
nstations.dep[order(nstations.dep$n),]
## dep n
## 97 974 1
## 91 90 18
## 22 23 26
## 49 48 29
## 56 55 31
## 5 05 36
## 9 09 39
## 4 04 45
## 8 08 45
## 15 15 47
## 33 32 48
## 37 36 48
## 66 65 48
## 44 43 50
## 47 46 51
## 53 52 51
## 62 61 53
## 59 58 55
## 7 07 56
## 29 2A 56
## 71 70 56
## 54 53 57
## 10 10 60
## 18 18 60
## 19 19 61
## 42 41 61
## 67 66 61
## 83 82 61
## 16 16 62
## 76 75 63
## 30 2B 66
## 82 81 66
## 27 28 70
## 40 39 71
## 80 79 71
## 48 47 72
## 88 87 72
## 87 86 76
## 3 03 78
## 11 11 79
## 90 89 79
## 89 88 82
## 93 92 83
## 12 12 84
## 52 51 84
## 74 73 84
## 2 02 85
## 23 24 88
## 85 84 90
## 55 54 91
## 95 94 91
## 24 25 92
## 38 37 92
## 73 72 92
## 41 40 95
## 94 93 97
## 26 27 98
## 50 49 98
## 25 26 104
## 81 80 106
## 1 01 109
## 46 45 109
## 51 50 109
## 86 85 109
## 69 68 110
## 20 21 112
## 17 17 113
## 65 64 115
## 96 95 115
## 21 22 116
## 43 42 116
## 61 60 117
## 14 14 119
## 64 63 119
## 75 74 119
## 31 30 123
## 72 71 130
## 92 91 134
## 58 57 135
## 28 29 137
## 57 56 139
## 36 35 144
## 68 67 144
## 79 78 145
## 6 06 147
## 35 34 152
## 77 76 167
## 45 44 168
## 84 83 171
## 78 77 174
## 39 38 177
## 32 31 189
## 70 69 200
## 34 33 221
## 63 62 226
## 60 59 254
## 13 13 261
# Ain
gazole.01 <- subset(gazole.metro, gazole.metro$dep == "01")
mean(gazole.metro$prix)
## [1] 1.12452
mean(gazole.01$prix)
## [1] 1.131917
comp1 <- rbind(data.frame(terr = "France", prix = gazole.metro$prix), data.frame(terr = "Ain", prix = gazole.01$prix))
res.01 <- aov(data=comp1, prix ~ terr)
summary(res.01)
## Df Sum Sq Mean Sq F value Pr(>F)
## terr 1 0.01 0.005896 1.697 0.193
## Residuals 9495 32.99 0.003474
# les 25 % de stations les moins chères du dép (hors autoroute)
stmp <- subset(gazole.01, prix < quantile(subset(gazole.01, pop == "R")$prix)[2])
stmp[order(stmp$prix),]
## id cp pop ville adresse
## 16 1120004 01120 R MONTLUEL Cours de la Portelle
## 17 1120005 01120 R LA BOISSE Route de Thil
## 103 1700004 01700 R BEYNOST ZAC des Beterses
## 95 1600001 01700 R MIRIBEL Rue du Figuier
## 96 1600002 01600 R TREVOUX 324 Route de Lyon
## 106 1800001 01800 R MEXIMIEUX 47 Route de Lyon
## 97 1600003 01600 R MASSIEUX Avenue Lavoisier
## 10 1090001 01090 R MONTCEAUX LE GRAND RIVOLET
## 92 1570001 01570 R FEILLENS Grande Rue
## 70 1400005 01400 R CHATILLON SUR CHALARONNE Avenue Jean Jaurès
## 1 1000001 01000 R SAINT-DENIS-LèS-BOURG 596 AVENUE DE TREVOUX
## 2 1000002 01000 R BOURG-EN-BRESSE 16 Avenue de Marboz
## 5 1000007 01000 R Bourg-en-Bresse Avenue Amédée Mercier
## 6 1000008 01000 R BOURG-EN-BRESSE Bd Charles de Gaulle
## 7 1000009 01000 R Bourg-en-Bresse 56 Rue du Stand
## 29 1170001 01170 R SéGNY RN5
## 38 1210003 01210 R FERNEY-VOLTAIRE Route de Meyrin
## 58 1340003 01340 R Jayat Les Cézilles
## 99 1630003 01630 R SAINT-GENIS-POUILLY 148 Rue des Chalets
## 100 1630004 01630 R PéRON ZA DU PRE MUNNY
## 104 1710001 01710 R THOIRY route de Bellegarde
## 108 1800003 01800 R Meximieux ROUTE DE CHALAMONT
## 102 1700003 01700 R Miribel RN 83
## 56 1340001 01340 R MONTREVEL-EN-BRESSE Rue des Luyers
## 81 1480001 01480 R JASSANS-RIOTTIER Rue Ã\u0089douard Herriot
## latitude longitude prix date dep REGION CHEFLIEU TNCC NCC
## 16 45.85251 5.058720 1.068 2015-09-21 01 82 01053 5 AIN
## 17 45.84681 5.047780 1.068 2015-09-19 01 82 01053 5 AIN
## 103 45.82650 4.998950 1.068 2015-09-19 01 82 01053 5 AIN
## 95 45.82834 4.971830 1.071 2015-09-22 01 82 01053 5 AIN
## 96 45.93472 4.790628 1.071 2015-09-24 01 82 01053 5 AIN
## 106 45.90119 5.190522 1.071 2015-09-24 01 82 01053 5 AIN
## 97 45.90391 4.818722 1.072 2015-09-23 01 82 01053 5 AIN
## 10 46.09636 4.774550 1.074 2015-09-23 01 82 01053 5 AIN
## 92 46.32904 4.884930 1.077 2015-09-22 01 82 01053 5 AIN
## 70 46.11765 4.960360 1.078 2015-09-22 01 82 01053 5 AIN
## 1 46.20114 5.197910 1.079 2015-09-22 01 82 01053 5 AIN
## 2 46.21842 5.227670 1.079 2015-09-22 01 82 01053 5 AIN
## 5 46.20105 5.248910 1.079 2015-09-24 01 82 01053 5 AIN
## 6 46.19942 5.240704 1.079 2015-09-24 01 82 01053 5 AIN
## 7 46.19566 5.229350 1.079 2015-09-22 01 82 01053 5 AIN
## 29 46.29301 6.081496 1.079 2015-09-24 01 82 01053 5 AIN
## 38 46.24370 6.092662 1.079 2015-09-17 01 82 01053 5 AIN
## 58 46.34627 5.126150 1.079 2015-09-22 01 82 01053 5 AIN
## 99 46.26326 6.029500 1.079 2015-09-18 01 82 01053 5 AIN
## 100 46.18322 5.928918 1.079 2015-09-18 01 82 01053 5 AIN
## 104 46.22732 5.992776 1.079 2015-09-17 01 82 01053 5 AIN
## 108 45.91162 5.189714 1.079 2015-09-22 01 82 01053 5 AIN
## 102 45.87147 4.912410 1.080 2015-09-24 01 82 01053 5 AIN
## 56 46.33208 5.122780 1.082 2015-09-24 01 82 01053 5 AIN
## 81 45.98403 4.756940 1.082 2015-09-18 01 82 01053 5 AIN
## lib.dep CHEFLIEU.1 TNCC.1 NCC.1 lib.reg
## 16 Ain 69123 0 RHONE-ALPES Rhône-Alpes
## 17 Ain 69123 0 RHONE-ALPES Rhône-Alpes
## 103 Ain 69123 0 RHONE-ALPES Rhône-Alpes
## 95 Ain 69123 0 RHONE-ALPES Rhône-Alpes
## 96 Ain 69123 0 RHONE-ALPES Rhône-Alpes
## 106 Ain 69123 0 RHONE-ALPES Rhône-Alpes
## 97 Ain 69123 0 RHONE-ALPES Rhône-Alpes
## 10 Ain 69123 0 RHONE-ALPES Rhône-Alpes
## 92 Ain 69123 0 RHONE-ALPES Rhône-Alpes
## 70 Ain 69123 0 RHONE-ALPES Rhône-Alpes
## 1 Ain 69123 0 RHONE-ALPES Rhône-Alpes
## 2 Ain 69123 0 RHONE-ALPES Rhône-Alpes
## 5 Ain 69123 0 RHONE-ALPES Rhône-Alpes
## 6 Ain 69123 0 RHONE-ALPES Rhône-Alpes
## 7 Ain 69123 0 RHONE-ALPES Rhône-Alpes
## 29 Ain 69123 0 RHONE-ALPES Rhône-Alpes
## 38 Ain 69123 0 RHONE-ALPES Rhône-Alpes
## 58 Ain 69123 0 RHONE-ALPES Rhône-Alpes
## 99 Ain 69123 0 RHONE-ALPES Rhône-Alpes
## 100 Ain 69123 0 RHONE-ALPES Rhône-Alpes
## 104 Ain 69123 0 RHONE-ALPES Rhône-Alpes
## 108 Ain 69123 0 RHONE-ALPES Rhône-Alpes
## 102 Ain 69123 0 RHONE-ALPES Rhône-Alpes
## 56 Ain 69123 0 RHONE-ALPES Rhône-Alpes
## 81 Ain 69123 0 RHONE-ALPES Rhône-Alpes
# Savoie
gazole.73 <- subset(gazole.metro, gazole.metro$dep == "73")
subset(gazole.73, prix < quantile(subset(gazole.73, pop == "R")$prix)[2])
## id cp pop ville
## 7376 73000001 73000 R BASSENS
## 7377 73000002 73000 R CHAMBéRY
## 7378 73000007 73000 R Chambéry
## 7379 73000009 73000 R CHAMBERY
## 7380 73000010 73000 R CHAMBéRY
## 7381 73000012 73000 R CHAMBERY
## 7382 73000013 73000 R CHAMBERY
## 7387 73100005 73100 R Aix-les-Bains
## 7401 73160002 73160 R Cognin
## 7419 73290001 73290 R LA MOTTE-SERVOLEX
## 7427 73330001 73330 R Le Pont-de-Beauvoisin
## 7428 73330002 73330 R Le Pont-de-Beauvoisin
## 7436 73420001 73420 R Drumettaz-Clarafond
## 7437 73420002 73420 R VIVIERS-DU-LAC
## 7438 73420004 73420 R Voglans
## 7443 73490002 73490 R LA RAVOIRE
## 7444 73490003 73490 R LA RAVOIRE
## adresse latitude longitude prix date dep
## 7376 21 RUE CENTRALE 45.57566 5.944094 1.075 2015-09-24 73
## 7377 1097 Avenue des Landiers 45.59483 5.898027 1.075 2015-09-24 73
## 7378 204 Avenue du Grand Verger 45.57977 5.909180 1.075 2015-09-24 73
## 7379 345 Quai du 11 Novembre 45.56727 5.935046 1.084 2015-09-22 73
## 7380 583 Avenue des Landiers 45.58905 5.905130 1.069 2015-09-24 73
## 7381 VOIE RAP. URB. RN 201 45.60964 5.886971 1.075 2015-09-24 73
## 7382 1388 AVENUE DE LYON 45.56332 5.902070 1.077 2015-09-24 73
## 7387 50 AVENUE GD PORT RP HOPITAUX 45.69676 5.907780 1.084 2015-09-24 73
## 7401 Rue de l'Ã\u0089pine 45.56230 5.894290 1.075 2015-09-19 73
## 7419 75 Rue Lavoisier 45.60217 5.875460 1.079 2015-09-19 73
## 7427 ZI LA BARONNIE 45.52954 5.689600 1.059 2015-09-15 73
## 7428 zi Baronnie 45.52956 5.684770 1.069 2015-09-10 73
## 7436 214 Chemin de la Boisière 45.65712 5.910790 1.079 2015-09-16 73
## 7437 La Coua 45.64736 5.893918 1.079 2015-09-16 73
## 7438 Route de l'aéroport 45.63796 5.889880 1.079 2015-09-16 73
## 7443 rue du pré Renaud 45.56279 5.963060 1.079 2015-09-24 73
## 7444 RN 6 CARREFOUR LA TROUSSE 45.56953 5.961659 1.079 2015-09-24 73
## REGION CHEFLIEU TNCC NCC lib.dep CHEFLIEU.1 TNCC.1 NCC.1
## 7376 82 73065 3 SAVOIE Savoie 69123 0 RHONE-ALPES
## 7377 82 73065 3 SAVOIE Savoie 69123 0 RHONE-ALPES
## 7378 82 73065 3 SAVOIE Savoie 69123 0 RHONE-ALPES
## 7379 82 73065 3 SAVOIE Savoie 69123 0 RHONE-ALPES
## 7380 82 73065 3 SAVOIE Savoie 69123 0 RHONE-ALPES
## 7381 82 73065 3 SAVOIE Savoie 69123 0 RHONE-ALPES
## 7382 82 73065 3 SAVOIE Savoie 69123 0 RHONE-ALPES
## 7387 82 73065 3 SAVOIE Savoie 69123 0 RHONE-ALPES
## 7401 82 73065 3 SAVOIE Savoie 69123 0 RHONE-ALPES
## 7419 82 73065 3 SAVOIE Savoie 69123 0 RHONE-ALPES
## 7427 82 73065 3 SAVOIE Savoie 69123 0 RHONE-ALPES
## 7428 82 73065 3 SAVOIE Savoie 69123 0 RHONE-ALPES
## 7436 82 73065 3 SAVOIE Savoie 69123 0 RHONE-ALPES
## 7437 82 73065 3 SAVOIE Savoie 69123 0 RHONE-ALPES
## 7438 82 73065 3 SAVOIE Savoie 69123 0 RHONE-ALPES
## 7443 82 73065 3 SAVOIE Savoie 69123 0 RHONE-ALPES
## 7444 82 73065 3 SAVOIE Savoie 69123 0 RHONE-ALPES
## lib.reg
## 7376 Rhône-Alpes
## 7377 Rhône-Alpes
## 7378 Rhône-Alpes
## 7379 Rhône-Alpes
## 7380 Rhône-Alpes
## 7381 Rhône-Alpes
## 7382 Rhône-Alpes
## 7387 Rhône-Alpes
## 7401 Rhône-Alpes
## 7419 Rhône-Alpes
## 7427 Rhône-Alpes
## 7428 Rhône-Alpes
## 7436 Rhône-Alpes
## 7437 Rhône-Alpes
## 7438 Rhône-Alpes
## 7443 Rhône-Alpes
## 7444 Rhône-Alpes
# Rhône-Alpes
gazole.ra <- subset(gazole.metro, gazole.metro$REGION == "82")
mean(gazole.metro$prix)
## [1] 1.12452
mean(gazole.ra$prix)
## [1] 1.131214
comp2 <- rbind(data.frame(terr = "France", prix = gazole.metro$prix), data.frame(terr = "RA", prix = gazole.ra$prix))
res.ra <- aov(data=comp2, prix ~ terr)
summary(res.ra)
## Df Sum Sq Mean Sq F value Pr(>F)
## terr 1 0.04 0.03914 11.25 0.000798 ***
## Residuals 10349 36.00 0.00348
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Prix par date de mise à jour depuis un mois
ggplot(gazole, aes(date, prix)) +
geom_point() +
stat_smooth() +
ggtitle("Gazole - Prix par date de mise à jour") +
xlab("Date") +
scale_x_date(labels = date_format("%d %b")) +
ylab(expression(paste("Prix (€⋅", l^"-1", ")")))
## geom_smooth: method="auto" and size of largest group is >=1000, so using gam with formula: y ~ s(x, bs = "cs"). Use 'method = x' to change the smoothing method.

ggplot(subset(gazole, gazole$pop !="N"), aes(date, prix, color = pop)) +
geom_point(position = position_jitter(w = 0.1)) +
stat_smooth(method = "glm") +
ggtitle("Gazole - Prix par date de mise à jour") +
xlab("Date") +
scale_x_date(labels = date_format("%d %b")) +
ylab(expression(paste("Prix (€⋅", l^"-1", ")"))) +
scale_color_manual(name="Localisation",
values=c("blue", "red"),
breaks=c("A", "R"),
labels=c("Autoroute", "Route"))

# Groupes
library(agricolae)
out <- HSD.test(res, "dep", group = TRUE, console = TRUE, main = "Prix par dép.")
##
## Study: Prix par dép.
##
## HSD Test for prix
##
## Mean Square Error: 0.002454652
##
## dep, means
##
## prix std r Min Max
## 01 1.131917 0.06178384 109 1.068 1.279
## 02 1.119482 0.05335742 85 1.067 1.280
## 03 1.136718 0.04734343 78 1.080 1.249
## 04 1.151867 0.05496305 45 1.066 1.280
## 05 1.134083 0.05161471 36 1.059 1.250
## 06 1.162109 0.06362014 147 1.067 1.330
## 07 1.123107 0.04331352 56 1.075 1.230
## 08 1.119933 0.04010294 45 1.087 1.240
## 09 1.137256 0.04890846 39 1.083 1.250
## 10 1.117217 0.05306219 60 1.067 1.290
## 11 1.131671 0.05984075 79 1.059 1.256
## 12 1.129798 0.04923286 84 1.058 1.250
## 13 1.128123 0.05352591 261 1.046 1.290
## 14 1.110555 0.05140116 119 1.059 1.254
## 15 1.157255 0.04949944 47 1.097 1.266
## 16 1.075677 0.04681312 62 1.030 1.227
## 17 1.079673 0.06111427 113 1.011 1.300
## 18 1.115817 0.05345345 60 1.057 1.280
## 19 1.146820 0.05795156 61 1.091 1.279
## 21 1.132937 0.05461417 112 1.067 1.260
## 22 1.114543 0.06282555 116 1.053 1.430
## 23 1.134654 0.04043458 26 1.099 1.240
## 24 1.129864 0.06045102 88 1.069 1.300
## 25 1.127185 0.04130879 92 1.078 1.254
## 26 1.124096 0.05731594 104 1.065 1.269
## 27 1.114816 0.05915005 98 1.054 1.272
## 28 1.108014 0.05785451 70 1.057 1.300
## 29 1.091569 0.03724300 137 1.059 1.270
## 2A 1.226107 0.02043142 56 1.190 1.280
## 2B 1.214803 0.02530322 66 1.169 1.280
## 30 1.123057 0.05132128 123 1.059 1.276
## 31 1.118587 0.05592661 189 1.056 1.276
## 32 1.155354 0.06333279 48 1.080 1.310
## 33 1.116869 0.05892612 221 1.055 1.290
## 34 1.122197 0.05436010 152 1.054 1.264
## 35 1.111542 0.05682323 144 1.065 1.360
## 36 1.125521 0.06046029 48 1.075 1.270
## 37 1.110337 0.05508201 92 1.055 1.280
## 38 1.128254 0.05800429 177 1.059 1.300
## 39 1.127958 0.05127334 71 1.074 1.290
## 40 1.127179 0.05689202 95 1.084 1.290
## 41 1.130311 0.07385426 61 1.065 1.370
## 42 1.126707 0.05675164 116 1.058 1.380
## 43 1.121140 0.04529270 50 1.064 1.259
## 44 1.107577 0.05467361 168 1.054 1.309
## 45 1.109138 0.05461472 109 1.059 1.280
## 46 1.143451 0.06226277 51 1.088 1.349
## 47 1.125875 0.05486396 72 1.064 1.260
## 48 1.160552 0.05854033 29 1.089 1.270
## 49 1.100276 0.05361985 98 1.060 1.280
## 50 1.121165 0.04816898 109 1.064 1.255
## 51 1.112917 0.06021292 84 1.054 1.294
## 52 1.140353 0.05351666 51 1.070 1.270
## 53 1.117333 0.05685299 57 1.069 1.260
## 54 1.127604 0.05632384 91 1.057 1.300
## 55 1.138194 0.05615124 31 1.073 1.269
## 56 1.109619 0.05692194 139 1.050 1.363
## 57 1.126015 0.04654046 135 1.071 1.269
## 58 1.124000 0.04258847 55 1.070 1.249
## 59 1.103209 0.04856339 254 1.054 1.299
## 60 1.107188 0.05024559 117 1.053 1.254
## 61 1.123019 0.05837626 53 1.055 1.280
## 62 1.106252 0.05134323 226 1.053 1.275
## 63 1.126151 0.05595877 119 1.055 1.280
## 64 1.133930 0.05878309 115 1.066 1.279
## 65 1.138771 0.04499042 48 1.092 1.230
## 66 1.118852 0.04794887 61 1.063 1.260
## 67 1.131847 0.05300789 144 1.070 1.294
## 68 1.139755 0.05397182 110 1.082 1.280
## 69 1.133135 0.06139539 200 1.036 1.349
## 70 1.125786 0.03885391 56 1.085 1.240
## 71 1.127731 0.05208532 130 1.059 1.280
## 72 1.118250 0.05896052 92 1.065 1.259
## 73 1.133405 0.06593774 84 1.059 1.290
## 74 1.144639 0.06241356 119 1.075 1.314
## 75 1.234873 0.08995545 63 1.086 1.560
## 76 1.111246 0.05379451 167 1.050 1.265
## 77 1.115195 0.05443686 174 1.051 1.290
## 78 1.127166 0.06017729 145 1.057 1.259
## 79 1.091789 0.05931777 71 1.035 1.229
## 80 1.121094 0.05917650 106 1.059 1.320
## 81 1.130545 0.06565298 66 1.075 1.299
## 82 1.123721 0.04342854 61 1.079 1.249
## 83 1.126386 0.05556398 171 1.055 1.430
## 84 1.127789 0.05601815 90 1.067 1.270
## 85 1.121670 0.07478043 109 1.049 1.591
## 86 1.099105 0.06030624 76 1.035 1.230
## 87 1.122583 0.05140279 72 1.079 1.250
## 88 1.127915 0.04638092 82 1.069 1.274
## 89 1.131266 0.05850810 79 1.056 1.280
## 90 1.138722 0.04892829 18 1.089 1.230
## 91 1.110030 0.05621253 134 1.049 1.359
## 92 1.169482 0.05873138 83 1.067 1.290
## 93 1.149670 0.06577435 97 1.075 1.490
## 94 1.124198 0.05347091 91 1.059 1.259
## 95 1.123478 0.05026393 115 1.070 1.249
## 974 1.159000 NA 1 1.159 1.159
##
## alpha: 0.05 ; Df Error: 9317
## Critical Value of Studentized Range: 6.068435
##
## Harmonic Mean of Cell Sizes 42.74826
## Honestly Significant Difference: 0.0459846
##
## Means with the same letter are not significantly different.
##
## Groups, Treatments and means
## a 75 1.235
## a 2A 1.226
## a 2B 1.215
## b 92 1.169
## b 06 1.162
## bc 48 1.161
## bcd 974 1.159
## bcd 15 1.157
## bcd 32 1.155
## bcd 04 1.152
## bcd 93 1.15
## bcd 19 1.147
## bcd 74 1.145
## bcd 46 1.143
## bcd 52 1.14
## bcd 68 1.14
## bcd 65 1.139
## bcd 90 1.139
## bcd 55 1.138
## bcd 09 1.137
## bcd 03 1.137
## bcd 23 1.135
## bcd 05 1.134
## cd 64 1.134
## cd 73 1.133
## cd 69 1.133
## cd 21 1.133
## cd 01 1.132
## cd 67 1.132
## cd 11 1.132
## cd 89 1.131
## cd 81 1.131
## cd 41 1.13
## cd 24 1.13
## cd 12 1.13
## cd 38 1.128
## cd 13 1.128
## cd 39 1.128
## cd 88 1.128
## cd 84 1.128
## cd 71 1.128
## cd 54 1.128
## cd 25 1.127
## cd 40 1.127
## cd 78 1.127
## cd 42 1.127
## cd 83 1.126
## cd 63 1.126
## cd 57 1.126
## cd 47 1.126
## cd 70 1.126
## cd 36 1.126
## cd 94 1.124
## cd 26 1.124
## cd 58 1.124
## cd 82 1.124
## cd 95 1.123
## cd 07 1.123
## cd 30 1.123
## cd 61 1.123
## cd 87 1.123
## cd 34 1.122
## cd 85 1.122
## cd 50 1.121
## cd 43 1.121
## cd 80 1.121
## cd 08 1.12
## cd 02 1.119
## cd 66 1.119
## cd 31 1.119
## cd 72 1.118
## cd 53 1.117
## cd 10 1.117
## d 33 1.117
## d 18 1.116
## d 77 1.115
## d 27 1.115
## d 22 1.115
## d 51 1.113
## d 35 1.112
## d 76 1.111
## d 14 1.111
## d 37 1.11
## d 91 1.11
## d 56 1.11
## d 45 1.109
## d 28 1.108
## d 44 1.108
## d 60 1.107
## d 62 1.106
## d 59 1.103
## d 49 1.1
## d 86 1.099
## d 79 1.092
## d 29 1.092
## d 17 1.08
## d 16 1.076
bar.group(out$groups, ylim=c(0, 1.8))

res.reg <-glm(data = gazole, prix~lib.reg)
out <- HSD.test(res.reg, "lib.reg", group = TRUE, console = TRUE, main = "Prix par région")
##
## Study: Prix par région
##
## HSD Test for prix
##
## Mean Square Error: 0.003209802
##
## lib.reg, means
##
## prix std r Min Max
## Alsace 1.135272 0.05346560 254 1.070 1.294
## Aquitaine 1.124878 0.05851376 591 1.055 1.300
## Auvergne 1.133075 0.05214388 294 1.055 1.280
## Basse-Normandie 1.117021 0.05169664 281 1.055 1.280
## Bourgogne 1.129479 0.05290585 376 1.056 1.280
## Bretagne 1.106588 0.05459222 536 1.050 1.430
## Centre 1.114843 0.05895147 440 1.055 1.370
## Champagne-Ardenne 1.121137 0.05436990 240 1.054 1.294
## Corse 1.219992 0.02378398 122 1.169 1.280
## Franche-Comté 1.127962 0.04440962 237 1.074 1.290
## Haute-Normandie 1.112566 0.05575050 265 1.050 1.272
## Île-de-France 1.135378 0.06773668 902 1.049 1.560
## Languedoc-Roussillon 1.126167 0.05467494 444 1.054 1.276
## La Réunion 1.159000 NA 1 1.159 1.159
## Limousin 1.133855 0.05333164 159 1.079 1.279
## Lorraine 1.128015 0.05009940 339 1.057 1.300
## Midi-Pyrénées 1.130147 0.05583528 586 1.056 1.349
## Nord-Pas-de-Calais 1.104642 0.04986228 480 1.053 1.299
## Pays de la Loire 1.112078 0.06045626 524 1.049 1.591
## Picardie 1.115367 0.05451723 308 1.053 1.320
## Poitou-Charentes 1.086161 0.05847527 322 1.011 1.300
## Provence-Alpes-Côte d'Azur 1.136059 0.05795672 750 1.046 1.430
## Rhône-Alpes 1.131216 0.05958438 965 1.036 1.380
##
## alpha: 0.05 ; Df Error: 9393
## Critical Value of Studentized Range: 5.114829
##
## Harmonic Mean of Cell Sizes 21.56054
## Honestly Significant Difference: 0.06240803
##
## Means with the same letter are not significantly different.
##
## Groups, Treatments and means
## a Corse 1.22
## ab La Réunion 1.159
## b Provence-Alpes-Côte d'Azur 1.136
## b Île-de-France 1.135
## b Alsace 1.135
## b Limousin 1.134
## b Auvergne 1.133
## b Rhône-Alpes 1.131
## b Midi-Pyrénées 1.13
## b Bourgogne 1.129
## b Lorraine 1.128
## b Franche-Comté 1.128
## b Languedoc-Roussillon 1.126
## b Aquitaine 1.125
## b Champagne-Ardenne 1.121
## b Basse-Normandie 1.117
## b Picardie 1.115
## b Centre 1.115
## b Haute-Normandie 1.113
## b Pays de la Loire 1.112
## b Bretagne 1.107
## b Nord-Pas-de-Calais 1.105
## b Poitou-Charentes 1.086
par(mar=c(12,5,1,1))
bar.group(out$groups, ylim=c(0, 1.8), las=2)
