#install.packages(c("Factoshiny"))
#install.packages("ggpubr")
#install.packages("rJava")
#install.packages("xlsx")
#install.packages("tseries")
#install.packages("dataseries")
#install.packages("tidyverse")
#install.packages("caschrono")
repertoire_sources <- "/home/user/Documents/formations/oc/Formation Analyst/P9/sources/"
repertoire_images <- "/home/user/Documents/formations/oc/Formation Analyst/P9/images/"
fichier <- paste (repertoire_sources,"tab.csv",sep = "")
data <- read.table(fichier, header=TRUE, sep=',', fileEncoding = "ISO-8859-15")
head(data)
data_max=max(data$Mois)
data_min=min(data$Mois)
cat("La série de données est entre : " , data_min,"et : " , data_max)
df_fr <- subset(data, Territoire == "France")
df_fr <- df_fr[,c("Mois","Consommation.totale")]
sapply(df_fr, function(x) sum(is.na(x)))
df_fr$Mois<-as.Date(as.yearmon(df_fr$Mois, "%Y-%m"))
df_fr %>%
ggplot(aes(x = Mois, y = `Consommation.totale`)) +
geom_line()
ggsave("img/01 - consommationTotale.png", width = 7.29, height = 4.5)
fichier_dju <- paste (repertoire_sources,"dju.csv",sep = "")
dju <- read.csv2(fichier_dju,header = TRUE,sep = ",")
dju$Mois<-as.Date(as.yearmon(dju$Mois, "%Y-%m"))
dju %>%
ggplot(aes(x = Mois, y = `DJU`)) +
geom_line()
ggsave("img/02 - DJU.png", width = 7.29, height = 4.5)
tail(dju,10)
tail(df,10)
df_fr <- subset(df_fr, Mois < "2020-06-01")
dju <- subset(dju, Mois < "2020-06-01")
df <- cbind( df_fr, dju$DJU)
names(df)[names(df) == 'Consommation.totale'] <- 'conso'
names(df)[names(df) == 'dju$DJU'] <- 'dju'
names(df)[names(df) == 'Mois'] <- 'mois'
dju %>%
ggplot(aes(x = Mois, y = `DJU`)) +
geom_line()
df_scaled <- data.frame(df, scale(df[,2:3]))
ggplot(df_scaled, aes(x = mois)) +
geom_line(aes(y = conso.1, color = "conso")) +
geom_line(aes(y = dju.1, color = "dju"))
ggsave("img/03 - consommation et DJU.png", width = 7.29, height = 4.5)
plot(conso~dju, data = df)
model <- lm(conso~dju, data = df)
plot(conso~dju, data = df) + abline(model, col = "red")
png("img/04 - regressionLineaire.png", width = 7.25, height = 4.5, units = 'in', res = 300)
plot(conso~dju, data = df) + abline(model, col = "red")
summary(model)
model$coefficients[2]
df$corrige<-df$conso-model$coefficients[2]*df$dju
ggplot(df, aes(x = mois)) +
geom_line(aes(y = conso, color = "réelle")) +
geom_line(aes(y = corrige, color = "corrigée"))
ggsave("img/05 - consoReelleetCorrigee.png", width = 7.29, height = 4.5)
res<-resid(model)
plot(df$mois,res,main="Résidus") + abline (v = as.Date("2012-08-01") , col = "blue") + abline (v = as.Date("2013-08-01") , col = "blue") + abline (v = as.Date("2014-08-01") , col = "blue") + abline (v = as.Date("2015-08-01") , col = "blue") - abline(h=0,col="red")
consommation <- ts(df$conso, start=c(2012, 1), end=c(2020, 4), frequency = 12)
consommation_dec <- decompose(consommation) #,'additive') je ne vois pas la différence avec ou cette valeur.
plot(consommation_dec)
png("img/05b - boxPlotConsommationsMensuelles.png", width = 7.25, height = 4.5, units = 'in', res = 300)
boxplot(consommation~ cycle(consommation))
boxplot(consommation~ cycle(consommation))
corrigee <- ts(df$corrige, start=c(2012, 1), end=c(2020, 4), frequency = 12)
corrigee_dec <- decompose(corrigee) #,'additive') je ne vois pas la différence avec ou cette valeur.
png("img/05C - decompositionConsoCorrigee.png", width = 7.25, height = 4.5, units = 'in', res = 300)
plot(corrigee_dec)
plot(corrigee_dec)
consommation_cvs = consommation-consommation_dec$seasonal
png("img/06 - consoDessainalisee.png", width = 7.25, height = 4.5, units = 'in', res = 300)
ts.plot(consommation, consommation_cvs, xlab="t", ylab="consommations",col=c(1,2),lwd=c(1,2))
title("Consommation réelle sans les variations saisonnières")
ts.plot(consommation, consommation_cvs, xlab="t", ylab="consommations",col=c(1,2),lwd=c(1,2))
title("Consommation réelle sans les variations saisonnières")
corrigee_cvs = corrigee-corrigee_dec$seasonal
png("img/07 - consoCorrigeeDessainalisee.png", width = 7.25, height = 4.5, units = 'in', res = 300)
ts.plot(corrigee, corrigee_cvs, xlab="temps", ylab="consommation",col=c(1,2),lwd=c(1,2))
legend("bottomleft",legend=c("corrigée","désaisonnalisée"),col=c(1,2),lwd=c(1,2))
title("Consommation corrigée des variations saisonnières")
ts.plot(corrigee, corrigee_cvs, xlab="temps", ylab="consommation",col=c(1,2),lwd=c(1,2))
legend("bottomleft",legend=c("corrigée","désaisonnalisée"),col=c(1,2),lwd=c(1,2))
title("Consommation corrigée des variations saisonnières")
Box.test.2(corrigee_dec$random,nlag = c(6,12,18,24,30,36), type = "Ljung-Box",decim = 5)
shapiro.test(corrigee_dec$random)
howi=ets(corrigee,model="AAA")
summary(howi)
x_tronc=window(corrigee,end=c(2019,4))
x_a_prevoir=window(corrigee,start=c(2019,5))
modelhowi_tronc=ets(x_tronc,model="AAA")
summary(modelhowi_tronc)
pred_modelhowi_tronc=forecast(modelhowi_tronc,h=12,level=95)
predhowi_tronc=pred_modelhowi_tronc$mean
predhowi_l_tronc=ts(pred_modelhowi_tronc$lower,start=c(2019,5),frequency=12)
predhowi_u_tronc=ts(pred_modelhowi_tronc$upper,start=c(2019,5),frequency=12)
png("img/08 - analysePosterioriHW.png", width = 7.25, height = 4.5, units = 'in', res = 300)
ts.plot(x_a_prevoir,predhowi_tronc,predhowi_l_tronc,predhowi_u_tronc,xlab="t",ylab="Consommation corrigée",col=c(1,2,3,3),lty=c(1,1,2,2),lwd=c(3,3,2,2))
legend("topleft",legend=c("X","X_prev"),col=c(1,2,3,3),lty=c(1,1),lwd=c(3,3))
legend("topright",legend=c("int95%_inf","int95%_sup"),col=c(3,3),lty=c(2,2),lwd=c(2,2))
title("Analyse a posteriori : Holt Winters")
ts.plot(x_a_prevoir,predhowi_tronc,predhowi_l_tronc,predhowi_u_tronc,xlab="t",ylab="Consommation corrigée",col=c(1,2,3,3),lty=c(1,1,2,2),lwd=c(3,3,2,2))
legend("topleft",legend=c("X","X_prev"),col=c(1,2,3,3),lty=c(1,1),lwd=c(3,3))
legend("topright",legend=c("int95%_inf","int95%_sup"),col=c(3,3),lty=c(2,2),lwd=c(2,2))
title("Analyse a posteriori : Holt Winters")
Box.test.2(pred_modelhowi_tronc$residuals,nlag = c(6,12,18,24,30,36), type = "Ljung-Box",decim = 5)
shapiro.test(pred_modelhowi_tronc$residuals)
rmse_howi=sqrt(mean((x_a_prevoir-predhowi_tronc)^2))
mape_howi=mean(abs(1-predhowi_tronc/x_a_prevoir))*100
cat('RMSE Holt Winters : ',rmse_howi,'\n')
cat('MAPE Hot Winters : ',mape_howi,'\n')
pred_model_howi=forecast(howi,h=12,level=95) # prédiction modèle HW sur séries de 12 avec un niveau de 5%
pred_howi=pred_model_howi$mean # moyenne
pred_howi_l=ts(pred_model_howi$lower,start=c(2020,5),frequency=12) # estimation basse de l'intervalle
pred_howi_u=ts(pred_model_howi$upper,start=c(2020,5),frequency=12) # estimation haute
png("img/09 - previsionHW.png", width = 7.25, height = 4.5, units = 'in', res = 300)
ts.plot(corrigee,pred_howi,pred_howi_l,pred_howi_u,xlab="t",ylab="Consommation corrigée",col=c(1,2,3,3),lty=c(1,1,2,2),lwd=c(1,3,2,2))
legend("topleft",legend=c("int95%_inf","int95%_sup"),col=c(3,3),lty=c(2,2),lwd=c(2,2))
title("Prévision : Holt Winters")
ts.plot(corrigee,pred_howi,pred_howi_l,pred_howi_u,xlab="t",ylab="Consommation corrigée",col=c(1,2,3,3),lty=c(1,1,2,2),lwd=c(1,3,2,2))
legend("topleft",legend=c("int95%_inf","int95%_sup"),col=c(3,3),lty=c(2,2),lwd=c(2,2))
title("Prévision : Holt Winters")
png("img/10 - previsionHW_ZOOM.png", width = 7.25, height = 4.5, units = 'in', res = 300)
ts.plot(window(corrigee,start=c(2019,5)),pred_howi,pred_howi_l,pred_howi_u,xlab="t",ylab="Consommation corrigée",col=c(1,2,3,3),lty=c(1,1,2,2),lwd=c(1,3,2,2))
legend("topleft",legend=c("int95%_inf","int95%_sup"),col=c(3,3),lty=c(2,2),lwd=c(2,2))
title("Prévision : Holt Winters - ZOOM ")
ts.plot(window(corrigee,start=c(2019,5)),pred_howi,pred_howi_l,pred_howi_u,xlab="t",ylab="Consommation corrigée",col=c(1,2,3,3),lty=c(1,1,2,2),lwd=c(1,3,2,2))
legend("topleft",legend=c("int95%_inf","int95%_sup"),col=c(3,3),lty=c(2,2),lwd=c(2,2))
title("Prévision : Holt Winters - ZOOM ")
# la série est-elle stationnaire ?^
stationnaire_test<-adf.test(corrigee,k=12)
stationnaire_test
acf=acf(corrigee,lag.max=36,ylim=c(-1,1))
acf$lag=acf$lag*12
png("img/11 - ACF01 .png", width = 7.25, height = 4.5, units = 'in', res = 300)
acf(corrigee,lag.max=36,ylim=c(-1,1))
y_dif_1_12=diff(corrigee,lag = 12,differences = 1)
acf=acf(y_dif_1_12,lag.max=36,ylim=c(-1,1), plot=FALSE)
acf$lag=acf$lag*12
plot(acf)
png("img/12 - ACF02-différenciée-Lag12 .png", width = 7.25, height = 4.5, units = 'in', res = 300)
acf(y_dif_1_12,lag.max=36,ylim=c(-1,1))
###€ La sortiE ACF de la série différenciée semble pouvoir être interprétée comme un autocorrélogramme simple empirique. On ne voit plus les décroissances lentes précédentes. La série est donc potentiellement stationnaire. On identifiera donc un modèle SARIMA . ## 5.2.2 ACF - La série est-elle stationnaire ?
stationnaire_test<-adf.test(y_dif_1_12)
stationnaire_test
pacf=pacf(y_dif_1_12,lag.max=36,ylim=c(-1,1), plot=FALSE)
pacf$lag=pacf$lag*12
plot(pacf,ylim = c(-1,1))
png("img/13 - PACF01-différenciée-Lag12 .png", width = 7.25, height = 4.5, units = 'in', res = 300)
pacf(y_dif_1_12,lag.max=36,ylim=c(-1,1))
d = ndiffs(corrigee)
D = nsdiffs(corrigee)
cat('d = ',d,' - différentiation en tendances / D = ', D,'différenciations en saisonnalité')
model1=Arima(corrigee,order=c(0,0,0),list(order=c(1,1,1),period=12),include.mean=FALSE,method="CSS-ML")
summary(model1)
t_stat(model1)
Box.test.2(model1$residuals,nlag = c(6,12,18,24,30,36), type = "Ljung-Box",decim = 5)
shapiro.test(model1$residuals)
model2=Arima(corrigee,order=c(0,0,1),list(order=c(1,1,1),period=12),include.mean=FALSE,method="CSS-ML")
summary(model2)
t_stat(model2)
Box.test.2(model2$residuals,nlag = c(6,12,18,24,30,36), type = "Ljung-Box",decim = 5)
shapiro.test(model2$residuals)
model3=Arima(corrigee,order=c(1,0,1),list(order=c(1,1,1),period=12),include.mean=FALSE,method="CSS-ML")
summary(model3)
t_stat(model3)
Box.test.2(model3$residuals,nlag = c(6,12,18,24,30,36), type = "Ljung-Box",decim = 5)
shapiro.test(model3$residuals)
model4=Arima(corrigee,order=c(0,1,1),list(order=c(0,1,1),period=12),include.mean=FALSE,method="CSS-ML")
summary(model4)
t_stat(model4)
Box.test.2(model4$residuals,nlag = c(6,12,18,24,30,36), type = "Ljung-Box",decim = 5)
shapiro.test(model4$residuals)
model5=Arima(corrigee,order=c(1,0,1),list(order=c(0,1,1),period=12),include.mean=FALSE,method="CSS-ML")
summary(model5)
t_stat(model5)
Box.test.2(model5$residuals,nlag = c(6,12,18,24,30,36), type = "Ljung-Box",decim = 5)
shapiro.test(model5$residuals)
model_auto <- auto.arima(corrigee, stepwise=TRUE, approximation=TRUE, allowdrift=FALSE)
t_stat(model_auto)
Box.test.2(model_auto$residuals,nlag = c(6,12,18,24,30,36), type = "Ljung-Box",decim = 5)
shapiro.test(model_auto$residuals)
model5tronc=Arima(x_tronc,order=c(1,0,1),list(order=c(0,1,1),period=12),include.mean=FALSE,method="CSS-ML")
summary(model5tronc)
t_stat(model5tronc)
Box.test.2(model5tronc$residuals,nlag=c(6,12,18,24,30,36),type="Ljung-Box",decim=5)
shapiro.test(model5tronc$residuals)
model4tronc=Arima(x_tronc,order=c(0,1,1),list(order=c(0,1,1),period=12),include.mean=FALSE,method="CSS-ML")
summary(model3tronc)
t_stat(model4tronc)
Box.test.2(model4tronc$residuals,nlag=c(6,12,18,24,30,36),type="Ljung-Box",decim=5)
shapiro.test(model4tronc$residuals)
pred_model4tronc=forecast(model4tronc,h=12,level=95)
pred4_tronc=pred_model4tronc$mean
pred4_l_tronc=ts(pred_model4tronc$lower,start=c(2019,5),frequency=12)
pred4_u_tronc=ts(pred_model4tronc$upper,start=c(2019,5),frequency=12)
ts.plot(window(x_a_prevoir),pred4_tronc,pred4_l_tronc,pred4_u_tronc,xlab="t",ylab="Consommation corrigée",col=c(1,2,3,3),lty=c(1,1,2,2),lwd=c(3,3,2,2))
legend("topleft",legend=c("X","X_prev"),col=c(1,2,3,3),lty=c(1,1),lwd=c(3,3))
legend("topright",legend=c("int95%_inf","int95%_sup"),col=c(3,3),lty=c(2,2),lwd=c(2,2))
title("Analyse a posteriori : SARIMA ZOOM modèle 4 (0,1,1)(0,1,1)[12]")
png("img/14 - Analyse a posteriori SARIMA ZOOM modèle 4.png", width = 7.25, height = 4.5, units = 'in', res = 300)
ts.plot(window(x_a_prevoir),pred4_tronc,pred4_l_tronc,pred4_u_tronc,xlab="t",ylab="Consommation corrigée",col=c(1,2,3,3),lty=c(1,1,2,2),lwd=c(3,3,2,2))
legend("topleft",legend=c("X","X_prev"),col=c(1,2,3,3),lty=c(1,1),lwd=c(3,3))
legend("topright",legend=c("int95%_inf","int95%_sup"),col=c(3,3),lty=c(2,2),lwd=c(2,2))
title("Analyse a posteriori : SARIMA ZOOM modèle 4 (0,1,1)(0,1,1)[12]")
rmse_sarima=sqrt(mean((x_a_prevoir-pred4_tronc)^2))
mape_sarima=mean(abs(1-pred4_tronc/x_a_prevoir))*100
cat('RMSE SARIMA : ',rmse_sarima,'\n')
cat('MAPE SARIMA : ',mape_sarima,'%\n')
pred_model4=forecast(model4,h=12,level=95) # prédiction modèl3 sur séries de 12 avec un niveau de 5%
pred4=pred_model4$mean
pred4_l=ts(pred_model4$lower,start=c(2020,5),frequency=12)
pred4_u=ts(pred_model4$upper,start=c(2020,5),frequency=12) # estimation haute
ts.plot(corrigee,pred4,pred4_l,pred4_u,xlab="temps",ylab="Consommation corrigée",col=c(1,2,3,3),lty=c(1,1,2,2),lwd=c(1,3,2,2))
legend("topleft",legend=c("int95%_inf","int95%_sup"),col=c(3,3),lty=c(2,2),lwd=c(2,2))
title("Prévision : SARIMA")
ts.plot(window(corrigee,start=c(2018,5)),pred4,pred4_l,pred4_u,xlab="temps",ylab="Conso corrigée",col=c(1,2,3,3),lty=c(1,1,2,2),lwd=c(1,3,2,2))
legend("topleft",legend=c("int95%_inf","int95%_sup"),col=c(3,3),lty=c(2,2),lwd=c(2,2))
title("Prévision : SARIMA - Zoom ")
png("img/15 - Prédiction SARIMA ZOOM avec modèle 4.png", width = 7.25, height = 4.5, units = 'in', res = 300)
ts.plot(window(corrigee,start=c(2018,5)),pred4,pred4_l,pred4_u,xlab="temps",ylab="Conso corrigée",col=c(1,2,3,3),lty=c(1,1,2,2),lwd=c(1,3,2,2))
legend("topleft",legend=c("int95%_inf","int95%_sup"),col=c(3,3),lty=c(2,2),lwd=c(2,2))
title("Prévision : SARIMA - Zoom ")
ts.plot(window(x_a_prevoir),pred4_tronc,predhowi_tronc,xlab="t",ylab="Consommation corrigée",col=c(1,2,3),lty=c(1,1,1),lwd=c(3,3,3))
legend("topleft",legend=c("Réelle","SARIMA"),col=c(1,2),lty=c(1,1),lwd=c(3,3))
legend("topright",legend=c("Holt Winters"),col=c(3),lty=c(1),lwd=c(3))
title("Analyse a posteriori : SARIMA + Holt Winters ")
png("img/16 - Analyse Posteriori SARIMA et Holt-Winters.png", width = 7.25, height = 4.5, units = 'in', res = 300)
ts.plot(window(x_a_prevoir),pred4_tronc,predhowi_tronc,xlab="t",ylab="Consommation corrigée",col=c(1,2,3),lty=c(1,1,1),lwd=c(3,3,3))
legend("topleft",legend=c("Réelle","SARIMA"),col=c(1,2),lty=c(1,1),lwd=c(3,3))
legend("topright",legend=c("Holt Winters"),col=c(3),lty=c(1),lwd=c(3))
title("Analyse a posteriori : SARIMA + Holt Winters ")
cat('RMSE Holt Winters : ',rmse_howi,'\n')
cat('RMSE SARIMA : ',rmse_sarima,'\n')
cat('MAPE Hot Winters : ',mape_howi,'\n')
cat('MAPE SARIMA : ',mape_sarima,'\n')