library(dplyr)
##
## Attaching package: 'dplyr'
##
## The following objects are masked from 'package:stats':
##
## filter, lag
##
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library("ggplot2")
library(stats) # Modelos ARIMA
library(e1071) # Assimetria e Curtose
library(fGarch) # Modelos de heterocedasticidade
## Loading required package: timeDate
##
## Attaching package: 'timeDate'
##
## The following objects are masked from 'package:e1071':
##
## kurtosis, skewness
##
## Loading required package: timeSeries
## Loading required package: fBasics
##
##
## Rmetrics Package fBasics
## Analysing Markets and calculating Basic Statistics
## Copyright (C) 2005-2014 Rmetrics Association Zurich
## Educational Software for Financial Engineering and Computational Science
## Rmetrics is free software and comes with ABSOLUTELY NO WARRANTY.
## https://www.rmetrics.org --- Mail to: info@rmetrics.org
library(tseries) #
library(urca) # Testes de Co-integração e de raízes unitárias
library("randtests")
##
## Attaching package: 'randtests'
##
## The following object is masked from 'package:tseries':
##
## runs.test
library("forecast") # pacote para modelos de series temporais
## Loading required package: zoo
##
## Attaching package: 'zoo'
##
## The following object is masked from 'package:timeSeries':
##
## time<-
##
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
##
## This is forecast 6.2
library("caret")
## Loading required package: lattice
busao.novo <- read.csv("analises.csv", header=T ,sep=",")
aggregate(duracao ~ rota, busao.novo, mean)
## rota duracao
## 1 0003 44.32137
## 2 0004 66.97883
## 3 0009 118.10055
## 4 0020 75.47311
## 5 0022 71.49215
## 6 003B 49.25674
## 7 004A 63.08690
## 8 0055 130.83750
## 9 0066 67.44947
## 10 0077 80.60924
## 11 0092 124.17293
## 12 0101 95.59682
## 13 0111 100.04074
## 14 0202 83.67443
## 15 0220 74.62922
## 16 0222 81.05355
## 17 0303 87.69010
## 18 0333 94.72258
## 19 0400 60.86355
## 20 0404 94.52087
## 21 0444 93.48219
## 22 0500 60.97748
## 23 0505 105.24415
## 24 0550 91.71273
## 25 0555 104.25737
## 26 0660 65.29057
## 27 0900 82.48807
## 28 0902 135.29959
## 29 0904 82.00000
## 30 0909 76.53721
## 31 090A 85.02852
## 32 090B 85.11761
## 33 0910 77.01067
## 34 0922 93.65654
## 35 0944 86.50760
## 36 0955 105.60477
## 37 202A 41.00194
## 38 245A 103.02946
## 39 245B 88.05284
## 40 263A 87.02372
## 41 263B 82.53700
## 42 300B 60.58034
## 43 900A 74.46154
## 44 900B 70.37500
## 45 903A 85.61243
## 46 903B 125.61508
## 47 903C 104.79953
## 48 904A 75.95238
## 49 944A 89.84527
# resumo do banco de dados
summary(busao.novo)
## rota data hora_inicio hora_fim
## 0909 : 23151 2015-03-02: 1780 13:04:00: 669 13:04:00: 785
## 0077 : 17164 2015-04-13: 1761 13:10:00: 643 13:05:00: 760
## 0092 : 16244 2015-02-10: 1755 11:50:00: 639 13:06:00: 744
## 0404 : 16148 2015-05-26: 1753 13:51:00: 638 13:07:00: 729
## 0444 : 16089 2015-03-11: 1751 7:28:00 : 633 13:03:00: 727
## 0111 : 15438 2015-03-03: 1750 13:03:00: 631 13:08:00: 698
## (Other):345444 (Other) :439128 (Other) :445825 (Other) :445235
## duracao numero_onibus hora_inicio_tb hora_fim_tb
## Min. : 0.00 Min. : 826 8:00:00 : 2655 15:00:00: 2482
## 1st Qu.: 61.00 1st Qu.:40015 11:40:00: 2480 8:40:00 : 2339
## Median : 76.00 Median :40084 12:20:00: 2458 11:40:00: 2323
## Mean : 85.61 Mean :42639 10:00:00: 2443 12:30:00: 2240
## 3rd Qu.: 94.00 3rd Qu.:60111 15:00:00: 2399 12:20:00: 2192
## Max. :1423.00 Max. :90220 12:00:00: 2324 18:30:00: 2163
## (Other) :434919 (Other) :435939
## duracao_tb del operador
## Min. : 3.00 Min. :-204.0 ANTONIO FELICIANO DA SILVA : 2437
## 1st Qu.: 64.00 1st Qu.: -5.0 LEONARDO VASCONCELOS DE LIMA : 2375
## Median : 75.00 Median : 1.0 RAFAEL SILVA RODRIGUES : 2313
## Mean : 74.01 Mean : 11.6 ARIOSVALDO GOMES DA SILVA : 2310
## 3rd Qu.: 84.00 3rd Qu.: 11.0 JAQUERLANDO ROBSON SANTIAGO DE: 2277
## Max. :204.00 Max. :1348.0 ALTAMIR DE ATAIDE LEITE : 2252
## (Other) :435714
## empresa linha pareado
## Borborema : 29515 INTER-AREA: 62079 True:449678
## Cabral :106763 VERMELHA : 61619
## Cruzeiro : 55223 BRANCA : 59530
## INDEFINIDO : 19 AMARELA : 54054
## Nacional :153922 VERDE : 44065
## SJ : 16196 MARROM : 41516
## Transnacional: 88040 (Other) :126815
A seguir observa-se uma série temporal para os atrasos da rota 500 e a partir dela realizar análises descritivas, testes de hipóteses e encontrar o melhor modelo para ajustar a essa série e com base nele gerar previsões para os possíveis próximos atrasos.
Rota.500<-busao.novo[busao.novo$rota=="0500",]
Media.atraso<-Rota.500 %>%
group_by(data) %>%
summarise (media_dia_500 = mean(del))
Rota.500<- Media.atraso$media_dia_500
mean(Rota.500) # Média
## [1] 17.42591
sd(Rota.500) #Desvio Padrão
## [1] 14.45506
skewness(Rota.500, type = 3) # Assimetria
## [1] 5.995961
## attr(,"method")
## [1] "moment"
kurtosis(Rota.500, type = 3) # Curtose
## [1] 61.35573
## attr(,"method")
## [1] "excess"
# Se a série possuir valores missing, coloque na.rm=TRUE
hist(Rota.500)
#Observe no t teste o intervalo de confiança
t.test(Rota.500)
##
## One Sample t-test
##
## data: Rota.500
## t = 18.869, df = 244, p-value < 2.2e-16
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## 15.60686 19.24496
## sample estimates:
## mean of x
## 17.42591
Rota.500<- ts(Rota.500,freq=365)
# plot da série Rota-500
plot.ts(Rota.500)
Podemos observar existem oscilações de subida e de queda na série,que podem ser causadas por ruidos(erros de mesunração) ou a componentes não observáveis(componentes modelavéis).
cox.stuart.test(Rota.500)
##
## Cox Stuart test
##
## data: Rota.500
## statistic = 36, n = 122, p-value = 6.91e-06
## alternative hypothesis: non randomness
Foi Realizado dois testes de hipótese, apartir do teste de Cox Stuart foi obtido o p-valor = 0,00000691 < 0,05, logo ao nível de 5% de significância temos evidências que a série possui tendencia.
runs.test(Rota.500)
##
## Runs Test
##
## data: Rota.500
## statistic = -2.1811, runs = 106, n1 = 122, n2 = 122, n = 244,
## p-value = 0.02917
## alternative hypothesis: nonrandomness
O Runs Test também nos informou que a componente tendência estava presente p-valor = 0.02146,o teste de Runs é mais rigoroso se em uma parte dos dados for observado uma subida ou uma descida ele irá acusar tendência por ele ser um teste para aleatoridade
tempo=seq(1:nrow(Media.atraso))
kruskal.test( Rota.500 ~ tempo)
##
## Kruskal-Wallis rank sum test
##
## data: Rota.500 by tempo
## Kruskal-Wallis chi-squared = 244, df = 244, p-value = 0.488
Para sabermos se série tem indícios de possuir ou não a componente sazonal realizamos o teste kruskal.test que nos informou que o p-valor = 0.488> 0,05.Logo ao nível de 5% de significância temos evidências estatisticas de que a série não possui sazonalidade.
#teste de aleatoriadade e estacionaridade
adf.test(Rota.500)
## Warning in adf.test(Rota.500): p-value smaller than printed p-value
##
## Augmented Dickey-Fuller Test
##
## data: Rota.500
## Dickey-Fuller = -4.762, Lag order = 6, p-value = 0.01
## alternative hypothesis: stationary
acf(Rota.500)
pacf(Rota.500)
Rota.500.per= spec.pgram(Rota.500, taper=0, log="no",main="Periodograma",
ylab="espectro")
Rota.500.per$spec
## [1] 0.637604647 1.890824448 0.545408552 0.099189674 0.319815396
## [6] 0.620844067 0.303719773 0.634057024 0.283770510 0.602763830
## [11] 1.188331843 1.587625724 0.773327856 0.410119879 0.009039154
## [16] 0.314773881 0.021355594 0.594332403 0.123532581 0.320655415
## [21] 0.219504280 0.747531118 0.288047586 0.929133578 0.006677240
## [26] 0.585857689 0.099861536 0.271211945 0.449395718 0.631603352
## [31] 0.334976685 0.224655487 0.516893125 0.303900856 0.332184036
## [36] 0.204443909 0.629221548 0.366408059 0.464499224 0.790149029
## [41] 0.181872201 0.875908871 1.169407403 2.431035250 0.088536735
## [46] 0.683148324 0.070557470 0.368581104 0.250077591 1.327335527
## [51] 0.478074649 0.760511977 0.586845803 0.300223998 0.751297500
## [56] 0.186537415 0.724144570 0.528085338 0.416504456 0.326174628
## [61] 0.089352533 1.036772291 0.500492527 1.272833198 0.384740642
## [66] 0.923331503 0.160652362 1.763950268 0.234787788 0.836768912
## [71] 0.012653083 0.435351181 0.481837429 0.746794817 0.276666930
## [76] 0.590367539 0.502432157 0.148896767 0.819042423 0.197603480
## [81] 0.178807843 0.344358351 0.775152240 0.391884879 1.197659129
## [86] 0.176434926 0.036903732 1.952384942 0.398537366 0.974752068
## [91] 0.203129505 0.128177955 0.092195134 0.515728599 0.452076583
## [96] 0.850597109 0.340014288 1.353196645 0.970255999 0.800671870
## [101] 0.035474344 0.308982918 0.402311039 0.334178649 0.302607891
## [106] 0.779593874 0.921101430 0.436039175 0.066466104 0.772626100
## [111] 0.416451379 1.283530690 1.259560609 0.643557452 0.099541173
## [116] 0.312804345 0.733884124 0.497766146 1.448660601 0.118665051
## [121] 0.123870545 1.402001714 0.076882667 0.984043300 0.372045636
max(Rota.500.per$spec)
## [1] 2.431035
Rota.500.per$freq
## [1] 1.46 2.92 4.38 5.84 7.30 8.76 10.22 11.68 13.14 14.60
## [11] 16.06 17.52 18.98 20.44 21.90 23.36 24.82 26.28 27.74 29.20
## [21] 30.66 32.12 33.58 35.04 36.50 37.96 39.42 40.88 42.34 43.80
## [31] 45.26 46.72 48.18 49.64 51.10 52.56 54.02 55.48 56.94 58.40
## [41] 59.86 61.32 62.78 64.24 65.70 67.16 68.62 70.08 71.54 73.00
## [51] 74.46 75.92 77.38 78.84 80.30 81.76 83.22 84.68 86.14 87.60
## [61] 89.06 90.52 91.98 93.44 94.90 96.36 97.82 99.28 100.74 102.20
## [71] 103.66 105.12 106.58 108.04 109.50 110.96 112.42 113.88 115.34 116.80
## [81] 118.26 119.72 121.18 122.64 124.10 125.56 127.02 128.48 129.94 131.40
## [91] 132.86 134.32 135.78 137.24 138.70 140.16 141.62 143.08 144.54 146.00
## [101] 147.46 148.92 150.38 151.84 153.30 154.76 156.22 157.68 159.14 160.60
## [111] 162.06 163.52 164.98 166.44 167.90 169.36 170.82 172.28 173.74 175.20
## [121] 176.66 178.12 179.58 181.04 182.50
a<-1/(Rota.500.per$freq[44])
365*a
## [1] 5.681818
#sazonalidade semanal
Como a série em estudo apresenta uma componente de tendência, então o processo estocástico gerador da série é não estacionário. Neste caso a série deve passar por d diferenças simples para tornar-se estacionária, e para remover a tendência linearbasta tomar a primeira diferença da série (d=1):
ROTA.500.dif=diff(Rota.500)#Primeira diferença
plot(ROTA.500.dif)
Logo após diferenciar a série observamos que a mesma encontra-se de forma estacionária com seus valores em torno de 0 e com pouca variabilidade.
par(mfrow=c(2,1))
acf(ROTA.500.dif)
pacf(ROTA.500.dif)
modelo<-arima(Rota.500,order=c(5,1,1), seasonal=list(order=c(0,1,1),period=7),fixed=c(0,0,0,0,NA,NA,NA))# modelo
## Warning in arima(Rota.500, order = c(5, 1, 1), seasonal = list(order =
## c(0, : some AR parameters were fixed: setting transform.pars = FALSE
summary(modelo)
##
## Call:
## arima(x = Rota.500, order = c(5, 1, 1), seasonal = list(order = c(0, 1, 1),
## period = 7), fixed = c(0, 0, 0, 0, NA, NA, NA))
##
## Coefficients:
## ar1 ar2 ar3 ar4 ar5 ma1 sma1
## 0 0 0 0 0.0573 -1.0342 -1.0011
## s.e. 0 0 0 0 0.0680 0.0285 0.2700
##
## sigma^2 estimated as 194.8: log likelihood = -984.27, aic = 1976.53
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set 0.8604503 13.72556 7.045278 -18.94975 89.87836 0.6242988
## ACF1
## Training set -0.04835525
A função tsdiag plota os resíduos, muitas vezes padronizados, a função de autocorrelação dos resíduos, e os p-valores.
tsdiag(modelo)
plot(modelo$residuals)
acf(modelo$residuals)
pacf(modelo$residuals)
Box.test(modelo$residuals,lag=1)
##
## Box-Pierce test
##
## data: modelo$residuals
## X-squared = 0.57287, df = 1, p-value = 0.4491
Box.test(modelo$residuals,lag=2)
##
## Box-Pierce test
##
## data: modelo$residuals
## X-squared = 0.71504, df = 2, p-value = 0.6994
Box.test(modelo$residuals,lag=3)
##
## Box-Pierce test
##
## data: modelo$residuals
## X-squared = 0.80594, df = 3, p-value = 0.848
Box.test(modelo$residuals,lag=4)
##
## Box-Pierce test
##
## data: modelo$residuals
## X-squared = 1.4921, df = 4, p-value = 0.828
Box.test(modelo$residuals,lag=5)
##
## Box-Pierce test
##
## data: modelo$residuals
## X-squared = 1.5019, df = 5, p-value = 0.9129
Box.test(modelo$residuals,lag=6)
##
## Box-Pierce test
##
## data: modelo$residuals
## X-squared = 1.9368, df = 6, p-value = 0.9254
Box.test(modelo$residuals,lag=7)
##
## Box-Pierce test
##
## data: modelo$residuals
## X-squared = 1.9369, df = 7, p-value = 0.9633
Box.test(modelo$residuals,lag=8)
##
## Box-Pierce test
##
## data: modelo$residuals
## X-squared = 1.9509, df = 8, p-value = 0.9825
Box.test(modelo$residuals,lag=9)
##
## Box-Pierce test
##
## data: modelo$residuals
## X-squared = 2.9754, df = 9, p-value = 0.9653
Box.test(modelo$residuals,lag=10)
##
## Box-Pierce test
##
## data: modelo$residuals
## X-squared = 3.6635, df = 10, p-value = 0.9613
Box.test(modelo$residuals,lag=11)
##
## Box-Pierce test
##
## data: modelo$residuals
## X-squared = 4.0622, df = 11, p-value = 0.9681
Box.test(modelo$residuals,lag=12)
##
## Box-Pierce test
##
## data: modelo$residuals
## X-squared = 4.0861, df = 12, p-value = 0.9818
previsoes<-predict(modelo, n.ahead=7)$pred
## Warning in predict.Arima(modelo, n.ahead = 7): MA part of model is not
## invertible
## Warning in predict.Arima(modelo, n.ahead = 7): seasonal MA part of model is
## not invertible
previsoes
## Time Series:
## Start = c(1, 246)
## End = c(1, 252)
## Frequency = 365
## [1] 13.38100 22.10615 15.16303 17.64663 13.58656 17.32351 15.40190
previsoes1 <- forecast(modelo,h=7)# prevê valores futuros, baseados no modelo estimado
## Warning in predict.Arima(object, n.ahead = h): MA part of model is not
## invertible
## Warning in predict.Arima(object, n.ahead = h): seasonal MA part of model is
## not invertible
previsoes1
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## 1.671233 13.38100 -5.400981 32.16298 -15.343562 42.10556
## 1.673973 22.10615 3.313941 40.89836 -6.634053 50.84635
## 1.676712 15.16303 -3.639646 33.96571 -13.593181 43.91924
## 1.679452 17.64663 -1.166263 36.45951 -11.125204 46.41845
## 1.682192 13.58656 -5.236548 32.40967 -15.200901 42.37403
## 1.684932 17.32351 -1.577098 36.22411 -11.582474 46.22949
## 1.687671 15.40190 -3.510064 34.31387 -13.521453 44.32525
plot(previsoes1) # plota o gráfico com a série temporal junto dos valores previstos