Intro

Script com código comentado realizando análise de mudanças bruscas (change-points) numa série temporal com distribuição normal, com emprego de teste de hipótese para média e variância. Para tal, foi utilizado o pacote changepoint com software R.

library(changepoint)
library(tidyverse) # Conjunto de pacotes para manipulacao de dados e plot

Change-Points

Para melhor acompanhamento do script observe os comentários ao longo do code.

set.seed(1)

# Gerando vetor com tres change-point
# TS com distribuicao normal alternando valores de media e variancia
mv1 <- c(rnorm(50, mean = 2, sd = 1), 
         rnorm(50, mean = 3, sd = 3), 
         rnorm(70, mean = 4, sd = 1))

# Estimativa de número máximo de change-points
Q <- floor((length(mv1)/2)) + 1
Q
## [1] 86
# Penalidade na função de max verossimilhança
Penalidade <- "SIC"

# Change-point para media e variancia com:
# Criterio de penalizacao SIC
# Distribuicao normal de dados
mv1.binseg <- changepoint::cpt.meanvar(mv1, method = 'BinSeg', 
                                       Q = Q, penalty = Penalidade)

changepoint::cpts(mv1.binseg)      # locais dos pontos de mudanca signiticativos
## [1]  50 100
changepoint::param.est(mv1.binseg) # estimativa dos parâmetros media e variancia
## $mean
## [1] 2.100448 3.351979 3.885182
## 
## $variance
## [1] 0.6773916 8.2786946 0.8684638
mv1.binseg@pen.value.full # Valores da penalidade para Q segmentos
##  [1] 67.6255798 67.6255798  9.0804455  9.0804455  8.0319029  8.0319029
##  [7]  8.0319029  8.0319029  6.9948185  6.1316917  5.9693947  5.1028660
## [13]  4.0647108  3.8274112  3.8274112  3.8274112  3.8274112  3.8274112
## [19]  3.8274112  3.7836740  3.7836740  3.7836740  3.7836740  3.7753155
## [25]  3.7753155  3.1325866  3.1206390  3.1206390  3.1206390  3.0789866
## [31]  3.0789866  3.0789866  3.0789866  3.0789866  3.0789866  2.5337391
## [37]  2.4352385  2.4172312  2.4172312  2.0869593  2.0387632  2.0387632
## [43]  1.8879680  1.8773159  1.8773159  1.7650534  1.7650534  1.7256170
## [49]  1.6150718  1.6150718  1.5964967  1.5808667  1.4437534  1.3331240
## [55]  1.3331240  0.5776597  0.4385834  0.3443125  0.0000000  0.0000000
## [61]  0.0000000  0.0000000  0.0000000  0.0000000  0.0000000  0.0000000
## [67]  0.0000000  0.0000000  0.0000000  0.0000000  0.0000000  0.0000000
## [73]  0.0000000  0.0000000  0.0000000  0.0000000  0.0000000  0.0000000
## [79]  0.0000000  0.0000000  0.0000000  0.0000000  0.0000000  0.0000000
## [85]  0.0000000  0.0000000
# observar que os valores caem com o passar da quantidade de pontos
# ou seja, a partir do terceiro ponto, não há incremento de informacao

##
plot_n_segmentos <- mv1.binseg@pen.value.full %>% 
  data.frame()
colnames(plot_n_segmentos) <- Penalidade

plot1 <- plot_n_segmentos %>% 
  dplyr::mutate(Pontos = (1:nrow(plot_n_segmentos))) %>% 
  ggplot2::ggplot() +
  geom_path(aes(x = Pontos, 
                y = SIC)) +
  geom_point(aes(x = Pontos,
                 y = SIC,
                 fill = SIC),
             size = 4, shape = 21, show.legend = F) +
  scale_fill_gradient2(low = "red", high = "blue") +
  theme_classic()

plot1

x <- length(changepoint::cpts(mv1.binseg)) + 5

plot2 <- plot_n_segmentos %>% 
  dplyr::slice(1:x) %>% 
  dplyr::mutate(Pontos = (1:x)) %>% 
  ggplot2::ggplot() +
  geom_path(aes(x = Pontos, 
                y = SIC)) +
  geom_point(aes(x = Pontos,
                 y = SIC,
                 fill = SIC),
             size = 4, shape = 21, show.legend = F) +
  theme_classic()

plot2

##
mv1.binseg@data.set    # Conjunto de dados em uso
## Time Series:
## Start = 1 
## End = 170 
## Frequency = 1 
##   [1]  1.3735462  2.1836433  1.1643714  3.5952808  2.3295078  1.1795316
##   [7]  2.4874291  2.7383247  2.5757814  1.6946116  3.5117812  2.3898432
##  [13]  1.3787594 -0.2146999  3.1249309  1.9550664  1.9838097  2.9438362
##  [19]  2.8212212  2.5939013  2.9189774  2.7821363  2.0745650  0.0106483
##  [25]  2.6198257  1.9438713  1.8442045  0.5292476  1.5218499  2.4179416
##  [31]  3.3586796  1.8972123  2.3876716  1.9461950  0.6229404  1.5850054
##  [37]  1.6057100  1.9406866  3.1000254  2.7631757  1.8354764  1.7466383
##  [43]  2.6969634  2.5566632  1.3112443  1.2925048  2.3645820  2.7685329
##  [49]  1.8876538  2.8811077  4.1943176  1.1639208  4.0233591 -0.3880893
##  [55]  7.2990711  8.9411997  1.8983356 -0.1324039  4.7091589  2.5948362
##  [61] 10.2048533  2.8822800  5.0692181  3.0840065  0.7701804  3.5663769
##  [67] -2.4148759  7.3966646  3.4597600  9.5178350  4.4265286  0.8701607
##  [73]  4.8321791  0.1977071 -0.7609002  3.8743387  1.6701244  3.0033161
##  [79]  3.2230240  1.2314372  1.2939938  2.5944642  6.5342610 -1.5707004
##  [85]  4.7818386  3.9988511  6.1892995  2.0874482  4.1100564  3.8012964
##  [91]  1.3724399  6.6236034  6.4812078  5.1006409  7.7605004  4.6754593
##  [97] -0.8297766  1.2802038 -0.6738378  1.5797981  3.3796333  4.0421159
## [103]  3.0890784  4.1580288  3.3454154  5.7672873  4.7167075  4.9101742
## [109]  4.3841854  5.6821761  3.3642635  3.5383553  5.4322822  3.3493036
## [115]  3.7926193  3.6071921  3.6800071  3.7208867  4.4941883  3.8226695
## [121]  3.4940425  5.3430388  3.7854206  3.8204435  3.8998093  4.7126663
## [127]  3.9264356  3.9623658  3.3183395  3.6757297  4.0601604  3.4111055
## [133]  4.5314962  2.4816059  4.3065579  2.4635502  3.6990239  3.4717201
## [139]  3.3479052  3.9431032  2.0856406  5.1765833  2.3350276  3.5364696
## [145]  2.8840799  3.2491810  6.0871665  4.0173956  2.7136995  2.3593945
## [151]  4.4501871  3.9814402  3.6819316  3.0706379  2.5125397  2.9248077
## [157]  5.0000288  3.3787333  2.6155732  5.8692906  4.4251004  3.7613529
## [163]  5.0584830  4.8864227  3.3807570  6.2061025  3.7449730  2.5755053
## [169]  3.8556004  4.2075383
mv1.binseg@cpttype     # parametros de criterio em uso
## [1] "mean and variance"
mv1.binseg@method      # metodo de segmentacao
## [1] "BinSeg"
mv1.binseg@test.stat   # Distribuicao utilizada no teste
## [1] "Normal"
mv1.binseg@pen.type    # Tipo da penalidade (SIC, BIC, etc)
## [1] "SIC"
mv1.binseg@pen.value   # Valor da penalidade (limiar)
## [1] 15.4074
##
# https://stats.stackexchange.com/a/248872/205888
plot1 + geom_hline(yintercept = mv1.binseg@pen.value, linetype = "dashed", 
                   color = "red", size = 1) +
  annotate("text", x = Q/2, y = (mv1.binseg@pen.value + 2), 
           label = c("Threshold for number of change-points") , 
           size = 5 , fontface = "bold")

plot2 + geom_hline(yintercept = mv1.binseg@pen.value, linetype = "dashed", 
                   color = "red", size = 1) +
  annotate("text", x = (x/2)+1.5, y = (mv1.binseg@pen.value + 3), 
           label = c("Threshold for number of change-points") , 
           size = 5 , fontface = "bold")

# Threshold for number of change-points

mv1.binseg@minseglen   # Quantidade de objetos que podem estar dentro de um mesmo cluster
## [1] 2
mv1.binseg@cpts        # Ponto de mudanca (170 e o fim da serie. Deve ser desconsiderado)
## [1]  50 100 170
mv1.binseg@ncpts.max
## [1] 86
mv1.binseg@param.est   # estimativa dos parâmetros media e variancia
## $mean
## [1] 2.100448 3.351979 3.885182
## 
## $variance
## [1] 0.6773916 8.2786946 0.8684638
mv1.binseg@date        # Data do pacote
## [1] "Fri Aug 24 02:15:16 2018"
mv1.binseg@version     # Versao do pacote
## [1] "2.2.2"
##
plot(mv1.binseg, cpt.width = 3, cpt.col = 'blue')

# df com a série em análise
mv1 <- data.frame(ts = mv1,
                  Obs = (1:length(mv1)))

# Medias em cada segmento/cluster
md <- changepoint::param.est(mv1.binseg)[[1]]

# plots
ggplot2::ggplot(mv1) +
  geom_line(aes(x = Obs, 
                y = ts)) +
  geom_segment(aes(x = 0, 
                   y = md[1], 
                   xend = changepoint::cpts(mv1.binseg)[1], 
                   yend = md[1]), size = 1, col = "blue") +
  geom_segment(aes(x = changepoint::cpts(mv1.binseg)[1], 
                   y = md[2], 
                   xend = changepoint::cpts(mv1.binseg)[2], 
                   yend = md[2]), size = 1, col = "blue") +
  geom_segment(aes(x = changepoint::cpts(mv1.binseg)[2], 
                   y = md[3], 
                   xend = length(mv1$ts), 
                   yend = md[3]), size = 1, col = "blue") +
  theme_classic()

ggplot2::ggplot(mv1) +
  geom_line(aes(x = Obs, 
                y = ts)) +
  geom_vline(xintercept = changepoint::cpts(mv1.binseg)[1], linetype = "dashed", 
             color = "red", size = 1) +
  geom_vline(xintercept = changepoint::cpts(mv1.binseg)[2], linetype = "dashed", 
             color = "red", size = 1) +
  theme_classic()

ggplot2::ggplot(mv1) +
  geom_line(aes(x = Obs, 
                y = ts)) +
  annotate("rect", xmin = 0, xmax = changepoint::cpts(mv1.binseg)[1],
           ymin = min(mv1$ts), ymax = max(mv1$ts),
           alpha = .2) +
  annotate("rect", xmin = changepoint::cpts(mv1.binseg)[1], 
           xmax = changepoint::cpts(mv1.binseg)[2],
           ymin = min(mv1$ts), ymax = max(mv1$ts),
           alpha = .2, fill = "blue") +
  annotate("rect", xmin = changepoint::cpts(mv1.binseg)[2], 
           xmax = length(mv1$ts),
           ymin = min(mv1$ts), ymax = max(mv1$ts),
           alpha = .2, fill = "green") +
  geom_vline(xintercept = changepoint::cpts(mv1.binseg)[1], linetype = "dashed", 
             color = "red", size = 1) +
  geom_vline(xintercept = changepoint::cpts(mv1.binseg)[2], linetype = "dashed", 
             color = "red", size = 1) +
  theme_classic()

ggplot2::ggplot(mv1) +
  geom_line(aes(x = Obs, 
                y = ts)) +
  annotate("rect", xmin = 0, xmax = changepoint::cpts(mv1.binseg)[1],
           ymin = min(mv1$ts), ymax = max(mv1$ts),
           alpha = .2) +
  annotate("rect", xmin = changepoint::cpts(mv1.binseg)[1], 
           xmax = changepoint::cpts(mv1.binseg)[2],
           ymin = min(mv1$ts), ymax = max(mv1$ts),
           alpha = .2, fill = "blue") +
  annotate("rect", xmin = changepoint::cpts(mv1.binseg)[2], 
           xmax = length(mv1$ts),
           ymin = min(mv1$ts), ymax = max(mv1$ts),
           alpha = .2, fill = "green") +
  geom_segment(aes(x = 0, 
                   y = md[1], 
                   xend = changepoint::cpts(mv1.binseg)[1], 
                   yend = md[1]), size = 1) +
  geom_segment(aes(x = changepoint::cpts(mv1.binseg)[1], 
                   y = md[2], 
                   xend = changepoint::cpts(mv1.binseg)[2], 
                   yend = md[2]), size = 1) +
  geom_segment(aes(x = changepoint::cpts(mv1.binseg)[2], 
                   y = md[3], 
                   xend = length(mv1$ts), 
                   yend = md[3]), size = 1) +
  geom_vline(xintercept = changepoint::cpts(mv1.binseg)[1], linetype = "dashed", 
             color = "red", size = 1) +
  geom_vline(xintercept = changepoint::cpts(mv1.binseg)[2], linetype = "dashed", 
             color = "red", size = 1) +
  xlab("Tempo") + ylab("Valore de Y") +
  theme_classic()


Autor:
Brenner Silva