Script com código comentado realizando análise de mudanças bruscas (change-points) numa série temporal com distribuição normal, com emprego de teste de hipótese para média e variância. Para tal, foi utilizado o pacote changepoint com software R.
library(changepoint)
library(tidyverse) # Conjunto de pacotes para manipulacao de dados e plotPara melhor acompanhamento do script observe os comentários ao longo do code.
set.seed(1)
# Gerando vetor com tres change-point
# TS com distribuicao normal alternando valores de media e variancia
mv1 <- c(rnorm(50, mean = 2, sd = 1),
rnorm(50, mean = 3, sd = 3),
rnorm(70, mean = 4, sd = 1))
# Estimativa de número máximo de change-points
Q <- floor((length(mv1)/2)) + 1
Q## [1] 86
# Penalidade na função de max verossimilhança
Penalidade <- "SIC"
# Change-point para media e variancia com:
# Criterio de penalizacao SIC
# Distribuicao normal de dados
mv1.binseg <- changepoint::cpt.meanvar(mv1, method = 'BinSeg',
Q = Q, penalty = Penalidade)
changepoint::cpts(mv1.binseg) # locais dos pontos de mudanca signiticativos## [1] 50 100
changepoint::param.est(mv1.binseg) # estimativa dos parâmetros media e variancia## $mean
## [1] 2.100448 3.351979 3.885182
##
## $variance
## [1] 0.6773916 8.2786946 0.8684638
mv1.binseg@pen.value.full # Valores da penalidade para Q segmentos## [1] 67.6255798 67.6255798 9.0804455 9.0804455 8.0319029 8.0319029
## [7] 8.0319029 8.0319029 6.9948185 6.1316917 5.9693947 5.1028660
## [13] 4.0647108 3.8274112 3.8274112 3.8274112 3.8274112 3.8274112
## [19] 3.8274112 3.7836740 3.7836740 3.7836740 3.7836740 3.7753155
## [25] 3.7753155 3.1325866 3.1206390 3.1206390 3.1206390 3.0789866
## [31] 3.0789866 3.0789866 3.0789866 3.0789866 3.0789866 2.5337391
## [37] 2.4352385 2.4172312 2.4172312 2.0869593 2.0387632 2.0387632
## [43] 1.8879680 1.8773159 1.8773159 1.7650534 1.7650534 1.7256170
## [49] 1.6150718 1.6150718 1.5964967 1.5808667 1.4437534 1.3331240
## [55] 1.3331240 0.5776597 0.4385834 0.3443125 0.0000000 0.0000000
## [61] 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000
## [67] 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000
## [73] 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000
## [79] 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000
## [85] 0.0000000 0.0000000
# observar que os valores caem com o passar da quantidade de pontos
# ou seja, a partir do terceiro ponto, não há incremento de informacao
##
plot_n_segmentos <- mv1.binseg@pen.value.full %>%
data.frame()
colnames(plot_n_segmentos) <- Penalidade
plot1 <- plot_n_segmentos %>%
dplyr::mutate(Pontos = (1:nrow(plot_n_segmentos))) %>%
ggplot2::ggplot() +
geom_path(aes(x = Pontos,
y = SIC)) +
geom_point(aes(x = Pontos,
y = SIC,
fill = SIC),
size = 4, shape = 21, show.legend = F) +
scale_fill_gradient2(low = "red", high = "blue") +
theme_classic()
plot1x <- length(changepoint::cpts(mv1.binseg)) + 5
plot2 <- plot_n_segmentos %>%
dplyr::slice(1:x) %>%
dplyr::mutate(Pontos = (1:x)) %>%
ggplot2::ggplot() +
geom_path(aes(x = Pontos,
y = SIC)) +
geom_point(aes(x = Pontos,
y = SIC,
fill = SIC),
size = 4, shape = 21, show.legend = F) +
theme_classic()
plot2##
mv1.binseg@data.set # Conjunto de dados em uso## Time Series:
## Start = 1
## End = 170
## Frequency = 1
## [1] 1.3735462 2.1836433 1.1643714 3.5952808 2.3295078 1.1795316
## [7] 2.4874291 2.7383247 2.5757814 1.6946116 3.5117812 2.3898432
## [13] 1.3787594 -0.2146999 3.1249309 1.9550664 1.9838097 2.9438362
## [19] 2.8212212 2.5939013 2.9189774 2.7821363 2.0745650 0.0106483
## [25] 2.6198257 1.9438713 1.8442045 0.5292476 1.5218499 2.4179416
## [31] 3.3586796 1.8972123 2.3876716 1.9461950 0.6229404 1.5850054
## [37] 1.6057100 1.9406866 3.1000254 2.7631757 1.8354764 1.7466383
## [43] 2.6969634 2.5566632 1.3112443 1.2925048 2.3645820 2.7685329
## [49] 1.8876538 2.8811077 4.1943176 1.1639208 4.0233591 -0.3880893
## [55] 7.2990711 8.9411997 1.8983356 -0.1324039 4.7091589 2.5948362
## [61] 10.2048533 2.8822800 5.0692181 3.0840065 0.7701804 3.5663769
## [67] -2.4148759 7.3966646 3.4597600 9.5178350 4.4265286 0.8701607
## [73] 4.8321791 0.1977071 -0.7609002 3.8743387 1.6701244 3.0033161
## [79] 3.2230240 1.2314372 1.2939938 2.5944642 6.5342610 -1.5707004
## [85] 4.7818386 3.9988511 6.1892995 2.0874482 4.1100564 3.8012964
## [91] 1.3724399 6.6236034 6.4812078 5.1006409 7.7605004 4.6754593
## [97] -0.8297766 1.2802038 -0.6738378 1.5797981 3.3796333 4.0421159
## [103] 3.0890784 4.1580288 3.3454154 5.7672873 4.7167075 4.9101742
## [109] 4.3841854 5.6821761 3.3642635 3.5383553 5.4322822 3.3493036
## [115] 3.7926193 3.6071921 3.6800071 3.7208867 4.4941883 3.8226695
## [121] 3.4940425 5.3430388 3.7854206 3.8204435 3.8998093 4.7126663
## [127] 3.9264356 3.9623658 3.3183395 3.6757297 4.0601604 3.4111055
## [133] 4.5314962 2.4816059 4.3065579 2.4635502 3.6990239 3.4717201
## [139] 3.3479052 3.9431032 2.0856406 5.1765833 2.3350276 3.5364696
## [145] 2.8840799 3.2491810 6.0871665 4.0173956 2.7136995 2.3593945
## [151] 4.4501871 3.9814402 3.6819316 3.0706379 2.5125397 2.9248077
## [157] 5.0000288 3.3787333 2.6155732 5.8692906 4.4251004 3.7613529
## [163] 5.0584830 4.8864227 3.3807570 6.2061025 3.7449730 2.5755053
## [169] 3.8556004 4.2075383
mv1.binseg@cpttype # parametros de criterio em uso## [1] "mean and variance"
mv1.binseg@method # metodo de segmentacao## [1] "BinSeg"
mv1.binseg@test.stat # Distribuicao utilizada no teste## [1] "Normal"
mv1.binseg@pen.type # Tipo da penalidade (SIC, BIC, etc)## [1] "SIC"
mv1.binseg@pen.value # Valor da penalidade (limiar)## [1] 15.4074
##
# https://stats.stackexchange.com/a/248872/205888
plot1 + geom_hline(yintercept = mv1.binseg@pen.value, linetype = "dashed",
color = "red", size = 1) +
annotate("text", x = Q/2, y = (mv1.binseg@pen.value + 2),
label = c("Threshold for number of change-points") ,
size = 5 , fontface = "bold")plot2 + geom_hline(yintercept = mv1.binseg@pen.value, linetype = "dashed",
color = "red", size = 1) +
annotate("text", x = (x/2)+1.5, y = (mv1.binseg@pen.value + 3),
label = c("Threshold for number of change-points") ,
size = 5 , fontface = "bold")# Threshold for number of change-points
mv1.binseg@minseglen # Quantidade de objetos que podem estar dentro de um mesmo cluster## [1] 2
mv1.binseg@cpts # Ponto de mudanca (170 e o fim da serie. Deve ser desconsiderado)## [1] 50 100 170
mv1.binseg@ncpts.max## [1] 86
mv1.binseg@param.est # estimativa dos parâmetros media e variancia## $mean
## [1] 2.100448 3.351979 3.885182
##
## $variance
## [1] 0.6773916 8.2786946 0.8684638
mv1.binseg@date # Data do pacote## [1] "Fri Aug 24 02:15:16 2018"
mv1.binseg@version # Versao do pacote## [1] "2.2.2"
##
plot(mv1.binseg, cpt.width = 3, cpt.col = 'blue')# df com a série em análise
mv1 <- data.frame(ts = mv1,
Obs = (1:length(mv1)))
# Medias em cada segmento/cluster
md <- changepoint::param.est(mv1.binseg)[[1]]
# plots
ggplot2::ggplot(mv1) +
geom_line(aes(x = Obs,
y = ts)) +
geom_segment(aes(x = 0,
y = md[1],
xend = changepoint::cpts(mv1.binseg)[1],
yend = md[1]), size = 1, col = "blue") +
geom_segment(aes(x = changepoint::cpts(mv1.binseg)[1],
y = md[2],
xend = changepoint::cpts(mv1.binseg)[2],
yend = md[2]), size = 1, col = "blue") +
geom_segment(aes(x = changepoint::cpts(mv1.binseg)[2],
y = md[3],
xend = length(mv1$ts),
yend = md[3]), size = 1, col = "blue") +
theme_classic()ggplot2::ggplot(mv1) +
geom_line(aes(x = Obs,
y = ts)) +
geom_vline(xintercept = changepoint::cpts(mv1.binseg)[1], linetype = "dashed",
color = "red", size = 1) +
geom_vline(xintercept = changepoint::cpts(mv1.binseg)[2], linetype = "dashed",
color = "red", size = 1) +
theme_classic()ggplot2::ggplot(mv1) +
geom_line(aes(x = Obs,
y = ts)) +
annotate("rect", xmin = 0, xmax = changepoint::cpts(mv1.binseg)[1],
ymin = min(mv1$ts), ymax = max(mv1$ts),
alpha = .2) +
annotate("rect", xmin = changepoint::cpts(mv1.binseg)[1],
xmax = changepoint::cpts(mv1.binseg)[2],
ymin = min(mv1$ts), ymax = max(mv1$ts),
alpha = .2, fill = "blue") +
annotate("rect", xmin = changepoint::cpts(mv1.binseg)[2],
xmax = length(mv1$ts),
ymin = min(mv1$ts), ymax = max(mv1$ts),
alpha = .2, fill = "green") +
geom_vline(xintercept = changepoint::cpts(mv1.binseg)[1], linetype = "dashed",
color = "red", size = 1) +
geom_vline(xintercept = changepoint::cpts(mv1.binseg)[2], linetype = "dashed",
color = "red", size = 1) +
theme_classic()ggplot2::ggplot(mv1) +
geom_line(aes(x = Obs,
y = ts)) +
annotate("rect", xmin = 0, xmax = changepoint::cpts(mv1.binseg)[1],
ymin = min(mv1$ts), ymax = max(mv1$ts),
alpha = .2) +
annotate("rect", xmin = changepoint::cpts(mv1.binseg)[1],
xmax = changepoint::cpts(mv1.binseg)[2],
ymin = min(mv1$ts), ymax = max(mv1$ts),
alpha = .2, fill = "blue") +
annotate("rect", xmin = changepoint::cpts(mv1.binseg)[2],
xmax = length(mv1$ts),
ymin = min(mv1$ts), ymax = max(mv1$ts),
alpha = .2, fill = "green") +
geom_segment(aes(x = 0,
y = md[1],
xend = changepoint::cpts(mv1.binseg)[1],
yend = md[1]), size = 1) +
geom_segment(aes(x = changepoint::cpts(mv1.binseg)[1],
y = md[2],
xend = changepoint::cpts(mv1.binseg)[2],
yend = md[2]), size = 1) +
geom_segment(aes(x = changepoint::cpts(mv1.binseg)[2],
y = md[3],
xend = length(mv1$ts),
yend = md[3]), size = 1) +
geom_vline(xintercept = changepoint::cpts(mv1.binseg)[1], linetype = "dashed",
color = "red", size = 1) +
geom_vline(xintercept = changepoint::cpts(mv1.binseg)[2], linetype = "dashed",
color = "red", size = 1) +
xlab("Tempo") + ylab("Valore de Y") +
theme_classic()Autor:
Brenner Silva