Las paqueterías necesarias para el análisis:
library(tidyverse)
library(tsibble)
library(feasts)
library(fable)
library(tsibbledata)
library(fpp3)Write a function to compute the mean and standard deviation of a time series, and apply it to the
PBSdata. Plot the series with the highest mean, and the series with the lowest standard deviation.
PBSmedia_desvstd <- function(x){
c(
media = mean(x, na.rm = TRUE),
desv_std = sd(x, na.rm = TRUE)
)
}
# PBS
pbs_features <- PBS %>%
features(Cost, media_desvstd)
pbs_featuresLa serie con la media más alta:
pbs_features %>%
filter(media == max(media))
# Opción 1 (no recomedada)
PBS %>%
filter(Concession == "Concessional",
Type == "Co-payments",
ATC1 == "C",
ATC2 == "C10") %>%
autoplot()Plot variable not specified, automatically selected `.vars = Scripts`
pbs_features %>%
filter(media == max(media)) %>%
inner_join(PBS)Joining, by = c("Concession", "Type", "ATC1", "ATC2")
pbs_features %>%
filter(media == max(media)) %>%
right_join(PBS)Joining, by = c("Concession", "Type", "ATC1", "ATC2")
pbs_features %>%
filter(media == max(media)) %>%
full_join(PBS)Joining, by = c("Concession", "Type", "ATC1", "ATC2")
pbs_joined <- pbs_features %>%
filter(media == max(media)) %>%
left_join(PBS)Joining, by = c("Concession", "Type", "ATC1", "ATC2")
pbs_joined # la tabla es una tibble
# Usando ggplot()
pbs_joined %>%
ggplot(aes(x = Month, y = Cost)) +
geom_line()
# convirtiendo a tsibble y usando autoplot()
pbs_joined %>%
as_tsibble(index = Month, key = Concession:ATC2) %>%
autoplot(Cost)pbs_features %>%
filter(desv_std == min(desv_std))Use
GGally::ggpairs()to look at the relationships between the STL-based features for the tourism data. You might wish to changeseasonal_peak_yearandseasonal_trough_yearto factors. Which is the peak quarter for holidays in each state?
tourism %>%
features(Trips, feat_stl) %>%
select(-Region, -State, -Purpose) %>%
mutate(
seasonal_peak_year = factor(seasonal_peak_year),
seasonal_trough_year = factor(seasonal_trough_year),
) %>%
GGally::ggpairs()Registered S3 method overwritten by 'GGally':
method from
+.gg ggplot2
plot: [1,1] [-------------------------------] 1% est: 0s
plot: [1,2] [>------------------------------] 2% est: 5s
plot: [1,3] [>------------------------------] 4% est: 7s
plot: [1,4] [=>-----------------------------] 5% est:11s
plot: [1,5] [=>-----------------------------] 6% est:12s
plot: [1,6] [=>-----------------------------] 7% est:11s
plot: [1,7] [==>----------------------------] 9% est:11s
plot: [1,8] [==>----------------------------] 10% est:10s
plot: [1,9] [==>----------------------------] 11% est:10s
plot: [2,1] [===>---------------------------] 12% est: 9s
plot: [2,2] [===>---------------------------] 14% est: 9s
plot: [2,3] [====>--------------------------] 15% est: 9s
plot: [2,4] [====>--------------------------] 16% est: 9s
plot: [2,5] [====>--------------------------] 17% est: 9s
plot: [2,6] [=====>-------------------------] 19% est: 9s
plot: [2,7] [=====>-------------------------] 20% est: 9s
plot: [2,8] [======>------------------------] 21% est: 9s
plot: [2,9] [======>------------------------] 22% est: 9s
plot: [3,1] [======>------------------------] 23% est: 9s `stat_bin()` using `bins = 30`. Pick better value with
`binwidth`.
plot: [3,2] [=======>-----------------------] 25% est:10s `stat_bin()` using `bins = 30`. Pick better value with
`binwidth`.
plot: [3,3] [=======>-----------------------] 26% est:11s
plot: [3,4] [=======>-----------------------] 27% est:10s
plot: [3,5] [========>----------------------] 28% est:10s
plot: [3,6] [========>----------------------] 30% est:10s
plot: [3,7] [=========>---------------------] 31% est:10s
plot: [3,8] [=========>---------------------] 32% est: 9s
plot: [3,9] [=========>---------------------] 33% est: 9s
plot: [4,1] [==========>--------------------] 35% est: 9s `stat_bin()` using `bins = 30`. Pick better value with
`binwidth`.
plot: [4,2] [==========>--------------------] 36% est:10s `stat_bin()` using `bins = 30`. Pick better value with
`binwidth`.
plot: [4,3] [==========>--------------------] 37% est:10s
plot: [4,4] [===========>-------------------] 38% est:10s
plot: [4,5] [===========>-------------------] 40% est:10s
plot: [4,6] [============>------------------] 41% est:10s
plot: [4,7] [============>------------------] 42% est:10s
plot: [4,8] [============>------------------] 43% est:10s
plot: [4,9] [=============>-----------------] 44% est: 9s
plot: [5,1] [=============>-----------------] 46% est: 9s
plot: [5,2] [==============>----------------] 47% est: 9s
plot: [5,3] [==============>----------------] 48% est: 8s `stat_bin()` using `bins = 30`. Pick better value with
`binwidth`.
plot: [5,4] [==============>----------------] 49% est: 8s `stat_bin()` using `bins = 30`. Pick better value with
`binwidth`.
plot: [5,5] [===============>---------------] 51% est: 9s
plot: [5,6] [===============>---------------] 52% est: 8s
plot: [5,7] [===============>---------------] 53% est: 8s
plot: [5,8] [================>--------------] 54% est: 8s
plot: [5,9] [================>--------------] 56% est: 7s
plot: [6,1] [=================>-------------] 57% est: 7s
plot: [6,2] [=================>-------------] 58% est: 7s
plot: [6,3] [=================>-------------] 59% est: 7s `stat_bin()` using `bins = 30`. Pick better value with
`binwidth`.
plot: [6,4] [==================>------------] 60% est: 7s `stat_bin()` using `bins = 30`. Pick better value with
`binwidth`.
plot: [6,5] [==================>------------] 62% est: 7s
plot: [6,6] [===================>-----------] 63% est: 6s
plot: [6,7] [===================>-----------] 64% est: 6s
plot: [6,8] [===================>-----------] 65% est: 6s
plot: [6,9] [====================>----------] 67% est: 5s
plot: [7,1] [====================>----------] 68% est: 5s
plot: [7,2] [====================>----------] 69% est: 5s
plot: [7,3] [=====================>---------] 70% est: 5s `stat_bin()` using `bins = 30`. Pick better value with
`binwidth`.
plot: [7,4] [=====================>---------] 72% est: 5s `stat_bin()` using `bins = 30`. Pick better value with
`binwidth`.
plot: [7,5] [======================>--------] 73% est: 5s
plot: [7,6] [======================>--------] 74% est: 4s
plot: [7,7] [======================>--------] 75% est: 4s
plot: [7,8] [=======================>-------] 77% est: 4s
plot: [7,9] [=======================>-------] 78% est: 4s
plot: [8,1] [=======================>-------] 79% est: 3s
plot: [8,2] [========================>------] 80% est: 3s
plot: [8,3] [========================>------] 81% est: 3s `stat_bin()` using `bins = 30`. Pick better value with
`binwidth`.
plot: [8,4] [=========================>-----] 83% est: 3s `stat_bin()` using `bins = 30`. Pick better value with
`binwidth`.
plot: [8,5] [=========================>-----] 84% est: 3s
plot: [8,6] [=========================>-----] 85% est: 2s
plot: [8,7] [==========================>----] 86% est: 2s
plot: [8,8] [==========================>----] 88% est: 2s
plot: [8,9] [===========================>---] 89% est: 2s
plot: [9,1] [===========================>---] 90% est: 2s
plot: [9,2] [===========================>---] 91% est: 1s
plot: [9,3] [============================>--] 93% est: 1s `stat_bin()` using `bins = 30`. Pick better value with
`binwidth`.
plot: [9,4] [============================>--] 94% est: 1s `stat_bin()` using `bins = 30`. Pick better value with
`binwidth`.
plot: [9,5] [============================>--] 95% est: 1s
plot: [9,6] [=============================>-] 96% est: 1s
plot: [9,7] [=============================>-] 98% est: 0s
plot: [9,8] [==============================>] 99% est: 0s
plot: [9,9] [===============================]100% est: 0s
Para obtener cuál fue el trimestre pico en cada estado:
tourism %>%
group_by(State) %>%
summarise(Trips = sum(Trips)) %>%
features(Trips, feat_stl) %>%
select(State, seasonal_peak_year)Use a feature-based approach to look for outlying series in the
PBSdata. What is unusual about the series you identify as “outliers”.
Calcular características (todas las que arroja “feasts”), y omite las series que tengan características faltantes.
PBS_feat <- PBS %>%
features(Cost, feature_set(pkgs = "feasts")) %>%
na.omit()Warning in optimise(lambda_coef_var, c(lower, upper), x = x, .period = max(.period, :
NA/Inf replaced by maximum positive value
Warning in optimise(lambda_coef_var, c(lower, upper), x = x, .period = max(.period, :
NA/Inf replaced by maximum positive value
Warning in optimise(lambda_coef_var, c(lower, upper), x = x, .period = max(.period, :
NA/Inf replaced by maximum positive value
Warning in optimise(lambda_coef_var, c(lower, upper), x = x, .period = max(.period, :
NA/Inf replaced by maximum positive value
Error in ar.burg.default(x, aic = aic, order.max = order.max, na.action = na.action, :
zero-variance series
Error in ar.burg.default(x, aic = aic, order.max = order.max, na.action = na.action, :
zero-variance series
Warning: `n_flat_spots()` was deprecated in feasts 0.1.5.
Please use `longest_flat_spot()` instead.
This warning is displayed once every 8 hours.
Call `lifecycle::last_warnings()` to see where this warning was generated.
Warning: 2 errors (1 unique) encountered for feature 7
[2] subscript out of bounds
PBS_featEstimamos los componentes principales:
PBS_prcomp <- PBS_feat %>%
select(-Concession, -Type, -ATC1, -ATC2) %>%
prcomp(scale = TRUE) %>%
augment(PBS_feat)
PBS_prcompGraficar los primeros dos componentes principales:
p1 <- PBS_prcomp %>%
unite("serie", Concession:ATC2, sep = "-", remove = FALSE) %>%
ggplot(aes(x = .fittedPC1, y = .fittedPC2, label = serie)) +
geom_point()
p1
plotly::ggplotly(p1)Filtramos para solo quedarnos con las series más inusuales del primer componente principal.
outliers <- PBS_prcomp %>%
filter(.fittedPC1 > 6)
outliers %>%
select(ATC1, ATC2, Type, Concession)Visualizamos las series inusuales:
PBS %>%
semi_join(outliers, by = c("Concession", "Type", "ATC1", "ATC2")) %>%
autoplot(Cost) +
facet_grid(vars(Concession, Type, ATC1, ATC2)) +
labs(title = "Outlying time series in PC space")