Las paqueterías necesarias para el análisis:

library(tidyverse)
library(tsibble)
library(feasts)
library(fable)
library(tsibbledata)
library(fpp3)

Ejercicio 1

Write a function to compute the mean and standard deviation of a time series, and apply it to the PBS data. Plot the series with the highest mean, and the series with the lowest standard deviation.

PBS

media_desvstd <- function(x){
  c(
    media    = mean(x, na.rm = TRUE),
    desv_std = sd(x, na.rm = TRUE)
  )
}

# PBS  
pbs_features <- PBS %>% 
  features(Cost, media_desvstd)

pbs_features

La serie con la media más alta:

pbs_features %>% 
  filter(media == max(media))


# Opción 1 (no recomedada)
PBS %>% 
  filter(Concession == "Concessional",
         Type == "Co-payments",
         ATC1 == "C",
         ATC2 == "C10") %>% 
  autoplot()

Plot variable not specified, automatically selected `.vars = Scripts`

pbs_features %>% 
  filter(media == max(media)) %>% 
  inner_join(PBS)

Joining, by = c("Concession", "Type", "ATC1", "ATC2")

pbs_features %>% 
  filter(media == max(media)) %>% 
  right_join(PBS)

Joining, by = c("Concession", "Type", "ATC1", "ATC2")

pbs_features %>% 
  filter(media == max(media)) %>% 
  full_join(PBS)

Joining, by = c("Concession", "Type", "ATC1", "ATC2")

pbs_joined <- pbs_features %>% 
  filter(media == max(media)) %>% 
  left_join(PBS)

Joining, by = c("Concession", "Type", "ATC1", "ATC2")

pbs_joined # la tabla es una tibble


# Usando ggplot()
pbs_joined %>% 
  ggplot(aes(x = Month, y = Cost)) +
  geom_line()


# convirtiendo a tsibble y usando autoplot()
pbs_joined %>% 
  as_tsibble(index = Month, key = Concession:ATC2) %>% 
  autoplot(Cost)

pbs_features %>% 
  filter(desv_std == min(desv_std))

Ejercicio 2

Use GGally::ggpairs() to look at the relationships between the STL-based features for the tourism data. You might wish to change seasonal_peak_year and seasonal_trough_year to factors. Which is the peak quarter for holidays in each state?

tourism %>%
  features(Trips, feat_stl) %>%
  select(-Region, -State, -Purpose) %>%
  mutate(
    seasonal_peak_year = factor(seasonal_peak_year),
    seasonal_trough_year = factor(seasonal_trough_year),
  ) %>%
  GGally::ggpairs()

Registered S3 method overwritten by 'GGally':
  method from   
  +.gg   ggplot2

 plot: [1,1] [-------------------------------]  1% est: 0s 
 plot: [1,2] [>------------------------------]  2% est: 5s 
 plot: [1,3] [>------------------------------]  4% est: 7s 
 plot: [1,4] [=>-----------------------------]  5% est:11s 
 plot: [1,5] [=>-----------------------------]  6% est:12s 
 plot: [1,6] [=>-----------------------------]  7% est:11s 
 plot: [1,7] [==>----------------------------]  9% est:11s 
 plot: [1,8] [==>----------------------------] 10% est:10s 
 plot: [1,9] [==>----------------------------] 11% est:10s 
 plot: [2,1] [===>---------------------------] 12% est: 9s 
 plot: [2,2] [===>---------------------------] 14% est: 9s 
 plot: [2,3] [====>--------------------------] 15% est: 9s 
 plot: [2,4] [====>--------------------------] 16% est: 9s 
 plot: [2,5] [====>--------------------------] 17% est: 9s 
 plot: [2,6] [=====>-------------------------] 19% est: 9s 
 plot: [2,7] [=====>-------------------------] 20% est: 9s 
 plot: [2,8] [======>------------------------] 21% est: 9s 
 plot: [2,9] [======>------------------------] 22% est: 9s 
 plot: [3,1] [======>------------------------] 23% est: 9s `stat_bin()` using `bins = 30`. Pick better value with
`binwidth`.

 plot: [3,2] [=======>-----------------------] 25% est:10s `stat_bin()` using `bins = 30`. Pick better value with
`binwidth`.

 plot: [3,3] [=======>-----------------------] 26% est:11s 
 plot: [3,4] [=======>-----------------------] 27% est:10s 
 plot: [3,5] [========>----------------------] 28% est:10s 
 plot: [3,6] [========>----------------------] 30% est:10s 
 plot: [3,7] [=========>---------------------] 31% est:10s 
 plot: [3,8] [=========>---------------------] 32% est: 9s 
 plot: [3,9] [=========>---------------------] 33% est: 9s 
 plot: [4,1] [==========>--------------------] 35% est: 9s `stat_bin()` using `bins = 30`. Pick better value with
`binwidth`.

 plot: [4,2] [==========>--------------------] 36% est:10s `stat_bin()` using `bins = 30`. Pick better value with
`binwidth`.

 plot: [4,3] [==========>--------------------] 37% est:10s 
 plot: [4,4] [===========>-------------------] 38% est:10s 
 plot: [4,5] [===========>-------------------] 40% est:10s 
 plot: [4,6] [============>------------------] 41% est:10s 
 plot: [4,7] [============>------------------] 42% est:10s 
 plot: [4,8] [============>------------------] 43% est:10s 
 plot: [4,9] [=============>-----------------] 44% est: 9s 
 plot: [5,1] [=============>-----------------] 46% est: 9s 
 plot: [5,2] [==============>----------------] 47% est: 9s 
 plot: [5,3] [==============>----------------] 48% est: 8s `stat_bin()` using `bins = 30`. Pick better value with
`binwidth`.

 plot: [5,4] [==============>----------------] 49% est: 8s `stat_bin()` using `bins = 30`. Pick better value with
`binwidth`.

 plot: [5,5] [===============>---------------] 51% est: 9s 
 plot: [5,6] [===============>---------------] 52% est: 8s 
 plot: [5,7] [===============>---------------] 53% est: 8s 
 plot: [5,8] [================>--------------] 54% est: 8s 
 plot: [5,9] [================>--------------] 56% est: 7s 
 plot: [6,1] [=================>-------------] 57% est: 7s 
 plot: [6,2] [=================>-------------] 58% est: 7s 
 plot: [6,3] [=================>-------------] 59% est: 7s `stat_bin()` using `bins = 30`. Pick better value with
`binwidth`.

 plot: [6,4] [==================>------------] 60% est: 7s `stat_bin()` using `bins = 30`. Pick better value with
`binwidth`.

 plot: [6,5] [==================>------------] 62% est: 7s 
 plot: [6,6] [===================>-----------] 63% est: 6s 
 plot: [6,7] [===================>-----------] 64% est: 6s 
 plot: [6,8] [===================>-----------] 65% est: 6s 
 plot: [6,9] [====================>----------] 67% est: 5s 
 plot: [7,1] [====================>----------] 68% est: 5s 
 plot: [7,2] [====================>----------] 69% est: 5s 
 plot: [7,3] [=====================>---------] 70% est: 5s `stat_bin()` using `bins = 30`. Pick better value with
`binwidth`.

 plot: [7,4] [=====================>---------] 72% est: 5s `stat_bin()` using `bins = 30`. Pick better value with
`binwidth`.

 plot: [7,5] [======================>--------] 73% est: 5s 
 plot: [7,6] [======================>--------] 74% est: 4s 
 plot: [7,7] [======================>--------] 75% est: 4s 
 plot: [7,8] [=======================>-------] 77% est: 4s 
 plot: [7,9] [=======================>-------] 78% est: 4s 
 plot: [8,1] [=======================>-------] 79% est: 3s 
 plot: [8,2] [========================>------] 80% est: 3s 
 plot: [8,3] [========================>------] 81% est: 3s `stat_bin()` using `bins = 30`. Pick better value with
`binwidth`.

 plot: [8,4] [=========================>-----] 83% est: 3s `stat_bin()` using `bins = 30`. Pick better value with
`binwidth`.

 plot: [8,5] [=========================>-----] 84% est: 3s 
 plot: [8,6] [=========================>-----] 85% est: 2s 
 plot: [8,7] [==========================>----] 86% est: 2s 
 plot: [8,8] [==========================>----] 88% est: 2s 
 plot: [8,9] [===========================>---] 89% est: 2s 
 plot: [9,1] [===========================>---] 90% est: 2s 
 plot: [9,2] [===========================>---] 91% est: 1s 
 plot: [9,3] [============================>--] 93% est: 1s `stat_bin()` using `bins = 30`. Pick better value with
`binwidth`.

 plot: [9,4] [============================>--] 94% est: 1s `stat_bin()` using `bins = 30`. Pick better value with
`binwidth`.

 plot: [9,5] [============================>--] 95% est: 1s 
 plot: [9,6] [=============================>-] 96% est: 1s 
 plot: [9,7] [=============================>-] 98% est: 0s 
 plot: [9,8] [==============================>] 99% est: 0s 
 plot: [9,9] [===============================]100% est: 0s

Para obtener cuál fue el trimestre pico en cada estado:

tourism %>%
  group_by(State) %>%
  summarise(Trips = sum(Trips)) %>%
  features(Trips, feat_stl) %>%
  select(State, seasonal_peak_year)

Ejercicio 3

Use a feature-based approach to look for outlying series in the PBS data. What is unusual about the series you identify as “outliers”.

Calcular características (todas las que arroja “feasts”), y omite las series que tengan características faltantes.

PBS_feat <- PBS %>%
  features(Cost, feature_set(pkgs = "feasts")) %>%
  na.omit()

Warning in optimise(lambda_coef_var, c(lower, upper), x = x, .period = max(.period,  :
  NA/Inf replaced by maximum positive value
Warning in optimise(lambda_coef_var, c(lower, upper), x = x, .period = max(.period,  :
  NA/Inf replaced by maximum positive value
Warning in optimise(lambda_coef_var, c(lower, upper), x = x, .period = max(.period,  :
  NA/Inf replaced by maximum positive value
Warning in optimise(lambda_coef_var, c(lower, upper), x = x, .period = max(.period,  :
  NA/Inf replaced by maximum positive value
Error in ar.burg.default(x, aic = aic, order.max = order.max, na.action = na.action,  : 
  zero-variance series
Error in ar.burg.default(x, aic = aic, order.max = order.max, na.action = na.action,  : 
  zero-variance series
Warning: `n_flat_spots()` was deprecated in feasts 0.1.5.
Please use `longest_flat_spot()` instead.
This warning is displayed once every 8 hours.
Call `lifecycle::last_warnings()` to see where this warning was generated.
Warning: 2 errors (1 unique) encountered for feature 7
[2] subscript out of bounds

PBS_feat

Estimamos los componentes principales:

PBS_prcomp <- PBS_feat %>%
  select(-Concession, -Type, -ATC1, -ATC2) %>%
  prcomp(scale = TRUE) %>%
  augment(PBS_feat)

PBS_prcomp

Graficar los primeros dos componentes principales:

p1 <- PBS_prcomp %>% 
   unite("serie", Concession:ATC2, sep = "-", remove = FALSE) %>% 
  ggplot(aes(x = .fittedPC1, y = .fittedPC2, label = serie)) +
  geom_point()

p1


plotly::ggplotly(p1)

Filtramos para solo quedarnos con las series más inusuales del primer componente principal.

outliers <- PBS_prcomp %>%
  filter(.fittedPC1 > 6)
outliers %>%
  select(ATC1, ATC2, Type, Concession)

Visualizamos las series inusuales:

PBS %>%
  semi_join(outliers, by = c("Concession", "Type", "ATC1", "ATC2")) %>%
  autoplot(Cost) +
  facet_grid(vars(Concession, Type, ATC1, ATC2)) +
  labs(title = "Outlying time series in PC space")

LS0tDQp0aXRsZTogIkVqZXJjaWNpb3MgY2FwLiA0Ig0Kc3VidGl0bGU6ICJUaW1lIFNlcmllcyBmZWF0dXJlcyINCmF1dGhvcjogIlBhYmxvIEJlbmF2aWRlcyBIZXJyZXJhIg0Kb3V0cHV0OiANCiAgaHRtbF9ub3RlYm9vazoNCiAgICB0aGVtZTogcmVhZGFibGUNCiAgICBoaWdobGlnaHQ6IHRhbmdvDQogICAgdG9jOiBUUlVFDQogICAgdG9jX2Zsb2F0OiBUUlVFDQotLS0NCg0KTGFzIHBhcXVldGVyw61hcyBuZWNlc2FyaWFzIHBhcmEgZWwgYW7DoWxpc2lzOg0KDQpgYGB7ciBtZXNzYWdlPUZBTFNFLCB3YXJuaW5nPUZBTFNFfQ0KbGlicmFyeSh0aWR5dmVyc2UpDQpsaWJyYXJ5KHRzaWJibGUpDQpsaWJyYXJ5KGZlYXN0cykNCmxpYnJhcnkoZmFibGUpDQpsaWJyYXJ5KHRzaWJibGVkYXRhKQ0KbGlicmFyeShmcHAzKQ0KYGBgDQoNCg0KDQojIEVqZXJjaWNpbyAxDQoNCj4gV3JpdGUgYSBmdW5jdGlvbiB0byBjb21wdXRlIHRoZSBtZWFuIGFuZCBzdGFuZGFyZCBkZXZpYXRpb24gb2YgYSB0aW1lIHNlcmllcywgYW5kIGFwcGx5IGl0IHRvIHRoZSBgUEJTYCBkYXRhLiBQbG90IHRoZSBzZXJpZXMgd2l0aCB0aGUgaGlnaGVzdCBtZWFuLCBhbmQgdGhlIHNlcmllcyB3aXRoIHRoZSBsb3dlc3Qgc3RhbmRhcmQgZGV2aWF0aW9uLg0KDQpgYGB7cn0NClBCUw0KYGBgDQoNCg0KYGBge3J9DQptZWRpYV9kZXN2c3RkIDwtIGZ1bmN0aW9uKHgpew0KICBjKG1lZGlhICAgID0gbWVhbih4LCBuYS5ybSA9IFRSVUUpLA0KICAgIGRlc3Zfc3RkID0gc2QoeCwgbmEucm0gPSBUUlVFKQ0KICAgICkNCn0NCg0KIyBQQlMgIA0KcGJzX2ZlYXR1cmVzIDwtIFBCUyAlPiUgDQogIGZlYXR1cmVzKENvc3QsIG1lZGlhX2Rlc3ZzdGQpDQoNCnBic19mZWF0dXJlcw0KYGBgDQoNCkxhIHNlcmllIGNvbiBsYSBtZWRpYSBtw6FzIGFsdGE6DQoNCmBgYHtyfQ0KcGJzX2ZlYXR1cmVzICU+JSANCiAgZmlsdGVyKG1lZGlhID09IG1heChtZWRpYSkpDQoNCiMgT3BjacOzbiAxIChubyByZWNvbWVkYWRhKQ0KUEJTICU+JSANCiAgZmlsdGVyKENvbmNlc3Npb24gPT0gIkNvbmNlc3Npb25hbCIsDQogICAgICAgICBUeXBlID09ICJDby1wYXltZW50cyIsDQogICAgICAgICBBVEMxID09ICJDIiwNCiAgICAgICAgIEFUQzIgPT0gIkMxMCIpICU+JSANCiAgYXV0b3Bsb3QoKQ0KYGBgDQoNCmBgYHtyfQ0KcGJzX2ZlYXR1cmVzICU+JSANCiAgZmlsdGVyKG1lZGlhID09IG1heChtZWRpYSkpICU+JSANCiAgaW5uZXJfam9pbihQQlMpDQoNCnBic19mZWF0dXJlcyAlPiUgDQogIGZpbHRlcihtZWRpYSA9PSBtYXgobWVkaWEpKSAlPiUgDQogIHJpZ2h0X2pvaW4oUEJTKQ0KDQpwYnNfZmVhdHVyZXMgJT4lIA0KICBmaWx0ZXIobWVkaWEgPT0gbWF4KG1lZGlhKSkgJT4lIA0KICBmdWxsX2pvaW4oUEJTKQ0KYGBgDQoNCg0KYGBge3J9DQpwYnNfam9pbmVkIDwtIHBic19mZWF0dXJlcyAlPiUgDQogIGZpbHRlcihtZWRpYSA9PSBtYXgobWVkaWEpKSAlPiUgDQogIGxlZnRfam9pbihQQlMpDQoNCnBic19qb2luZWQgIyBsYSB0YWJsYSBlcyB1bmEgdGliYmxlDQoNCiMgVXNhbmRvIGdncGxvdCgpDQpwYnNfam9pbmVkICU+JSANCiAgZ2dwbG90KGFlcyh4ID0gTW9udGgsIHkgPSBDb3N0KSkgKw0KICBnZW9tX2xpbmUoKQ0KDQojIGNvbnZpcnRpZW5kbyBhIHRzaWJibGUgeSB1c2FuZG8gYXV0b3Bsb3QoKQ0KcGJzX2pvaW5lZCAlPiUgDQogIGFzX3RzaWJibGUoaW5kZXggPSBNb250aCwga2V5ID0gQ29uY2Vzc2lvbjpBVEMyKSAlPiUgDQogIGF1dG9wbG90KENvc3QpDQpgYGANCg0KYGBge3J9DQpwYnNfZmVhdHVyZXMgJT4lIA0KICBmaWx0ZXIoZGVzdl9zdGQgPT0gbWluKGRlc3Zfc3RkKSkNCmBgYA0KDQojIEVqZXJjaWNpbyAyDQoNCj4gVXNlIGBHR2FsbHk6OmdncGFpcnMoKWAgdG8gbG9vayBhdCB0aGUgcmVsYXRpb25zaGlwcyBiZXR3ZWVuIHRoZSBTVEwtYmFzZWQgZmVhdHVyZXMgZm9yIHRoZSB0b3VyaXNtIGRhdGEuIFlvdSBtaWdodCB3aXNoIHRvIGNoYW5nZSBgc2Vhc29uYWxfcGVha195ZWFyYCBhbmQgYHNlYXNvbmFsX3Ryb3VnaF95ZWFyYCB0byBmYWN0b3JzLiBXaGljaCBpcyB0aGUgcGVhayBxdWFydGVyIGZvciBob2xpZGF5cyBpbiBlYWNoIHN0YXRlPw0KDQpgYGB7ciwgZmlnLmhlaWdodD0gOCwgZmlnLndpZHRoPSAxMH0NCnRvdXJpc20gJT4lDQogIGZlYXR1cmVzKFRyaXBzLCBmZWF0X3N0bCkgJT4lDQogIHNlbGVjdCgtUmVnaW9uLCAtU3RhdGUsIC1QdXJwb3NlKSAlPiUNCiAgbXV0YXRlKA0KICAgIHNlYXNvbmFsX3BlYWtfeWVhciA9IGZhY3RvcihzZWFzb25hbF9wZWFrX3llYXIpLA0KICAgIHNlYXNvbmFsX3Ryb3VnaF95ZWFyID0gZmFjdG9yKHNlYXNvbmFsX3Ryb3VnaF95ZWFyKSwNCiAgKSAlPiUNCiAgR0dhbGx5OjpnZ3BhaXJzKCkNCmBgYA0KDQpQYXJhIG9idGVuZXIgY3XDoWwgZnVlIGVsIHRyaW1lc3RyZSBwaWNvIGVuIGNhZGEgZXN0YWRvOg0KDQpgYGB7cn0NCnRvdXJpc20gJT4lDQogIGdyb3VwX2J5KFN0YXRlKSAlPiUNCiAgc3VtbWFyaXNlKFRyaXBzID0gc3VtKFRyaXBzKSkgJT4lDQogIGZlYXR1cmVzKFRyaXBzLCBmZWF0X3N0bCkgJT4lDQogIHNlbGVjdChTdGF0ZSwgc2Vhc29uYWxfcGVha195ZWFyKQ0KYGBgDQoNCiMgRWplcmNpY2lvIDMNCg0KPiBVc2UgYSBmZWF0dXJlLWJhc2VkIGFwcHJvYWNoIHRvIGxvb2sgZm9yIG91dGx5aW5nIHNlcmllcyBpbiB0aGUgYFBCU2AgZGF0YS4gV2hhdCBpcyB1bnVzdWFsIGFib3V0IHRoZSBzZXJpZXMgeW91IGlkZW50aWZ5IGFzIOKAnG91dGxpZXJz4oCdLg0KDQpDYWxjdWxhciBjYXJhY3RlcsOtc3RpY2FzICh0b2RhcyBsYXMgcXVlIGFycm9qYSAiZmVhc3RzIiksIHkgb21pdGUgbGFzIHNlcmllcyBxdWUgdGVuZ2FuIGNhcmFjdGVyw61zdGljYXMgZmFsdGFudGVzLg0KDQpgYGB7cn0NClBCU19mZWF0IDwtIFBCUyAlPiUNCiAgZmVhdHVyZXMoQ29zdCwgZmVhdHVyZV9zZXQocGtncyA9ICJmZWFzdHMiKSkgJT4lDQogIG5hLm9taXQoKQ0KDQpQQlNfZmVhdA0KYGBgDQoNCkVzdGltYW1vcyBsb3MgW2NvbXBvbmVudGVzIHByaW5jaXBhbGVzXShodHRwczovL2VzLndpa2lwZWRpYS5vcmcvd2lraS9BbiVDMyVBMWxpc2lzX2RlX2NvbXBvbmVudGVzX3ByaW5jaXBhbGVzKToNCg0KYGBge3J9DQpQQlNfcHJjb21wIDwtIFBCU19mZWF0ICU+JQ0KICBzZWxlY3QoLUNvbmNlc3Npb24sIC1UeXBlLCAtQVRDMSwgLUFUQzIpICU+JQ0KICBwcmNvbXAoc2NhbGUgPSBUUlVFKSAlPiUNCiAgYXVnbWVudChQQlNfZmVhdCkNCg0KUEJTX3ByY29tcA0KYGBgDQoNCkdyYWZpY2FyIGxvcyBwcmltZXJvcyBkb3MgY29tcG9uZW50ZXMgcHJpbmNpcGFsZXM6DQoNCmBgYHtyfQ0KcDEgPC0gUEJTX3ByY29tcCAlPiUgDQogICB1bml0ZSgic2VyaWUiLCBDb25jZXNzaW9uOkFUQzIsIHNlcCA9ICItIiwgcmVtb3ZlID0gRkFMU0UpICU+JSANCiAgZ2dwbG90KGFlcyh4ID0gLmZpdHRlZFBDMSwgeSA9IC5maXR0ZWRQQzIsIGxhYmVsID0gc2VyaWUpKSArDQogIGdlb21fcG9pbnQoKQ0KDQpwMQ0KDQpwbG90bHk6OmdncGxvdGx5KHAxKQ0KYGBgDQoNCkZpbHRyYW1vcyBwYXJhIHNvbG8gcXVlZGFybm9zIGNvbiBsYXMgc2VyaWVzIG3DoXMgaW51c3VhbGVzIGRlbCBwcmltZXIgY29tcG9uZW50ZSBwcmluY2lwYWwuDQoNCmBgYHtyfQ0Kb3V0bGllcnMgPC0gUEJTX3ByY29tcCAlPiUNCiAgZmlsdGVyKC5maXR0ZWRQQzEgPiA2KQ0Kb3V0bGllcnMgJT4lDQogIHNlbGVjdChBVEMxLCBBVEMyLCBUeXBlLCBDb25jZXNzaW9uKQ0KYGBgDQoNClZpc3VhbGl6YW1vcyBsYXMgc2VyaWVzIGludXN1YWxlczoNCg0KYGBge3J9DQpQQlMgJT4lDQogIHNlbWlfam9pbihvdXRsaWVycywgYnkgPSBjKCJDb25jZXNzaW9uIiwgIlR5cGUiLCAiQVRDMSIsICJBVEMyIikpICU+JQ0KICBhdXRvcGxvdChDb3N0KSArDQogIGZhY2V0X2dyaWQodmFycyhDb25jZXNzaW9uLCBUeXBlLCBBVEMxLCBBVEMyKSkgKw0KICBsYWJzKHRpdGxlID0gIk91dGx5aW5nIHRpbWUgc2VyaWVzIGluIFBDIHNwYWNlIikNCg0KYGBgDQoNCg==

Ejercicios cap. 4

Time Series features

Pablo Benavides Herrera

Ejercicio 1

Ejercicio 2

Ejercicio 3