Stat. 674 Homework03

Author

Yogesh Gupta

Section 4.6, Exercise 1

Code and Comments:

library(tidyverse)
── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
✔ ggplot2 3.4.0     ✔ purrr   1.0.1
✔ tibble  3.1.8     ✔ dplyr   1.1.0
✔ tidyr   1.3.0     ✔ stringr 1.5.0
✔ readr   2.1.4     ✔ forcats 1.0.0
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
library(tsibble)

Attaching package: 'tsibble'

The following objects are masked from 'package:base':

    intersect, setdiff, union
library(feasts)
Loading required package: fabletools
library(fable)
library(tsibbledata)
library(fpp3)
── Attaching packages ────────────────────────────────────────────── fpp3 0.5 ──
✔ lubridate 1.9.1     
── Conflicts ───────────────────────────────────────────────── fpp3_conflicts ──
✖ lubridate::date()     masks base::date()
✖ dplyr::filter()       masks stats::filter()
✖ tsibble::intersect()  masks base::intersect()
✖ lubridate::interval() masks tsibble::interval()
✖ dplyr::lag()          masks stats::lag()
✖ tsibble::setdiff()    masks base::setdiff()
✖ tsibble::union()      masks base::union()
PBS
# A tsibble: 67,596 x 9 [1M]
# Key:       Concession, Type, ATC1, ATC2 [336]
      Month Concession   Type        ATC1  ATC1_desc ATC2  ATC2_…¹ Scripts  Cost
      <mth> <chr>        <chr>       <chr> <chr>     <chr> <chr>     <dbl> <dbl>
 1 1991 Jul Concessional Co-payments A     Alimenta… A01   STOMAT…   18228 67877
 2 1991 Aug Concessional Co-payments A     Alimenta… A01   STOMAT…   15327 57011
 3 1991 Sep Concessional Co-payments A     Alimenta… A01   STOMAT…   14775 55020
 4 1991 Oct Concessional Co-payments A     Alimenta… A01   STOMAT…   15380 57222
 5 1991 Nov Concessional Co-payments A     Alimenta… A01   STOMAT…   14371 52120
 6 1991 Dec Concessional Co-payments A     Alimenta… A01   STOMAT…   15028 54299
 7 1992 Jan Concessional Co-payments A     Alimenta… A01   STOMAT…   11040 39753
 8 1992 Feb Concessional Co-payments A     Alimenta… A01   STOMAT…   15165 54405
 9 1992 Mar Concessional Co-payments A     Alimenta… A01   STOMAT…   16898 61108
10 1992 Apr Concessional Co-payments A     Alimenta… A01   STOMAT…   18141 65356
# … with 67,586 more rows, and abbreviated variable name ¹​ATC2_desc
media_desvstd <- function(x){
  c(
    media    = mean(x, na.rm = TRUE),
    desv_std = sd(x, na.rm = TRUE)
  )
}

# PBS  
pbs_features <- PBS %>% 
  features(Cost, media_desvstd)

pbs_features
# A tibble: 336 × 6
   Concession   Type        ATC1  ATC2      media desv_std
   <chr>        <chr>       <chr> <chr>     <dbl>    <dbl>
 1 Concessional Co-payments A     A01      67673.   14763.
 2 Concessional Co-payments A     A02   16455044. 7498596.
 3 Concessional Co-payments A     A03     476221.  370696.
 4 Concessional Co-payments A     A04     463392.  154020.
 5 Concessional Co-payments A     A05     147604.   74190.
 6 Concessional Co-payments A     A06     417889.  163040.
 7 Concessional Co-payments A     A07     917795.  338081.
 8 Concessional Co-payments A     A09     343881.   89815.
 9 Concessional Co-payments A     A10    5680010. 3180294.
10 Concessional Co-payments A     A11     651863.  387439.
# … with 326 more rows
pbs_features %>% 
  filter(media == max(media))
# A tibble: 1 × 6
  Concession   Type        ATC1  ATC2      media  desv_std
  <chr>        <chr>       <chr> <chr>     <dbl>     <dbl>
1 Concessional Co-payments C     C10   24845501. 18384824.
pbs_features %>% 
  filter(media == max(media)) %>% 
  inner_join(PBS)
Joining with `by = join_by(Concession, Type, ATC1, ATC2)`
Warning in inner_join(., PBS): Each row in `x` is expected to match at most 1 row in `y`.
ℹ Row 1 of `x` matches multiple rows.
ℹ If multiple matches are expected, set `multiple = "all"` to silence this
  warning.
# A tibble: 204 × 11
   Concession  Type  ATC1  ATC2   media desv_…¹    Month ATC1_…² ATC2_…³ Scripts
   <chr>       <chr> <chr> <chr>  <dbl>   <dbl>    <mth> <chr>   <chr>     <dbl>
 1 Concession… Co-p… C     C10   2.48e7  1.84e7 1991 Jul Cardio… SERUML…   95391
 2 Concession… Co-p… C     C10   2.48e7  1.84e7 1991 Aug Cardio… SERUML…   84923
 3 Concession… Co-p… C     C10   2.48e7  1.84e7 1991 Sep Cardio… SERUML…   85597
 4 Concession… Co-p… C     C10   2.48e7  1.84e7 1991 Oct Cardio… SERUML…   91486
 5 Concession… Co-p… C     C10   2.48e7  1.84e7 1991 Nov Cardio… SERUML…   84980
 6 Concession… Co-p… C     C10   2.48e7  1.84e7 1991 Dec Cardio… SERUML…   92610
 7 Concession… Co-p… C     C10   2.48e7  1.84e7 1992 Jan Cardio… SERUML…   74973
 8 Concession… Co-p… C     C10   2.48e7  1.84e7 1992 Feb Cardio… SERUML…   82883
 9 Concession… Co-p… C     C10   2.48e7  1.84e7 1992 Mar Cardio… SERUML…   97347
10 Concession… Co-p… C     C10   2.48e7  1.84e7 1992 Apr Cardio… SERUML…  104652
# … with 194 more rows, 1 more variable: Cost <dbl>, and abbreviated variable
#   names ¹​desv_std, ²​ATC1_desc, ³​ATC2_desc
pbs_features %>% 
  filter(media == max(media)) %>% 
  right_join(PBS)
Joining with `by = join_by(Concession, Type, ATC1, ATC2)`
Warning in right_join(., PBS): Each row in `x` is expected to match at most 1 row in `y`.
ℹ Row 1 of `x` matches multiple rows.
ℹ If multiple matches are expected, set `multiple = "all"` to silence this
  warning.
# A tibble: 67,596 × 11
   Concession  Type  ATC1  ATC2   media desv_…¹    Month ATC1_…² ATC2_…³ Scripts
   <chr>       <chr> <chr> <chr>  <dbl>   <dbl>    <mth> <chr>   <chr>     <dbl>
 1 Concession… Co-p… C     C10   2.48e7  1.84e7 1991 Jul Cardio… SERUML…   95391
 2 Concession… Co-p… C     C10   2.48e7  1.84e7 1991 Aug Cardio… SERUML…   84923
 3 Concession… Co-p… C     C10   2.48e7  1.84e7 1991 Sep Cardio… SERUML…   85597
 4 Concession… Co-p… C     C10   2.48e7  1.84e7 1991 Oct Cardio… SERUML…   91486
 5 Concession… Co-p… C     C10   2.48e7  1.84e7 1991 Nov Cardio… SERUML…   84980
 6 Concession… Co-p… C     C10   2.48e7  1.84e7 1991 Dec Cardio… SERUML…   92610
 7 Concession… Co-p… C     C10   2.48e7  1.84e7 1992 Jan Cardio… SERUML…   74973
 8 Concession… Co-p… C     C10   2.48e7  1.84e7 1992 Feb Cardio… SERUML…   82883
 9 Concession… Co-p… C     C10   2.48e7  1.84e7 1992 Mar Cardio… SERUML…   97347
10 Concession… Co-p… C     C10   2.48e7  1.84e7 1992 Apr Cardio… SERUML…  104652
# … with 67,586 more rows, 1 more variable: Cost <dbl>, and abbreviated
#   variable names ¹​desv_std, ²​ATC1_desc, ³​ATC2_desc
pbs_features %>% 
  filter(media == max(media)) %>% 
  full_join(PBS)
Joining with `by = join_by(Concession, Type, ATC1, ATC2)`
Warning in full_join(., PBS): Each row in `x` is expected to match at most 1 row in `y`.
ℹ Row 1 of `x` matches multiple rows.
ℹ If multiple matches are expected, set `multiple = "all"` to silence this
  warning.
# A tibble: 67,596 × 11
   Concession  Type  ATC1  ATC2   media desv_…¹    Month ATC1_…² ATC2_…³ Scripts
   <chr>       <chr> <chr> <chr>  <dbl>   <dbl>    <mth> <chr>   <chr>     <dbl>
 1 Concession… Co-p… C     C10   2.48e7  1.84e7 1991 Jul Cardio… SERUML…   95391
 2 Concession… Co-p… C     C10   2.48e7  1.84e7 1991 Aug Cardio… SERUML…   84923
 3 Concession… Co-p… C     C10   2.48e7  1.84e7 1991 Sep Cardio… SERUML…   85597
 4 Concession… Co-p… C     C10   2.48e7  1.84e7 1991 Oct Cardio… SERUML…   91486
 5 Concession… Co-p… C     C10   2.48e7  1.84e7 1991 Nov Cardio… SERUML…   84980
 6 Concession… Co-p… C     C10   2.48e7  1.84e7 1991 Dec Cardio… SERUML…   92610
 7 Concession… Co-p… C     C10   2.48e7  1.84e7 1992 Jan Cardio… SERUML…   74973
 8 Concession… Co-p… C     C10   2.48e7  1.84e7 1992 Feb Cardio… SERUML…   82883
 9 Concession… Co-p… C     C10   2.48e7  1.84e7 1992 Mar Cardio… SERUML…   97347
10 Concession… Co-p… C     C10   2.48e7  1.84e7 1992 Apr Cardio… SERUML…  104652
# … with 67,586 more rows, 1 more variable: Cost <dbl>, and abbreviated
#   variable names ¹​desv_std, ²​ATC1_desc, ³​ATC2_desc
pbs_joined <- pbs_features %>% 
  filter(media == max(media)) %>% 
  left_join(PBS)
Joining with `by = join_by(Concession, Type, ATC1, ATC2)`
Warning in left_join(., PBS): Each row in `x` is expected to match at most 1 row in `y`.
ℹ Row 1 of `x` matches multiple rows.
ℹ If multiple matches are expected, set `multiple = "all"` to silence this
  warning.
pbs_joined
# A tibble: 204 × 11
   Concession  Type  ATC1  ATC2   media desv_…¹    Month ATC1_…² ATC2_…³ Scripts
   <chr>       <chr> <chr> <chr>  <dbl>   <dbl>    <mth> <chr>   <chr>     <dbl>
 1 Concession… Co-p… C     C10   2.48e7  1.84e7 1991 Jul Cardio… SERUML…   95391
 2 Concession… Co-p… C     C10   2.48e7  1.84e7 1991 Aug Cardio… SERUML…   84923
 3 Concession… Co-p… C     C10   2.48e7  1.84e7 1991 Sep Cardio… SERUML…   85597
 4 Concession… Co-p… C     C10   2.48e7  1.84e7 1991 Oct Cardio… SERUML…   91486
 5 Concession… Co-p… C     C10   2.48e7  1.84e7 1991 Nov Cardio… SERUML…   84980
 6 Concession… Co-p… C     C10   2.48e7  1.84e7 1991 Dec Cardio… SERUML…   92610
 7 Concession… Co-p… C     C10   2.48e7  1.84e7 1992 Jan Cardio… SERUML…   74973
 8 Concession… Co-p… C     C10   2.48e7  1.84e7 1992 Feb Cardio… SERUML…   82883
 9 Concession… Co-p… C     C10   2.48e7  1.84e7 1992 Mar Cardio… SERUML…   97347
10 Concession… Co-p… C     C10   2.48e7  1.84e7 1992 Apr Cardio… SERUML…  104652
# … with 194 more rows, 1 more variable: Cost <dbl>, and abbreviated variable
#   names ¹​desv_std, ²​ATC1_desc, ³​ATC2_desc
pbs_joined %>% 
  ggplot(aes(x = Month, y = Cost)) +
  geom_line()

pbs_joined %>% 
  as_tsibble(index = Month, key = Concession:ATC2) %>% 
  autoplot(Cost)

pbs_features %>% 
  filter(desv_std == min(desv_std))
# A tibble: 2 × 6
  Concession Type        ATC1  ATC2  media desv_std
  <chr>      <chr>       <chr> <chr> <dbl>    <dbl>
1 General    Co-payments R     R         0        0
2 General    Co-payments S     S         0        0

Section 4.6, Exercise 3

Code and Comments:

PBS_feat <- PBS %>%
  features(Cost, feature_set(pkgs = "feasts"))
Warning in optimise(lambda_coef_var, c(lower, upper), x = x, .period =
max(.period, : NA/Inf replaced by maximum positive value

Warning in optimise(lambda_coef_var, c(lower, upper), x = x, .period =
max(.period, : NA/Inf replaced by maximum positive value

Warning in optimise(lambda_coef_var, c(lower, upper), x = x, .period =
max(.period, : NA/Inf replaced by maximum positive value

Warning in optimise(lambda_coef_var, c(lower, upper), x = x, .period =
max(.period, : NA/Inf replaced by maximum positive value
Error in ar.burg.default(x, aic = aic, order.max = order.max, na.action = na.action,  : 
  zero-variance series
Error in ar.burg.default(x, aic = aic, order.max = order.max, na.action = na.action,  : 
  zero-variance series
Warning: `n_flat_spots()` was deprecated in feasts 0.1.5.
ℹ Please use `longest_flat_spot()` instead.
ℹ The deprecated feature was likely used in the fabletools package.
  Please report the issue at <]8;;https://github.com/tidyverts/fabletools/issueshttps://github.com/tidyverts/fabletools/issues]8;;>.
New names:
• `` -> `...26`
Warning: 2 errors (1 unique) encountered for feature 7
[2] subscript out of bounds
PBS_feat
# A tibble: 336 × 53
   Concession  Type  ATC1  ATC2  trend…¹ seaso…² seaso…³ seaso…⁴ spiki…⁵ linea…⁶
   <chr>       <chr> <chr> <chr>   <dbl>   <dbl>   <dbl>   <dbl>   <dbl>   <dbl>
 1 Concession… Co-p… A     A01     0.837   0.899       9       7 1.44e10 -4.88e4
 2 Concession… Co-p… A     A02     0.973   0.937      11       6 1.25e20  8.51e7
 3 Concession… Co-p… A     A03     0.978   0.845      11       7 4.58e14 -2.24e6
 4 Concession… Co-p… A     A04     0.944   0.843       9       6 1.24e14  1.67e6
 5 Concession… Co-p… A     A05     0.960   0.870      11       6 8.95e12  6.10e5
 6 Concession… Co-p… A     A06     0.960   0.951      11       6 3.82e13  1.53e6
 7 Concession… Co-p… A     A07     0.971   0.910      11       6 9.77e14  4.10e6
 8 Concession… Co-p… A     A09     0.942   0.891      11       6 1.31e13  9.19e5
 9 Concession… Co-p… A     A10     0.975   0.917      11       6 9.97e18  3.83e7
10 Concession… Co-p… A     A11     0.982   0.898      11       7 2.82e14  3.71e5
# … with 326 more rows, 43 more variables: curvature <dbl>, stl_e_acf1 <dbl>,
#   stl_e_acf10 <dbl>, acf1 <dbl>, acf10 <dbl>, diff1_acf1 <dbl>,
#   diff1_acf10 <dbl>, diff2_acf1 <dbl>, diff2_acf10 <dbl>, season_acf1 <dbl>,
#   pacf5 <dbl>, diff1_pacf5 <dbl>, diff2_pacf5 <dbl>, season_pacf <dbl>,
#   zero_run_mean <dbl>, nonzero_squared_cv <dbl>, zero_start_prop <dbl>,
#   zero_end_prop <dbl>, lambda_guerrero <dbl>, ...26 <dbl>, kpss_stat <dbl>,
#   kpss_pvalue <dbl>, pp_stat <dbl>, pp_pvalue <dbl>, ndiffs <int>, …
PBS_prcomp <- PBS_feat %>%
  select(-Concession, -Type, -ATC1, -ATC2)
 # prcomp(scale = TRUE) %>%
 # augment(PBS_feat)

PBS_prcomp
# A tibble: 336 × 49
   trend_stre…¹ seaso…² seaso…³ seaso…⁴ spiki…⁵ linea…⁶ curva…⁷ stl_e_…⁸ stl_e…⁹
          <dbl>   <dbl>   <dbl>   <dbl>   <dbl>   <dbl>   <dbl>    <dbl>   <dbl>
 1        0.837   0.899       9       7 1.44e10 -4.88e4 -8.79e4 -0.00788   0.182
 2        0.973   0.937      11       6 1.25e20  8.51e7 -1.31e7 -0.0597    0.211
 3        0.978   0.845      11       7 4.58e14 -2.24e6 -3.02e6  0.579     1.21 
 4        0.944   0.843       9       6 1.24e14  1.67e6 -1.54e5 -0.208     0.135
 5        0.960   0.870      11       6 8.95e12  6.10e5 -4.18e4  0.0264    0.317
 6        0.960   0.951      11       6 3.82e13  1.53e6 -5.75e5 -0.159     0.212
 7        0.971   0.910      11       6 9.77e14  4.10e6 -3.34e4 -0.274     0.218
 8        0.942   0.891      11       6 1.31e13  9.19e5 -3.66e5 -0.390     0.265
 9        0.975   0.917      11       6 9.97e18  3.83e7  6.29e6 -0.00956   0.269
10        0.982   0.898      11       7 2.82e14  3.71e5 -4.55e6  0.318     0.819
# … with 326 more rows, 40 more variables: acf1 <dbl>, acf10 <dbl>,
#   diff1_acf1 <dbl>, diff1_acf10 <dbl>, diff2_acf1 <dbl>, diff2_acf10 <dbl>,
#   season_acf1 <dbl>, pacf5 <dbl>, diff1_pacf5 <dbl>, diff2_pacf5 <dbl>,
#   season_pacf <dbl>, zero_run_mean <dbl>, nonzero_squared_cv <dbl>,
#   zero_start_prop <dbl>, zero_end_prop <dbl>, lambda_guerrero <dbl>,
#   ...26 <dbl>, kpss_stat <dbl>, kpss_pvalue <dbl>, pp_stat <dbl>,
#   pp_pvalue <dbl>, ndiffs <int>, nsdiffs <int>, bp_stat <dbl>, …