library(nowcasting)
library(timetk)
library(tidyverse)
library(lubridate)
library(tseries)ilustrasi DFM untuk PDRB NTB - Bagus Sartono
Package yang diperlukan
Sebelum memulai, kita load/panggil terlebih dahulu beberapa package yang akan digunakan pada dokumen ini. Jika ada package yang tidak dapat dipanggil, perlu didahului dengan proses instalasi package sebelumnya.
Import Data
Data telah disiapkan pada file excel dengan nama “Data_PDRB_siap.xlsx”. File ini berisi 9 kolom yaitu:
- date
- ADHK (PDRB ADHK) kuartalan, dan
- 7 variabel makro bulanan
dari tahun 2015 hingga 2023.
dengan alasan tertentu, data 2015-2017 selanjutnya tidak disertakan dalam analisis.
pdrb <- readxl::read_excel("Data_PDRB_siap.xlsx")
pdrb <- pdrb[-(1:36),]
glimpse(pdrb)Rows: 72
Columns: 9
$ date <chr> "2018-01-01", "2018-02-01", "2018-03-01", "2018-04-0…
$ ADHK <dbl> NA, NA, 21902.12, NA, NA, 22931.43, NA, NA, 22560.13…
$ prod_tbg <dbl> 31843.87, 23620.41, 31383.90, 33483.54, 28503.74, 20…
$ kredit_konstruksi <dbl> 7.108428e+11, 7.336657e+11, 7.744189e+11, 8.552797e+…
$ ekspor <dbl> 69244995, 352323, 43910678, 48731150, 44588299, 7822…
$ pmi_jepang <dbl> 54.8, 54.1, 53.1, 53.8, 52.8, 53.0, 52.3, 52.5, 52.5…
$ ihk <dbl> 98.75617, 99.08186, 98.90139, 99.38930, 99.07820, 10…
$ soi <dbl> 6.9, -1.2, 10.5, -2.6, 2.1, 5.5, 1.6, -5.8, -10.0, 3…
$ ikk <dbl> 123.08, 110.25, 111.00, 113.17, 109.08, 106.92, 106.…
Ketika data diimport menjadi R-dataframe, kolom “date” terbaca sebagai variabel dengan tipe text. Untuk ini pada program di bawah ini dilakukan penyesuaian agar “date” diperlakukan oleh R sebagai data dengan format yang sesuai.
selanjutnya variabel2 PDRB, Ekspor, Prod TBG, dan Kredit Konstrukksi di transformasi logaritma.
pdrb <- pdrb %>% mutate(date=as_date(date))
pdrb$ADHK = log(pdrb$ADHK)
pdrb$prod_tbg = log(pdrb$prod_tbg)
pdrb$kredit_konstruksi = log(pdrb$kredit_konstruksi)
pdrb$ekspor = log(pdrb$ekspor)
glimpse(pdrb)Rows: 72
Columns: 9
$ date <date> 2018-01-01, 2018-02-01, 2018-03-01, 2018-04-01, 201…
$ ADHK <dbl> NA, NA, 9.994339, NA, NA, 10.040264, NA, NA, 10.0239…
$ prod_tbg <dbl> 10.368600, 10.069866, 10.354050, 10.418809, 10.25779…
$ kredit_konstruksi <dbl> 27.28972, 27.32132, 27.37538, 27.47469, 27.63365, 27…
$ ekspor <dbl> 18.05316, 12.77230, 17.59767, 17.70183, 17.61298, 18…
$ pmi_jepang <dbl> 54.8, 54.1, 53.1, 53.8, 52.8, 53.0, 52.3, 52.5, 52.5…
$ ihk <dbl> 98.75617, 99.08186, 98.90139, 99.38930, 99.07820, 10…
$ soi <dbl> 6.9, -1.2, 10.5, -2.6, 2.1, 5.5, 1.6, -5.8, -10.0, 3…
$ ikk <dbl> 123.08, 110.25, 111.00, 113.17, 109.08, 106.92, 106.…
Plot timeseries dari variabel dalam data
Berikut ini adalah perintah untuk menampilkan plot time series dari data PDRB Provinsi Nusa Tenggara Barat. Karena data PDRB berupa kuartalan, yang dalam dataframe loncat setiap 3 bulan sekali, ada proses pembuangan data NA (not available) sebelum menggambar plot-nya.
pdrb %>%
na.omit() %>%
plot_time_series(
.date_var = date,
.value = ADHK,
.title = "ADHK Tahun 2018 ke 2023",
.smooth = FALSE,
.interactive = FALSE)Sementara itu, di bawah ini adalah perintah untuk memperoleh plot time series dari 7 variabel makroekonomi bulanan, yang nantinya akan dipergunakan sebagai variabel prediktor teramati dalam model DFM.
plot_multi_time_series=function(data,.var,...){
data %>%
select(date,all_of(.var)) %>%
na.omit() %>%
pivot_longer(cols = !date,names_to = "Variable",
values_to ="value") %>%
plot_time_series(.date_var = date,
.value = value,
.facet_vars = Variable,
...)
}x_var_names <- colnames(pdrb %>% select(-date,
-ADHK)
)
plot_multi_time_series(data = pdrb,
.var = x_var_names,
.facet_ncol = 3,
.smooth = FALSE,
.title = "Indikator Ekonomi Tahun 2018 ke 2023",
.interactive = FALSE)Memeriksa kestationeran data
Uji ADF
adf_test_df <- function(data, alpha = 0.05) {
data <- data %>%
select(where(is.numeric))
data <- recipes::recipe(~.,data=data) %>%
step_ts_impute(everything(),
period = 1,
lambda = NULL) %>%
recipes::prep() %>%
recipes::bake(new_data=NULL)
test_results <- map_dfr(data, function(x) {
data.frame(
Dicky_Fuller = suppressWarnings(adf.test(x,
alternative = "stationary")$statistic),
p_value = suppressWarnings(adf.test(x,
alternative = "stationary")$p.value)
)
})
rownames(test_results) = NULL
cbind("Variables" = colnames(data), test_results) %>%
mutate(significance = ifelse(p_value > 0.05, "no", "yes"))
}adf_test_df(pdrb) Variables Dicky_Fuller p_value significance
1 ADHK -3.0313375 0.15540002 no
2 prod_tbg -3.2964245 0.07929645 no
3 kredit_konstruksi -4.6892131 0.01000000 yes
4 ekspor -2.8986673 0.20941886 no
5 pmi_jepang -1.8665909 0.62964540 no
6 ihk -0.9849859 0.93400220 no
7 soi -1.6429046 0.72072288 no
8 ikk -1.7239816 0.68771108 no
adf_test_df(pdrb) %>%
filter(significance=="yes") Variables Dicky_Fuller p_value significance
1 kredit_konstruksi -4.689213 0.01 yes
nama_sig <- adf_test_df(pdrb) %>%
filter(significance=="yes") %>% pull(Variables)
nama_sig[1] "kredit_konstruksi"
Transformasi
Prediktor
Tahapan ini menyiapkan data variabel prediktor menggunakan fungsi Bpanel sekaligus melakukan tranformasi differencing (menggunakan kode 2 pada fungsi tersebut.)
base <- pdrb %>%
select(-ADHK) %>%
tk_ts(start = 2018,
frequency = 12)Warning: Non-numeric columns being dropped: date
#nama variabel prediktor
x_var_names [1] "prod_tbg" "kredit_konstruksi" "ekspor"
[4] "pmi_jepang" "ihk" "soi"
[7] "ikk"
trans <- rep(2,length(x_var_names))
trans[1] 2 2 2 2 2 2 2
x_var <- Bpanel(base = base,
trans = trans,
NA.replace = T,h = 12)NULL
Variabel Respon
Transformasi differencing juga dilakukan pada PDRB untuk mengubahnya menjadi variabel growth.
dADHK <- pdrb %>%
select(date,ADHK) %>%
tk_augment_differences(.value = "ADHK",
.differences = 1,
.lags = 3,
.names = "dADHK",
.log = TRUE)Uji ADF pada data transformasi
x_var %>%
tk_tbl() %>%
adf_test_df() Variables Dicky_Fuller p_value significance
1 prod_tbg -3.886558 0.01884619 yes
2 kredit_konstruksi -5.088322 0.01000000 yes
3 ekspor -5.007223 0.01000000 yes
4 pmi_jepang -3.739818 0.02621884 yes
5 ihk -3.717787 0.02813924 yes
6 soi -3.321647 0.07368857 no
7 ikk -4.579145 0.01000000 yes
x_var %>%
tk_tbl() %>%
adf_test_df() %>%
filter(significance=="no") Variables Dicky_Fuller p_value significance
1 soi -3.321647 0.07368857 no
dADHK %>%
adf_test_df() Variables Dicky_Fuller p_value significance
1 ADHK -3.031338 0.1554 no
2 dADHK -5.195836 0.0100 yes
Transformasi tahap 2
Karena soi masih tidak signifikan kita transformasi lagi dengan difference
pdrb2 <- pdrb %>%
tk_augment_differences(.value = soi,
.differences = 1,
.names = "soi",
.log = FALSE)base <- pdrb2 %>%
select(-ADHK) %>%
tk_ts(start = 2018,
frequency = 12)Warning: Non-numeric columns being dropped: date
#nama variabel prediktor
x_var_names [1] "prod_tbg" "kredit_konstruksi" "ekspor"
[4] "pmi_jepang" "ihk" "soi"
[7] "ikk"
trans <- rep(2,length(x_var_names))
x_var <- Bpanel(base = base,
trans = trans,
NA.replace = T,h = 12)NULL
x_var %>%
tk_tbl() %>%
adf_test_df() %>%
filter(significance=="no") Variables Dicky_Fuller p_value significance
1 soi -3.227867 0.08897218 no
Menentukan Banyaknya factor
ICR1 <- ICfactors(x = x_var, type = 1)12 rows out of 84 were deleted due to NA observations.
ICR2 <- ICfactors(x = x_var, type = 2)12 rows out of 84 were deleted due to NA observations.
ICR3 <- ICfactors(x = x_var, type = 3)12 rows out of 84 were deleted due to NA observations.
Menentukan banyaknya Shock
ICQ1 <- ICshocks(x = x_var,
r = 5,
p = 3)12 rows out of 84 were deleted due to NA observations.
ICQ1$q_star
[1] 2
$p
[1] 3
Nowcast using DFM
ADHK_ts <- dADHK %>%
select(date,dADHK) %>%
tk_ts(start = c(2018,1),
frequency = 12)Warning: Non-numeric columns being dropped: date
data_mod <- cbind(dADHK=ADHK_ts, x_var)
str(data_mod) Time-Series [1:84, 1:8] from 2018 to 2025: NA NA NA NA NA ...
- attr(*, "dimnames")=List of 2
..$ : NULL
..$ : chr [1:8] "dADHK" "x_var.prod_tbg" "x_var.kredit_konstruksi" "x_var.ekspor" ...
pilih 5 karena kalau 7 sama dengan variable x nya jadi kyk ga ada reduksi dimensi. Di sisi lain dia juga error.
frequency <- c(4, rep(12,
ncol(data_mod) -1)
)
dfm_est <- nowcast(formula = dADHK~., data = data_mod,
r = 5,
q = 3 ,
p = 3,
method = "2s_agg",
frequency = frequency)summary(dfm_est$reg)
Call:
stats::lm(formula = Y ~ ., data = Balanced_panel)
Residuals:
Min 1Q Median 3Q Max
-0.0032768 -0.0011008 0.0002469 0.0012336 0.0039939
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 0.0009214 0.0004271 2.158 0.045564 *
Factor1 -0.0007736 0.0002500 -3.094 0.006591 **
Factor2 0.0004700 0.0002705 1.737 0.100392
Factor3 0.0006838 0.0001736 3.939 0.001058 **
Factor4 -0.0010017 0.0002729 -3.670 0.001896 **
Factor5 0.0045065 0.0010411 4.329 0.000456 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 0.002044 on 17 degrees of freedom
Multiple R-squared: 0.6398, Adjusted R-squared: 0.5339
F-statistic: 6.04 on 5 and 17 DF, p-value: 0.002171
nowcast.plot(dfm_est, type = "fcst")nowcast.plot(dfm_est, type = "factors")nowcast.plot(dfm_est, type = "eigenvalues")nowcast.plot(dfm_est, type = "eigenvectors")get_first_y_value <- pdrb %>%
na.omit() %>%
slice(1) %>%
pull(ADHK)inv_diff_y <- function(dfm,first_y_value){
date_df_out <- dfm$yfcst %>%
tk_tbl(rename_index = "date") %>%
filter(!is.na(out)) %>%
select(date)
date_df_in <- dfm$yfcst %>%
tk_tbl(rename_index = "date") %>%
select(date) %>%
anti_join(date_df_out)
df <- rbind(cbind(date_df_in,
data.frame(prediction = diff_inv_vec(dfm$yfcst[,1],
initial_values = first_y_value,
difference = 1,
log = TRUE),name="actual")),
cbind(date_df_in,
data.frame(prediction =
diff_inv_vec(dfm$yfcst[,2],
initial_values = first_y_value,
difference = 1,
log = TRUE),name="predict"))
)
first_pred <- df %>% slice_tail(n=1) %>% pull(prediction)
rbind(df,
cbind(date_df_out,
data.frame(prediction =
diff_inv_vec(dfm$yfcst[,3],
initial_values = first_pred,
difference = 1,
log = TRUE)[-1],name="forecast")
)
)
}
get_y <- function(dfm){
date_df_out <- dfm$yfcst %>%
tk_tbl(rename_index = "date") %>%
filter(!is.na(out)) %>%
select(date)
date_df_in <- dfm$yfcst %>%
tk_tbl(rename_index = "date") %>%
select(date) %>%
anti_join(date_df_out)
df <- rbind(cbind(date_df_in,
data.frame(prediction = dfm$yfcst[,1],name="actual") %>%
na.omit()),
cbind(date_df_in,
data.frame(prediction = c(dfm$yfcst[1,1],dfm$yfcst[,2]),name="predict") %>%
na.omit())
)
rbind(df,
cbind(date_df_out,
data.frame(prediction =dfm$yfcst[,3],
name="forecast") %>%
na.omit()
)
)
}df_y_res <- inv_diff_y(dfm_est,get_first_y_value)Joining with `by = join_by(date)`
df_y_res date prediction name
1 2018 Q1 9.994339 actual
2 2018 Q2 10.040264 actual
3 2018 Q3 10.023940 actual
4 2018 Q4 10.041310 actual
5 2019 Q1 10.011367 actual
6 2019 Q2 10.060659 actual
7 2019 Q3 10.085647 actual
8 2019 Q4 10.093865 actual
9 2020 Q1 10.040878 actual
10 2020 Q2 10.047955 actual
11 2020 Q3 10.075979 actual
12 2020 Q4 10.063463 actual
13 2021 Q1 10.029015 actual
14 2021 Q2 10.094570 actual
15 2021 Q3 10.099906 actual
16 2021 Q4 10.094563 actual
17 2022 Q1 10.103503 actual
18 2022 Q2 10.152749 actual
19 2022 Q3 10.168530 actual
20 2022 Q4 10.162552 actual
21 2023 Q1 10.138414 actual
22 2023 Q2 10.137271 actual
23 2023 Q3 10.184158 actual
24 2023 Q4 10.198488 actual
25 2018 Q1 9.994339 predict
26 2018 Q2 10.023518 predict
27 2018 Q3 10.040067 predict
28 2018 Q4 10.062187 predict
29 2019 Q1 10.023989 predict
30 2019 Q2 10.049306 predict
31 2019 Q3 10.074579 predict
32 2019 Q4 10.091685 predict
33 2020 Q1 10.052480 predict
34 2020 Q2 10.050707 predict
35 2020 Q3 10.062125 predict
36 2020 Q4 10.078845 predict
37 2021 Q1 10.062233 predict
38 2021 Q2 10.087636 predict
39 2021 Q3 10.102824 predict
40 2021 Q4 10.125387 predict
41 2022 Q1 10.127830 predict
42 2022 Q2 10.173765 predict
43 2022 Q3 10.174614 predict
44 2022 Q4 10.180856 predict
45 2023 Q1 10.154167 predict
46 2023 Q2 10.137243 predict
47 2023 Q3 10.194323 predict
48 2023 Q4 10.198488 predict
49 2024 Q1 10.195843 forecast
50 2024 Q2 10.205576 forecast
51 2024 Q3 10.215468 forecast
52 2024 Q4 10.225164 forecast
plot_time_series(df_y_res,
.value = prediction,
.date_var = date,
.smooth = FALSE,
.color_var = name)