sidrar, você deve pegar os números-índices adequados da produção industrial geral, do volume do Varejo Ampliado e do volume dos Serviços. Você deve (i) criar as variações acumuladas em 12 meses das três pesquisas e plotar no mesmo gráfico de linhas com datas a partir de janeiro de 2014; (ii) criar um mesmo gráfico de barras com datas a partir de dezembro de 2018 com as variações na margem das três pesquisas.library(tidyverse)
library(sidrar)
library(lubridate)
library(tstools)
library(scales)
library(readxl)
library(PerformanceAnalytics)
library(zoo)
library(BETS)
# Série sem ajuste sazonal
pim = get_sidra(api='/t/3653/n1/all/v/3135/p/all/c544/129314/d/v3135%201')
# volume do varejo apliado
pmc_VA = get_sidra(api = '/t/3417/n1/all/v/1186/p/all/c11046/40311/d/v1186%201' )
# volume dos serviços
pms <- get_sidra(api = '/t/6443/n1/all/v/8677/p/all/c11046/40311/c12355/107071/d/v8677%201' )
# Variação acumulada em 12 meses
pim = pim %>%
mutate(date = parse_date(`Mês (Código)`, format='%Y%m'))%>%
select(date, Valor) %>%
mutate(var_anual = acum_i(Valor,12))
tail(pim)
pmc_VA= pmc_VA %>%
mutate(date = parse_date(`Mês (Código)`, format='%Y%m'))%>%
select(date, Valor) %>%
mutate(var_anual = acum_i(Valor,12))
tail(pmc_VA)
pms= pms %>%
mutate(date = parse_date(`Mês (Código)`, format='%Y%m'))%>%
select(date, Valor) %>%
mutate(var_anual = acum_i(Valor,12))
tail(pms)
gpim <- pim %>%
dplyr::filter(date >= "2014-01-01")
gpmc <- pmc_VA %>%
dplyr::filter(date >= "2014-01-01")
gpms <- pms%>%
dplyr::filter(date>="2014-01-01")
names <- c("Date", "Produção Industrial Geral", "Volume Varejo Ampliado", "Volume Serviços")
dataplot <- inner_join(gpim, gpmc, by = "date")%>%
inner_join(gpms, by = "date")%>%
select(date, contains("var_anual"))%>%
`colnames<-`(names)%>%
gather(Setores, `Variação Acumulada em 12 meses`, -Date)%>%
group_by(Setores)
ggplot(dataplot)+
geom_line(aes(x = Date, y = `Variação Acumulada em 12 meses`, color = Setores))+
scale_x_date(date_breaks = "3 month", labels = date_format("%b/%Y")) +
theme(axis.text.x=element_text(angle=60, hjust=1), legend.position = "bottom",
legend.title = element_blank(),
strip.text = element_text(size=10, face='bold'),
plot.title = element_text(size=10, face='bold'))+
geom_hline(yintercept=0, colour='black', linetype='dashed')+
labs(title='Variação acumulada em 12 meses por setores',
caption='Fonte: Elaboração própria com dados do IBGE')
# Séries com ajuste sazonal
# produção industrial geral
pim_sa = get_sidra(api='/t/3653/n1/all/v/3134/p/all/c544/129314/d/v3134%201')
# volume do varejo apliado
pmc_sa = get_sidra(api = '/t/3417/n1/all/v/1186/p/all/c11046/40312/d/v1186%201' )
# volume dos serviços
pms_sa <- get_sidra(api = '/t/6443/n1/all/v/8677/p/all/c11046/40312/c12355/107071/d/v8677%201' )
#Variação na margem
pim_sa =
pim_sa %>%
mutate(date = parse_date(`Mês (Código)`, format='%Y%m')) %>%
select(date, Valor) %>%
mutate(var_margem = (Valor/lag(Valor,1)-1)*100)
tail(pim_sa)
pmc_sa =
pmc_sa %>%
mutate(date = parse_date(`Mês (Código)`, format='%Y%m')) %>%
select(date, Valor) %>%
mutate(var_margem = (Valor/lag(Valor,1)-1)*100)
tail(pmc_sa)
pms_sa =
pms_sa %>%
mutate(date = parse_date(`Mês (Código)`, format='%Y%m')) %>%
select(date, Valor) %>%
mutate(var_margem = (Valor/lag(Valor,1)-1)*100)
tail(pms_sa)
gpim_sa <- pim_sa%>%
filter(date >= "2018-12-01")
gpmc_sa <- pmc_sa%>%
filter(date >= "2018-12-01")
gpms_sa <- pms_sa%>%
filter(date >= "2018-12-01")
names <- c("Date", "Produção Industrial Geral", "Volume Varejo Ampliado", "Volume Serviços")
dataplot <- inner_join(gpim_sa, gpmc_sa, by = "date")%>%
inner_join(gpms_sa, by = "date")%>%
select(date, contains("var_margem"))%>%
`colnames<-`(names)%>%
gather(Setores, `Variação na margem`, -Date)%>%
group_by(Setores)
ggplot(dataplot, aes(x = Date, y = `Variação na margem`, colour = Setores))+
geom_bar(aes(colour = Setores, fill = Setores), stat='identity', position = 'dodge')+
scale_x_date(date_breaks = "1 month", labels = date_format("%b/%Y")) +
theme(axis.text.x=element_text(angle=60, hjust=1), legend.position = "bottom",
legend.title = element_blank(),
strip.text = element_text(size=10, face='bold'),
plot.title = element_text(size=10, face='bold'))+
geom_hline(yintercept=0, colour='black', linetype='dashed')+
labs(title='Variação na margem por setores',
caption='Fonte: Elaboração própria com dados do IBGE')
## Fazer download dos dados
url = 'http://www.anfavea.com.br/docs/SeriesTemporais_Autoveiculos.xlsm'
download.file(url, destfile = 'veiculos.xlsm', mode='wb')
data = read_excel('veiculos.xlsm', col_types = c('date',
rep('numeric', 25)),
skip=4)
Para a análise de correlação usarei a variação anual em 12 meses por ser uma série livre de sazonalidade (mais suave).
## Tratamento dos dados
data = data[!rowSums(data[,-1])==0,] %>%
select("...1", "Produção...5") %>%
rename(date = "...1", veiculos = "Produção...5") %>%
mutate(veiculos = veiculos/1000) %>%
mutate(margem = (veiculos/lag(veiculos,1)-1)*100) %>%
mutate(interanual = (veiculos/lag(veiculos,12)-1)*100) %>%
mutate(anual = acum_i(veiculos,12))
dataplot = pim_sa%>%
mutate(anual = acum_i(Valor,12))%>%
inner_join(data, by = "date")%>%
filter(date >= "2003-12-01")%>%
select(date, anual.x, anual.y)%>%
rename(`Variação Anual Indústria (%)` = "anual.x", `Variação Anual Veículos (%)` = "anual.y" )
chart.Correlation(dataplot[,-1], histogram=TRUE, pch=15)
data_long =
dataplot %>%
gather(variavel, variação, -date)%>%
mutate(date = as.Date(date, format='%Y%m'))
ggplot(data_long, aes(x = date, y = variação, colour = variavel))+
geom_line()+
scale_x_date(date_breaks = "4 month", labels = date_format("%b/%Y")) +
theme(axis.text.x=element_text(angle=60, hjust=1), legend.position = "bottom",
legend.title = element_blank(),
strip.text = element_text(size=10, face='bold'),
plot.title = element_text(size=10, face='bold'))+
geom_hline(yintercept=0, colour='black', linetype='dashed')+
labs(title='Variação acumulada em 12 meses da produção de Veículos e Indústria',
caption='Fonte: Elaboração própria com dados do IBGE', x = "")
# Coleta de dados
url = 'https://apicatalog.mziq.com/filemanager/v2/d/4d1ebe73-b068-4443-992a-3d72d573238c/3e864198-0b72-c970-1771-80cd8c338a30?origin=2'
download.file(url, destfile='icva.xlsx', mode='wb')
icva = read_excel('icva.xlsx')
colnames(icva) = c('date', 'nominal', 'nominal_sa', 'real', 'real_sa')
# Visualização de dados
icva_long =
icva %>%
gather(metrica, valor, -date)%>%
mutate(date = as.Date(date, format='%Y%m'))
ggplot(icva_long, aes(x=date, y=valor*100, colour=metrica))+
geom_line()+
theme(legend.title = element_blank(),axis.text.x=element_text(angle=60, hjust=1),
legend.position = 'top')+
geom_hline(yintercept=0, colour='black', linetype='dashed')+
scale_x_datetime(breaks = pretty_breaks(n=8))+
labs(x='', y='%',
title='Índice Cielo do Varejo Ampliado',
caption='Fonte: Elaboração própria com dados da Cielo')+
scale_x_date(date_breaks = "3 month", labels = date_format("%b/%Y"))
tail(icva)
icva <- icva%>%
mutate(nominal = nominal*100, nominal_sa = nominal_sa*100,
real = real*100, real_sa = real_sa*100)
dataplot <- inner_join(pmc_VA, icva, by = "date")%>%
rename(`PMC Acum. 12 meses`= "var_anual", `ICVA real` = "real",
`ICVA real ajustado` = "real_sa", `ICVA nominal` = "nominal",
`ICVA nominal ajustado`= "nominal_sa")
chart.Correlation(dataplot[,c("PMC Acum. 12 meses","ICVA real")], histogram=TRUE, pch=15)
chart.Correlation(dataplot[,c("PMC Acum. 12 meses","ICVA real ajustado")], histogram=TRUE, pch=15)
data_long <- dataplot%>%
gather(metrica, valor, -date)%>%
mutate(date = as.Date(date, format='%Y%m'))%>%
filter(metrica != "Valor")
ggplot(data_long, aes(x=date, y=valor, colour=metrica))+
geom_line()+
theme(legend.title = element_blank(),axis.text.x=element_text(angle=60, hjust=1),
legend.position = 'top')+
geom_hline(yintercept=0, colour='black', linetype='dashed')+
scale_x_datetime(breaks = pretty_breaks(n=8))+
labs(x='', y='%',
title='Índice Cielo do Varejo Ampliado e Pesquisa Mensal do Comércio (Varejo Ampliado) anual',
caption='Fonte: Elaboração própria com dados da Cielo')+
scale_x_date(date_breaks = "3 month", labels = date_format("%b/%Y"))
## Exercício com dados acumulados em 12 meses
ibc <- BETSget(24364)
pib <- BETSget(22109)
autoplot(ibc, series = "ibc")+
autolayer(pib)+ylab("")
#soma acumulada em 12 meses
va12_ibc <- acum_i(ibc,12)%>%
ts(start = c(2003,1), frequency = 12)%>%
window(start = c(2005,1))
#extraindo os trimestres
trim <- seq(3,185,3)
ibc_trim <- va12_ibc[trim]%>%
ts(start = c(2005,1), frequency = 4)
head(va12_ibc,4)
## Jan Feb Mar Apr
## 2005 6.426143 6.686168 6.707182 6.589536
#ibc soma acumulada trimestral
head(ibc_trim,4)
## Qtr1 Qtr2 Qtr3 Qtr4
## 2005 6.707182 6.076662 4.576001 3.440101
tail(ibc_trim,4)
## Qtr1 Qtr2 Qtr3 Qtr4
## 2019 1.3733223 0.9509893 0.7913726
## 2020 0.3730801
va4_pib <- acum_i(pib,3)%>%
ts(start = c(1996,1), frequency = 4)%>%
window(start = c(2005,1))
head(va4_pib,4)
## Qtr1 Qtr2 Qtr3 Qtr4
## 2005 4.391185 3.498945 2.346955 1.995039
tail(va4_pib,4)
## Qtr1 Qtr2 Qtr3 Qtr4
## 2020 0.009800463 -3.974932157 -5.608587236 -4.762558062
autoplot(ibc_trim, series = "ibc")+
autolayer(va4_pib, series = "pib")+ylab("")+
ggtitle("IBC (Acumulado 12 meses trimestralizado) vs \r\nPIB (acumulado trimestral)")+
scale_x_continuous(breaks = seq(from = 2005, to = 2020, by = 1))+ theme(legend.title = element_blank(),axis.text.x=element_text(angle=60, hjust=1),
legend.position = 'bottom')+
geom_hline(yintercept=0, colour='black', linetype='dashed')+
labs(x='', y='%',
caption='Fonte: Elaboração própria com dados do Banco Central')
O ibc possui dados para o primeiro trimestre de 2020, podemos usá-lo para prever o pib.
##
datareg <- ts.intersect(ibc_trim, va4_pib)
colnames(datareg) <- c("ibc", "pib")
tail(time(ibc_trim))
## Qtr1 Qtr2 Qtr3 Qtr4
## 2018 2018.75
## 2019 2019.00 2019.25 2019.50 2019.75
## 2020 2020.00
library(forecast)
mod <- auto.arima(datareg[,"pib"], xreg = datareg[,"ibc"])
summary(mod)
## Series: datareg[, "pib"]
## Regression with ARIMA(2,0,2)(0,0,1)[4] errors
##
## Coefficients:
## ar1 ar2 ma1 ma2 sma1 intercept xreg
## 0.2377 -0.3701 1.0172 0.8048 -0.7509 0.4007 0.6689
## s.e. 0.1606 0.1729 0.1231 0.1118 0.1678 0.0636 0.0260
##
## sigma^2 estimated as 0.2362: log likelihood=-41.27
## AIC=98.54 AICc=101.31 BIC=115.42
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set -0.02221659 0.4572289 0.3748722 -86.98351 109.5856 0.167814
## ACF1
## Training set 0.02875005
checkresiduals(mod)
##
## Ljung-Box test
##
## data: Residuals from Regression with ARIMA(2,0,2)(0,0,1)[4] errors
## Q* = 5.8787, df = 3, p-value = 0.1177
##
## Model df: 7. Total lags used: 10
mod <- tslm(pib ~ ibc + trend + season, data = datareg)
newdata <- data.frame(ibc = rep(0.3302, 4))
summary(mod)
##
## Call:
## tslm(formula = pib ~ ibc + trend + season, data = datareg)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.0633 -0.4144 -0.0500 0.2896 3.8241
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.238169 0.441261 0.540 0.592
## ibc 0.666341 0.048888 13.630 <2e-16 ***
## trend 0.003976 0.009414 0.422 0.674
## season2 -0.041476 0.368745 -0.112 0.911
## season3 0.012540 0.368675 0.034 0.973
## season4 0.078391 0.368747 0.213 0.832
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.026 on 55 degrees of freedom
## Multiple R-squared: 0.8381, Adjusted R-squared: 0.8233
## F-statistic: 56.92 on 5 and 55 DF, p-value: < 2.2e-16
fcast <- forecast(mod, newdata = newdata)
autoplot(ibc_trim, series = "ibc")+
autolayer(va4_pib, series = "pib")+
autolayer(fcast, PI = TRUE, series = "previsão pib")+
ylab("")+
ggtitle("IBC (Acumulado 12 meses trimestralizado) vs \r\nPIB (acumulado trimestral)")+
theme(legend.title = element_blank(),axis.text.x=element_text(angle=60, hjust=1),
legend.position = 'bottom')+
geom_hline(yintercept=0, colour='black', linetype='dashed')+
labs(x='', y='%',
caption='Fonte: Elaboração própria com dados do Banco Central')
summary(fcast)
##
## Forecast method: Linear regression model
##
## Model Information:
##
## Call:
## tslm(formula = pib ~ ibc + trend + season, data = datareg)
##
## Coefficients:
## (Intercept) ibc trend season2 season3 season4
## 0.238169 0.666341 0.003976 -0.041476 0.012540 0.078391
##
##
## Error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set -3.458249e-17 0.9739814 0.6429559 -134.4718 170.9626 0.2878234
## ACF1
## Training set 0.667336
##
## Forecasts:
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## 2020 Q2 0.6632191 -0.7515564 2.077995 -1.522579 2.849017
## 2020 Q3 0.7212105 -0.6943339 2.136755 -1.465775 2.908196
## 2020 Q4 0.7910370 -0.6252194 2.207293 -1.397049 2.979123
## 2021 Q1 0.7166222 -0.7015862 2.134831 -1.474479 2.907724
fcast
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## 2020 Q2 0.6632191 -0.7515564 2.077995 -1.522579 2.849017
## 2020 Q3 0.7212105 -0.6943339 2.136755 -1.465775 2.908196
## 2020 Q4 0.7910370 -0.6252194 2.207293 -1.397049 2.979123
## 2021 Q1 0.7166222 -0.7015862 2.134831 -1.474479 2.907724
## Exercício com dados na margem
#margem ibc
margem_ibc <- diff(ibc)%>%
window(start = c(2004,1))
tail(margem_ibc,4)
## Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec
## 2020 1.18 1.02 0.98
## 2021 1.44
#completar serie do ibc até final do ano
ibc_ar <- auto.arima(margem_ibc)
summary(ibc_ar)
## Series: margem_ibc
## ARIMA(0,0,1) with zero mean
##
## Coefficients:
## ma1
## 0.2712
## s.e. 0.0638
##
## sigma^2 estimated as 2.375: log likelihood=-379.07
## AIC=762.15 AICc=762.21 BIC=768.79
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set 0.1499679 1.537275 0.9634807 97.80961 146.6211 0.7659901
## ACF1
## Training set -0.003363143
checkresiduals(ibc_ar)
##
## Ljung-Box test
##
## data: Residuals from ARIMA(0,0,1) with zero mean
## Q* = 12.32, df = 23, p-value = 0.9652
##
## Model df: 1. Total lags used: 24
ibc_fcast <- forecast(ibc_ar, h = 7)
margem_ibc <- append(margem_ibc, ibc_fcast$mean)%>%
ts(start = c(2004,1), frequency = 12)
tail(margem_ibc)
## Mar Apr May Jun Jul Aug
## 2021 0 0 0 0 0 0
#ibc soma acumulada trimestral
margem_ibc_trim <- aggregate(margem_ibc, mean, nfrequency = 4)
tail(margem_ibc_trim,4)
## Qtr1 Qtr2 Qtr3 Qtr4
## 2020 2.533333 1.060000
## 2021 0.591770 0.000000
margem_pib_trim <- diff(pib)%>%
window(start = c(2004,1))
tail(margem_pib_trim,4)
## Qtr1 Qtr2 Qtr3 Qtr4
## 2020 -3.53 -15.47 11.68 5.19
autoplot(margem_ibc_trim, series = "ibc")+
autolayer(margem_pib_trim, series = "pib")+ylab("")+
ggtitle("IBC (margem trimestral) vs \r\nPIB (margem trimestral)")+
theme(legend.title = element_blank(),axis.text.x=element_text(angle=60, hjust=1),
legend.position = 'bottom')+
geom_hline(yintercept=0, colour='black', linetype='dashed')+
labs(x='', y='%',
caption='Fonte: Elaboração própria com dados do Banco Central')
O ibc possui dados para o primeiro trimestre de 2020, podemos usá-lo para prever o pib.
##
datareg <- ts.intersect(margem_ibc_trim, margem_pib_trim)
colnames(datareg) <- c("ibc", "pib")
library(forecast)
mod <- auto.arima(datareg[,"pib"], xreg = datareg[,"ibc"])
checkresiduals(mod)
##
## Ljung-Box test
##
## data: Residuals from Regression with ARIMA(0,0,1)(0,0,1)[4] errors
## Q* = 3.569, df = 4, p-value = 0.4675
##
## Model df: 4. Total lags used: 8
summary(mod)
## Series: datareg[, "pib"]
## Regression with ARIMA(0,0,1)(0,0,1)[4] errors
##
## Coefficients:
## ma1 sma1 intercept xreg
## -0.7472 0.3184 0.2013 3.0447
## s.e. 0.1232 0.1807 0.0773 0.2013
##
## sigma^2 estimated as 3.246: log likelihood=-134.98
## AIC=279.97 AICc=280.94 BIC=291.06
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set -0.02703593 1.747795 1.136668 -0.3067075 95.70051 0.4892513
## ACF1
## Training set 0.06181806
mod <- tslm(pib ~ ibc + season, data = datareg)
newdata <- data.frame(ibc = tail(margem_ibc_trim, 4))
summary(mod)
##
## Call:
## tslm(formula = pib ~ ibc + season, data = datareg)
##
## Residuals:
## Min 1Q Median 3Q Max
## -11.8526 -1.0601 -0.0033 1.3294 4.8451
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.2264 0.5360 0.422 0.674
## ibc 2.5033 0.3076 8.138 2.05e-11 ***
## season2 -0.6145 0.7557 -0.813 0.419
## season3 0.2669 0.7587 0.352 0.726
## season4 0.5819 0.7567 0.769 0.445
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.202 on 63 degrees of freedom
## Multiple R-squared: 0.5237, Adjusted R-squared: 0.4934
## F-statistic: 17.31 on 4 and 63 DF, p-value: 1.251e-09
fcast <- forecast(mod, newdata = newdata)
autoplot(margem_ibc_trim, series = "ibc")+
autolayer(margem_pib_trim, series = "pib")+
autolayer(fcast, PI = TRUE, series = "previsão")+
ylab("")+
ggtitle("IBC (margem trimestral) vs \r\nPIB (margem trimestral)")+
scale_x_continuous(breaks = seq(from = 2005, to = 2020, by = 1))+ theme(legend.title = element_blank(),axis.text.x=element_text(angle=60, hjust=1),
legend.position = 'bottom')+
geom_hline(yintercept=0, colour='black', linetype='dashed')+
labs(x='', y='%',
caption='Fonte: Elaboração própria com dados do Banco Central')
summary(fcast)
##
## Forecast method: Linear regression model
##
## Model Information:
##
## Call:
## tslm(formula = pib ~ ibc + season, data = datareg)
##
## Coefficients:
## (Intercept) ibc season2 season3 season4
## 0.2264 2.5033 -0.6145 0.2669 0.5819
##
##
## Error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set -2.121847e-17 2.119768 1.406063 123.6545 179.3539 0.6052056
## ACF1
## Training set -0.426495
##
## Forecasts:
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## 2021 Q1 6.5679856 3.4827260 9.653245 1.807553 11.328418
## 2021 Q2 2.2653148 -0.6889968 5.219626 -2.293070 6.823700
## 2021 Q3 1.9746385 -0.9615581 4.910835 -2.555796 6.505073
## 2021 Q4 0.8082391 -2.1266999 3.743178 -3.720255 5.336733
#Queda prevista para o pib em 2020
x <- colMeans(as_tibble(fcast))
y <- matrix(x,5,1)
rownames(y)<-names(x)
y
## [,1]
## Point Forecast 2.90404451
## Lo 80 -0.07363219
## Hi 80 5.88172120
## Lo 95 -1.69039183
## Hi 95 7.49848085
ggsave("prev_pib.jpeg")