Contexto

Esse .Rmd vai mostrar como algumas ilustrações em relação ao setor de alimentos e bebidas. Essa parte está diposponível no Rpubs porque é feita com a parte da RAIS que não é identificada.

Pacotes

library(haven)
library(ggplot2)
library(RColorBrewer)
library(sp)
library(sf)
library(ggspatial)
library(dplyr)
library(psych)
library(ggthemes)
library(fixest)
library(modelsummary)
library(broom)
library(did)
library(scales)
library(reshape)
library(vtable)
library(basedosdados)
library(DBI)
library(bigrquery)
library(data.table)
library(ggokabeito)
library(thematic)
library(readr)
library(stringr)
library(tidyr)
library(readxl)

Como é o setor na RAIS?

Credenciais

Fizemos o download das informações pelo diretório “Base dos Dados” (https://basedosdados.org/), de forma que vocês precisam de credenciais (gratuítas) para fazer o mesmo procedimento.

As instruções básicas para o carregamento das credenciais podem ser encontradas no site do basedosdados.org(https://basedosdados.github.io/mais/access_data_bq/#primeiros-passos).

Por isso que toda vez que a gente for fazer o download das informações brutas, vai estar indicado como “não rodar” e posteriormente terá o arquivo resultado do rotina que está especificado como “não rodar”. Isso é feito por dois motivos: i) se toda vez fizermos os downloads novamente. Isso pode gerar custos no Google Cloud; ii) É interessante ter a memória de como a base de dados foi gerada (mesmo que a de um gráfico simples) para posterior conferência e/ou modificação.

Credenciais (não rodar)

  • Incluir suas credenciais
set_billing_id("356922918080")
bq_auth()

con <- dbConnect(
  bigrquery::bigquery(),
  billing = "356922918080",
  project = "agrteste2"
)

#leonardocardoso005@gmail.com

Mudar para o UFV

set_billing_id("1055074884926")
bq_auth()

con <- dbConnect(
  bigrquery::bigquery(),
  billing = "1055074884926",
  project = "My First Project"
)

#leonardocardoso@ufv.br

Quantidade de empregos

A gente vai contar o número de vínculos ativos do setor de bebidas e alimentos (subsetor 13 do IBGE)

empregos1<-basedosdados::read_sql('SELECT ano, SUM(quantidade_vinculos_clt) as total_clt
FROM `basedosdados.br_me_rais.microdados_estabelecimentos`
WHERE subsetor_ibge = "13"
GROUP BY ano
ORDER BY ano
                                ')


dicionario<-basedosdados::read_sql('SELECT * 
FROM `basedosdados.br_me_rais.dicionario`
                                ')

Olhar para Cruz das Almas - BA só para entender como são os dados pré-2014.

Não vale a pena baixar ela inteira. Nem sei se isso é viável, a base dos vínculos da RAIS, por exemplo, tem aproximadamente 380GB (1985-2023).

#2909802
cruz<-basedosdados::read_sql('SELECT * 
FROM `basedosdados.br_me_rais.microdados_estabelecimentos`
WHERE id_municipio = "2909802"
                                ')

cnae1 <- basedosdados::read_sql('
  SELECT * 
  FROM `basedosdados.br_bd_diretorios_brasil.cnae_1` 
  ')

cnae2 <- basedosdados::read_sql('
  SELECT * 
  FROM `basedosdados.br_bd_diretorios_brasil.cnae_2` 
  ')

f1 <- cnae1 %>%
  dplyr::filter(descricao_divisao == "Fabricação de produtos alimentícios e bebidas") %>%
  dplyr::pull(cnae_1)

f2<- cnae2 %>%
  dplyr::filter(divisao %in% c("10", "11")) %>%
  dplyr::pull(classe)

f2

Pré-2014 a variável não é o subsetor do ibge, mas é o subsetor_ibge = “ALIM E BEB”

Agora sim.

e1 <- basedosdados::read_sql('
  SELECT ano, SUM(quantidade_vinculos_ativos) AS vinculos_ativos
  FROM `basedosdados.br_me_rais.microdados_estabelecimentos`
  WHERE subsetor_ibge = "13"
    OR subsetor_ibge = "ALIM E BEB"
    OR cnae_1 IN ("15130", "15148", "15113", "15121", "15210", "15229", "15237", "15318", "15326", "15334", "15431", "15423", "15415", "15598", "15539", 
                  "15563", "15520", "15512", "15555", "15547", "15610", "15628", "15717", "15725", "15849", "15857", "15890", "15822", "15830", "15865", 
                  "15814", "15920", "15946", "15954", "15911", "15938")
    OR cnae_2 IN ("10112", "10112", "10112", "10112", "10112", "10121", "10121", "10121", "10121", "10139", "10139", "10201", "10201", "10317", "10325", "10325", "10333", "10333", "10414", "10422", "10431", "10511", "10520", "10538", "10619", "10619", "10627", "10635", "10643", "10651", "10651", "10651", "10660", "10694", "10716", "10724", "10724", "10813", "10813", "10821", "10911", "10911", "10911", "10929", "10937", "10937", "10945", "10953", "10961", "10996", "10996", "10996", "10996", "10996", "10996", "10996", "11119", "11119", "11127", "11135", "11135", "11216", "11224", "11224", "11224", "11224", "11224")
    GROUP BY ano
  ORDER BY ano
')


save(e1, file="e1.Rda")

Todos os empregos da RAIS

e2 <- basedosdados::read_sql('
  SELECT ano, SUM(quantidade_vinculos_ativos) AS vinculos_ativos
  FROM `basedosdados.br_me_rais.microdados_estabelecimentos`
    GROUP BY ano
  ORDER BY ano
')



#industria toda
e10 <- basedosdados::read_sql('
  SELECT 
    ano, 
    SUM(quantidade_vinculos_ativos) AS vinculos_ativos
  FROM `basedosdados.br_me_rais.microdados_estabelecimentos`
  WHERE subsetor_ibge IN ("2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12", "14")
  GROUP BY ano
  ORDER BY ano
')


save(e2, file="e2.Rda")
save(e10, file="e10.Rda")
load("e1.Rda")
load("e2.Rda")
load("e10.Rda")

e10$todaindustriamenosalimentos<-as.numeric(e10$vinculos_ativos)

et<-merge(e1, e2, by="ano")
et2<-merge(et, e10, by="ano")

colnames(et)<-c("ano", "bebidas", "todos")

et$share_beb <- 100*(et$bebidas/et$todos)


g1 <- ggplot(e1, aes(x = ano, y = vinculos_ativos / 1000000)) +
  geom_line(color = "darkred") + # Adicionar linhas
  geom_point(color = "darkred") + # Adicionar pontos
  labs(title = "",
       x = "",
       y = "Vínculos (milhões)") +
  scale_y_continuous(labels = label_comma(decimal.mark = ",")) + # Alterar separador decimal
  theme_minimal()

g1
## Don't know how to automatically pick scale for object of type <integer64>.
## Defaulting to continuous.
## Warning in prettyNum(.Internal(format(x, trim, digits, nsmall, width, 3L, :
## 'big.mark' and 'decimal.mark' are both ',', which could be confusing

ggsave(file = "alimentos_bebidas_1.png", plot = g1, width = 6.472, height = 4)
## Don't know how to automatically pick scale for object of type <integer64>.
## Defaulting to continuous.
## Warning in prettyNum(.Internal(format(x, trim, digits, nsmall, width, 3L, :
## 'big.mark' and 'decimal.mark' are both ',', which could be confusing
g2 <- ggplot(et, aes(x = ano, y = share_beb)) +
  geom_line(color = "darkblue") + # Adicionar linhas
  geom_point(color = "darkblue") + # Adicionar pontos
  labs(title = "",
       x = "",
       y = "Participação (%)") +
  scale_y_continuous(labels = label_comma(decimal.mark = ",")) + # Alterar separador decimal
  theme_minimal()

g2
## Don't know how to automatically pick scale for object of type <integer64>.
## Defaulting to continuous.
## Warning in prettyNum(.Internal(format(x, trim, digits, nsmall, width, 3L, :
## 'big.mark' and 'decimal.mark' are both ',', which could be confusing

ggsave(file = "alimentos_bebidas_2.png", plot = g2, width = 6.472, height = 4)
## Don't know how to automatically pick scale for object of type <integer64>.
## Defaulting to continuous.
## Warning in prettyNum(.Internal(format(x, trim, digits, nsmall, width, 3L, :
## 'big.mark' and 'decimal.mark' are both ',', which could be confusing
et2$ind<-100*(et2$todaindustriamenosalimentos/et2$vinculos_ativos.y)
cruz12<-basedosdados::read_sql('SELECT * 
FROM `basedosdados.br_me_rais.microdados_vinculos`
WHERE id_municipio = "2909802"
                                ')

hh1 <- basedosdados::read_sql('
  SELECT ano, 
         COUNT(vinculo_ativo_3112) AS vinc_dez, 
         AVG(CAST(idade AS FLOAT64)) AS idade, 
         AVG(CAST(faixa_etaria AS FLOAT64)) AS faixa_etaria, 
         AVG(CAST(faixa_remuneracao_media_sm AS FLOAT64)) AS faixa_remuneracao_media_sm, 
         AVG(CAST(valor_remuneracao_media_sm AS FLOAT64)) AS valor_remuneracao_media_sm
  FROM `basedosdados.br_me_rais.microdados_vinculos`
  WHERE (
        subsetor_ibge = "13"
        OR subsetor_ibge = "ALIM E BEB"
        OR cnae_1 IN ("15130", "15148", "15113", "15121", "15210", "15229", "15237", "15318", "15326", "15334", "15431", "15423", "15415", "15598", "15539", 
                      "15563", "15520", "15512", "15555", "15547", "15610", "15628", "15717", "15725", "15849", "15857", "15890", "15822", "15830", "15865", 
                      "15814", "15920", "15946", "15954", "15911", "15938")
        OR cnae_2 IN ("10112", "10121", "10139", "10201", "10317", "10325", "10333", "10414", "10422", "10431", "10511", "10520", "10538", "10619", "10627", "10635", "10643", "10651", "10660", "10694", "10716", "10724", "10813", "10821", "10911", "10929", "10937", "10945", "10953", "10961", "10996", "11119", "11127", "11135", "11216", "11224")
  )
  AND vinculo_ativo_3112 = "1"
  GROUP BY ano
  ORDER BY ano
')



save(hh1, file="hh1.Rda")




hh2 <- basedosdados::read_sql('
  SELECT ano, 
         COUNT(vinculo_ativo_3112) AS vinc_dez, 
         AVG(CAST(idade AS FLOAT64)) AS idade, 
         AVG(CAST(faixa_etaria AS FLOAT64)) AS faixa_etaria, 
         AVG(CAST(faixa_remuneracao_media_sm AS FLOAT64)) AS faixa_remuneracao_media_sm, 
         AVG(CAST(valor_remuneracao_media_sm AS FLOAT64)) AS valor_remuneracao_media_sm
  FROM `basedosdados.br_me_rais.microdados_vinculos`
WHERE vinculo_ativo_3112 = "1"
  GROUP BY ano
  ORDER BY ano
')
save(hh2, file="hh2.Rda")




hh3 <- basedosdados::read_sql('
  SELECT ano, 
         AVG(CAST(valor_remuneracao_media AS FLOAT64)) AS smedioali
  FROM `basedosdados.br_me_rais.microdados_vinculos`
  WHERE (
        subsetor_ibge = "13"
        OR subsetor_ibge = "ALIM E BEB"
        OR cnae_1 IN ("15130", "15148", "15113", "15121", "15210", "15229", "15237", "15318", "15326", "15334", "15431", "15423", "15415", "15598", "15539", 
                      "15563", "15520", "15512", "15555", "15547", "15610", "15628", "15717", "15725", "15849", "15857", "15890", "15822", "15830", "15865", 
                      "15814", "15920", "15946", "15954", "15911", "15938")
        OR cnae_2 IN ("10112", "10121", "10139", "10201", "10317", "10325", "10333", "10414", "10422", "10431", "10511", "10520", "10538", "10619", "10627", "10635", "10643", "10651", "10660", "10694", "10716", "10724", "10813", "10821", "10911", "10929", "10937", "10945", "10953", "10961", "10996", "11119", "11127", "11135", "11216", "11224")
  )
  AND vinculo_ativo_3112 = "1"
  GROUP BY ano
  ORDER BY ano
')



save(hh3, file="hh3.Rda")

hh4 <- basedosdados::read_sql('
  SELECT ano, 
         AVG(CAST(valor_remuneracao_media AS FLOAT64)) AS smediotd
  FROM `basedosdados.br_me_rais.microdados_vinculos`
  WHERE vinculo_ativo_3112 = "1"
  GROUP BY ano
  ORDER BY ano
')



save(hh4, file="hh4.Rda")
load("hh1.Rda")
load("hh2.Rda")

load("hh3.Rda")
load("hh4.Rda")
load("hh3.Rda")
load("hh4.Rda")

salario<-merge(hh3, hh4, by="ano")

salario
##     ano smedioali  smediotd
## 1  1985        NA        NA
## 2  1986        NA        NA
## 3  1987        NA        NA
## 4  1988        NA        NA
## 5  1989        NA        NA
## 6  1990        NA        NA
## 7  1991        NA        NA
## 8  1992        NA        NA
## 9  1993        NA        NA
## 10 1994        NA        NA
## 11 1995        NA        NA
## 12 1996        NA        NA
## 13 1997        NA        NA
## 14 1998        NA        NA
## 15 1999  523.8144  686.9854
## 16 2000  546.4354  730.5225
## 17 2001  591.8017  792.3699
## 18 2002  614.2061  846.9454
## 19 2003  696.9010  925.0861
## 20 2004  764.8081 1004.5081
## 21 2005  814.1012 1078.9249
## 22 2006  879.9996 1169.5767
## 23 2007  938.4194 1240.6242
## 24 2008 1024.7787 1357.3305
## 25 2009 1094.7590 1461.2061
## 26 2010 1217.2269 1588.2030
## 27 2011 1343.9727 1733.2093
## 28 2012 1494.2865 1903.0893
## 29 2013 1643.0426 2073.4463
## 30 2014 1773.7251 2252.2049
## 31 2015 1828.3958 2450.5900
## 32 2016 1995.0955 2635.3158
## 33 2017 2087.1590 2776.9073
## 34 2018 2118.5894 2858.0369
## 35 2019 2153.8017 2900.5003
## 36 2020 2184.6567 2924.3224
## 37 2021 2320.9606 3081.3401
## 38 2022 2952.9671 3861.0873
## 39 2023 2684.9804 3033.0976
salario<-salario %>% filter(ano>1999)

g3 <- ggplot(salario, aes(x = ano)) +
  geom_line(aes(y = smedioali, color = "Alimentos e Bebidas"), size = 0.7) +
  geom_point(aes(y = smedioali, color = "Alimentos e Bebidas"), size = 0.7) +
  geom_line(aes(y = smediotd, color = "Todos"), size = 0.7) +
  geom_point(aes(y = smediotd, color = "Todos"), size = 0.7) +
  labs(title = "",
       x = "",
       y = "Salário Nominal (BRL)",
       color = "") +
  scale_color_manual(values = c("Alimentos e Bebidas" = "darkblue", 
                                "Todos" = "grey50")) +
  theme_minimal() +
  theme(legend.position = "bottom")  # Posiciona a legenda na parte inferior
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
# Exibir o gráfico
g3
## Don't know how to automatically pick scale for object of type <integer64>.
## Defaulting to continuous.

ggsave(file = "alimentos_bebidas_3.png", plot = g3, width = 6.472, height = 4)
## Don't know how to automatically pick scale for object of type <integer64>.
## Defaulting to continuous.
salario$salario_share<-salario$smedioali/salario$smediotd

Apenas a Região Nordeste

ne1 <- basedosdados::read_sql('
  SELECT ano, 
         COUNT(vinculo_ativo_3112) AS vinc_dez, 
         AVG(CAST(idade AS FLOAT64)) AS idade, 
         AVG(CAST(faixa_etaria AS FLOAT64)) AS faixa_etaria, 
         AVG(CAST(faixa_remuneracao_media_sm AS FLOAT64)) AS faixa_remuneracao_media_sm, 
         AVG(CAST(valor_remuneracao_media_sm AS FLOAT64)) AS valor_remuneracao_media_sm
  FROM `basedosdados.br_me_rais.microdados_vinculos`
  WHERE (
        subsetor_ibge = "13"
        OR subsetor_ibge = "ALIM E BEB"
        OR cnae_1 IN ("15130", "15148", "15113", "15121", "15210", "15229", "15237", "15318", "15326", "15334", "15431", "15423", "15415", "15598", "15539", 
                      "15563", "15520", "15512", "15555", "15547", "15610", "15628", "15717", "15725", "15849", "15857", "15890", "15822", "15830", "15865", 
                      "15814", "15920", "15946", "15954", "15911", "15938")
        OR cnae_2 IN ("10112", "10121", "10139", "10201", "10317", "10325", "10333", "10414", "10422", "10431", "10511", "10520", "10538", "10619", "10627", "10635", "10643", "10651", "10660", "10694", "10716", "10724", "10813", "10821", "10911", "10929", "10937", "10945", "10953", "10961", "10996", "11119", "11127", "11135", "11216", "11224")
  )
  AND vinculo_ativo_3112 = "1"
  AND sigla_uf IN ("AL", "BA", "CE", "MA", "PB", "PE", "PI", "RN", "SE")-- Apenas estados do Nordeste
  GROUP BY ano
  ORDER BY ano
')

save(ne1, file="ne1.Rda")
ne1c <- basedosdados::read_sql('
  SELECT id_municipio, SUM(quantidade_vinculos_ativos) AS vinculos_ativos
  FROM `basedosdados.br_me_rais.microdados_estabelecimentos`
  WHERE (
    subsetor_ibge = "13"
    OR subsetor_ibge = "ALIM E BEB"
    OR cnae_1 IN ("15130", "15148", "15113", "15121", "15210", "15229", "15237", "15318", "15326", "15334", "15431", "15423", "15415", "15598", "15539", 
                  "15563", "15520", "15512", "15555", "15547", "15610", "15628", "15717", "15725", "15849", "15857", "15890", "15822", "15830", "15865", 
                  "15814", "15920", "15946", "15954", "15911", "15938")
    OR cnae_2 IN ("10112", "10121", "10139", "10201", "10317", "10325", "10333", "10414", "10422", "10431", "10511", "10520", "10538", "10619", "10627", 
                  "10635", "10643", "10651", "10660", "10694", "10716", "10724", "10813", "10821", "10911", "10929", "10937", "10945", "10953", "10961", 
                  "10996", "11119", "11127", "11135", "11216", "11224")
  )
  AND ano = 2022
  GROUP BY id_municipio
  ORDER BY id_municipio
')

save(ne1c, file="ne1c.Rda")




ne2c <- basedosdados::read_sql('
  SELECT id_municipio, SUM(quantidade_vinculos_ativos) AS vinculos_ativos_brasil
  FROM `basedosdados.br_me_rais.microdados_estabelecimentos`
  WHERE ano = 2022
  GROUP BY id_municipio
  ORDER BY id_municipio
')

save(ne2c, file="ne2c.Rda")
load("ne1c.Rda")

load("ne2c.Rda")
rel<-merge(ne1c, ne2c, by="id_municipio")

rel$share <- ifelse(rel$vinculos_ativos == 0, 0, 100 * (rel$vinculos_ativos / rel$vinculos_ativos_brasil))
mean_share <- mean(rel$share, na.rm = TRUE)

# Plot the density with the mean marked
g5<-ggplot(rel, aes(x = share)) +
  geom_density(fill = "blue", alpha = 0.5) +
  geom_vline(aes(xintercept = mean_share), color = "black", linetype = "dashed", size = 0.5) +
  labs(title = "",
       x = "(%)",
       y = "Densidade") +
  annotate("text", x = mean_share + 8, y = 0.12, 
           label = paste("Média:", formatC(mean_share, format = "f", big.mark = ".", decimal.mark = ",", digits = 1)),
           color = "black", vjust = -1) +
  scale_y_continuous(labels = scales::label_number(decimal.mark = ",")) +
  theme_minimal()

g5 

ggsave(file = "alimentos_bebidas_5.png", plot = g5, width = 6.472, height = 4)
count_share_above_25 <- sum(rel$share > 25, na.rm = TRUE)
count_share_above_25
## [1] 226
count_share_above_10 <- sum(rel$share > 10, na.rm = TRUE)
count_share_above_10
## [1] 678
proportion_share_between_2_and_7 <- mean(rel$share >= 2 & rel$share <= 7, na.rm = TRUE)
proportion_share_between_2_and_7
## [1] 0.2875844

Mapa

# Ler o shapefile dos municípios
shpMun <- st_read("G:/Meu Drive/AGR/BR_Municipios_2021.shp")
## Reading layer `BR_Municipios_2021' from data source 
##   `G:\Meu Drive\AGR\BR_Municipios_2021.shp' using driver `ESRI Shapefile'
## Simple feature collection with 5572 features and 4 fields
## Geometry type: MULTIPOLYGON
## Dimension:     XY
## Bounding box:  xmin: -73.99045 ymin: -33.75118 xmax: -28.84764 ymax: 5.271841
## Geodetic CRS:  SIRGAS 2000
# Ler o shapefile dos estados
shpUFs <- st_read("G:/Meu Drive/AGR/BR_UF_2021.shp")
## Reading layer `BR_UF_2021' from data source `G:\Meu Drive\AGR\BR_UF_2021.shp' using driver `ESRI Shapefile'
## Simple feature collection with 27 features and 4 fields
## Geometry type: MULTIPOLYGON
## Dimension:     XY
## Bounding box:  xmin: -73.99045 ymin: -33.75118 xmax: -28.84764 ymax: 5.271841
## Geodetic CRS:  SIRGAS 2000
# Ajustar o nome da coluna para os municípios
mapaMun <- shpMun %>%
  st_transform(crs = st_crs(shpMun)) %>%
  mutate(CD_MUN = as.character(CD_MUN))  # Ajuste conforme o nome real da coluna

# Ajustar o nome da coluna para os estados
mapaUF <- shpUFs %>%
  st_transform(crs = st_crs(shpUFs)) %>%
  mutate(CD_UF = as.character(CD_UF))  # Ajuste conforme o nome real da coluna
mapaMun$id <- mapaMun$CD_MUN
rel$id<-rel$id_municipio
basemapa2<-merge(rel, mapaMun, by="id")


library(sf)

# Verifique se basemapa2 e shpUFs são objetos sf
class(basemapa2)
## [1] "data.frame"
class(shpUFs)
## [1] "sf"         "data.frame"
basemapa2 <- st_as_sf(basemapa2)
basemapa2 <- basemapa2 %>%
  mutate(intensidade = factor(case_when(
    share < 2 ~ "Menor do que 2%",
    share >= 2 & share < 4 ~ "Entre 2 e 4%",
    share >= 4 & share < 8 ~ "Entre 4 e 8%",
    share >= 8  ~ "Maior do que 8%"
  ), levels = c("Menor do que 2%", "Entre 2 e 4%", "Entre 4 e 8%", "Maior do que 8%")))




mapa1 <- ggplot(data = basemapa2) +
  geom_sf(aes(fill = intensidade), color = NA) +
  geom_sf(data = shpUFs, fill = NA, color = "black", size = 0.5) +
  scale_fill_viridis_d(name = "", option = "viridis", direction = 1) +  # Use viridis_d for discrete data
  labs(title = "", subtitle = "", fill = "intensidade") +
  theme_minimal() +
  theme(legend.title = element_text(), 
        legend.position = "bottom",
        legend.box.margin = margin(0, 0, 0, 0)) +
  coord_sf(expand = FALSE)

# Visualizar o mapa
mapa1

ggsave(file = "alimentos_bebidas_6.png", plot = mapa1, width = 6.472, height = 4)

Fazer por estado também

est1 <- basedosdados::read_sql('
  SELECT sigla_uf, SUM(quantidade_vinculos_ativos) AS vinculos_ativos
  FROM `basedosdados.br_me_rais.microdados_estabelecimentos`
  WHERE (
    subsetor_ibge = "13"
    OR subsetor_ibge = "ALIM E BEB"
    OR cnae_1 IN ("15130", "15148", "15113", "15121", "15210", "15229", "15237", "15318", "15326", "15334", "15431", "15423", "15415", "15598", "15539", 
                  "15563", "15520", "15512", "15555", "15547", "15610", "15628", "15717", "15725", "15849", "15857", "15890", "15822", "15830", "15865", 
                  "15814", "15920", "15946", "15954", "15911", "15938")
    OR cnae_2 IN ("10112", "10121", "10139", "10201", "10317", "10325", "10333", "10414", "10422", "10431", "10511", "10520", "10538", "10619", "10627", 
                  "10635", "10643", "10651", "10660", "10694", "10716", "10724", "10813", "10821", "10911", "10929", "10937", "10945", "10953", "10961", 
                  "10996", "11119", "11127", "11135", "11216", "11224")
  )
  AND ano = 2022
  GROUP BY sigla_uf
  ORDER BY sigla_uf
')

save(est1, file="est1.Rda")




est2 <- basedosdados::read_sql('
  SELECT sigla_uf, SUM(quantidade_vinculos_ativos) AS vinculos_ativos_brasil
  FROM `basedosdados.br_me_rais.microdados_estabelecimentos`
  WHERE ano = 2022
  GROUP BY sigla_uf
  ORDER BY sigla_uf
')

save(est2, file="est2.Rda")
load("est1.Rda")
load("est2.Rda")

est3<-merge(est1, est2, by="sigla_uf")

est3$share<-100*(est3$vinculos_ativos/est3$vinculos_ativos_brasil)

Correlação entre empregos na agricultura e empregos na Indústria de Alimentos

#um já está pronto que é 
load("ne1c.Rda")

# o outro é 

xx1 <- basedosdados::read_sql('
  SELECT id_municipio, SUM(quantidade_vinculos_ativos) AS vinculos_ativos_agricultura
  FROM `basedosdados.br_me_rais.microdados_estabelecimentos`
  WHERE subsetor_ibge = "25"
      AND ano = 2022
  GROUP BY id_municipio
  ORDER BY id_municipio
')

save(xx1, file="xx1.Rda")
load("ne1c.Rda")
load("xx1.Rda")
load("ne2c.Rda")

xx2<-merge(xx1, ne1c, by="id_municipio")
xx3<-merge(xx2, ne2c, by="id_municipio")

p<-xx3

p$share_agro<-p$vinculos_ativos_agricultura/p$vinculos_ativos_brasil
p$share_ali<-p$vinculos_ativos/p$vinculos_ativos_brasil



# Criar o gráfico
fig10<-ggplot(p, aes(x = log(share_ali+0.001), y = log(share_agro+0.001))) +
  geom_point(color = "grey70") +                               # Adiciona os pontos
  geom_smooth(method = "lm", color = "black", se = FALSE) +    # Adiciona a linha de tendência
  labs(x = "Empregos na Indústria de Alimentos (%)",
       y = "Empregos na Agropecuária (%)",
       title = "") +
  theme_minimal()


print(fig10)
## `geom_smooth()` using formula = 'y ~ x'

delimitacao <- read_dta("delimitacao.dta")
delimitacao$id_municipio<-delimitacao$v2017
delimitacao<-delimitacao %>% select(id_municipio)



p$semiarido <- ifelse(p$id_municipio %in% delimitacao$id_municipio, 1, 0)
p2<-p %>% filter(vinculos_ativos>=10)

fig12 <- ggplot(p2, aes(x = log(share_ali + 0.001), y = log(share_agro + 0.001))) +
  geom_point(aes(color = factor(semiarido)), alpha = 0.7) +     # Pontos coloridos por semiarido
  geom_smooth(data = p2[p2$semiarido == 1, ], aes(color = factor(1)), method = "lm", se = FALSE) + # Linha para semiarido=1
  geom_smooth(data = p2[p2$semiarido == 0, ], aes(color = factor(0)), method = "lm", se = FALSE) + # Linha para semiarido=0
  scale_color_manual(values = c("1" = "darkblue", "0" = "grey70"),    # Ajusta as cores: azul para semiarido=1
                     name = "",
                     labels = c("Não Semiárido", "Semiárido")) +  # Ajusta os rótulos da legenda
  labs(x = "Indústria de Alimentos (log(share + 0,001))",
       y = "Agropecuária (log(share + 0,001))",
       title = "") +
  theme_minimal() +
  theme(legend.position = "bottom")                              # Posiciona a legenda na parte inferior

fig12
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'

ggsave(file = "alimentos_bebidas_12.png", plot = fig12, width = 6.472, height = 4)
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'

Sidra

#https://apisidra.ibge.gov.br/values/k/1456165926


library(sidrar)
tt =
'/k/1456165926' %>%
get_sidra(api=.) 
## All others arguments are desconsidered when 'api' is informed
tt2 <- tt %>% 
  group_by(Ano) %>% 
  summarise(Valor_total_bilhoes = sum(Valor, na.rm = TRUE) / 1e6)

Sidra 2

#https://apisidra.ibge.gov.br/values/t/1841/n1/all/v/630,631/p/all/c12762/116910,116911,116912,116916,116918,116922,116926,116930,116938,116941,116944,116952,116953,116957,117897


library(sidrar)
zz1 =
'/t/1841/n1/all/v/630,631/p/all/c12762/116910,116911,116912,116916,116918,116922,116926,116930,116938,116941,116944,116952,116953,116957,117897' %>%
get_sidra(api=.) 
## All others arguments are desconsidered when 'api' is informed

Sidra 3 (Valor Bruto da Produção Industrial

#https://apisidra.ibge.gov.br/values/t/1842/n1/all/v/810/p/all/c12762/116911,116912,116916,116918,116922,116926,116930,116938,116941,116944,116952,116953,116957,117897

gg1 = 
'/t/1842/n1/all/v/810/p/all/c12762/116911,116912,116916,116918,116922,116926,116930,116938,116941,116944,116952,116953,116957,117897' %>%
get_sidra(api=.) 
## All others arguments are desconsidered when 'api' is informed
save(gg1, file="gg1.Rda")
load("gg1.Rda")
vb <- gg1 %>% 
  filter(`Classificação Nacional de Atividades Econômicas (CNAE 2.0) (Código)` %in% c(116911, 116952))
ggplot(vb, aes(x = as.numeric(Ano), y = as.numeric(Valor/1000000), fill = `Classificação Nacional de Atividades Econômicas (CNAE 2.0)`)) +
  geom_bar(stat = "identity") +  # Barras empilhadas
  labs(x = "", y = "VBP Industrial (BRL, bilhões)", fill = "") +  # Títulos dos eixos e legenda
  theme_minimal() +  # Tema limpo
  theme(
    legend.position = "bottom",  # Coloca a legenda na parte inferior
    legend.title = element_text(size = 10),  # Tamanho do título da legenda
    legend.text = element_text(size = 8)     # Tamanho do texto da legenda
  )

vb$`Classificação Nacional de Atividades Econômicas (CNAE 2.0)` <- str_remove(
  vb$`Classificação Nacional de Atividades Econômicas (CNAE 2.0)`,
  "^\\d+\\s"  # Regex para capturar números no início seguidos por espaço
)

g18<- ggplot(vb, aes(x = as.numeric(Ano), y = as.numeric(Valor/1000000), fill = `Classificação Nacional de Atividades Econômicas (CNAE 2.0)`)) +
  geom_bar(stat = "identity") +  # Barras empilhadas
  geom_text(
    aes(label = round(Valor / 1000000, 0)),  # Rótulo arredondado
    position = position_stack(vjust = 0.5),  # Posiciona o texto no centro das barras
    size = 3                                # Tamanho do texto
  ) +
  labs(x = "", y = "VBP Industrial (BRL, bilhões)", fill = "") +  # Títulos dos eixos e legenda
  theme_minimal() +  # Tema limpo
  theme(
    legend.position = "bottom",  # Coloca a legenda na parte inferior
    legend.title = element_text(size = 10),  # Tamanho do título da legenda
    legend.text = element_text(size = 8)     # Tamanho do texto da legenda
  )

ggsave(file = "alimentos_bebidas_18.png", plot = g18, width = 6.472, height = 4)

g18

#655: https://apisidra.ibge.gov.br/values/t/655/n1/all/n7/all/v/all/p/all/c315/7169,7170,7171,7172,7173,7175,7176,7177,7178,7179,7180,7204/d/v63%202

#2938: https://apisidra.ibge.gov.br/values/t/2938/n1/all/n7/all/v/63/p/all/c315/7169,7170,7171,7172,7173,7175,7176,7177,7180,7204/d/v63%202

#1419: https://apisidra.ibge.gov.br/values/t/1419/n1/all/n7/all/v/63/p/all/c315/7169,7170,7171,7172,7173,7175,7176,7177,7204,12222,41128/d/v63%202

#7060: https://apisidra.ibge.gov.br/values/t/7060/n1/all/n7/all/v/63/p/all/c315/7169,7170,7171,7172,7173,7175,7176,7200,7204,12222,47617/d/v63%202


b1 =
'/t/655/n1/all/n7/all/v/all/p/all/c315/7169,7170,7171,7172,7173,7175,7176,7177,7178,7179,7180,7204/d/v63%202' %>%
get_sidra(api=.)

b2 =
'/t/2938/n1/all/n7/all/v/63/p/all/c315/7169,7170,7171,7172,7173,7175,7176,7177,7180,7204/d/v63%202' %>%
get_sidra(api=.)

b3 =
'/t/1419/n1/all/n7/all/v/63/p/all/c315/7169,7170,7171,7172,7173,7175,7176,7177,7204,12222,41128/d/v63%202' %>%
get_sidra(api=.)

b4 =
'/t/7060/n1/all/n7/all/v/63/p/all/c315/7169,7170,7171,7172,7173,7175,7176,7200,7204,12222,47617/d/v63%202' %>%
get_sidra(api=.)


td1 <- rbind(b1, b2, b3, b4)
colnames(td1)<-c("", "", "", "", "valor", "rm_cod", "rm_nome", "", "", "data", "data_nome", "sub_cod", "sub_nome")
td2<-td1%>%
  select("valor", "rm_cod", "rm_nome", "data", "data_nome", "sub_cod", "sub_nome")


td2$year <- substr(td2$data, 1, 4)
td2$year <- as.numeric(td2$year)
td2$month <- substr(td2$data, nchar(td2$data) - 1, nchar(td2$data))
td2$month <- as.numeric(td2$month)
td2$time <- as.Date(paste(td2$year, td2$month, "01", sep = "-"))

td3<-td2 %>% 
pivot_wider(id_cols = c("time", "data", "sub_cod", "sub_nome", "year", "month"), names_from = c("rm_cod"), values_from = "valor")

td4<-td2 %>% 
pivot_wider(id_cols = c("time", "data", "year", "month"), names_from = c("sub_nome", "rm_cod"), values_from = "valor")

c2 <- td4 %>%
  arrange(time)

#c2$test<-cumprod(1 + c2$`Índice geral_1`/100)

calculate_cumulative_product <- function(x) {
  result <- cumprod(1 + x/100)
  return(result)
}

exclude_columns <- c("time", "data", "year", "month")


c2[setdiff(names(c2), exclude_columns)] <- lapply(
  c2[setdiff(names(c2), exclude_columns)],
  calculate_cumulative_product
)

c3 <- c2 %>%
  mutate(across(-c("time", "data", "year", "month"), ~replace(., row_number() == 1, 1)))

base_precos<-c3

 
save(base_precos, file="base_precos.Rda")
load("base_precos.Rda")

base2 <- base_precos%>%
  dplyr:::select(-data, -year, -month)

base3<- base2 %>%
pivot_longer(!time, names_to = "id", values_to = "valor")

base3 <- base3 %>%
  separate(id, into = c("part1", "rm_id"), sep = "_")



base4<-base3 %>% 
pivot_wider(id_cols = c("time", "rm_id"), names_from = c("part1"), values_from = "valor")

colnames(base4)[3] <- "geral"
colnames(base4)[4] <- "ali"
colnames(base4)[5] <- "ali_dom"
colnames(base4)[6] <- "cereais"
colnames(base4)[6] <- "cereais"
colnames(base4)[7] <- "arroz"
colnames(base4)[8] <- "f1"
colnames(base4)[9] <- "f2"
colnames(base4)[10] <- "f3"
colnames(base4)[11] <- "f4"
colnames(base4)[12] <- "f5"
colnames(base4)[13] <- "f6"
colnames(base4)[14] <- "mandioca"
colnames(base4)[15] <- "f7"
colnames(base4)[16] <- "f8"
colnames(base4)[17] <- "tuberc"
colnames(base4)[18] <- "f9"


base4$feijoes <- rowMeans(base4[, c("f1", "f2", "f3", "f4", "f5", "f6", "f7", "f8", "f9")], na.rm = TRUE)

base4$arroz_feijao <- rowMeans(base4[, c("feijoes","arroz")], na.rm = TRUE)



baseg1<-base4%>% 
filter(rm_id==1)%>%
  dplyr:::select("time", "geral", "ali", "feijoes", "arroz")

g1_longer <- baseg1 %>%
  pivot_longer(cols = c(geral, ali, feijoes, arroz), names_to = "var", values_to = "value")


g1<-ggplot(g1_longer, aes(x = time, y = 100*value, color = var)) +
  geom_line() +
    geom_line() +
  labs(title = "",
       y = "Indices",
       x = "") +
  theme_minimal() +
  scale_x_date(date_breaks = "1 year", date_labels = "%b-%Y") +
  scale_y_continuous(breaks = scales::pretty_breaks(n = 5)) +
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1), 
        legend.position = "bottom", legend.title = element_blank()) 

g1

baseg2<-base4%>% 
filter(rm_id==1)%>%
  dplyr:::select("time", "geral", "ali")

g2_longer2 <- baseg1 %>%
  pivot_longer(cols = c(geral, ali), names_to = "var", values_to = "value")
infgeral<-100*((4.34-3.49)/3.49)
infalimentos<-100*((5.84-4.25)/4.25)

infgeral
## [1] 24.3553
infalimentos
## [1] 37.41176
g222 <- ggplot(g2_longer2, aes(x = time, y = 100 * value, color = var)) +
  geom_line(size = 0.9) +
  labs(
    title = "",
    y = "Índices",
    x = ""
  ) +
  theme_minimal() +
scale_x_date(
    date_breaks = "2 year", 
    date_labels = "%Y", 
    limits = c(as.Date("2000-01-01"), as.Date("2024-12-31"))
  ) +  scale_y_continuous(breaks = scales::pretty_breaks(n = 3)) +
  theme(
    axis.text.x = element_text(angle = 0, vjust = 0.5, hjust = 0.5),
    legend.position = "bottom", 
    legend.title = element_blank()
  ) +
  scale_color_manual(
    labels = c("Alimentos", "Geral"),
    values = c("black", "grey")
  ) +
  # Adiciona a área sombreada
  geom_rect(
    aes(
      xmin = as.Date("2020-03-01"),
      xmax = as.Date("2023-05-31"),
      ymin = -Inf,
      ymax = 600
    ),
    fill = "orange",
    alpha = 0.003,
    inherit.aes = FALSE
  ) +
  # Adiciona anotações
  annotate(
    "text",
    x = as.Date("2020-01-01"), y = 300,
    label = "Geral = +24%",
    color = "grey20",
    size =2.8,
    hjust = 0
  ) +
  annotate(
    "text",
    x = as.Date("2020-01-01"), y = 650,
    label = "Alimentos = +37%",
    color = "black",
    size = 2.8,
    hjust = 0
  )

g222
## Warning in geom_rect(aes(xmin = as.Date("2020-03-01"), xmax = as.Date("2023-05-31"), : All aesthetics have length 1, but the data has 606 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing
##   a single row.
## Warning: Removed 10 rows containing missing values or values outside the scale range
## (`geom_line()`).

ggsave(file="g222.png", plot=g222,  width = 6.472, height = 4)
## Warning in geom_rect(aes(xmin = as.Date("2020-03-01"), xmax = as.Date("2023-05-31"), : All aesthetics have length 1, but the data has 606 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing
##   a single row.
## Removed 10 rows containing missing values or values outside the scale range
## (`geom_line()`).