Esse .Rmd vai mostrar como algumas ilustrações em relação ao setor de alimentos e bebidas. Essa parte está diposponível no Rpubs porque é feita com a parte da RAIS que não é identificada.
library(haven)
library(ggplot2)
library(RColorBrewer)
library(sp)
library(sf)
library(ggspatial)
library(dplyr)
library(psych)
library(ggthemes)
library(fixest)
library(modelsummary)
library(broom)
library(did)
library(scales)
library(reshape)
library(vtable)
library(basedosdados)
library(DBI)
library(bigrquery)
library(data.table)
library(ggokabeito)
library(thematic)
library(readr)
library(stringr)
library(tidyr)
library(readxl)
Empregos gerados
Salários
Comparação com outros setores importantes
Fizemos o download das informações pelo diretório “Base dos Dados” (https://basedosdados.org/), de forma que vocês precisam de credenciais (gratuítas) para fazer o mesmo procedimento.
As instruções básicas para o carregamento das credenciais podem ser encontradas no site do basedosdados.org(https://basedosdados.github.io/mais/access_data_bq/#primeiros-passos).
Por isso que toda vez que a gente for fazer o download das informações brutas, vai estar indicado como “não rodar” e posteriormente terá o arquivo resultado do rotina que está especificado como “não rodar”. Isso é feito por dois motivos: i) se toda vez fizermos os downloads novamente. Isso pode gerar custos no Google Cloud; ii) É interessante ter a memória de como a base de dados foi gerada (mesmo que a de um gráfico simples) para posterior conferência e/ou modificação.
set_billing_id("356922918080")
bq_auth()
con <- dbConnect(
bigrquery::bigquery(),
billing = "356922918080",
project = "agrteste2"
)
#leonardocardoso005@gmail.com
Mudar para o UFV
set_billing_id("1055074884926")
bq_auth()
con <- dbConnect(
bigrquery::bigquery(),
billing = "1055074884926",
project = "My First Project"
)
#leonardocardoso@ufv.br
A gente vai contar o número de vínculos ativos do setor de bebidas e alimentos (subsetor 13 do IBGE)
empregos1<-basedosdados::read_sql('SELECT ano, SUM(quantidade_vinculos_clt) as total_clt
FROM `basedosdados.br_me_rais.microdados_estabelecimentos`
WHERE subsetor_ibge = "13"
GROUP BY ano
ORDER BY ano
')
dicionario<-basedosdados::read_sql('SELECT *
FROM `basedosdados.br_me_rais.dicionario`
')
Olhar para Cruz das Almas - BA só para entender como são os dados pré-2014.
Não vale a pena baixar ela inteira. Nem sei se isso é viável, a base dos vínculos da RAIS, por exemplo, tem aproximadamente 380GB (1985-2023).
#2909802
cruz<-basedosdados::read_sql('SELECT *
FROM `basedosdados.br_me_rais.microdados_estabelecimentos`
WHERE id_municipio = "2909802"
')
cnae1 <- basedosdados::read_sql('
SELECT *
FROM `basedosdados.br_bd_diretorios_brasil.cnae_1`
')
cnae2 <- basedosdados::read_sql('
SELECT *
FROM `basedosdados.br_bd_diretorios_brasil.cnae_2`
')
f1 <- cnae1 %>%
dplyr::filter(descricao_divisao == "Fabricação de produtos alimentícios e bebidas") %>%
dplyr::pull(cnae_1)
f2<- cnae2 %>%
dplyr::filter(divisao %in% c("10", "11")) %>%
dplyr::pull(classe)
f2
Pré-2014 a variável não é o subsetor do ibge, mas é o subsetor_ibge = “ALIM E BEB”
Agora sim.
e1 <- basedosdados::read_sql('
SELECT ano, SUM(quantidade_vinculos_ativos) AS vinculos_ativos
FROM `basedosdados.br_me_rais.microdados_estabelecimentos`
WHERE subsetor_ibge = "13"
OR subsetor_ibge = "ALIM E BEB"
OR cnae_1 IN ("15130", "15148", "15113", "15121", "15210", "15229", "15237", "15318", "15326", "15334", "15431", "15423", "15415", "15598", "15539",
"15563", "15520", "15512", "15555", "15547", "15610", "15628", "15717", "15725", "15849", "15857", "15890", "15822", "15830", "15865",
"15814", "15920", "15946", "15954", "15911", "15938")
OR cnae_2 IN ("10112", "10112", "10112", "10112", "10112", "10121", "10121", "10121", "10121", "10139", "10139", "10201", "10201", "10317", "10325", "10325", "10333", "10333", "10414", "10422", "10431", "10511", "10520", "10538", "10619", "10619", "10627", "10635", "10643", "10651", "10651", "10651", "10660", "10694", "10716", "10724", "10724", "10813", "10813", "10821", "10911", "10911", "10911", "10929", "10937", "10937", "10945", "10953", "10961", "10996", "10996", "10996", "10996", "10996", "10996", "10996", "11119", "11119", "11127", "11135", "11135", "11216", "11224", "11224", "11224", "11224", "11224")
GROUP BY ano
ORDER BY ano
')
save(e1, file="e1.Rda")
Todos os empregos da RAIS
e2 <- basedosdados::read_sql('
SELECT ano, SUM(quantidade_vinculos_ativos) AS vinculos_ativos
FROM `basedosdados.br_me_rais.microdados_estabelecimentos`
GROUP BY ano
ORDER BY ano
')
#industria toda
e10 <- basedosdados::read_sql('
SELECT
ano,
SUM(quantidade_vinculos_ativos) AS vinculos_ativos
FROM `basedosdados.br_me_rais.microdados_estabelecimentos`
WHERE subsetor_ibge IN ("2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12", "14")
GROUP BY ano
ORDER BY ano
')
save(e2, file="e2.Rda")
save(e10, file="e10.Rda")
load("e1.Rda")
load("e2.Rda")
load("e10.Rda")
e10$todaindustriamenosalimentos<-as.numeric(e10$vinculos_ativos)
et<-merge(e1, e2, by="ano")
et2<-merge(et, e10, by="ano")
colnames(et)<-c("ano", "bebidas", "todos")
et$share_beb <- 100*(et$bebidas/et$todos)
g1 <- ggplot(e1, aes(x = ano, y = vinculos_ativos / 1000000)) +
geom_line(color = "darkred") + # Adicionar linhas
geom_point(color = "darkred") + # Adicionar pontos
labs(title = "",
x = "",
y = "Vínculos (milhões)") +
scale_y_continuous(labels = label_comma(decimal.mark = ",")) + # Alterar separador decimal
theme_minimal()
g1
## Don't know how to automatically pick scale for object of type <integer64>.
## Defaulting to continuous.
## Warning in prettyNum(.Internal(format(x, trim, digits, nsmall, width, 3L, :
## 'big.mark' and 'decimal.mark' are both ',', which could be confusing
ggsave(file = "alimentos_bebidas_1.png", plot = g1, width = 6.472, height = 4)
## Don't know how to automatically pick scale for object of type <integer64>.
## Defaulting to continuous.
## Warning in prettyNum(.Internal(format(x, trim, digits, nsmall, width, 3L, :
## 'big.mark' and 'decimal.mark' are both ',', which could be confusing
g2 <- ggplot(et, aes(x = ano, y = share_beb)) +
geom_line(color = "darkblue") + # Adicionar linhas
geom_point(color = "darkblue") + # Adicionar pontos
labs(title = "",
x = "",
y = "Participação (%)") +
scale_y_continuous(labels = label_comma(decimal.mark = ",")) + # Alterar separador decimal
theme_minimal()
g2
## Don't know how to automatically pick scale for object of type <integer64>.
## Defaulting to continuous.
## Warning in prettyNum(.Internal(format(x, trim, digits, nsmall, width, 3L, :
## 'big.mark' and 'decimal.mark' are both ',', which could be confusing
ggsave(file = "alimentos_bebidas_2.png", plot = g2, width = 6.472, height = 4)
## Don't know how to automatically pick scale for object of type <integer64>.
## Defaulting to continuous.
## Warning in prettyNum(.Internal(format(x, trim, digits, nsmall, width, 3L, :
## 'big.mark' and 'decimal.mark' are both ',', which could be confusing
et2$ind<-100*(et2$todaindustriamenosalimentos/et2$vinculos_ativos.y)
cruz12<-basedosdados::read_sql('SELECT *
FROM `basedosdados.br_me_rais.microdados_vinculos`
WHERE id_municipio = "2909802"
')
hh1 <- basedosdados::read_sql('
SELECT ano,
COUNT(vinculo_ativo_3112) AS vinc_dez,
AVG(CAST(idade AS FLOAT64)) AS idade,
AVG(CAST(faixa_etaria AS FLOAT64)) AS faixa_etaria,
AVG(CAST(faixa_remuneracao_media_sm AS FLOAT64)) AS faixa_remuneracao_media_sm,
AVG(CAST(valor_remuneracao_media_sm AS FLOAT64)) AS valor_remuneracao_media_sm
FROM `basedosdados.br_me_rais.microdados_vinculos`
WHERE (
subsetor_ibge = "13"
OR subsetor_ibge = "ALIM E BEB"
OR cnae_1 IN ("15130", "15148", "15113", "15121", "15210", "15229", "15237", "15318", "15326", "15334", "15431", "15423", "15415", "15598", "15539",
"15563", "15520", "15512", "15555", "15547", "15610", "15628", "15717", "15725", "15849", "15857", "15890", "15822", "15830", "15865",
"15814", "15920", "15946", "15954", "15911", "15938")
OR cnae_2 IN ("10112", "10121", "10139", "10201", "10317", "10325", "10333", "10414", "10422", "10431", "10511", "10520", "10538", "10619", "10627", "10635", "10643", "10651", "10660", "10694", "10716", "10724", "10813", "10821", "10911", "10929", "10937", "10945", "10953", "10961", "10996", "11119", "11127", "11135", "11216", "11224")
)
AND vinculo_ativo_3112 = "1"
GROUP BY ano
ORDER BY ano
')
save(hh1, file="hh1.Rda")
hh2 <- basedosdados::read_sql('
SELECT ano,
COUNT(vinculo_ativo_3112) AS vinc_dez,
AVG(CAST(idade AS FLOAT64)) AS idade,
AVG(CAST(faixa_etaria AS FLOAT64)) AS faixa_etaria,
AVG(CAST(faixa_remuneracao_media_sm AS FLOAT64)) AS faixa_remuneracao_media_sm,
AVG(CAST(valor_remuneracao_media_sm AS FLOAT64)) AS valor_remuneracao_media_sm
FROM `basedosdados.br_me_rais.microdados_vinculos`
WHERE vinculo_ativo_3112 = "1"
GROUP BY ano
ORDER BY ano
')
save(hh2, file="hh2.Rda")
hh3 <- basedosdados::read_sql('
SELECT ano,
AVG(CAST(valor_remuneracao_media AS FLOAT64)) AS smedioali
FROM `basedosdados.br_me_rais.microdados_vinculos`
WHERE (
subsetor_ibge = "13"
OR subsetor_ibge = "ALIM E BEB"
OR cnae_1 IN ("15130", "15148", "15113", "15121", "15210", "15229", "15237", "15318", "15326", "15334", "15431", "15423", "15415", "15598", "15539",
"15563", "15520", "15512", "15555", "15547", "15610", "15628", "15717", "15725", "15849", "15857", "15890", "15822", "15830", "15865",
"15814", "15920", "15946", "15954", "15911", "15938")
OR cnae_2 IN ("10112", "10121", "10139", "10201", "10317", "10325", "10333", "10414", "10422", "10431", "10511", "10520", "10538", "10619", "10627", "10635", "10643", "10651", "10660", "10694", "10716", "10724", "10813", "10821", "10911", "10929", "10937", "10945", "10953", "10961", "10996", "11119", "11127", "11135", "11216", "11224")
)
AND vinculo_ativo_3112 = "1"
GROUP BY ano
ORDER BY ano
')
save(hh3, file="hh3.Rda")
hh4 <- basedosdados::read_sql('
SELECT ano,
AVG(CAST(valor_remuneracao_media AS FLOAT64)) AS smediotd
FROM `basedosdados.br_me_rais.microdados_vinculos`
WHERE vinculo_ativo_3112 = "1"
GROUP BY ano
ORDER BY ano
')
save(hh4, file="hh4.Rda")
load("hh1.Rda")
load("hh2.Rda")
load("hh3.Rda")
load("hh4.Rda")
load("hh3.Rda")
load("hh4.Rda")
salario<-merge(hh3, hh4, by="ano")
salario
## ano smedioali smediotd
## 1 1985 NA NA
## 2 1986 NA NA
## 3 1987 NA NA
## 4 1988 NA NA
## 5 1989 NA NA
## 6 1990 NA NA
## 7 1991 NA NA
## 8 1992 NA NA
## 9 1993 NA NA
## 10 1994 NA NA
## 11 1995 NA NA
## 12 1996 NA NA
## 13 1997 NA NA
## 14 1998 NA NA
## 15 1999 523.8144 686.9854
## 16 2000 546.4354 730.5225
## 17 2001 591.8017 792.3699
## 18 2002 614.2061 846.9454
## 19 2003 696.9010 925.0861
## 20 2004 764.8081 1004.5081
## 21 2005 814.1012 1078.9249
## 22 2006 879.9996 1169.5767
## 23 2007 938.4194 1240.6242
## 24 2008 1024.7787 1357.3305
## 25 2009 1094.7590 1461.2061
## 26 2010 1217.2269 1588.2030
## 27 2011 1343.9727 1733.2093
## 28 2012 1494.2865 1903.0893
## 29 2013 1643.0426 2073.4463
## 30 2014 1773.7251 2252.2049
## 31 2015 1828.3958 2450.5900
## 32 2016 1995.0955 2635.3158
## 33 2017 2087.1590 2776.9073
## 34 2018 2118.5894 2858.0369
## 35 2019 2153.8017 2900.5003
## 36 2020 2184.6567 2924.3224
## 37 2021 2320.9606 3081.3401
## 38 2022 2952.9671 3861.0873
## 39 2023 2684.9804 3033.0976
salario<-salario %>% filter(ano>1999)
g3 <- ggplot(salario, aes(x = ano)) +
geom_line(aes(y = smedioali, color = "Alimentos e Bebidas"), size = 0.7) +
geom_point(aes(y = smedioali, color = "Alimentos e Bebidas"), size = 0.7) +
geom_line(aes(y = smediotd, color = "Todos"), size = 0.7) +
geom_point(aes(y = smediotd, color = "Todos"), size = 0.7) +
labs(title = "",
x = "",
y = "Salário Nominal (BRL)",
color = "") +
scale_color_manual(values = c("Alimentos e Bebidas" = "darkblue",
"Todos" = "grey50")) +
theme_minimal() +
theme(legend.position = "bottom") # Posiciona a legenda na parte inferior
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
# Exibir o gráfico
g3
## Don't know how to automatically pick scale for object of type <integer64>.
## Defaulting to continuous.
ggsave(file = "alimentos_bebidas_3.png", plot = g3, width = 6.472, height = 4)
## Don't know how to automatically pick scale for object of type <integer64>.
## Defaulting to continuous.
salario$salario_share<-salario$smedioali/salario$smediotd
Apenas a Região Nordeste
ne1 <- basedosdados::read_sql('
SELECT ano,
COUNT(vinculo_ativo_3112) AS vinc_dez,
AVG(CAST(idade AS FLOAT64)) AS idade,
AVG(CAST(faixa_etaria AS FLOAT64)) AS faixa_etaria,
AVG(CAST(faixa_remuneracao_media_sm AS FLOAT64)) AS faixa_remuneracao_media_sm,
AVG(CAST(valor_remuneracao_media_sm AS FLOAT64)) AS valor_remuneracao_media_sm
FROM `basedosdados.br_me_rais.microdados_vinculos`
WHERE (
subsetor_ibge = "13"
OR subsetor_ibge = "ALIM E BEB"
OR cnae_1 IN ("15130", "15148", "15113", "15121", "15210", "15229", "15237", "15318", "15326", "15334", "15431", "15423", "15415", "15598", "15539",
"15563", "15520", "15512", "15555", "15547", "15610", "15628", "15717", "15725", "15849", "15857", "15890", "15822", "15830", "15865",
"15814", "15920", "15946", "15954", "15911", "15938")
OR cnae_2 IN ("10112", "10121", "10139", "10201", "10317", "10325", "10333", "10414", "10422", "10431", "10511", "10520", "10538", "10619", "10627", "10635", "10643", "10651", "10660", "10694", "10716", "10724", "10813", "10821", "10911", "10929", "10937", "10945", "10953", "10961", "10996", "11119", "11127", "11135", "11216", "11224")
)
AND vinculo_ativo_3112 = "1"
AND sigla_uf IN ("AL", "BA", "CE", "MA", "PB", "PE", "PI", "RN", "SE")-- Apenas estados do Nordeste
GROUP BY ano
ORDER BY ano
')
save(ne1, file="ne1.Rda")
ne1c <- basedosdados::read_sql('
SELECT id_municipio, SUM(quantidade_vinculos_ativos) AS vinculos_ativos
FROM `basedosdados.br_me_rais.microdados_estabelecimentos`
WHERE (
subsetor_ibge = "13"
OR subsetor_ibge = "ALIM E BEB"
OR cnae_1 IN ("15130", "15148", "15113", "15121", "15210", "15229", "15237", "15318", "15326", "15334", "15431", "15423", "15415", "15598", "15539",
"15563", "15520", "15512", "15555", "15547", "15610", "15628", "15717", "15725", "15849", "15857", "15890", "15822", "15830", "15865",
"15814", "15920", "15946", "15954", "15911", "15938")
OR cnae_2 IN ("10112", "10121", "10139", "10201", "10317", "10325", "10333", "10414", "10422", "10431", "10511", "10520", "10538", "10619", "10627",
"10635", "10643", "10651", "10660", "10694", "10716", "10724", "10813", "10821", "10911", "10929", "10937", "10945", "10953", "10961",
"10996", "11119", "11127", "11135", "11216", "11224")
)
AND ano = 2022
GROUP BY id_municipio
ORDER BY id_municipio
')
save(ne1c, file="ne1c.Rda")
ne2c <- basedosdados::read_sql('
SELECT id_municipio, SUM(quantidade_vinculos_ativos) AS vinculos_ativos_brasil
FROM `basedosdados.br_me_rais.microdados_estabelecimentos`
WHERE ano = 2022
GROUP BY id_municipio
ORDER BY id_municipio
')
save(ne2c, file="ne2c.Rda")
load("ne1c.Rda")
load("ne2c.Rda")
rel<-merge(ne1c, ne2c, by="id_municipio")
rel$share <- ifelse(rel$vinculos_ativos == 0, 0, 100 * (rel$vinculos_ativos / rel$vinculos_ativos_brasil))
mean_share <- mean(rel$share, na.rm = TRUE)
# Plot the density with the mean marked
g5<-ggplot(rel, aes(x = share)) +
geom_density(fill = "blue", alpha = 0.5) +
geom_vline(aes(xintercept = mean_share), color = "black", linetype = "dashed", size = 0.5) +
labs(title = "",
x = "(%)",
y = "Densidade") +
annotate("text", x = mean_share + 8, y = 0.12,
label = paste("Média:", formatC(mean_share, format = "f", big.mark = ".", decimal.mark = ",", digits = 1)),
color = "black", vjust = -1) +
scale_y_continuous(labels = scales::label_number(decimal.mark = ",")) +
theme_minimal()
g5
ggsave(file = "alimentos_bebidas_5.png", plot = g5, width = 6.472, height = 4)
count_share_above_25 <- sum(rel$share > 25, na.rm = TRUE)
count_share_above_25
## [1] 226
count_share_above_10 <- sum(rel$share > 10, na.rm = TRUE)
count_share_above_10
## [1] 678
proportion_share_between_2_and_7 <- mean(rel$share >= 2 & rel$share <= 7, na.rm = TRUE)
proportion_share_between_2_and_7
## [1] 0.2875844
Mapa
# Ler o shapefile dos municípios
shpMun <- st_read("G:/Meu Drive/AGR/BR_Municipios_2021.shp")
## Reading layer `BR_Municipios_2021' from data source
## `G:\Meu Drive\AGR\BR_Municipios_2021.shp' using driver `ESRI Shapefile'
## Simple feature collection with 5572 features and 4 fields
## Geometry type: MULTIPOLYGON
## Dimension: XY
## Bounding box: xmin: -73.99045 ymin: -33.75118 xmax: -28.84764 ymax: 5.271841
## Geodetic CRS: SIRGAS 2000
# Ler o shapefile dos estados
shpUFs <- st_read("G:/Meu Drive/AGR/BR_UF_2021.shp")
## Reading layer `BR_UF_2021' from data source `G:\Meu Drive\AGR\BR_UF_2021.shp' using driver `ESRI Shapefile'
## Simple feature collection with 27 features and 4 fields
## Geometry type: MULTIPOLYGON
## Dimension: XY
## Bounding box: xmin: -73.99045 ymin: -33.75118 xmax: -28.84764 ymax: 5.271841
## Geodetic CRS: SIRGAS 2000
# Ajustar o nome da coluna para os municípios
mapaMun <- shpMun %>%
st_transform(crs = st_crs(shpMun)) %>%
mutate(CD_MUN = as.character(CD_MUN)) # Ajuste conforme o nome real da coluna
# Ajustar o nome da coluna para os estados
mapaUF <- shpUFs %>%
st_transform(crs = st_crs(shpUFs)) %>%
mutate(CD_UF = as.character(CD_UF)) # Ajuste conforme o nome real da coluna
mapaMun$id <- mapaMun$CD_MUN
rel$id<-rel$id_municipio
basemapa2<-merge(rel, mapaMun, by="id")
library(sf)
# Verifique se basemapa2 e shpUFs são objetos sf
class(basemapa2)
## [1] "data.frame"
class(shpUFs)
## [1] "sf" "data.frame"
basemapa2 <- st_as_sf(basemapa2)
basemapa2 <- basemapa2 %>%
mutate(intensidade = factor(case_when(
share < 2 ~ "Menor do que 2%",
share >= 2 & share < 4 ~ "Entre 2 e 4%",
share >= 4 & share < 8 ~ "Entre 4 e 8%",
share >= 8 ~ "Maior do que 8%"
), levels = c("Menor do que 2%", "Entre 2 e 4%", "Entre 4 e 8%", "Maior do que 8%")))
mapa1 <- ggplot(data = basemapa2) +
geom_sf(aes(fill = intensidade), color = NA) +
geom_sf(data = shpUFs, fill = NA, color = "black", size = 0.5) +
scale_fill_viridis_d(name = "", option = "viridis", direction = 1) + # Use viridis_d for discrete data
labs(title = "", subtitle = "", fill = "intensidade") +
theme_minimal() +
theme(legend.title = element_text(),
legend.position = "bottom",
legend.box.margin = margin(0, 0, 0, 0)) +
coord_sf(expand = FALSE)
# Visualizar o mapa
mapa1
ggsave(file = "alimentos_bebidas_6.png", plot = mapa1, width = 6.472, height = 4)
Fazer por estado também
est1 <- basedosdados::read_sql('
SELECT sigla_uf, SUM(quantidade_vinculos_ativos) AS vinculos_ativos
FROM `basedosdados.br_me_rais.microdados_estabelecimentos`
WHERE (
subsetor_ibge = "13"
OR subsetor_ibge = "ALIM E BEB"
OR cnae_1 IN ("15130", "15148", "15113", "15121", "15210", "15229", "15237", "15318", "15326", "15334", "15431", "15423", "15415", "15598", "15539",
"15563", "15520", "15512", "15555", "15547", "15610", "15628", "15717", "15725", "15849", "15857", "15890", "15822", "15830", "15865",
"15814", "15920", "15946", "15954", "15911", "15938")
OR cnae_2 IN ("10112", "10121", "10139", "10201", "10317", "10325", "10333", "10414", "10422", "10431", "10511", "10520", "10538", "10619", "10627",
"10635", "10643", "10651", "10660", "10694", "10716", "10724", "10813", "10821", "10911", "10929", "10937", "10945", "10953", "10961",
"10996", "11119", "11127", "11135", "11216", "11224")
)
AND ano = 2022
GROUP BY sigla_uf
ORDER BY sigla_uf
')
save(est1, file="est1.Rda")
est2 <- basedosdados::read_sql('
SELECT sigla_uf, SUM(quantidade_vinculos_ativos) AS vinculos_ativos_brasil
FROM `basedosdados.br_me_rais.microdados_estabelecimentos`
WHERE ano = 2022
GROUP BY sigla_uf
ORDER BY sigla_uf
')
save(est2, file="est2.Rda")
load("est1.Rda")
load("est2.Rda")
est3<-merge(est1, est2, by="sigla_uf")
est3$share<-100*(est3$vinculos_ativos/est3$vinculos_ativos_brasil)
Correlação entre empregos na agricultura e empregos na Indústria de Alimentos
#um já está pronto que é
load("ne1c.Rda")
# o outro é
xx1 <- basedosdados::read_sql('
SELECT id_municipio, SUM(quantidade_vinculos_ativos) AS vinculos_ativos_agricultura
FROM `basedosdados.br_me_rais.microdados_estabelecimentos`
WHERE subsetor_ibge = "25"
AND ano = 2022
GROUP BY id_municipio
ORDER BY id_municipio
')
save(xx1, file="xx1.Rda")
load("ne1c.Rda")
load("xx1.Rda")
load("ne2c.Rda")
xx2<-merge(xx1, ne1c, by="id_municipio")
xx3<-merge(xx2, ne2c, by="id_municipio")
p<-xx3
p$share_agro<-p$vinculos_ativos_agricultura/p$vinculos_ativos_brasil
p$share_ali<-p$vinculos_ativos/p$vinculos_ativos_brasil
# Criar o gráfico
fig10<-ggplot(p, aes(x = log(share_ali+0.001), y = log(share_agro+0.001))) +
geom_point(color = "grey70") + # Adiciona os pontos
geom_smooth(method = "lm", color = "black", se = FALSE) + # Adiciona a linha de tendência
labs(x = "Empregos na Indústria de Alimentos (%)",
y = "Empregos na Agropecuária (%)",
title = "") +
theme_minimal()
print(fig10)
## `geom_smooth()` using formula = 'y ~ x'
delimitacao <- read_dta("delimitacao.dta")
delimitacao$id_municipio<-delimitacao$v2017
delimitacao<-delimitacao %>% select(id_municipio)
p$semiarido <- ifelse(p$id_municipio %in% delimitacao$id_municipio, 1, 0)
p2<-p %>% filter(vinculos_ativos>=10)
fig12 <- ggplot(p2, aes(x = log(share_ali + 0.001), y = log(share_agro + 0.001))) +
geom_point(aes(color = factor(semiarido)), alpha = 0.7) + # Pontos coloridos por semiarido
geom_smooth(data = p2[p2$semiarido == 1, ], aes(color = factor(1)), method = "lm", se = FALSE) + # Linha para semiarido=1
geom_smooth(data = p2[p2$semiarido == 0, ], aes(color = factor(0)), method = "lm", se = FALSE) + # Linha para semiarido=0
scale_color_manual(values = c("1" = "darkblue", "0" = "grey70"), # Ajusta as cores: azul para semiarido=1
name = "",
labels = c("Não Semiárido", "Semiárido")) + # Ajusta os rótulos da legenda
labs(x = "Indústria de Alimentos (log(share + 0,001))",
y = "Agropecuária (log(share + 0,001))",
title = "") +
theme_minimal() +
theme(legend.position = "bottom") # Posiciona a legenda na parte inferior
fig12
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
ggsave(file = "alimentos_bebidas_12.png", plot = fig12, width = 6.472, height = 4)
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
Sidra
#https://apisidra.ibge.gov.br/values/k/1456165926
library(sidrar)
tt =
'/k/1456165926' %>%
get_sidra(api=.)
## All others arguments are desconsidered when 'api' is informed
tt2 <- tt %>%
group_by(Ano) %>%
summarise(Valor_total_bilhoes = sum(Valor, na.rm = TRUE) / 1e6)
Sidra 2
#https://apisidra.ibge.gov.br/values/t/1841/n1/all/v/630,631/p/all/c12762/116910,116911,116912,116916,116918,116922,116926,116930,116938,116941,116944,116952,116953,116957,117897
library(sidrar)
zz1 =
'/t/1841/n1/all/v/630,631/p/all/c12762/116910,116911,116912,116916,116918,116922,116926,116930,116938,116941,116944,116952,116953,116957,117897' %>%
get_sidra(api=.)
## All others arguments are desconsidered when 'api' is informed
Sidra 3 (Valor Bruto da Produção Industrial
#https://apisidra.ibge.gov.br/values/t/1842/n1/all/v/810/p/all/c12762/116911,116912,116916,116918,116922,116926,116930,116938,116941,116944,116952,116953,116957,117897
gg1 =
'/t/1842/n1/all/v/810/p/all/c12762/116911,116912,116916,116918,116922,116926,116930,116938,116941,116944,116952,116953,116957,117897' %>%
get_sidra(api=.)
## All others arguments are desconsidered when 'api' is informed
save(gg1, file="gg1.Rda")
load("gg1.Rda")
vb <- gg1 %>%
filter(`Classificação Nacional de Atividades Econômicas (CNAE 2.0) (Código)` %in% c(116911, 116952))
ggplot(vb, aes(x = as.numeric(Ano), y = as.numeric(Valor/1000000), fill = `Classificação Nacional de Atividades Econômicas (CNAE 2.0)`)) +
geom_bar(stat = "identity") + # Barras empilhadas
labs(x = "", y = "VBP Industrial (BRL, bilhões)", fill = "") + # Títulos dos eixos e legenda
theme_minimal() + # Tema limpo
theme(
legend.position = "bottom", # Coloca a legenda na parte inferior
legend.title = element_text(size = 10), # Tamanho do título da legenda
legend.text = element_text(size = 8) # Tamanho do texto da legenda
)
vb$`Classificação Nacional de Atividades Econômicas (CNAE 2.0)` <- str_remove(
vb$`Classificação Nacional de Atividades Econômicas (CNAE 2.0)`,
"^\\d+\\s" # Regex para capturar números no início seguidos por espaço
)
g18<- ggplot(vb, aes(x = as.numeric(Ano), y = as.numeric(Valor/1000000), fill = `Classificação Nacional de Atividades Econômicas (CNAE 2.0)`)) +
geom_bar(stat = "identity") + # Barras empilhadas
geom_text(
aes(label = round(Valor / 1000000, 0)), # Rótulo arredondado
position = position_stack(vjust = 0.5), # Posiciona o texto no centro das barras
size = 3 # Tamanho do texto
) +
labs(x = "", y = "VBP Industrial (BRL, bilhões)", fill = "") + # Títulos dos eixos e legenda
theme_minimal() + # Tema limpo
theme(
legend.position = "bottom", # Coloca a legenda na parte inferior
legend.title = element_text(size = 10), # Tamanho do título da legenda
legend.text = element_text(size = 8) # Tamanho do texto da legenda
)
ggsave(file = "alimentos_bebidas_18.png", plot = g18, width = 6.472, height = 4)
g18
#655: https://apisidra.ibge.gov.br/values/t/655/n1/all/n7/all/v/all/p/all/c315/7169,7170,7171,7172,7173,7175,7176,7177,7178,7179,7180,7204/d/v63%202
#2938: https://apisidra.ibge.gov.br/values/t/2938/n1/all/n7/all/v/63/p/all/c315/7169,7170,7171,7172,7173,7175,7176,7177,7180,7204/d/v63%202
#1419: https://apisidra.ibge.gov.br/values/t/1419/n1/all/n7/all/v/63/p/all/c315/7169,7170,7171,7172,7173,7175,7176,7177,7204,12222,41128/d/v63%202
#7060: https://apisidra.ibge.gov.br/values/t/7060/n1/all/n7/all/v/63/p/all/c315/7169,7170,7171,7172,7173,7175,7176,7200,7204,12222,47617/d/v63%202
b1 =
'/t/655/n1/all/n7/all/v/all/p/all/c315/7169,7170,7171,7172,7173,7175,7176,7177,7178,7179,7180,7204/d/v63%202' %>%
get_sidra(api=.)
b2 =
'/t/2938/n1/all/n7/all/v/63/p/all/c315/7169,7170,7171,7172,7173,7175,7176,7177,7180,7204/d/v63%202' %>%
get_sidra(api=.)
b3 =
'/t/1419/n1/all/n7/all/v/63/p/all/c315/7169,7170,7171,7172,7173,7175,7176,7177,7204,12222,41128/d/v63%202' %>%
get_sidra(api=.)
b4 =
'/t/7060/n1/all/n7/all/v/63/p/all/c315/7169,7170,7171,7172,7173,7175,7176,7200,7204,12222,47617/d/v63%202' %>%
get_sidra(api=.)
td1 <- rbind(b1, b2, b3, b4)
colnames(td1)<-c("", "", "", "", "valor", "rm_cod", "rm_nome", "", "", "data", "data_nome", "sub_cod", "sub_nome")
td2<-td1%>%
select("valor", "rm_cod", "rm_nome", "data", "data_nome", "sub_cod", "sub_nome")
td2$year <- substr(td2$data, 1, 4)
td2$year <- as.numeric(td2$year)
td2$month <- substr(td2$data, nchar(td2$data) - 1, nchar(td2$data))
td2$month <- as.numeric(td2$month)
td2$time <- as.Date(paste(td2$year, td2$month, "01", sep = "-"))
td3<-td2 %>%
pivot_wider(id_cols = c("time", "data", "sub_cod", "sub_nome", "year", "month"), names_from = c("rm_cod"), values_from = "valor")
td4<-td2 %>%
pivot_wider(id_cols = c("time", "data", "year", "month"), names_from = c("sub_nome", "rm_cod"), values_from = "valor")
c2 <- td4 %>%
arrange(time)
#c2$test<-cumprod(1 + c2$`Índice geral_1`/100)
calculate_cumulative_product <- function(x) {
result <- cumprod(1 + x/100)
return(result)
}
exclude_columns <- c("time", "data", "year", "month")
c2[setdiff(names(c2), exclude_columns)] <- lapply(
c2[setdiff(names(c2), exclude_columns)],
calculate_cumulative_product
)
c3 <- c2 %>%
mutate(across(-c("time", "data", "year", "month"), ~replace(., row_number() == 1, 1)))
base_precos<-c3
save(base_precos, file="base_precos.Rda")
load("base_precos.Rda")
base2 <- base_precos%>%
dplyr:::select(-data, -year, -month)
base3<- base2 %>%
pivot_longer(!time, names_to = "id", values_to = "valor")
base3 <- base3 %>%
separate(id, into = c("part1", "rm_id"), sep = "_")
base4<-base3 %>%
pivot_wider(id_cols = c("time", "rm_id"), names_from = c("part1"), values_from = "valor")
colnames(base4)[3] <- "geral"
colnames(base4)[4] <- "ali"
colnames(base4)[5] <- "ali_dom"
colnames(base4)[6] <- "cereais"
colnames(base4)[6] <- "cereais"
colnames(base4)[7] <- "arroz"
colnames(base4)[8] <- "f1"
colnames(base4)[9] <- "f2"
colnames(base4)[10] <- "f3"
colnames(base4)[11] <- "f4"
colnames(base4)[12] <- "f5"
colnames(base4)[13] <- "f6"
colnames(base4)[14] <- "mandioca"
colnames(base4)[15] <- "f7"
colnames(base4)[16] <- "f8"
colnames(base4)[17] <- "tuberc"
colnames(base4)[18] <- "f9"
base4$feijoes <- rowMeans(base4[, c("f1", "f2", "f3", "f4", "f5", "f6", "f7", "f8", "f9")], na.rm = TRUE)
base4$arroz_feijao <- rowMeans(base4[, c("feijoes","arroz")], na.rm = TRUE)
baseg1<-base4%>%
filter(rm_id==1)%>%
dplyr:::select("time", "geral", "ali", "feijoes", "arroz")
g1_longer <- baseg1 %>%
pivot_longer(cols = c(geral, ali, feijoes, arroz), names_to = "var", values_to = "value")
g1<-ggplot(g1_longer, aes(x = time, y = 100*value, color = var)) +
geom_line() +
geom_line() +
labs(title = "",
y = "Indices",
x = "") +
theme_minimal() +
scale_x_date(date_breaks = "1 year", date_labels = "%b-%Y") +
scale_y_continuous(breaks = scales::pretty_breaks(n = 5)) +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1),
legend.position = "bottom", legend.title = element_blank())
g1
baseg2<-base4%>%
filter(rm_id==1)%>%
dplyr:::select("time", "geral", "ali")
g2_longer2 <- baseg1 %>%
pivot_longer(cols = c(geral, ali), names_to = "var", values_to = "value")
infgeral<-100*((4.34-3.49)/3.49)
infalimentos<-100*((5.84-4.25)/4.25)
infgeral
## [1] 24.3553
infalimentos
## [1] 37.41176
g222 <- ggplot(g2_longer2, aes(x = time, y = 100 * value, color = var)) +
geom_line(size = 0.9) +
labs(
title = "",
y = "Índices",
x = ""
) +
theme_minimal() +
scale_x_date(
date_breaks = "2 year",
date_labels = "%Y",
limits = c(as.Date("2000-01-01"), as.Date("2024-12-31"))
) + scale_y_continuous(breaks = scales::pretty_breaks(n = 3)) +
theme(
axis.text.x = element_text(angle = 0, vjust = 0.5, hjust = 0.5),
legend.position = "bottom",
legend.title = element_blank()
) +
scale_color_manual(
labels = c("Alimentos", "Geral"),
values = c("black", "grey")
) +
# Adiciona a área sombreada
geom_rect(
aes(
xmin = as.Date("2020-03-01"),
xmax = as.Date("2023-05-31"),
ymin = -Inf,
ymax = 600
),
fill = "orange",
alpha = 0.003,
inherit.aes = FALSE
) +
# Adiciona anotações
annotate(
"text",
x = as.Date("2020-01-01"), y = 300,
label = "Geral = +24%",
color = "grey20",
size =2.8,
hjust = 0
) +
annotate(
"text",
x = as.Date("2020-01-01"), y = 650,
label = "Alimentos = +37%",
color = "black",
size = 2.8,
hjust = 0
)
g222
## Warning in geom_rect(aes(xmin = as.Date("2020-03-01"), xmax = as.Date("2023-05-31"), : All aesthetics have length 1, but the data has 606 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing
## a single row.
## Warning: Removed 10 rows containing missing values or values outside the scale range
## (`geom_line()`).
ggsave(file="g222.png", plot=g222, width = 6.472, height = 4)
## Warning in geom_rect(aes(xmin = as.Date("2020-03-01"), xmax = as.Date("2023-05-31"), : All aesthetics have length 1, but the data has 606 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing
## a single row.
## Removed 10 rows containing missing values or values outside the scale range
## (`geom_line()`).