Alvaro Krause
2024-04-10
setwd("C:/Users/akrause/OneDrive - Instituto Nacional de Estadisticas/SAE/MODELO 1/DATA")
# Voy a chequear algunos cálculos del proyecto SAE MODELO 1
# Primero
library(survey)## Loading required package: grid
## Loading required package: Matrix
## Warning: package 'Matrix' was built under R version 4.2.3
## Loading required package: survival
##
## Attaching package: 'survey'
## The following object is masked from 'package:graphics':
##
## dotchart
## Warning: package 'tidyverse' was built under R version 4.2.3
## Warning: package 'ggplot2' was built under R version 4.2.3
## Warning: package 'tibble' was built under R version 4.2.3
## Warning: package 'tidyr' was built under R version 4.2.3
## Warning: package 'readr' was built under R version 4.2.3
## Warning: package 'purrr' was built under R version 4.2.3
## Warning: package 'dplyr' was built under R version 4.2.3
## Warning: package 'stringr' was built under R version 4.2.3
## Warning: package 'forcats' was built under R version 4.2.3
## Warning: package 'lubridate' was built under R version 4.2.3
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.3 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.0 ✔ tibble 3.2.1
## ✔ lubridate 1.9.3 ✔ tidyr 1.3.1
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ tidyr::expand() masks Matrix::expand()
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ✖ tidyr::pack() masks Matrix::pack()
## ✖ tidyr::unpack() masks Matrix::unpack()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
## Warning: package 'srvyr' was built under R version 4.2.3
##
## Attaching package: 'srvyr'
##
## The following object is masked from 'package:stats':
##
## filter
## Warning: package 'TeachingSampling' was built under R version 4.2.3
## Loading required package: magrittr
## Warning: package 'magrittr' was built under R version 4.2.3
##
## Attaching package: 'magrittr'
##
## The following object is masked from 'package:purrr':
##
## set_names
##
## The following object is masked from 'package:tidyr':
##
## extract
## Warning: package 'haven' was built under R version 4.2.3
## Warning: package 'bayesplot' was built under R version 4.2.3
## This is bayesplot version 1.11.1
## - Online documentation and vignettes at mc-stan.org/bayesplot
## - bayesplot theme set to bayesplot::theme_default()
## * Does _not_ affect other ggplot2 plots
## * See ?bayesplot_theme_set for details on theme setting
## Warning: package 'patchwork' was built under R version 4.2.3
## Warning: package 'rstan' was built under R version 4.2.3
## Loading required package: StanHeaders
## Warning: package 'StanHeaders' was built under R version 4.2.3
##
## rstan version 2.32.6 (Stan version 2.32.2)
##
## For execution on a local, multicore CPU with excess RAM we recommend calling
## options(mc.cores = parallel::detectCores()).
## To avoid recompilation of unchanged Stan programs, we recommend calling
## rstan_options(auto_write = TRUE)
## For within-chain threading using `reduce_sum()` or `map_rect()` Stan functions,
## change `threads_per_chain` option:
## rstan_options(threads_per_chain = 1)
##
## Do not specify '-march=native' in 'LOCAL_CPPFLAGS' or a Makevars file
##
## Attaching package: 'rstan'
##
## The following object is masked from 'package:magrittr':
##
## extract
##
## The following object is masked from 'package:tidyr':
##
## extract
## Warning: package 'posterior' was built under R version 4.2.3
## This is posterior version 1.5.0
##
## Attaching package: 'posterior'
##
## The following objects are masked from 'package:rstan':
##
## ess_bulk, ess_tail
##
## The following object is masked from 'package:bayesplot':
##
## rhat
##
## The following objects are masked from 'package:stats':
##
## mad, sd, var
##
## The following objects are masked from 'package:base':
##
## %in%, match
## Warning: package 'ggspatial' was built under R version 4.2.3
## Warning: package 'ggrepel' was built under R version 4.2.3
## Warning: package 'ggthemes' was built under R version 4.2.3
## Warning: package 'chilemapas' was built under R version 4.2.3
## Loading required package: sf
## Warning: package 'sf' was built under R version 4.2.3
## Linking to GEOS 3.9.3, GDAL 3.5.2, PROJ 8.2.1; sf_use_s2() is TRUE
## La documentacion del paquete y ejemplos de uso se encuentran en https://pacha.dev/chilemapas/.
## Visita https://buymeacoffee.com/pacha/ si deseas donar para contribuir al desarrollo de este software.
## Warning: package 'readxl' was built under R version 4.2.3
ano_2021 <- read_dta("ano-2021.dta")
# Codigo de 346 comunas
CODIGOS <- read_excel("CUT_2018_v04.xls") [,c(6:7)] %>%
rename(codigo = `Código Comuna 2018`) %>%
mutate(codigo = as.numeric(codigo))
# Cálculos SIE
encuesta <- ano_2021
length_upm <- max(nchar(encuesta[["conglomerado"]]))
length_estrato <- max(nchar(encuesta[["estrato"]]))
# Ajuste variables a utilizar
encuesta <-
encuesta %>%
transmute(
dam = as_factor(region,levels = "values"),
dam = str_pad(string = dam, width = 2, pad = "0"),
dam2 = as_factor(r_p_c, levels = "values"),
dam2 = str_pad(string = dam2, width = 5, pad = "0"),
nombre_dam = as_factor(region,levels = "labels"),
nombre_dam2 = as_factor(r_p_c,levels = "labels"),
upm = str_pad(string = conglomerado, width = length_upm, pad = "0"),
estrato = str_pad(string = estrato, width = length_estrato , pad = "0"),
fep = fact_anual,
empleo = activ
)
id_dominio <- "dam2"
# Se define diseño de muestreo
options(survey.lonely.psu= 'adjust' )
diseno <- encuesta %>%
as_survey_design(
strata = estrato,
ids = upm,
weights = fep,
nest=T
)
rm(encuesta)
# Calculo de indicadores
indicador_dam <-
diseno %>% group_by_at(id_dominio) %>%
filter(empleo %in% c(1:3)) %>%
summarise(
n_ocupado = unweighted(sum(empleo == 1)),
n_desocupado = unweighted(sum(empleo == 2)),
n_inactivo = unweighted(sum(empleo == 3)),
Ocupado = survey_mean(empleo == 1,
vartype = c("se", "var"),
deff = T
),
Desocupado = survey_mean(empleo == 2,
vartype = c("se", "var"),
deff = T
),
Inactivo = survey_mean(empleo == 3,
vartype = c("se", "var"),
deff = T
)
)
# Cálculos DET
# Población en edad de trabajar
ano_2021 %<>% mutate(pet= ifelse(edad>14,1,0))
# Ocupados
ano_2021 %<>% mutate(oc= ifelse((cae_general %in% c(1:3)),1,0))
# Descupados
ano_2021$des <- ifelse(between(ano_2021$cae_especifico,8,9),1,0)
# Fuerza de trabajo
ano_2021 %<>% mutate(ft = ifelse(oc== 1 | des == 1,1,0))
# Fuera de la Fuerza de trabajo
ano_2021 %<>% mutate(fft = ifelse((oc== 0 & des == 0) & pet==1,1,0))
# DISEÑO MUESTRALCOMPLEJO ####
DC <- svydesign(id = ~conglomerado,
weights = ~fact_anual,#Factores de expansión
strata = ~estrato,
data = ano_2021) # Base de datos
options(survey.lonely.psu = "certainty")
# Desocupados sobre población en edad de trabajar
pet<-subset(DC,pet==1)
despet <- svyby(~des, pet, by =~r_p_c, svymean, deff=TRUE, na.rm=T)
despet %<>% mutate(cvdes = as.numeric(se/des)*100)
gl <- filter(ano_2021, pet == 1) %>% group_by(r_p_c) %>% summarise(gl = n_distinct(conglomerado) - n_distinct(estrato))
despet <- left_join(despet, gl)## Joining with `by = join_by(r_p_c)`
ndes <- ano_2021 %>%
group_by(r_p_c) %>%
summarise(ndes = sum(pet))
despet <- left_join(despet, ndes)## Joining with `by = join_by(r_p_c)`
## Warning: There was 1 warning in `mutate()`.
## ℹ In argument: `t = qt(p = 0.975, gl, lower.tail = TRUE)`.
## Caused by warning in `qt()`:
## ! NaNs produced
despet %<>% mutate(l_gorro = log(estimacion/(1-estimacion)))
despet %<>% mutate(var_l = (se^2)/((estimacion^2) * (1-estimacion)^2))
despet %<>% mutate(l1_gorro = l_gorro - (t*sqrt(var_l)))
despet %<>% mutate(l2_gorro = l_gorro + (t*sqrt(var_l)))
despet %<>% mutate(li = as.numeric(exp(l1_gorro)/(1+exp(l1_gorro))*100))
despet %<>% mutate(ls = as.numeric(exp(l2_gorro)/(1+exp(l2_gorro))*100))
despet %<>% mutate(flag = ifelse((ndes<60|gl<9),"No fiable",ifelse((ndes>=60&gl>=9&((estimacion<0.5&((estimacion^(2/3))/9<se))|(estimacion>=0.5&(((1-estimacion)^(2/3))/9<se)))),
"Poco fiable","Fiable")))
despet %<>%
rename(codigo = r_p_c)
# Agregando comunas sin muestra
despet <- left_join(CODIGOS, despet)## Joining with `by = join_by(codigo)`
despet %<>% mutate(flag = ifelse(is.na(flag), "Sin muestra",flag))
calidaddespet <- addmargins(table(despet$flag))DET
## codigo estimacion se DEff.des
## Min. : 1101 Min. :0.00000 Min. :0.000000 Min. :0.000
## 1st Qu.: 6109 1st Qu.:0.02414 1st Qu.:0.005824 1st Qu.:1.091
## Median : 8314 Median :0.03938 Median :0.009179 Median :1.777
## Mean : 9035 Mean :0.03911 Mean :0.009810 Mean :1.888
## 3rd Qu.:13103 3rd Qu.:0.05380 3rd Qu.:0.012784 3rd Qu.:2.486
## Max. :16305 Max. :0.10973 Max. :0.035532 Max. :6.715
## NA's :23 NA's :23 NA's :36
SDIE
## dam2 Desocupado Desocupado_se Desocupado_deff
## Length:323 Min. :0.00000 Min. :0.000000 Min. :0.000
## Class :character 1st Qu.:0.02414 1st Qu.:0.005824 1st Qu.:1.091
## Mode :character Median :0.03938 Median :0.009179 Median :1.777
## Mean :0.03911 Mean :0.009814 Mean :1.888
## 3rd Qu.:0.05380 3rd Qu.:0.012784 3rd Qu.:2.486
## Max. :0.10972 Max. :0.035532 Max. :6.715
## NA's :13
DET
## Joining with `by = join_by(r_p_c)`
## Joining with `by = join_by(r_p_c)`
## Warning: There was 1 warning in `mutate()`.
## ℹ In argument: `t = qt(p = 0.975, gl, lower.tail = TRUE)`.
## Caused by warning in `qt()`:
## ! NaNs produced
## codigo estimacion se DEff.oc
## Min. : 1101 Min. :0.07799 Min. :0.00000 Min. : 0.000
## 1st Qu.: 6110 1st Qu.:0.44997 1st Qu.:0.02110 1st Qu.: 1.538
## Median : 8309 Median :0.49335 Median :0.03063 Median : 2.648
## Mean : 9013 Mean :0.49529 Mean :0.03346 Mean : 2.962
## 3rd Qu.:13106 3rd Qu.:0.54022 3rd Qu.:0.04050 3rd Qu.: 3.945
## Max. :16305 Max. :0.79703 Max. :0.15068 Max. :12.887
## Joining with `by = join_by(codigo)`
SDIE
## dam2 Ocupado Ocupado_se Ocupado_deff
## Length:323 Min. :0.07799 Min. :0.00000 Min. : 0.000
## Class :character 1st Qu.:0.44997 1st Qu.:0.02110 1st Qu.: 1.538
## Mode :character Median :0.49335 Median :0.03063 Median : 2.652
## Mean :0.49529 Mean :0.03348 Mean : 2.965
## 3rd Qu.:0.54022 3rd Qu.:0.04050 3rd Qu.: 3.945
## Max. :0.79703 Max. :0.15068 Max. :12.887
DET
## Joining with `by = join_by(r_p_c)`
## Joining with `by = join_by(r_p_c)`
## Warning: There was 1 warning in `mutate()`.
## ℹ In argument: `t = qt(p = 0.975, gl, lower.tail = TRUE)`.
## Caused by warning in `qt()`:
## ! NaNs produced
## codigo estimacion se DEff.fft
## Min. : 1101 Min. :0.2030 Min. :0.00000 Min. : 0.000
## 1st Qu.: 6110 1st Qu.:0.4124 1st Qu.:0.02077 1st Qu.: 1.530
## Median : 8309 Median :0.4617 Median :0.02995 Median : 2.678
## Mean : 9013 Mean :0.4656 Mean :0.03310 Mean : 2.946
## 3rd Qu.:13106 3rd Qu.:0.5197 3rd Qu.:0.04144 3rd Qu.: 3.884
## Max. :16305 Max. :0.9220 Max. :0.13956 Max. :12.992
## Joining with `by = join_by(codigo)`
## Joining with `by = join_by(r_p_c)`
## Joining with `by = join_by(r_p_c)`
## Warning: There was 1 warning in `mutate()`.
## ℹ In argument: `t = qt(p = 0.975, df = gl, lower.tail = TRUE)`.
## Caused by warning in `qt()`:
## ! NaNs produced
## Joining with `by = join_by(codigo)`
## Joining with `by = join_by(codigo)`
## Joining with `by = join_by(r_p_c)`
## Joining with `by = join_by(r_p_c)`
## Warning: There was 1 warning in `mutate()`.
## ℹ In argument: `t = qt(p = 0.975, df = gl, lower.tail = TRUE)`.
## Caused by warning in `qt()`:
## ! NaNs produced
## Joining with `by = join_by(codigo)`
## Joining with `by = join_by(codigo)`
## Joining with `by = join_by(r_p_c)`
## Joining with `by = join_by(r_p_c)`
## Warning: There was 1 warning in `mutate()`.
## ℹ In argument: `t = qt(p = 0.975, gl, lower.tail = TRUE)`.
## Caused by warning in `qt()`:
## ! NaNs produced
## Joining with `by = join_by(codigo)`
## Joining with `by = join_by(r_p_c)`
## Joining with `by = join_by(r_p_c)`
## Warning: There was 1 warning in `mutate()`.
## ℹ In argument: `t = qt(p = 0.975, df = gl, lower.tail = TRUE)`.
## Caused by warning in `qt()`:
## ! NaNs produced
## Joining with `by = join_by(codigo)`
## Joining with `by = join_by(codigo)`
## Joining with `by = join_by(r_p_c)`
## Joining with `by = join_by(r_p_c)`
## Warning: There was 1 warning in `mutate()`.
## ℹ In argument: `t = qt(p = 0.975, gl, lower.tail = TRUE)`.
## Caused by warning in `qt()`:
## ! NaNs produced
## Joining with `by = join_by(codigo)`
## Joining with `by = join_by(r_p_c)`
## Joining with `by = join_by(r_p_c)`
## Warning: There was 1 warning in `mutate()`.
## ℹ In argument: `t = qt(p = 0.975, df = gl, lower.tail = TRUE)`.
## Caused by warning in `qt()`:
## ! NaNs produced
## Joining with `by = join_by(codigo)`
## Joining with `by = join_by(codigo)`
## Don't know how to automatically pick scale for object of type <table>.
## Defaulting to continuous.
## Warning: Removed 2 rows containing missing values or values outside the scale range
## (`geom_bar()`).
## Warning: Removed 2 rows containing missing values or values outside the scale range
## (`geom_text()`).
* El criterio de calidad de la encuesta establece 3 categorías: "Fiable", "Poco Fiable" y "No Fiable". La categoría "Sin muestra" corresponde a comunas que no ingresaron a la muestra en el período. La categoría "Sin casos" corresponde a comunas que si ingresaron a la muestra pero que no registraron casos de personas desocupadas. En los indicadores Porcentaje de desocupados y tasa de desocupación, estas comunas se clasifican como "No Fiable".
## Coordinate system already present. Adding new coordinate system, which will
## replace the existing one.
## Coordinate system already present. Adding new coordinate system, which will
## replace the existing one.
## Coordinate system already present. Adding new coordinate system, which will
## replace the existing one.
## Warning: Removed 13 rows containing non-finite outside the scale range
## (`stat_boxplot()`).
## Removed 13 rows containing non-finite outside the scale range
## (`stat_boxplot()`).
## Warning: Removed 13 rows containing non-finite outside the scale range
## (`stat_summary()`).
## Warning: Removed 13 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 13 rows containing non-finite outside the scale range
## (`stat_boxplot()`).
## Removed 13 rows containing non-finite outside the scale range
## (`stat_boxplot()`).
## Warning: Removed 13 rows containing non-finite outside the scale range
## (`stat_summary()`).
## Warning: Removed 13 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Don't know how to automatically pick scale for object of type <table>.
## Defaulting to continuous.
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_bar()`).
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_text()`).
* El criterio de calidad de la encuesta establece 3 categorías: "Fiable", "Poco Fiable" y "No Fiable". La categoría "Sin muestra" corresponde a comunas que no ingresaron a la muestra en el período.”
## Coordinate system already present. Adding new coordinate system, which will
## replace the existing one.
## Coordinate system already present. Adding new coordinate system, which will
## replace the existing one.
## Don't know how to automatically pick scale for object of type <table>.
## Defaulting to continuous.
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_bar()`).
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_text()`).
* El criterio de calidad de la encuesta establece 3 categorías: "Fiable", "Poco Fiable" y "No Fiable". La categoría "Sin muestra" corresponde a comunas que no ingresaron a la muestra en el período. La categoría "Sin casos" corresponde a comunas que si ingresaron a la muestra pero que no registraron casos de personas fuera de la fuerza de trabajo. En el indicador porcentaje de personas fuera de la fuerza de trabajo, estas comunas se clasifican como "No Fiable".
## Coordinate system already present. Adding new coordinate system, which will
## replace the existing one.
## Coordinate system already present. Adding new coordinate system, which will
## replace the existing one.
## Joining with `by = join_by(codigo, `Nombre Comuna`)`
## Joining with `by = join_by(codigo, `Nombre Comuna`)`
## # A tibble: 23 × 3
## codigo `Nombre Comuna` Muestra
## <dbl+lbl> <chr> <chr>
## 1 1402 [Camiña] Camiña Sin muestra
## 2 2202 [Ollagüe] Ollagüe Sin muestra
## 3 2302 [María Elena] María Elena Sin muestra
## 4 5104 [Juan Fernández] Juan Fernández Sin muestra
## 5 5201 [Isla de Pascua] Isla de Pascua Sin muestra
## 6 5405 [Zapallar] Zapallar Sin muestra
## 7 7309 [Vichuquén] Vichuquén Sin muestra
## 8 9209 [Renaico] Renaico Sin muestra
## 9 10103 [Cochamó] Cochamó Sin muestra
## 10 10401 [Chaitén] Chaitén Sin muestra
## # ℹ 13 more rows
## # A tibble: 13 × 5
## codigo `Nombre Comuna` Ocupados Desocupados Fuera de la fuerza d…¹
## <dbl+lbl> <chr> <chr> <chr> <chr>
## 1 1403 [Colchane] Colchane <NA> Sin casos <NA>
## 2 5604 [El Quisco] El Quisco <NA> Sin casos <NA>
## 3 8109 [Santa Jua… Santa Juana <NA> Sin casos <NA>
## 4 8204 [Contulmo] Contulmo <NA> Sin casos <NA>
## 5 8314 [Alto Biob… Alto Biobío <NA> Sin casos <NA>
## 6 9104 [Curarrehu… Curarrehue <NA> Sin casos <NA>
## 7 10204 [Curaco de… Curaco de Vélez <NA> Sin casos <NA>
## 8 10206 [Puqueldón] Puqueldón <NA> Sin casos <NA>
## 9 12103 [Río Verde] Río Verde <NA> Sin casos <NA>
## 10 12201 [Cabo de H… Cabo de Hornos <NA> Sin casos <NA>
## 11 13502 [Alhué] Alhué <NA> Sin casos <NA>
## 12 15102 [Camarones] Camarones <NA> Sin casos <NA>
## 13 16205 [Portezuel… Portezuelo <NA> Sin casos <NA>
## # ℹ abbreviated name: ¹`Fuera de la fuerza de trabajo`