setwd("C:/Users/akrause/OneDrive - Instituto Nacional de Estadisticas/SAE/MODELO 1/DATA")

# Voy a chequear algunos cálculos del proyecto SAE MODELO 1

# Primero
library(survey)

## Loading required package: grid

## Loading required package: Matrix

## Warning: package 'Matrix' was built under R version 4.2.3

## Loading required package: survival

## 
## Attaching package: 'survey'

## The following object is masked from 'package:graphics':
## 
##     dotchart

library(tidyverse)

## Warning: package 'tidyverse' was built under R version 4.2.3

## Warning: package 'ggplot2' was built under R version 4.2.3

## Warning: package 'tibble' was built under R version 4.2.3

## Warning: package 'tidyr' was built under R version 4.2.3

## Warning: package 'readr' was built under R version 4.2.3

## Warning: package 'purrr' was built under R version 4.2.3

## Warning: package 'dplyr' was built under R version 4.2.3

## Warning: package 'stringr' was built under R version 4.2.3

## Warning: package 'forcats' was built under R version 4.2.3

## Warning: package 'lubridate' was built under R version 4.2.3

## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.3     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.0     ✔ tibble    3.2.1
## ✔ lubridate 1.9.3     ✔ tidyr     1.3.1
## ✔ purrr     1.0.2

## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ tidyr::expand() masks Matrix::expand()
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ✖ tidyr::pack()   masks Matrix::pack()
## ✖ tidyr::unpack() masks Matrix::unpack()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

library(srvyr)

## Warning: package 'srvyr' was built under R version 4.2.3

## 
## Attaching package: 'srvyr'
## 
## The following object is masked from 'package:stats':
## 
##     filter

library(TeachingSampling)

## Warning: package 'TeachingSampling' was built under R version 4.2.3

## Loading required package: magrittr

## Warning: package 'magrittr' was built under R version 4.2.3

## 
## Attaching package: 'magrittr'
## 
## The following object is masked from 'package:purrr':
## 
##     set_names
## 
## The following object is masked from 'package:tidyr':
## 
##     extract

library(haven)

## Warning: package 'haven' was built under R version 4.2.3

library(bayesplot)

## Warning: package 'bayesplot' was built under R version 4.2.3

## This is bayesplot version 1.11.1
## - Online documentation and vignettes at mc-stan.org/bayesplot
## - bayesplot theme set to bayesplot::theme_default()
##    * Does _not_ affect other ggplot2 plots
##    * See ?bayesplot_theme_set for details on theme setting

library(patchwork)

## Warning: package 'patchwork' was built under R version 4.2.3

library(stringr)
library(rstan)

## Warning: package 'rstan' was built under R version 4.2.3

## Loading required package: StanHeaders

## Warning: package 'StanHeaders' was built under R version 4.2.3

## 
## rstan version 2.32.6 (Stan version 2.32.2)
## 
## For execution on a local, multicore CPU with excess RAM we recommend calling
## options(mc.cores = parallel::detectCores()).
## To avoid recompilation of unchanged Stan programs, we recommend calling
## rstan_options(auto_write = TRUE)
## For within-chain threading using `reduce_sum()` or `map_rect()` Stan functions,
## change `threads_per_chain` option:
## rstan_options(threads_per_chain = 1)
## 
## Do not specify '-march=native' in 'LOCAL_CPPFLAGS' or a Makevars file
## 
## Attaching package: 'rstan'
## 
## The following object is masked from 'package:magrittr':
## 
##     extract
## 
## The following object is masked from 'package:tidyr':
## 
##     extract

library(posterior)

## Warning: package 'posterior' was built under R version 4.2.3

## This is posterior version 1.5.0
## 
## Attaching package: 'posterior'
## 
## The following objects are masked from 'package:rstan':
## 
##     ess_bulk, ess_tail
## 
## The following object is masked from 'package:bayesplot':
## 
##     rhat
## 
## The following objects are masked from 'package:stats':
## 
##     mad, sd, var
## 
## The following objects are masked from 'package:base':
## 
##     %in%, match

library(ggspatial)

## Warning: package 'ggspatial' was built under R version 4.2.3

library(ggrepel)

## Warning: package 'ggrepel' was built under R version 4.2.3

library(magrittr)
library(ggthemes)

## Warning: package 'ggthemes' was built under R version 4.2.3

library(chilemapas)

## Warning: package 'chilemapas' was built under R version 4.2.3

## Loading required package: sf

## Warning: package 'sf' was built under R version 4.2.3

## Linking to GEOS 3.9.3, GDAL 3.5.2, PROJ 8.2.1; sf_use_s2() is TRUE
## La documentacion del paquete y ejemplos de uso se encuentran en https://pacha.dev/chilemapas/.
## Visita https://buymeacoffee.com/pacha/ si deseas donar para contribuir al desarrollo de este software.

library(readxl)

## Warning: package 'readxl' was built under R version 4.2.3

ano_2021 <- read_dta("ano-2021.dta")

# Codigo de 346 comunas
CODIGOS <- read_excel("CUT_2018_v04.xls") [,c(6:7)] %>% 
   rename(codigo = `Código Comuna 2018`) %>%
  mutate(codigo = as.numeric(codigo))

# Cálculos SIE

encuesta <- ano_2021

length_upm <- max(nchar(encuesta[["conglomerado"]]))
length_estrato <- max(nchar(encuesta[["estrato"]]))

# Ajuste variables a utilizar 

encuesta <-
  encuesta %>%
  transmute(
    dam = as_factor(region,levels  = "values"),
    dam = str_pad(string = dam, width = 2, pad = "0"),
    
    dam2 = as_factor(r_p_c, levels  = "values"),
    dam2 = str_pad(string = dam2, width = 5, pad = "0"),
    
    nombre_dam = as_factor(region,levels  = "labels"),
    nombre_dam2 = as_factor(r_p_c,levels  = "labels"),
    
    upm = str_pad(string = conglomerado, width = length_upm, pad = "0"),
    estrato = str_pad(string = estrato, width = length_estrato , pad = "0"),
    fep = fact_anual, 
    empleo = activ
  )
id_dominio <- "dam2"

# Se define diseño de muestreo

options(survey.lonely.psu= 'adjust' )
diseno <- encuesta %>%
  as_survey_design(
    strata = estrato,
    ids = upm,
    weights = fep,
    nest=T
  )
rm(encuesta)
# Calculo de indicadores 

indicador_dam <-
  diseno %>% group_by_at(id_dominio) %>% 
  filter(empleo %in% c(1:3)) %>%
  summarise(
    n_ocupado = unweighted(sum(empleo == 1)),
    n_desocupado = unweighted(sum(empleo == 2)),
    n_inactivo = unweighted(sum(empleo == 3)),
    Ocupado = survey_mean(empleo == 1,
                          vartype = c("se",  "var"),
                          deff = T
    ),
    Desocupado = survey_mean(empleo == 2,
                             vartype = c("se",  "var"),
                             deff = T
    ),
    Inactivo = survey_mean(empleo == 3,
                           vartype = c("se",  "var"),
                           deff = T
    )
  )

# Cálculos DET

# Población en edad de trabajar
ano_2021 %<>% mutate(pet= ifelse(edad>14,1,0))

# Ocupados
ano_2021 %<>% mutate(oc= ifelse((cae_general %in% c(1:3)),1,0))

# Descupados
ano_2021$des <- ifelse(between(ano_2021$cae_especifico,8,9),1,0)
# Fuerza de trabajo
ano_2021 %<>% mutate(ft = ifelse(oc== 1 | des == 1,1,0))

# Fuera de la Fuerza de trabajo
ano_2021 %<>% mutate(fft = ifelse((oc== 0 & des == 0) & pet==1,1,0))


# DISEÑO MUESTRALCOMPLEJO ####
DC <- svydesign(id = ~conglomerado,
weights = ~fact_anual,#Factores de expansión
strata = ~estrato,
data = ano_2021) # Base de datos
options(survey.lonely.psu = "certainty")



# Desocupados sobre población en edad de trabajar
pet<-subset(DC,pet==1)
despet <- svyby(~des,  pet, by =~r_p_c, svymean, deff=TRUE, na.rm=T)
despet %<>% mutate(cvdes = as.numeric(se/des)*100)
gl <-   filter(ano_2021, pet == 1) %>% group_by(r_p_c)  %>% summarise(gl = n_distinct(conglomerado) - n_distinct(estrato))
despet <- left_join(despet, gl)

## Joining with `by = join_by(r_p_c)`

ndes <- ano_2021 %>% 
  group_by(r_p_c) %>% 
  summarise(ndes = sum(pet))
despet <- left_join(despet, ndes)

## Joining with `by = join_by(r_p_c)`

despet %<>% 
  rename(estimacion = des)
despet %<>% mutate(t = qt(p = 0.975,  gl, lower.tail = TRUE))

## Warning: There was 1 warning in `mutate()`.
## ℹ In argument: `t = qt(p = 0.975, gl, lower.tail = TRUE)`.
## Caused by warning in `qt()`:
## ! NaNs produced

despet %<>% mutate(l_gorro = log(estimacion/(1-estimacion)))
despet %<>% mutate(var_l = (se^2)/((estimacion^2) * (1-estimacion)^2))
despet %<>% mutate(l1_gorro = l_gorro - (t*sqrt(var_l)))
despet %<>% mutate(l2_gorro = l_gorro + (t*sqrt(var_l)))
despet %<>% mutate(li = as.numeric(exp(l1_gorro)/(1+exp(l1_gorro))*100))
despet %<>% mutate(ls = as.numeric(exp(l2_gorro)/(1+exp(l2_gorro))*100))
despet %<>% mutate(flag = ifelse((ndes<60|gl<9),"No fiable",ifelse((ndes>=60&gl>=9&((estimacion<0.5&((estimacion^(2/3))/9<se))|(estimacion>=0.5&(((1-estimacion)^(2/3))/9<se)))),
"Poco fiable","Fiable")))

despet %<>% 
  rename(codigo = r_p_c)


# Agregando comunas sin muestra
despet <- left_join(CODIGOS, despet)

## Joining with `by = join_by(codigo)`

despet %<>% mutate(flag = ifelse(is.na(flag), "Sin muestra",flag))


calidaddespet <- addmargins(table(despet$flag))

Desocupados

DET

##      codigo        estimacion            se              DEff.des    
##  Min.   : 1101   Min.   :0.00000   Min.   :0.000000   Min.   :0.000  
##  1st Qu.: 6109   1st Qu.:0.02414   1st Qu.:0.005824   1st Qu.:1.091  
##  Median : 8314   Median :0.03938   Median :0.009179   Median :1.777  
##  Mean   : 9035   Mean   :0.03911   Mean   :0.009810   Mean   :1.888  
##  3rd Qu.:13103   3rd Qu.:0.05380   3rd Qu.:0.012784   3rd Qu.:2.486  
##  Max.   :16305   Max.   :0.10973   Max.   :0.035532   Max.   :6.715  
##                  NA's   :23        NA's   :23         NA's   :36

SDIE

##      dam2             Desocupado      Desocupado_se      Desocupado_deff
##  Length:323         Min.   :0.00000   Min.   :0.000000   Min.   :0.000  
##  Class :character   1st Qu.:0.02414   1st Qu.:0.005824   1st Qu.:1.091  
##  Mode  :character   Median :0.03938   Median :0.009179   Median :1.777  
##                     Mean   :0.03911   Mean   :0.009814   Mean   :1.888  
##                     3rd Qu.:0.05380   3rd Qu.:0.012784   3rd Qu.:2.486  
##                     Max.   :0.10972   Max.   :0.035532   Max.   :6.715  
##                                                          NA's   :13

Ocupados

DET

## Joining with `by = join_by(r_p_c)`
## Joining with `by = join_by(r_p_c)`

## Warning: There was 1 warning in `mutate()`.
## ℹ In argument: `t = qt(p = 0.975, gl, lower.tail = TRUE)`.
## Caused by warning in `qt()`:
## ! NaNs produced

##      codigo        estimacion            se             DEff.oc      
##  Min.   : 1101   Min.   :0.07799   Min.   :0.00000   Min.   : 0.000  
##  1st Qu.: 6110   1st Qu.:0.44997   1st Qu.:0.02110   1st Qu.: 1.538  
##  Median : 8309   Median :0.49335   Median :0.03063   Median : 2.648  
##  Mean   : 9013   Mean   :0.49529   Mean   :0.03346   Mean   : 2.962  
##  3rd Qu.:13106   3rd Qu.:0.54022   3rd Qu.:0.04050   3rd Qu.: 3.945  
##  Max.   :16305   Max.   :0.79703   Max.   :0.15068   Max.   :12.887

## Joining with `by = join_by(codigo)`

SDIE

##      dam2              Ocupado          Ocupado_se       Ocupado_deff   
##  Length:323         Min.   :0.07799   Min.   :0.00000   Min.   : 0.000  
##  Class :character   1st Qu.:0.44997   1st Qu.:0.02110   1st Qu.: 1.538  
##  Mode  :character   Median :0.49335   Median :0.03063   Median : 2.652  
##                     Mean   :0.49529   Mean   :0.03348   Mean   : 2.965  
##                     3rd Qu.:0.54022   3rd Qu.:0.04050   3rd Qu.: 3.945  
##                     Max.   :0.79703   Max.   :0.15068   Max.   :12.887

Personas fuera de la fuerza de trabajo

DET

## Joining with `by = join_by(r_p_c)`
## Joining with `by = join_by(r_p_c)`

## Warning: There was 1 warning in `mutate()`.
## ℹ In argument: `t = qt(p = 0.975, gl, lower.tail = TRUE)`.
## Caused by warning in `qt()`:
## ! NaNs produced

##      codigo        estimacion           se             DEff.fft     
##  Min.   : 1101   Min.   :0.2030   Min.   :0.00000   Min.   : 0.000  
##  1st Qu.: 6110   1st Qu.:0.4124   1st Qu.:0.02077   1st Qu.: 1.530  
##  Median : 8309   Median :0.4617   Median :0.02995   Median : 2.678  
##  Mean   : 9013   Mean   :0.4656   Mean   :0.03310   Mean   : 2.946  
##  3rd Qu.:13106   3rd Qu.:0.5197   3rd Qu.:0.04144   3rd Qu.: 3.884  
##  Max.   :16305   Max.   :0.9220   Max.   :0.13956   Max.   :12.992

## Joining with `by = join_by(codigo)`
## Joining with `by = join_by(r_p_c)`
## Joining with `by = join_by(r_p_c)`

## Warning: There was 1 warning in `mutate()`.
## ℹ In argument: `t = qt(p = 0.975, df = gl, lower.tail = TRUE)`.
## Caused by warning in `qt()`:
## ! NaNs produced

## Joining with `by = join_by(codigo)`
## Joining with `by = join_by(codigo)`

Evaluación de calidad

## Joining with `by = join_by(r_p_c)`
## Joining with `by = join_by(r_p_c)`

## Warning: There was 1 warning in `mutate()`.
## ℹ In argument: `t = qt(p = 0.975, df = gl, lower.tail = TRUE)`.
## Caused by warning in `qt()`:
## ! NaNs produced

## Joining with `by = join_by(codigo)`
## Joining with `by = join_by(codigo)`
## Joining with `by = join_by(r_p_c)`
## Joining with `by = join_by(r_p_c)`

## Warning: There was 1 warning in `mutate()`.
## ℹ In argument: `t = qt(p = 0.975, gl, lower.tail = TRUE)`.
## Caused by warning in `qt()`:
## ! NaNs produced

## Joining with `by = join_by(codigo)`
## Joining with `by = join_by(r_p_c)`
## Joining with `by = join_by(r_p_c)`

## Warning: There was 1 warning in `mutate()`.
## ℹ In argument: `t = qt(p = 0.975, df = gl, lower.tail = TRUE)`.
## Caused by warning in `qt()`:
## ! NaNs produced

## Joining with `by = join_by(codigo)`
## Joining with `by = join_by(codigo)`
## Joining with `by = join_by(r_p_c)`
## Joining with `by = join_by(r_p_c)`

## Warning: There was 1 warning in `mutate()`.
## ℹ In argument: `t = qt(p = 0.975, gl, lower.tail = TRUE)`.
## Caused by warning in `qt()`:
## ! NaNs produced

## Joining with `by = join_by(codigo)`
## Joining with `by = join_by(r_p_c)`
## Joining with `by = join_by(r_p_c)`

## Warning: There was 1 warning in `mutate()`.
## ℹ In argument: `t = qt(p = 0.975, df = gl, lower.tail = TRUE)`.
## Caused by warning in `qt()`:
## ! NaNs produced

## Joining with `by = join_by(codigo)`
## Joining with `by = join_by(codigo)`

Indicadores de desocupación

## Don't know how to automatically pick scale for object of type <table>.
## Defaulting to continuous.

## Warning: Removed 2 rows containing missing values or values outside the scale range
## (`geom_bar()`).

## Warning: Removed 2 rows containing missing values or values outside the scale range
## (`geom_text()`).

* El criterio de calidad de la encuesta establece 3 categorías: "Fiable", "Poco Fiable" y "No Fiable". La categoría "Sin muestra" corresponde a comunas que no ingresaron a la muestra en el período. La categoría "Sin casos" corresponde a comunas que si ingresaron a la muestra pero que no registraron casos de personas desocupadas. En los indicadores Porcentaje de desocupados y tasa de desocupación, estas comunas se clasifican como "No Fiable".

## Coordinate system already present. Adding new coordinate system, which will
## replace the existing one.
## Coordinate system already present. Adding new coordinate system, which will
## replace the existing one.
## Coordinate system already present. Adding new coordinate system, which will
## replace the existing one.

## Warning: Removed 13 rows containing non-finite outside the scale range
## (`stat_boxplot()`).
## Removed 13 rows containing non-finite outside the scale range
## (`stat_boxplot()`).

## Warning: Removed 13 rows containing non-finite outside the scale range
## (`stat_summary()`).

## Warning: Removed 13 rows containing missing values or values outside the scale range
## (`geom_point()`).

## Warning: Removed 13 rows containing non-finite outside the scale range
## (`stat_boxplot()`).
## Removed 13 rows containing non-finite outside the scale range
## (`stat_boxplot()`).

## Warning: Removed 13 rows containing non-finite outside the scale range
## (`stat_summary()`).

## Warning: Removed 13 rows containing missing values or values outside the scale range
## (`geom_point()`).

Indicadores de ocupación

## Don't know how to automatically pick scale for object of type <table>.
## Defaulting to continuous.

## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_bar()`).

## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_text()`).

## Coordinate system already present. Adding new coordinate system, which will
## replace the existing one.
## Coordinate system already present. Adding new coordinate system, which will
## replace the existing one.

Indicadores de personas fuera de la fuerza de trabajo

## Don't know how to automatically pick scale for object of type <table>.
## Defaulting to continuous.

## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_bar()`).

## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_text()`).

* El criterio de calidad de la encuesta establece 3 categorías: "Fiable", "Poco Fiable" y "No Fiable". La categoría "Sin muestra" corresponde a comunas que no ingresaron a la muestra en el período. La categoría "Sin casos" corresponde a comunas que si ingresaron a la muestra pero que no registraron casos de personas fuera de la fuerza de trabajo. En el indicador porcentaje de personas fuera de la fuerza de trabajo, estas comunas se clasifican como "No Fiable".

## Coordinate system already present. Adding new coordinate system, which will
## replace the existing one.
## Coordinate system already present. Adding new coordinate system, which will
## replace the existing one.

## Joining with `by = join_by(codigo, `Nombre Comuna`)`
## Joining with `by = join_by(codigo, `Nombre Comuna`)`

## # A tibble: 23 × 3
##    codigo                 `Nombre Comuna` Muestra    
##    <dbl+lbl>              <chr>           <chr>      
##  1  1402 [Camiña]         Camiña          Sin muestra
##  2  2202 [Ollagüe]        Ollagüe         Sin muestra
##  3  2302 [María Elena]    María Elena     Sin muestra
##  4  5104 [Juan Fernández] Juan Fernández  Sin muestra
##  5  5201 [Isla de Pascua] Isla de Pascua  Sin muestra
##  6  5405 [Zapallar]       Zapallar        Sin muestra
##  7  7309 [Vichuquén]      Vichuquén       Sin muestra
##  8  9209 [Renaico]        Renaico         Sin muestra
##  9 10103 [Cochamó]        Cochamó         Sin muestra
## 10 10401 [Chaitén]        Chaitén         Sin muestra
## # ℹ 13 more rows

## # A tibble: 13 × 5
##    codigo            `Nombre Comuna` Ocupados Desocupados Fuera de la fuerza d…¹
##    <dbl+lbl>         <chr>           <chr>    <chr>       <chr>                 
##  1  1403 [Colchane]  Colchane        <NA>     Sin casos   <NA>                  
##  2  5604 [El Quisco] El Quisco       <NA>     Sin casos   <NA>                  
##  3  8109 [Santa Jua… Santa Juana     <NA>     Sin casos   <NA>                  
##  4  8204 [Contulmo]  Contulmo        <NA>     Sin casos   <NA>                  
##  5  8314 [Alto Biob… Alto Biobío     <NA>     Sin casos   <NA>                  
##  6  9104 [Curarrehu… Curarrehue      <NA>     Sin casos   <NA>                  
##  7 10204 [Curaco de… Curaco de Vélez <NA>     Sin casos   <NA>                  
##  8 10206 [Puqueldón] Puqueldón       <NA>     Sin casos   <NA>                  
##  9 12103 [Río Verde] Río Verde       <NA>     Sin casos   <NA>                  
## 10 12201 [Cabo de H… Cabo de Hornos  <NA>     Sin casos   <NA>                  
## 11 13502 [Alhué]     Alhué           <NA>     Sin casos   <NA>                  
## 12 15102 [Camarones] Camarones       <NA>     Sin casos   <NA>                  
## 13 16205 [Portezuel… Portezuelo      <NA>     Sin casos   <NA>                  
## # ℹ abbreviated name: ¹`Fuera de la fuerza de trabajo`

Calidad 2021