1 Creación de Objetos

1.1 Escalares

Es la presentación más simple de un número real (R^1$)

escalar1 <- 3
escalar1 
## [1] 3
escalar2 <- 4

1.2 Operaciones

El motor de progrmación R puede servir como calculadora.

escalar3<-escalar1+escalar2
escalar3 <- escalar1 + escalar2
escalar3<-escalar1+escalar2

escalar4 <- escalar3 * escalar2
escalar5 <- escalar4/escalar1
escalar5
## [1] 9.333333
summary(escalar5)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   9.333   9.333   9.333   9.333   9.333   9.333
table(escalar5)
## escalar5
## 9.33333333333333 
##                1

1.3 Vectores

vect1 <- c(2,1.5,4) #vector numerico
vect2 <- c("Juan","Pedro","Sara")
vect3 <- c(5,6,2)

prd1 <- vect1 * escalar4

1.3.1 Ubicación

vect1[2]
## [1] 1.5
vect2[3]
## [1] "Sara"
objeto1 <- vect3[1]

objeto2 <- vect2[2:3]

objeto3 <- vect2[1:2]

objeto4 <- vect2[c(1,3)]
objeto4_f2 <- vect2[-2]

1.3.2 Matrices

vector1 <- c(1,2,3)
vector2 <- c(4,5,6)
vector3 <- c(7,8,9)

matrizA <- cbind(vector1,vector2,vector3)
matrizA
##      vector1 vector2 vector3
## [1,]       1       4       7
## [2,]       2       5       8
## [3,]       3       6       9
matrizB <- rbind(vector1,vector2,vector3)
matrizB
##         [,1] [,2] [,3]
## vector1    1    2    3
## vector2    4    5    6
## vector3    7    8    9

1.3.2.1 bases de datos (data frames)

base1 <- as.data.frame(matrizA)
base2 <- as.data.frame(matrizB)

1.3.2.2 Ingresar a objetos en bases de datos (tomar variables con $)

base1$vector1
## [1] 1 2 3

1.3.2.3 Cambiar de nombre a variables

nombre_vectores <- c("variable1","variable2","variable3")

names(base1) <- nombre_vectores

2 Cargar bases de datos internas

2.1 Filtros

2.2 Borramos todo

rm(list = ls())
base <- mtcars

2.3 Filtros

2.4 Vehiculos con mas de 4 cilindros

2.5 Forma (subset)

base1 <- subset(base,base$cyl > 4)

2.6 Forma (matricial)

base1_f2 <- base[base$cyl>4,]

2.7 Forma (dplyr)

library(dplyr)
## Warning: package 'dplyr' was built under R version 4.4.3
## 
## Adjuntando el paquete: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
base1_f3 <- base %>% 
  filter(cyl>4)

3 Modelo desempleo

rm(list = ls())
getwd()
## [1] "C:/Users/59398/Downloads/Proyecto Prueba/Insumos"
setwd("C:/Users/59398/Downloads/Proyecto Prueba/Insumos")
base <- read.csv('datos_ecuador - datos_ecuador.csv')

3.1 Modelo desempleo - B1 - B2PIB u

options(scipen = 999)
names(base)
## [1] "anio"                     "trim"                    
## [3] "RiesgoPais"               "PIB_MillonesUSD"         
## [5] "TasaDesempleo_Porcentaje" "IED_MillonesUSD"
modelo_desempleo <- lm(TasaDesempleo_Porcentaje ~ PIB_MillonesUSD, data=base)
modelo_desempleo2 <- lm(base$TasaDesempleo_Porcentaje ~ base$PIB_MillonesUSD)



summary(modelo_desempleo)
## 
## Call:
## lm(formula = TasaDesempleo_Porcentaje ~ PIB_MillonesUSD, data = base)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.6369 -0.8933 -0.1452  0.4323  7.6118 
## 
## Coefficients:
##                    Estimate  Std. Error t value          Pr(>|t|)    
## (Intercept)      8.37282462  0.92082867   9.093 0.000000000000595 ***
## PIB_MillonesUSD -0.00010715  0.00003556  -3.014           0.00376 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.377 on 61 degrees of freedom
## Multiple R-squared:  0.1296, Adjusted R-squared:  0.1153 
## F-statistic: 9.081 on 1 and 61 DF,  p-value: 0.003758
summary(modelo_desempleo2)
## 
## Call:
## lm(formula = base$TasaDesempleo_Porcentaje ~ base$PIB_MillonesUSD)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.6369 -0.8933 -0.1452  0.4323  7.6118 
## 
## Coefficients:
##                         Estimate  Std. Error t value          Pr(>|t|)    
## (Intercept)           8.37282462  0.92082867   9.093 0.000000000000595 ***
## base$PIB_MillonesUSD -0.00010715  0.00003556  -3.014           0.00376 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.377 on 61 degrees of freedom
## Multiple R-squared:  0.1296, Adjusted R-squared:  0.1153 
## F-statistic: 9.081 on 1 and 61 DF,  p-value: 0.003758

3.2 Modelo: IED = B1 + B2 * RO + u

options(scipen = 999)
names(base)
## [1] "anio"                     "trim"                    
## [3] "RiesgoPais"               "PIB_MillonesUSD"         
## [5] "TasaDesempleo_Porcentaje" "IED_MillonesUSD"
model_ied <- lm(IED_MillonesUSD ~ RiesgoPais, data=base)
summary(model_ied)
## 
## Call:
## lm(formula = IED_MillonesUSD ~ RiesgoPais, data = base)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -317.37  -55.76  -16.82   54.14  279.09 
## 
## Coefficients:
##              Estimate Std. Error t value       Pr(>|t|)    
## (Intercept) 229.89653   29.74552   7.729 0.000000000129 ***
## RiesgoPais   -0.04321    0.02458  -1.758         0.0838 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 107.9 on 61 degrees of freedom
## Multiple R-squared:  0.04821,    Adjusted R-squared:  0.0326 
## F-statistic: 3.089 on 1 and 61 DF,  p-value: 0.08382

3.3 Creamos el Riesgo pais Rezagado

library(dplyr)
base<-base %>% 
  mutate(ied_rezagado1=lag(IED_MillonesUSD, n=1),
         rp_rezagado1=lag(RiesgoPais, n=2),
         rp_rezagado2=lag(RiesgoPais, n=2))

4 Modelo Rezagado

names(base)
## [1] "anio"                     "trim"                    
## [3] "RiesgoPais"               "PIB_MillonesUSD"         
## [5] "TasaDesempleo_Porcentaje" "IED_MillonesUSD"         
## [7] "ied_rezagado1"            "rp_rezagado1"            
## [9] "rp_rezagado2"
model_ied_lag1<-lm(IED_MillonesUSD~rp_rezagado1, data = base)
summary(model_ied_lag1)
## 
## Call:
## lm(formula = IED_MillonesUSD ~ rp_rezagado1, data = base)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -320.59  -53.15  -14.34   49.29  266.92 
## 
## Coefficients:
##               Estimate Std. Error t value      Pr(>|t|)    
## (Intercept)  214.06880   30.81112   6.948 0.00000000332 ***
## rp_rezagado1  -0.02707    0.02533  -1.069          0.29    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 111 on 59 degrees of freedom
##   (2 observations deleted due to missingness)
## Multiple R-squared:  0.01899,    Adjusted R-squared:  0.002358 
## F-statistic: 1.142 on 1 and 59 DF,  p-value: 0.2896
model_ied_lag2<-lm(IED_MillonesUSD~rp_rezagado2, data = base)
summary(model_ied_lag2)
## 
## Call:
## lm(formula = IED_MillonesUSD ~ rp_rezagado2, data = base)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -320.59  -53.15  -14.34   49.29  266.92 
## 
## Coefficients:
##               Estimate Std. Error t value      Pr(>|t|)    
## (Intercept)  214.06880   30.81112   6.948 0.00000000332 ***
## rp_rezagado2  -0.02707    0.02533  -1.069          0.29    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 111 on 59 degrees of freedom
##   (2 observations deleted due to missingness)
## Multiple R-squared:  0.01899,    Adjusted R-squared:  0.002358 
## F-statistic: 1.142 on 1 and 59 DF,  p-value: 0.2896

4.1 Filtramos la base de datos

##desde 2015 hasta 2023

base <- base%>%
  filter(anio>=2015 & anio <=2023)
modelo_desempleo <- lm(TasaDesempleo_Porcentaje ~ PIB_MillonesUSD, data=base)
summary(modelo_desempleo)
## 
## Call:
## lm(formula = TasaDesempleo_Porcentaje ~ PIB_MillonesUSD, data = base)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.8356 -0.8271 -0.1033  0.1037  6.8451 
## 
## Coefficients:
##                   Estimate Std. Error t value  Pr(>|t|)    
## (Intercept)     12.6811501  2.7428198   4.623 0.0000526 ***
## PIB_MillonesUSD -0.0002485  0.0001002  -2.481    0.0182 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.451 on 34 degrees of freedom
## Multiple R-squared:  0.1533, Adjusted R-squared:  0.1284 
## F-statistic: 6.155 on 1 and 34 DF,  p-value: 0.01821
modelo_ied <- lm(TasaDesempleo_Porcentaje ~ log(PIB_MillonesUSD), data = base)
summary(modelo_desempleo)
## 
## Call:
## lm(formula = TasaDesempleo_Porcentaje ~ PIB_MillonesUSD, data = base)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.8356 -0.8271 -0.1033  0.1037  6.8451 
## 
## Coefficients:
##                   Estimate Std. Error t value  Pr(>|t|)    
## (Intercept)     12.6811501  2.7428198   4.623 0.0000526 ***
## PIB_MillonesUSD -0.0002485  0.0001002  -2.481    0.0182 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.451 on 34 degrees of freedom
## Multiple R-squared:  0.1533, Adjusted R-squared:  0.1284 
## F-statistic: 6.155 on 1 and 34 DF,  p-value: 0.01821
coef(modelo_ied)[2]
## log(PIB_MillonesUSD) 
##            -6.875101

5 Corrección de la prueba

rm(list = ls())
base <- read.csv('datos_ecuador - datos_ecuador.csv')

5.1 Filtro

5.2 B1 + B2*Ln(PIB) +u.

base1<- base %>% 
  filter(anio>2015 & anio<2023)

names(base1)
## [1] "anio"                     "trim"                    
## [3] "RiesgoPais"               "PIB_MillonesUSD"         
## [5] "TasaDesempleo_Porcentaje" "IED_MillonesUSD"
modelo1<-lm(TasaDesempleo_Porcentaje~log(PIB_MillonesUSD), data=base1)
summary(modelo1)
## 
## Call:
## lm(formula = TasaDesempleo_Porcentaje ~ log(PIB_MillonesUSD), 
##     data = base1)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.4957 -0.9472 -0.3826  0.3515  6.4448 
## 
## Coefficients:
##                      Estimate Std. Error t value Pr(>|t|)  
## (Intercept)           105.698     42.899   2.464   0.0207 *
## log(PIB_MillonesUSD)   -9.759      4.205  -2.321   0.0284 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.559 on 26 degrees of freedom
## Multiple R-squared:  0.1716, Adjusted R-squared:  0.1398 
## F-statistic: 5.387 on 1 and 26 DF,  p-value: 0.0284

5.3 B1 + B2*PIB +u.

base2<- base %>% 
  filter(anio>=2015& anio<=2023)

modelo2<-lm(TasaDesempleo_Porcentaje~PIB_MillonesUSD, data = base2)
summary(modelo2)
## 
## Call:
## lm(formula = TasaDesempleo_Porcentaje ~ PIB_MillonesUSD, data = base2)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.8356 -0.8271 -0.1033  0.1037  6.8451 
## 
## Coefficients:
##                   Estimate Std. Error t value  Pr(>|t|)    
## (Intercept)     12.6811501  2.7428198   4.623 0.0000526 ***
## PIB_MillonesUSD -0.0002485  0.0001002  -2.481    0.0182 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.451 on 34 degrees of freedom
## Multiple R-squared:  0.1533, Adjusted R-squared:  0.1284 
## F-statistic: 6.155 on 1 and 34 DF,  p-value: 0.01821

6 Manejo de Encuestas

rm(list = ls())
##Librerias
library(dplyr)
library(srvyr)
## Warning: package 'srvyr' was built under R version 4.4.3
## 
## Adjuntando el paquete: 'srvyr'
## The following object is masked from 'package:stats':
## 
##     filter
library(rio)
## Warning: package 'rio' was built under R version 4.4.3
library(import)
## Warning: package 'import' was built under R version 4.4.3
## The import package should not be attached.
## Use "colon syntax" instead, e.g. import::from, or import:::from.

6.1 Cargamos la base de datos

personas <- import('1_BDD_ENS2018_f1_personas.dta')
table(personas$dcronica_2)
## 
##    0    1 
## 5591 2210
dci<-2210/(5591+2210)

dci*100
## [1] 28.3297

6.2 Trabajando con muestras Oficiales

dm <- personas %>%
  as_survey_design(ids=upm,         #unidad primaria de muestreo
                   strata=estrato,  #estrato
                   weights = fexp)  #factor de expansión 
options(survey.lonely.psu = 'certainty')  #forzando a que las obsevaciones sean unicas e individuales 
prev_nac_dci2 <- dm %>% 
summarise(survey_mean(dcronica_2, vartype =c('se','cv'),na.rm=T),
          n_muestra=sum(!is.na(dcronica_2))) %>% 
  mutate(dominio="Nacional") %>% 
  select(dominio, porcentaje=coef, se=`_se`, cv=`_cv`, n_muestra) %>% 
  mutate(porcentaje=round((porcentaje*100),digits = 1))

7 INEMDU II TRIMESTRE 2025

rm(list = ls())
# Librerias
library(dplyr)
library(srvyr)
library(rio)

8 Cargamos la base de datos

df <- read.csv2('enemdu_persona_2025_II_trimestre.csv')
df2 <- read.csv('enemdu_persona_2025_II_trimestre.csv',sep = ';')

8.1 Declaracion del disenio muestral

dm <- df %>% 
  as_survey_design(ids=upm,   #unidad primaria de muestreo
                   strata=estrato, #estrato
                   weights = fexp) #f

8.1.0.1 Replicar indicadores

library(haven)
## Warning: package 'haven' was built under R version 4.4.3
library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.4.3
## Warning: package 'ggplot2' was built under R version 4.4.3
## Warning: package 'tibble' was built under R version 4.4.3
## Warning: package 'tidyr' was built under R version 4.4.3
## Warning: package 'readr' was built under R version 4.4.3
## Warning: package 'purrr' was built under R version 4.4.3
## Warning: package 'stringr' was built under R version 4.4.3
## Warning: package 'forcats' was built under R version 4.4.3
## Warning: package 'lubridate' was built under R version 4.4.3
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats   1.0.0     ✔ readr     2.1.5
## ✔ ggplot2   4.0.0     ✔ stringr   1.5.2
## ✔ lubridate 1.9.4     ✔ tibble    3.3.0
## ✔ purrr     1.1.0     ✔ tidyr     1.3.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ srvyr::filter() masks dplyr::filter(), stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(srvyr)

9 Tasa de Desempleo

tasa_desempleo_nac <- dm %>% 
  filter(p03>=15) %>% 
  summarise(
    tasa_desempleo = survey_ratio(
      numerator= condact %in% c(7,8),
      denominator=condact %in% 1:8,
      vartype = c('se','ci')
    ) 
  ) %>% 
  mutate('tasa_desempleo = mean_desempleo * 100')

10 Tasa de Empleo Adecuado

dm <- df %>% 
  as_survey_design(ids=upm,   
                   strata=estrato, 
                   weights = fexp)
tasa_empleo_adec <- dm %>% 
  filter(p03>=15) %>% 
  summarise(
    tasa_empleo_adecuado = survey_ratio(
      numerator=( condact == 1),
      denominator=(condact %in% 1:8),
      vartype = c('se','ci')
    ) 
  ) %>% 
  mutate(tasa_empleo_adecuado = round((tasa_empleo_adecuado*100),digits = 1))
print(tasa_empleo_adec)
## # A tibble: 1 × 4
##   tasa_empleo_adecuado tasa_empleo_adecuado_se tasa_empleo_adecuado_low
##                  <dbl>                   <dbl>                    <dbl>
## 1                 36.2                  0.0113                    0.340
## # ℹ 1 more variable: tasa_empleo_adecuado_upp <dbl>

11 Tasa de empleo no remunerado

tasa_empleo_norem <- dm %>% 
  filter(p03>=15) %>% 
  summarise(
    tasa_empleo_norem = survey_ratio(
      numerator=( condact == 5),
      denominator=(condact %in% 1:8),
      vartype = c('se','ci')
    ) 
  ) %>% 
  mutate(tasa_empleo_norem = round((tasa_empleo_norem*100),digits = 1))

12 Librerias

library(readxl)
## Warning: package 'readxl' was built under R version 4.4.3
library(dplyr)

13 Cargar base de datos

base <- read_excel('pib_can_anual.xlsx')
## New names:
## • `` -> `...20`

13.1 Cambiar de nombres a variables

names(base)[6] <- 'agricultura'

13.2 Reemplazar los NA con 0

base[is.na(base)]<- 0

13.3 Ver cuantas observaciones tengo por variable

table(base$prov)
## 
##            AZUAY          BOLÍVAR            CAÑAR           CARCHI 
##              150               70               70               60 
##       CHIMBORAZO         COTOPAXI           EL ORO       ESMERALDAS 
##              100               70              140               71 
##        GALÁPAGOS           GUAYAS         IMBABURA             LOJA 
##               30              250               60              160 
##         LOS RÍOS           MANABÍ  MORONA SANTIAGO             NAPO 
##              130              220              120               50 
##         ORELLANA          PASTAZA        PICHINCHA      SANTA ELENA 
##               40               40               80               30 
##    SANTO DOMINGO        SUCUMBÍOS       TUNGURAHUA ZAMORA CHINCHIPE 
##               19               70               90               90
base[is.na(base)]<- 0

13.4 Ver cuantas observaciones tengo por variable

table(base$prov)
## 
##            AZUAY          BOLÍVAR            CAÑAR           CARCHI 
##              150               70               70               60 
##       CHIMBORAZO         COTOPAXI           EL ORO       ESMERALDAS 
##              100               70              140               71 
##        GALÁPAGOS           GUAYAS         IMBABURA             LOJA 
##               30              250               60              160 
##         LOS RÍOS           MANABÍ  MORONA SANTIAGO             NAPO 
##              130              220              120               50 
##         ORELLANA          PASTAZA        PICHINCHA      SANTA ELENA 
##               40               40               80               30 
##    SANTO DOMINGO        SUCUMBÍOS       TUNGURAHUA ZAMORA CHINCHIPE 
##               19               70               90               90

13.5 Nos quedamos con una provincia

base_santo <- base%>%
  filter(dpa_prov=='23')
base_manabi <- base %>%
  filter(prov == "manabi")

13.6 Quedarme con las variables necesarias

names(base_manabi)
##  [1] "year"                                     
##  [2] "prov"                                     
##  [3] "dpa_prov"                                 
##  [4] "canton"                                   
##  [5] "dpa_can"                                  
##  [6] "agricultura"                              
##  [7] "Explotacion de minas y canteras"          
##  [8] "Manufactura\n\n"                          
##  [9] "Suministro de electricidad y de agua"     
## [10] "Construccion\n\n"                         
## [11] "comercio\n\n"                             
## [12] "Alojamiento y servicios de comida"        
## [13] "Transporte, informacion y comunicaciones" 
## [14] "Actividades financieras\n"                
## [15] "Actividades profesionales e inmobiliarias"
## [16] "Administracion publica \n"                
## [17] "Enseñanza\n\n"                            
## [18] "Salud\n\n"                                
## [19] "Otros servicios\n\n"                      
## [20] "...20"
base_manabi <- base_manabi %>%
  select(year, prov, dpa_prov, canton, dpa_can, agricultura, `Explotacion de minas y canteras`)

#Vista resumen de una variables

summary(base_manabi$agricultura)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
## 

#Valor limite 29351.06 ## Eliminar ultima variable

base_manabi <- base_manabi[-7]

13.7 Crear nueva variable dicatoma (mutate)

base_manabi <- base_manabi %>%
  mutate(tipo_pib = ifelse(agricultura > 29351, "pib_alto", "pib_bajo"))
base_manabi <- base_manabi %>%
  mutate(tipo=ifelse(agricultura > 29351,1,0))
base_manabi <- base_manabi %>%
  mutate(periodo=ifelse(year<2016,"pre","post"))

13.8 Olvidamos las bases por periodos

base_manabi_pre <- base_manabi %>%
  filter(periodo=="pre")
base_manabi_post <- base_manabi %>%
  filter(periodo=="post")

13.9 Sumar el Pib de Agricultura por Canton

b_m_pre_agg <- base_manabi_pre %>% 
  group_by(canton) %>% 
  summarise(total_agricultura_pre=sum(agricultura))


b_m_post_agg <- base_manabi_post %>% 
  group_by(canton) %>% 
  summarise(total_agricultura_post=sum(agricultura))