rm(list=ls()) #Limpiar entorno de trabajo
setwd("/home/daniel/Dropbox/U CENTRAL/03 2021/02 Segundo semestre 2021/Análisis de datos cuantitativos II/Clases/04") #Escribir el directorio en que va a trabajar
#Si su pc no tiene instalado los paquetes, primero ejecutar:
#install.packages("foreign")
#install.packages("haven")
#install.packages("dplyr")
library(foreign) #Cargar paquete de importación y exportación de datos
library(haven) #Cargar paquete de importación y exportación de datos
library(dplyr) #Cargar paquete de administración de bases de datos

Abrir base de datos Casen 2020

Casen2020 <- haven::read_dta("/home/daniel/Dropbox/U CENTRAL/03 2021/02 Segundo semestre 2021/Análisis de datos cuantitativos II/Datos/CASEN2020/Casen2020.dta") #Abrir base de datos
#Casen2020 <- foreign::read.dta("/home/daniel/Dropbox/U CENTRAL/03 2021/02 Segundo semestre 2021/Análisis de datos cuantitativos II/Datos/CASEN2020/Casen2020.dta") #Abrir base de datos

Chequeamos que se cargó correctamente.

dim(Casen2020) #Dimensiones
## [1] 185437    650
colnames(Casen2020) #Nombres de columnas (variables)
##   [1] "folio"               "o"                   "id_persona"         
##   [4] "id_vivienda"         "region"              "provincia"          
##   [7] "comuna"              "zona"                "area"               
##  [10] "segmento"            "estrato"             "cod_upm"            
##  [13] "hogar"               "p6_p_con"            "expr"               
##  [16] "expp"                "expc"                "varstrat"           
##  [19] "varunit"             "fecha_entrev"        "metodologia_entrev" 
##  [22] "tot_hog"             "numviv"              "informante_idoneo"  
##  [25] "tel1"                "tel2"                "tel3"               
##  [28] "tel4"                "tel5"                "tel6"               
##  [31] "tel7"                "tel8"                "p0a"                
##  [34] "p0b"                 "p1"                  "p2"                 
##  [37] "p3"                  "p4"                  "p5"                 
##  [40] "p6"                  "p7"                  "p8"                 
##  [43] "id_persona_e"        "edad"                "sexo"               
##  [46] "pco1"                "tot_per"             "h5"                 
##  [49] "ecivil"              "h5_1"                "h5_2"               
##  [52] "nucleo"              "pco2"                "numper"             
##  [55] "n_ocupados"          "n_desocupados"       "n_inactivos"        
##  [58] "conyuge_jh"          "numnuc"              "men18c"             
##  [61] "may60c"              "tipohogar"           "e2"                 
##  [64] "e5b"                 "e6a"                 "e6b"                
##  [67] "asiste2"             "esc"                 "esc2"               
##  [70] "educ"                "o1"                  "o2"                 
##  [73] "o3"                  "o3b"                 "o4"                 
##  [76] "o6"                  "o7"                  "o7_esp"             
##  [79] "o9a"                 "o9b"                 "oficio4_08"         
##  [82] "oficio1_08"          "oficio4_88"          "oficio1_88"         
##  [85] "o15"                 "o16"                 "o17"                
##  [88] "o24"                 "rama4"               "rama1"              
##  [91] "rama4_rev3"          "rama1_rev3"          "o29"                
##  [94] "o30"                 "o31"                 "o32"                
##  [97] "o32_esp"             "o32b"                "o33a"               
## [100] "o33b"                "o34"                 "o35"                
## [103] "o36"                 "activ"               "activ2"             
## [106] "ocup_inf"            "y1_preg"             "y1"                 
## [109] "y2a_preg"            "y2_dias"             "y2b_preg"           
## [112] "y2_hrs"              "y3a_preg"            "y3b_preg"           
## [115] "y3c_preg"            "y3d_preg"            "y3e_preg"           
## [118] "y3f_preg"            "y3f_esp"             "y3a"                
## [121] "y3ap"                "y3b"                 "y3bp"               
## [124] "y3c"                 "y3cp"                "y3d"                
## [127] "y3dp"                "y3e"                 "y3ep"               
## [130] "y3f"                 "y3fp"                "y4a_preg"           
## [133] "y4b_preg"            "y4c_preg"            "y4d_preg"           
## [136] "y4d_esp"             "y4a"                 "y4b"                
## [139] "y4c"                 "y4d"                 "y5a_preg"           
## [142] "y5b_preg"            "y5c_preg"            "y5d_preg"           
## [145] "y5e_preg"            "y5f_preg"            "y5g_preg"           
## [148] "y5h_preg"            "y5i_preg"            "y5j_preg"           
## [151] "y5k_preg"            "y5l_preg"            "y5a"                
## [154] "y5b"                 "y5c"                 "y5d"                
## [157] "y5e"                 "y5f"                 "y5g"                
## [160] "y5h"                 "y5i"                 "y5j"                
## [163] "y5k"                 "y5l"                 "y6_preg"            
## [166] "y6"                  "y7_preg"             "y7"                 
## [169] "y8_preg"             "y8"                  "y9_preg"            
## [172] "y9"                  "y10_preg"            "y10"                
## [175] "y11_preg"            "y11"                 "y12a_preg"          
## [178] "y12a"                "y12b_preg"           "y12b"               
## [181] "y13a_preg"           "y13a"                "y13b_preg"          
## [184] "y13b"                "y13c_preg"           "y13c"               
## [187] "y14a_preg"           "y14a"                "y14b_preg"          
## [190] "y14b"                "y14c_preg"           "y14c"               
## [193] "y15a_preg"           "y15a"                "y15b_preg"          
## [196] "y15b"                "y15c_preg"           "y15c"               
## [199] "y16a_preg"           "y16a"                "y16b_preg"          
## [202] "y16b"                "y17_preg"            "y17"                
## [205] "y18a_preg"           "y18a"                "y18b_preg"          
## [208] "y18b"                "y18c_preg"           "y18c"               
## [211] "y18d_preg"           "y18d_esp"            "y18d"               
## [214] "y19"                 "y19t"                "y19n"               
## [217] "y20a"                "y20b"                "y20c"               
## [220] "y20d"                "y20e"                "y20amonto"          
## [223] "y20bmonto"           "y20cmonto"           "y20dmonto"          
## [226] "y20emonto"           "y22_preg"            "y22"                
## [229] "y22amonto"           "y22bmonto"           "y22cmonto"          
## [232] "y22dmonto"           "y23a_preg"           "y23a"               
## [235] "y23b"                "y23c"                "y23bmonto"          
## [238] "y23cmonto"           "y24_preg"            "y24"                
## [241] "y25a_preg"           "y25a_espontanea"     "y25amonto"          
## [244] "y25a"                "y25a1"               "y25a2"              
## [247] "y25a3"               "y25a4"               "y25a5"              
## [250] "y25a6"               "y25a7"               "y25a8"              
## [253] "y25a9"               "y25a10"              "y25a11"             
## [256] "y25b"                "y25bmonto"           "y25c"               
## [259] "y25cmonto"           "y25d"                "y25dmonto"          
## [262] "y25ep"               "y25e"                "y25fp"              
## [265] "y25f"                "y25g_preg"           "y25g"               
## [268] "y26a_preg"           "y26a_espontanea"     "y26amonto"          
## [271] "y26a"                "y26a1"               "y26a2"              
## [274] "y26a3"               "y26a4"               "y26a5"              
## [277] "y26a6"               "y26a7"               "y26a8"              
## [280] "y26a9"               "y26a10"              "y26a11"             
## [283] "y26b_preg"           "y26b_espontanea"     "y26bmonto"          
## [286] "y26b"                "y26b1"               "y26b2"              
## [289] "y26b3"               "y26b4"               "y26b5"              
## [292] "y26b6"               "y26b7"               "y26b8"              
## [295] "y26b9"               "y26b10"              "y26b11"             
## [298] "y26c"                "y26d_hog"            "y26d_preg"          
## [301] "y26d_pago"           "y26d_pago_monto1"    "y26d_pago_monto2"   
## [304] "y26d_pago_monto3"    "y26d_pago_monto4"    "y26d_pago_monto5"   
## [307] "y26d_pago_monto6"    "y26d_verificar_suma" "y26d_corregir_monto"
## [310] "y26d_total"          "y26d_integrantes"    "y26d_preg_1"        
## [313] "y26d_preg_2"         "y26d_preg_3"         "y26d_preg_4"        
## [316] "y26d_preg_5"         "y26d_preg_6"         "y26d_preg_7"        
## [319] "y26d_preg_8"         "y26d_preg_9"         "y26d_preg_10"       
## [322] "y26d_preg_11"        "y26d_preg_12"        "y26d_preg_13"       
## [325] "y26d_preg_14"        "y26d_preg_15"        "y26d_preg_16"       
## [328] "y26d_preg_17"        "y26d_preg_18"        "y26d_preg_19"       
## [331] "y26d_preg_20"        "y27_preg"            "y27"                
## [334] "y27_esp"             "y28_1a"              "y28_1b"             
## [337] "y28_1c"              "y28_1d"              "y28_1e"             
## [340] "y28_1f"              "y28_1g"              "y28_1h"             
## [343] "y28_1i"              "y28_1j"              "y28_1j_esp"         
## [346] "y28_1norecibe"       "y28_1nosabe"         "y28_2a"             
## [349] "y28_2amonto"         "y28_3a"              "y28_2b1"            
## [352] "y28_2b2"             "y28_3b"              "y28_4b"             
## [355] "y28_2c"              "y28_3c"              "y28_4c"             
## [358] "y28_2d"              "y28_2dmonto"         "y28_3d"             
## [361] "y28_2e1"             "y28_2e2"             "y28_3e"             
## [364] "y28_4e"              "y28_2f"              "y28_3f"             
## [367] "y28_4f"              "y28_2g"              "y28_3g"             
## [370] "y28_2h"              "y28_3h"              "y28_2i"             
## [373] "y28_2j"              "y28_3j"              "y0101"              
## [376] "y0301"               "y0302"               "y0303"              
## [379] "y0304"               "y0305"               "y0306"              
## [382] "y0401"               "y0402"               "y0403"              
## [385] "y0404"               "y0501"               "y0502"              
## [388] "y0503"               "y0504"               "y0505"              
## [391] "y0506"               "y0507"               "y0508"              
## [394] "y0509"               "y0510"               "y0511"              
## [397] "y0512"               "yosa"                "yosi"               
## [400] "y0701"               "y0801"               "y0901"              
## [403] "y1101"               "yre1"                "yama"               
## [406] "ymes"                "yfa1"                "yfa2"               
## [409] "ytro"                "yta1"                "yta2"               
## [412] "ydes"                "yah1"                "yah2"               
## [415] "yrut"                "yre2"                "yre3"               
## [418] "yac2"                "yids"                "ydon"               
## [421] "ydim"                "yotr"                "yfam"               
## [424] "y2001"               "y2002"               "y2003"              
## [427] "y2004"               "y2005"               "y2201"              
## [430] "y2202"               "y2203"               "y2204"              
## [433] "y2301"               "y2302"               "y2303"              
## [436] "y2401"               "y2501"               "y2502"              
## [439] "y2503"               "y2504"               "y2505"              
## [442] "y2506"               "y2507"               "y2601"              
## [445] "y2602"               "y2604"               "y2701"              
## [448] "y2801"               "y280201"             "y280202"            
## [451] "y2803"               "y2804"               "yinv01"             
## [454] "yinv02"              "yinv03"              "ymon"               
## [457] "yorf"                "yesp"                "yotp"               
## [460] "yaut"                "ysub1"               "ysub2"              
## [463] "ysub"                "ytot"                "y0101h"             
## [466] "y0301h"              "y0302h"              "y0303h"             
## [469] "y0304h"              "y0305h"              "y0306h"             
## [472] "y0401h"              "y0402h"              "y0403h"             
## [475] "y0404h"              "y0501h"              "y0502h"             
## [478] "y0503h"              "y0504h"              "y0505h"             
## [481] "y0506h"              "y0507h"              "y0508h"             
## [484] "y0509h"              "y0510h"              "y0511h"             
## [487] "y0512h"              "yosah"               "yosih"              
## [490] "y0701h"              "y0801h"              "y0901h"             
## [493] "y1101h"              "yre1h"               "yamah"              
## [496] "ymesh"               "yfa1h"               "yfa2h"              
## [499] "ytroh"               "yta1h"               "yta2h"              
## [502] "ydesh"               "yah1h"               "yah2h"              
## [505] "yruth"               "yre2h"               "yre3h"              
## [508] "yac2h"               "yidsh"               "ydonh"              
## [511] "ydimh"               "yotrh"               "yfamh"              
## [514] "y2001h"              "y2002h"              "y2003h"             
## [517] "y2004h"              "y2005h"              "y2201h"             
## [520] "y2202h"              "y2203h"              "y2204h"             
## [523] "y2301h"              "y2302h"              "y2303h"             
## [526] "y2401h"              "y2501h"              "y2502h"             
## [529] "y2503h"              "y2504h"              "y2505h"             
## [532] "y2506h"              "y2507h"              "y2601h"             
## [535] "y2602h"              "y2604h"              "y2701h"             
## [538] "y2801h"              "y280201h"            "y280202h"           
## [541] "y2803h"              "y2804h"              "yinv01h"            
## [544] "yinv02h"             "yinv03h"             "ymonh"              
## [547] "yorfh"               "yesph"               "yotph"              
## [550] "yauth"               "ysub1h"              "ysub2h"             
## [553] "ysubh"               "ymoneh"              "yaimh"              
## [556] "ytoth"               "y0101c"              "y0701c"             
## [559] "y280201c"            "y2803c"              "yautcor"            
## [562] "ytotcor"             "y0101ch"             "y0701ch"            
## [565] "y280201ch"           "y2803ch"             "yautcorh"           
## [568] "ymonecorh"           "yaimcorh"            "ytotcorh"           
## [571] "ytrabajocor"         "yoprcor"             "ytrabajocorh"       
## [574] "yoprcorh"            "ypchtrabcor"         "ypchautcor"         
## [577] "ypchtotcor"          "li"                  "lp"                 
## [580] "yoautcor"            "nae"                 "yae"                
## [583] "yoautcorh"           "pobreza"             "yae_sinte"          
## [586] "pobreza_sinte"       "dau"                 "dautr"              
## [589] "qaut"                "qautr"               "s2"                 
## [592] "s13"                 "s15"                 "s16"                
## [595] "s17"                 "s18"                 "s18_esp"            
## [598] "s28"                 "s28_esp"             "s29"                
## [601] "s30"                 "s30_esp"             "sist_salud"         
## [604] "r1b"                 "r1b_comuna_esp"      "r1b_c_cod"          
## [607] "r1b_pais_esp"        "r1b_p_cod"           "r2"                 
## [610] "r2_comuna_esp"       "r2_c_cod"            "r2_pais_esp"        
## [613] "r2_p_cod"            "r3"                  "r8a"                
## [616] "r8b"                 "r8c"                 "r8d"                
## [619] "r8e"                 "r8f"                 "r8g"                
## [622] "r8h"                 "inmigrante"          "etnia"              
## [625] "v1"                  "v1_casa"             "v1_depto"           
## [628] "v13"                 "v13_propia"          "v13_arrendada"      
## [631] "v13_cedida"          "v19_preg"            "v19"                
## [634] "v20"                 "v20_esp"             "v20_red"            
## [637] "v22"                 "v23"                 "v23_sistema"        
## [640] "v23_cajon"           "v27"                 "v28"                
## [643] "v29"                 "hacinamiento"        "nhog_viv"           
## [646] "indsan"              "ten_viv"             "ten_viv_f"          
## [649] "iae"                 "iai"

Seleccionar casos: sólo personas que vivan en la IV región de Coquimbo.

table(Casen2020$region) #Tabla de frecuencias
## 
##     1     2     3     4     5     6     7     8     9    10    11    12    13 
##  8389  7599  7393  8161 18505 12350 12426 17676 11858 10513  4580  5087 39326 
##    14    15    16 
##  7997  7227  6350
Casen2020.IV <- filter(Casen2020, 
                     region == 4) #Seleccionar casos
dim(Casen2020.IV) #Dimensiones
## [1] 8161  650
rm(Casen2020)

Unidad de análisis hogar (filtrar casos: jefes de hogar).

Casen2020.IV <- filter(Casen2020.IV, 
                     p5 == 1) #Seleccionar casos
dim(Casen2020.IV) #Dimensiones
## [1] 4756  650

Ejemplo 1: intervalo de confianza de la media

  1. Necesitamos estimar el ingreso percápita promedio en la región de Coquimbo, año 2020. Sólamente tenemos los datos muestrales de Casen 2020.

  2. Para calcular intervalo de confianza de la media: \(\bar{X} \; \pm \; Z_{\alpha / 2} \; \frac{S_X}{\sqrt{n}}\).

  3. Donde:

media <- mean(Casen2020.IV$ypchtotcor, na.rm = TRUE) #calcular media muestral
z1 <- 1.96 #z para NC 95
z2 <- 2.58 #z para NC 99
sd <- sd(Casen2020.IV$ypchtotcor, na.rm = TRUE) #calcular desviación estándar
n <- length(which(!is.na(Casen2020.IV$ypchtotcor))) #n excluyendo NA
  1. Calcular error estándar (\(\frac{S_X}{\sqrt{n}}\)): \(\frac{322,785.1}{\sqrt{4,754}} = 4,420.575\)
se <- sd / sqrt(n)
  1. Calcular término error (\(Z_{\alpha / 2} \; \frac{S_X}{\sqrt{n}}\)).

Para un 95% de NC:

e1 <- z1 * se
e1
## [1] 8664.327

Para un 99% de NC:

e2 <- z2 * se
e2
## [1] 11405.08
  1. Calcular intervalo de confianza (\(\bar{X} \pm Z_{\alpha / 2} \frac{S_X}{\sqrt{n}}\)).

Para un 95% de NC:

ic95<- matrix(NA, 1, 2) #generar matriz vacía 1x2
ic95[1] <- media - e1 # calcular límite inferior
ic95[2] <- media + e1 # calcular límite superior
ic95
##          [,1]     [,2]
## [1,] 314120.7 331449.4

Para un 99% de NC:

ic99<- matrix(NA, 1, 2) #generar matriz vacía 1x2
ic99[1] <- media - e2 # calcular límite inferior
ic99[2] <- media + e2 # calcular límite superior
ic99
##        [,1]     [,2]
## [1,] 311380 334190.1
  1. Interpretar.

Ejemplo 2: intervalo de confianza de la proporción

  1. Necesitamos estimar el porcentaje de hogares en situación de pobreza en la región de Coquimbo, año 2020. Sólamente tenemos los datos muestrales de Casen 2020.

  2. Para calcular intervalo de confianza de la media: \(p \; \pm \; Z_{\alpha / 2} \; \sqrt{\frac{pq}{n}}\).

  3. Donde:

p <-  length(which(Casen2020.IV$pobreza != 3)) / length(which(!is.na(Casen2020.IV$pobreza))) #calcular proporción muestral
z1 <- 1.96 #z para NC 95
z2 <- 2.58 #z para NC 99
pq <- p*(1-p) #calcular desviación estándar
n <- length(which(!is.na(Casen2020.IV$pobreza))) #n excluyendo NA
  1. Calcular error estándar (\(\frac{pq}{\sqrt{n}}\)): \(\sqrt{\frac{0.12}{4,754}} = 0.005\)
se <- sqrt(pq/n)
  1. Calcular término error (\(Z_{\alpha / 2} \; \frac{pq}{\sqrt{n}}\)).

Para un 95% de NC:

e1 <- z1 * se
e1
## [1] 0.009810315

Para un 99% de NC:

e2 <- z2 * se
e2
## [1] 0.01291358
  1. Calcular intervalo de confianza (\(\bar{X} \pm Z_{\alpha / 2} \frac{S_X}{\sqrt{n}}\)).

Para un 95% de NC:

ic95<- matrix(NA, 1, 2) #generar matriz vacía 1x2
ic95[1] <- p - e1 # calcular límite inferior
ic95[2] <- p + e1 # calcular límite superior
round(ic95,4)
##        [,1]  [,2]
## [1,] 0.1284 0.148

Para un 99% de NC:

ic99<- matrix(NA, 1, 2) #generar matriz vacía 1x2
ic99[1] <- p - e2 # calcular límite inferior
ic99[2] <- p + e2 # calcular límite superior
round(ic99,4)
##        [,1]   [,2]
## [1,] 0.1253 0.1511
  1. Interpretar.