rm(list=ls()) #Limpiar entorno de trabajo
setwd("/home/daniel/Dropbox/U CENTRAL/03 2021/02 Segundo semestre 2021/Análisis de datos cuantitativos II/Clases/04") #Escribir el directorio en que va a trabajar
#Si su pc no tiene instalado los paquetes, primero ejecutar:
#install.packages("foreign")
#install.packages("haven")
#install.packages("dplyr")
library(foreign) #Cargar paquete de importación y exportación de datos
library(haven) #Cargar paquete de importación y exportación de datos
library(dplyr) #Cargar paquete de administración de bases de datos
Casen2020 <- haven::read_dta("/home/daniel/Dropbox/U CENTRAL/03 2021/02 Segundo semestre 2021/Análisis de datos cuantitativos II/Datos/CASEN2020/Casen2020.dta") #Abrir base de datos
#Casen2020 <- foreign::read.dta("/home/daniel/Dropbox/U CENTRAL/03 2021/02 Segundo semestre 2021/Análisis de datos cuantitativos II/Datos/CASEN2020/Casen2020.dta") #Abrir base de datos
Chequeamos que se cargó correctamente.
dim(Casen2020) #Dimensiones
## [1] 185437 650
colnames(Casen2020) #Nombres de columnas (variables)
## [1] "folio" "o" "id_persona"
## [4] "id_vivienda" "region" "provincia"
## [7] "comuna" "zona" "area"
## [10] "segmento" "estrato" "cod_upm"
## [13] "hogar" "p6_p_con" "expr"
## [16] "expp" "expc" "varstrat"
## [19] "varunit" "fecha_entrev" "metodologia_entrev"
## [22] "tot_hog" "numviv" "informante_idoneo"
## [25] "tel1" "tel2" "tel3"
## [28] "tel4" "tel5" "tel6"
## [31] "tel7" "tel8" "p0a"
## [34] "p0b" "p1" "p2"
## [37] "p3" "p4" "p5"
## [40] "p6" "p7" "p8"
## [43] "id_persona_e" "edad" "sexo"
## [46] "pco1" "tot_per" "h5"
## [49] "ecivil" "h5_1" "h5_2"
## [52] "nucleo" "pco2" "numper"
## [55] "n_ocupados" "n_desocupados" "n_inactivos"
## [58] "conyuge_jh" "numnuc" "men18c"
## [61] "may60c" "tipohogar" "e2"
## [64] "e5b" "e6a" "e6b"
## [67] "asiste2" "esc" "esc2"
## [70] "educ" "o1" "o2"
## [73] "o3" "o3b" "o4"
## [76] "o6" "o7" "o7_esp"
## [79] "o9a" "o9b" "oficio4_08"
## [82] "oficio1_08" "oficio4_88" "oficio1_88"
## [85] "o15" "o16" "o17"
## [88] "o24" "rama4" "rama1"
## [91] "rama4_rev3" "rama1_rev3" "o29"
## [94] "o30" "o31" "o32"
## [97] "o32_esp" "o32b" "o33a"
## [100] "o33b" "o34" "o35"
## [103] "o36" "activ" "activ2"
## [106] "ocup_inf" "y1_preg" "y1"
## [109] "y2a_preg" "y2_dias" "y2b_preg"
## [112] "y2_hrs" "y3a_preg" "y3b_preg"
## [115] "y3c_preg" "y3d_preg" "y3e_preg"
## [118] "y3f_preg" "y3f_esp" "y3a"
## [121] "y3ap" "y3b" "y3bp"
## [124] "y3c" "y3cp" "y3d"
## [127] "y3dp" "y3e" "y3ep"
## [130] "y3f" "y3fp" "y4a_preg"
## [133] "y4b_preg" "y4c_preg" "y4d_preg"
## [136] "y4d_esp" "y4a" "y4b"
## [139] "y4c" "y4d" "y5a_preg"
## [142] "y5b_preg" "y5c_preg" "y5d_preg"
## [145] "y5e_preg" "y5f_preg" "y5g_preg"
## [148] "y5h_preg" "y5i_preg" "y5j_preg"
## [151] "y5k_preg" "y5l_preg" "y5a"
## [154] "y5b" "y5c" "y5d"
## [157] "y5e" "y5f" "y5g"
## [160] "y5h" "y5i" "y5j"
## [163] "y5k" "y5l" "y6_preg"
## [166] "y6" "y7_preg" "y7"
## [169] "y8_preg" "y8" "y9_preg"
## [172] "y9" "y10_preg" "y10"
## [175] "y11_preg" "y11" "y12a_preg"
## [178] "y12a" "y12b_preg" "y12b"
## [181] "y13a_preg" "y13a" "y13b_preg"
## [184] "y13b" "y13c_preg" "y13c"
## [187] "y14a_preg" "y14a" "y14b_preg"
## [190] "y14b" "y14c_preg" "y14c"
## [193] "y15a_preg" "y15a" "y15b_preg"
## [196] "y15b" "y15c_preg" "y15c"
## [199] "y16a_preg" "y16a" "y16b_preg"
## [202] "y16b" "y17_preg" "y17"
## [205] "y18a_preg" "y18a" "y18b_preg"
## [208] "y18b" "y18c_preg" "y18c"
## [211] "y18d_preg" "y18d_esp" "y18d"
## [214] "y19" "y19t" "y19n"
## [217] "y20a" "y20b" "y20c"
## [220] "y20d" "y20e" "y20amonto"
## [223] "y20bmonto" "y20cmonto" "y20dmonto"
## [226] "y20emonto" "y22_preg" "y22"
## [229] "y22amonto" "y22bmonto" "y22cmonto"
## [232] "y22dmonto" "y23a_preg" "y23a"
## [235] "y23b" "y23c" "y23bmonto"
## [238] "y23cmonto" "y24_preg" "y24"
## [241] "y25a_preg" "y25a_espontanea" "y25amonto"
## [244] "y25a" "y25a1" "y25a2"
## [247] "y25a3" "y25a4" "y25a5"
## [250] "y25a6" "y25a7" "y25a8"
## [253] "y25a9" "y25a10" "y25a11"
## [256] "y25b" "y25bmonto" "y25c"
## [259] "y25cmonto" "y25d" "y25dmonto"
## [262] "y25ep" "y25e" "y25fp"
## [265] "y25f" "y25g_preg" "y25g"
## [268] "y26a_preg" "y26a_espontanea" "y26amonto"
## [271] "y26a" "y26a1" "y26a2"
## [274] "y26a3" "y26a4" "y26a5"
## [277] "y26a6" "y26a7" "y26a8"
## [280] "y26a9" "y26a10" "y26a11"
## [283] "y26b_preg" "y26b_espontanea" "y26bmonto"
## [286] "y26b" "y26b1" "y26b2"
## [289] "y26b3" "y26b4" "y26b5"
## [292] "y26b6" "y26b7" "y26b8"
## [295] "y26b9" "y26b10" "y26b11"
## [298] "y26c" "y26d_hog" "y26d_preg"
## [301] "y26d_pago" "y26d_pago_monto1" "y26d_pago_monto2"
## [304] "y26d_pago_monto3" "y26d_pago_monto4" "y26d_pago_monto5"
## [307] "y26d_pago_monto6" "y26d_verificar_suma" "y26d_corregir_monto"
## [310] "y26d_total" "y26d_integrantes" "y26d_preg_1"
## [313] "y26d_preg_2" "y26d_preg_3" "y26d_preg_4"
## [316] "y26d_preg_5" "y26d_preg_6" "y26d_preg_7"
## [319] "y26d_preg_8" "y26d_preg_9" "y26d_preg_10"
## [322] "y26d_preg_11" "y26d_preg_12" "y26d_preg_13"
## [325] "y26d_preg_14" "y26d_preg_15" "y26d_preg_16"
## [328] "y26d_preg_17" "y26d_preg_18" "y26d_preg_19"
## [331] "y26d_preg_20" "y27_preg" "y27"
## [334] "y27_esp" "y28_1a" "y28_1b"
## [337] "y28_1c" "y28_1d" "y28_1e"
## [340] "y28_1f" "y28_1g" "y28_1h"
## [343] "y28_1i" "y28_1j" "y28_1j_esp"
## [346] "y28_1norecibe" "y28_1nosabe" "y28_2a"
## [349] "y28_2amonto" "y28_3a" "y28_2b1"
## [352] "y28_2b2" "y28_3b" "y28_4b"
## [355] "y28_2c" "y28_3c" "y28_4c"
## [358] "y28_2d" "y28_2dmonto" "y28_3d"
## [361] "y28_2e1" "y28_2e2" "y28_3e"
## [364] "y28_4e" "y28_2f" "y28_3f"
## [367] "y28_4f" "y28_2g" "y28_3g"
## [370] "y28_2h" "y28_3h" "y28_2i"
## [373] "y28_2j" "y28_3j" "y0101"
## [376] "y0301" "y0302" "y0303"
## [379] "y0304" "y0305" "y0306"
## [382] "y0401" "y0402" "y0403"
## [385] "y0404" "y0501" "y0502"
## [388] "y0503" "y0504" "y0505"
## [391] "y0506" "y0507" "y0508"
## [394] "y0509" "y0510" "y0511"
## [397] "y0512" "yosa" "yosi"
## [400] "y0701" "y0801" "y0901"
## [403] "y1101" "yre1" "yama"
## [406] "ymes" "yfa1" "yfa2"
## [409] "ytro" "yta1" "yta2"
## [412] "ydes" "yah1" "yah2"
## [415] "yrut" "yre2" "yre3"
## [418] "yac2" "yids" "ydon"
## [421] "ydim" "yotr" "yfam"
## [424] "y2001" "y2002" "y2003"
## [427] "y2004" "y2005" "y2201"
## [430] "y2202" "y2203" "y2204"
## [433] "y2301" "y2302" "y2303"
## [436] "y2401" "y2501" "y2502"
## [439] "y2503" "y2504" "y2505"
## [442] "y2506" "y2507" "y2601"
## [445] "y2602" "y2604" "y2701"
## [448] "y2801" "y280201" "y280202"
## [451] "y2803" "y2804" "yinv01"
## [454] "yinv02" "yinv03" "ymon"
## [457] "yorf" "yesp" "yotp"
## [460] "yaut" "ysub1" "ysub2"
## [463] "ysub" "ytot" "y0101h"
## [466] "y0301h" "y0302h" "y0303h"
## [469] "y0304h" "y0305h" "y0306h"
## [472] "y0401h" "y0402h" "y0403h"
## [475] "y0404h" "y0501h" "y0502h"
## [478] "y0503h" "y0504h" "y0505h"
## [481] "y0506h" "y0507h" "y0508h"
## [484] "y0509h" "y0510h" "y0511h"
## [487] "y0512h" "yosah" "yosih"
## [490] "y0701h" "y0801h" "y0901h"
## [493] "y1101h" "yre1h" "yamah"
## [496] "ymesh" "yfa1h" "yfa2h"
## [499] "ytroh" "yta1h" "yta2h"
## [502] "ydesh" "yah1h" "yah2h"
## [505] "yruth" "yre2h" "yre3h"
## [508] "yac2h" "yidsh" "ydonh"
## [511] "ydimh" "yotrh" "yfamh"
## [514] "y2001h" "y2002h" "y2003h"
## [517] "y2004h" "y2005h" "y2201h"
## [520] "y2202h" "y2203h" "y2204h"
## [523] "y2301h" "y2302h" "y2303h"
## [526] "y2401h" "y2501h" "y2502h"
## [529] "y2503h" "y2504h" "y2505h"
## [532] "y2506h" "y2507h" "y2601h"
## [535] "y2602h" "y2604h" "y2701h"
## [538] "y2801h" "y280201h" "y280202h"
## [541] "y2803h" "y2804h" "yinv01h"
## [544] "yinv02h" "yinv03h" "ymonh"
## [547] "yorfh" "yesph" "yotph"
## [550] "yauth" "ysub1h" "ysub2h"
## [553] "ysubh" "ymoneh" "yaimh"
## [556] "ytoth" "y0101c" "y0701c"
## [559] "y280201c" "y2803c" "yautcor"
## [562] "ytotcor" "y0101ch" "y0701ch"
## [565] "y280201ch" "y2803ch" "yautcorh"
## [568] "ymonecorh" "yaimcorh" "ytotcorh"
## [571] "ytrabajocor" "yoprcor" "ytrabajocorh"
## [574] "yoprcorh" "ypchtrabcor" "ypchautcor"
## [577] "ypchtotcor" "li" "lp"
## [580] "yoautcor" "nae" "yae"
## [583] "yoautcorh" "pobreza" "yae_sinte"
## [586] "pobreza_sinte" "dau" "dautr"
## [589] "qaut" "qautr" "s2"
## [592] "s13" "s15" "s16"
## [595] "s17" "s18" "s18_esp"
## [598] "s28" "s28_esp" "s29"
## [601] "s30" "s30_esp" "sist_salud"
## [604] "r1b" "r1b_comuna_esp" "r1b_c_cod"
## [607] "r1b_pais_esp" "r1b_p_cod" "r2"
## [610] "r2_comuna_esp" "r2_c_cod" "r2_pais_esp"
## [613] "r2_p_cod" "r3" "r8a"
## [616] "r8b" "r8c" "r8d"
## [619] "r8e" "r8f" "r8g"
## [622] "r8h" "inmigrante" "etnia"
## [625] "v1" "v1_casa" "v1_depto"
## [628] "v13" "v13_propia" "v13_arrendada"
## [631] "v13_cedida" "v19_preg" "v19"
## [634] "v20" "v20_esp" "v20_red"
## [637] "v22" "v23" "v23_sistema"
## [640] "v23_cajon" "v27" "v28"
## [643] "v29" "hacinamiento" "nhog_viv"
## [646] "indsan" "ten_viv" "ten_viv_f"
## [649] "iae" "iai"
Seleccionar casos: sólo personas que vivan en la IV región de Coquimbo.
table(Casen2020$region) #Tabla de frecuencias
##
## 1 2 3 4 5 6 7 8 9 10 11 12 13
## 8389 7599 7393 8161 18505 12350 12426 17676 11858 10513 4580 5087 39326
## 14 15 16
## 7997 7227 6350
Casen2020.IV <- filter(Casen2020,
region == 4) #Seleccionar casos
dim(Casen2020.IV) #Dimensiones
## [1] 8161 650
rm(Casen2020)
Unidad de análisis hogar (filtrar casos: jefes de hogar).
Casen2020.IV <- filter(Casen2020.IV,
p5 == 1) #Seleccionar casos
dim(Casen2020.IV) #Dimensiones
## [1] 4756 650
Necesitamos estimar el ingreso percápita promedio en la región de Coquimbo, año 2020. Sólamente tenemos los datos muestrales de Casen 2020.
Para calcular intervalo de confianza de la media: \(\bar{X} \; \pm \; Z_{\alpha / 2} \; \frac{S_X}{\sqrt{n}}\).
Donde:
media <- mean(Casen2020.IV$ypchtotcor, na.rm = TRUE) #calcular media muestral
z1 <- 1.96 #z para NC 95
z2 <- 2.58 #z para NC 99
sd <- sd(Casen2020.IV$ypchtotcor, na.rm = TRUE) #calcular desviación estándar
n <- length(which(!is.na(Casen2020.IV$ypchtotcor))) #n excluyendo NA
se <- sd / sqrt(n)
Para un 95% de NC:
e1 <- z1 * se
e1
## [1] 8664.327
Para un 99% de NC:
e2 <- z2 * se
e2
## [1] 11405.08
Para un 95% de NC:
ic95<- matrix(NA, 1, 2) #generar matriz vacía 1x2
ic95[1] <- media - e1 # calcular límite inferior
ic95[2] <- media + e1 # calcular límite superior
ic95
## [,1] [,2]
## [1,] 314120.7 331449.4
Para un 99% de NC:
ic99<- matrix(NA, 1, 2) #generar matriz vacía 1x2
ic99[1] <- media - e2 # calcular límite inferior
ic99[2] <- media + e2 # calcular límite superior
ic99
## [,1] [,2]
## [1,] 311380 334190.1
Necesitamos estimar el porcentaje de hogares en situación de pobreza en la región de Coquimbo, año 2020. Sólamente tenemos los datos muestrales de Casen 2020.
Para calcular intervalo de confianza de la media: \(p \; \pm \; Z_{\alpha / 2} \; \sqrt{\frac{pq}{n}}\).
Donde:
p <- length(which(Casen2020.IV$pobreza != 3)) / length(which(!is.na(Casen2020.IV$pobreza))) #calcular proporción muestral
z1 <- 1.96 #z para NC 95
z2 <- 2.58 #z para NC 99
pq <- p*(1-p) #calcular desviación estándar
n <- length(which(!is.na(Casen2020.IV$pobreza))) #n excluyendo NA
se <- sqrt(pq/n)
Para un 95% de NC:
e1 <- z1 * se
e1
## [1] 0.009810315
Para un 99% de NC:
e2 <- z2 * se
e2
## [1] 0.01291358
Para un 95% de NC:
ic95<- matrix(NA, 1, 2) #generar matriz vacía 1x2
ic95[1] <- p - e1 # calcular límite inferior
ic95[2] <- p + e1 # calcular límite superior
round(ic95,4)
## [,1] [,2]
## [1,] 0.1284 0.148
Para un 99% de NC:
ic99<- matrix(NA, 1, 2) #generar matriz vacía 1x2
ic99[1] <- p - e2 # calcular límite inferior
ic99[2] <- p + e2 # calcular límite superior
round(ic99,4)
## [,1] [,2]
## [1,] 0.1253 0.1511