1 Nivel nacional URBANO (código 1)
1.1 Pregunta ESCOLARIDAD:
1.2 Las categorías de respuesta:
1 (0-21)
1.3 Generación de tabla de contingencia para la variable ESCOLARIDAD
tabla_con_clave <- readRDS("censos/censo_personas_con_clave_17")
tabla_con_clave_u <- filter(tabla_con_clave, tabla_con_clave$AREA == 1)
b <- tabla_con_clave_u$COMUNA
c <- tabla_con_clave_u$ESCOLARIDAD
cross_tab = xtabs( ~ unlist(b) + unlist(c))
tabla <- as.data.frame(cross_tab)
d <-tabla[!(tabla$Freq == 0),]
d$anio <- "2017"
d_t <- filter(d,d$unlist.c. == 0)
for(i in 1:21){
d_i <- filter(d,d$unlist.c. == i)
d_t = merge( x = d_t, y = d_i, by = "unlist.b.", all.x = TRUE)
}
codigos <- d_t$unlist.b.
rango <- seq(1:nrow(d_t))
cadena <- paste("0",codigos[rango], sep = "")
cadena <- substr(cadena,(nchar(cadena)[rango])-(4),6)
codigos <- as.data.frame(codigos)
cadena <- as.data.frame(cadena)
comuna_corr <- cbind(d_t,cadena)
comuna_corr <- comuna_corr[,-c(1),drop=FALSE]
names(comuna_corr)[ncol(comuna_corr)] <- "código"
quitar <- seq(3,(ncol(comuna_corr)-1),3)
comuna_corr <- comuna_corr[,-c(quitar),drop=FALSE]
III <- seq(2,44,2)
for (i in 1:100) {
names(comuna_corr)[III[i]] <- paste0(i-1, "_Años")
}
quitar <- seq(1,(ncol(comuna_corr)-1),2)
comuna_corr <- comuna_corr[,-c(quitar),drop=FALSE]
ingresos_expandidos_2017 <- readRDS("Ingresos_expandidos_urbano_17.rds")
df_2017_2 = merge( x = comuna_corr, y = ingresos_expandidos_2017, by = "código", all.x = TRUE)
union_final_urb <- df_2017_2[,-c(1,(ncol(df_2017_2)-1))]
write_xlsx(union_final_urb, "ESCOLARIDAD_urbano.xlsx")
data_sum <- summary(union_final_urb)
kbl(head(data_sum)) %>%
kable_styling(bootstrap_options = c("striped", "hover")) %>%
kable_paper() %>%
scroll_box(width = "100%", height = "500px")
0_Años | 1_Años | 2_Años | 3_Años | 4_Años | 5_Años | 6_Años | 7_Años | 8_Años | 9_Años | 10_Años | 11_Años | 12_Años | 13_Años | 14_Años | 15_Años | 16_Años | 17_Años | 18_Años | 19_Años | 20_Años | 21_Años | ingresos_expandidos | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
Min. : 98.0 | Min. : 19.0 | Min. : 28 | Min. : 34.0 | Min. : 36 | Min. : 34.0 | Min. : 46 | Min. : 31 | Min. : 87.0 | Min. : 35 | Min. : 43.0 | Min. : 21 | Min. : 215 | Min. : 4.0 | Min. : 8 | Min. : 15.0 | Min. : 9.0 | Min. : 7 | Min. : 1.00 | Min. : 1.0 | Min. : 1.00 | Min. : 1.00 | Min. :7.054e+08 | |
1st Qu.: 622.5 | 1st Qu.: 124.5 | 1st Qu.: 149 | 1st Qu.: 196.5 | 1st Qu.: 223 | 1st Qu.: 184.5 | 1st Qu.: 337 | 1st Qu.: 188 | 1st Qu.: 573.5 | 1st Qu.: 196 | 1st Qu.: 280.5 | 1st Qu.: 176 | 1st Qu.: 1348 | 1st Qu.: 60.0 | 1st Qu.: 140 | 1st Qu.: 146.5 | 1st Qu.: 101.5 | 1st Qu.: 199 | 1st Qu.: 2.00 | 1st Qu.: 11.0 | 1st Qu.: 2.00 | 1st Qu.: 2.00 | 1st Qu.:2.954e+09 | |
Median : 1520.0 | Median : 323.0 | Median : 363 | Median : 464.0 | Median : 521 | Median : 445.0 | Median : 730 | Median : 433 | Median : 1287.0 | Median : 490 | Median : 765.0 | Median : 433 | Median : 3230 | Median : 163.0 | Median : 406 | Median : 408.0 | Median : 300.0 | Median : 564 | Median : 6.50 | Median : 35.0 | Median : 6.00 | Median : 7.50 | Median :5.697e+09 | |
Mean : 5129.2 | Mean : 899.7 | Mean : 1008 | Mean : 1219.7 | Mean : 1397 | Mean : 1183.1 | Mean : 2039 | Mean : 1252 | Mean : 3392.4 | Mean : 1676 | Mean : 2564.8 | Mean : 1656 | Mean : 11825 | Mean : 988.4 | Mean : 2043 | Mean : 2077.8 | Mean : 1753.5 | Mean : 3948 | Mean : 90.55 | Mean : 501.7 | Mean : 68.66 | Mean : 114.85 | Mean :1.784e+10 | |
3rd Qu.: 5709.5 | 3rd Qu.: 1005.0 | 3rd Qu.: 1156 | 3rd Qu.: 1426.5 | 3rd Qu.: 1672 | 3rd Qu.: 1392.0 | 3rd Qu.: 2426 | 3rd Qu.: 1374 | 3rd Qu.: 3852.5 | 3rd Qu.: 1804 | 3rd Qu.: 2759.0 | 3rd Qu.: 1697 | 3rd Qu.: 12052 | 3rd Qu.: 887.5 | 3rd Qu.: 1848 | 3rd Qu.: 1833.0 | 3rd Qu.: 1428.5 | 3rd Qu.: 2398 | 3rd Qu.: 34.50 | 3rd Qu.: 171.0 | 3rd Qu.: 30.00 | 3rd Qu.: 36.25 | 3rd Qu.:1.857e+10 | |
Max. :61684.0 | Max. :10602.0 | Max. :11436 | Max. :13227.0 | Max. :15123 | Max. :13391.0 | Max. :21184 | Max. :15509 | Max. :39994.0 | Max. :20934 | Max. :32208.0 | Max. :20129 | Max. :152494 | Max. :13781.0 | Max. :29299 | Max. :31640.0 | Max. :29262.0 | Max. :83263 | Max. :3778.00 | Max. :21060.0 | Max. :1995.00 | Max. :3381.00 | Max. :1.870e+11 |
Graficas:
library(plotly)
df_2017_fig <- df_2017_2[,-c((ncol(df_2017_2)-1))]
fig <- plot_ly(df_2017_fig, x = df_2017_fig$código, y = df_2017_fig[,2]
, name = colnames(df_2017_fig[2]), type = 'scatter', mode = 'lines',
width=7000, height=400)
grafica_fn <- function(g){
fig <<- fig %>% add_trace(y = ~df_2017_fig[,g]
, name = colnames(df_2017_fig[g]), mode = 'lines',
width=7000, height=400)
}
for (g in 3:(ncol(df_2017_2)-1)) {
grafica_fn(g)
}
fig <- fig %>% layout(autosize = T )
fig
2 Correlaciones
La distribución es asimétrica, poseyendo un sesgo positivo.
df_2017_2f <- filter(union_final_urb, union_final_urb$ingresos_expandidos != 'is.na')
III <- seq(1,(ncol(df_2017_2f)-1),1)
my_data <- df_2017_2f[, c(III)]
tabla <- cor(x=my_data, y=df_2017_2f$ingresos_expandidos, method=c("kendall"), use = "pairwise")
tabla <- as.data.frame(tabla)
colnames(tabla) <- "Correlación"
saveRDS(tabla,"tablas_de_corr/C_ESCOLARIDAD_URB.rds")
tabla %>%
rownames_to_column("ESCOLARIDAD") %>%
mutate(Correlación = cell_spec(Correlación, background=ifelse(Correlación == max(Correlación), "#fc0303", "#5cb81f"))) %>%
kbl(booktabs = T, linesep = "", escape=FALSE) %>%
kable_paper(full_width = F) %>%
column_spec(1, color = "black")%>%
column_spec(2, color = "white")
ESCOLARIDAD | Correlación |
---|---|
0_Años | 0.854037689736936 |
1_Años | 0.838118814624733 |
2_Años | 0.832593628222123 |
3_Años | 0.825398206670011 |
4_Años | 0.829309583215577 |
5_Años | 0.829145577324303 |
6_Años | 0.824375852609494 |
7_Años | 0.839595694241319 |
8_Años | 0.823705072477945 |
9_Años | 0.846242300573528 |
10_Años | 0.847307214196012 |
11_Años | 0.850581265529565 |
12_Años | 0.852558280769337 |
13_Años | 0.839864990934019 |
14_Años | 0.841796418102207 |
15_Años | 0.841359955061385 |
16_Años | 0.842491401839675 |
17_Años | 0.816019254748089 |
18_Años | 0.753262109168159 |
19_Años | 0.755473704181375 |
20_Años | 0.696502017000573 |
21_Años | 0.716835909778733 |
2.0.1 Kendall
i <- 1
f <- 10
for (cc in 1:2) {
III <- seq(i,f)
print(paste0(i-1,"-",f-1))
df_2017_2_sub_subset <- union_final_urb[,c(III,ncol(union_final_urb))]
chart.Correlation(df_2017_2_sub_subset, histogram=TRUE, method = c( "kendall"), pch=20)
i <- i+10
f <- f+10
}
## [1] "0-9"
## [1] "10-19"
## [1] "21 - 23"
df_2017_2_sub_subset <- union_final_urb[,c(22:23)]
chart.Correlation(df_2017_2_sub_subset, histogram=TRUE, method = c( "kendall"), pch=20)
2.0.2 Pearson
i <- 1
f <- 10
for (cc in 1:2) {
III <- seq(i,f)
print(paste0(i-1,"-",f-1))
df_2017_2_sub_subset <- union_final_urb[,c(III,ncol(union_final_urb))]
chart.Correlation(df_2017_2_sub_subset, histogram=TRUE, method = c( "pearson"), pch=20)
i <- i+10
f <- f+10
}
## [1] "0-9"
## [1] "10-19"
## [1] "21 - 23"
df_2017_2_sub_subset <- union_final_urb[,c(22:23)]
chart.Correlation(df_2017_2_sub_subset, histogram=TRUE, method = c( "pearson"), pch=20)
2.0.3 Spearman
i <- 1
f <- 10
for (cc in 1:2) {
III <- seq(i,f)
print(paste0(i-1,"-",f-1))
df_2017_2_sub_subset <- union_final_urb[,c(III,ncol(union_final_urb))]
chart.Correlation(df_2017_2_sub_subset, histogram=TRUE, method = c( "spearman"), pch=20)
i <- i+10
f <- f+10
}
## [1] "0-9"
## [1] "10-19"
## [1] "21 - 23"
df_2017_2_sub_subset <- union_final_urb[,c(22:23)]
chart.Correlation(df_2017_2_sub_subset, histogram=TRUE, method = c( "spearman"), pch=20)
3 Nivel nacional RURAL (código 2)
tabla_con_clave <- readRDS("censos/censo_personas_con_clave_17")
tabla_con_clave_u <- filter(tabla_con_clave, tabla_con_clave$AREA == 2)
b <- tabla_con_clave_u$COMUNA
c <- tabla_con_clave_u$ESCOLARIDAD
cross_tab = xtabs( ~ unlist(b) + unlist(c))
tabla <- as.data.frame(cross_tab)
d <-tabla[!(tabla$Freq == 0),]
d$anio <- "2017"
d_t <- filter(d,d$unlist.c. == 0)
for(i in 1:21){
d_i <- filter(d,d$unlist.c. == i)
d_t = merge( x = d_t, y = d_i, by = "unlist.b.", all.x = TRUE)
}
codigos <- d_t$unlist.b.
rango <- seq(1:nrow(d_t))
cadena <- paste("0",codigos[rango], sep = "")
cadena <- substr(cadena,(nchar(cadena)[rango])-(4),6)
codigos <- as.data.frame(codigos)
cadena <- as.data.frame(cadena)
comuna_corr <- cbind(d_t,cadena)
comuna_corr <- comuna_corr[,-c(1),drop=FALSE]
names(comuna_corr)[ncol(comuna_corr)] <- "código"
quitar <- seq(3,(ncol(comuna_corr)-1),3)
comuna_corr <- comuna_corr[,-c(quitar),drop=FALSE]
III <- seq(2,44,2)
for (i in 1:100) {
names(comuna_corr)[III[i]] <- paste0(i-1, "_años")
}
quitar <- seq(1,(ncol(comuna_corr)-1),2)
comuna_corr <- comuna_corr[,-c(quitar),drop=FALSE]
ingresos_expandidos_2017 <- readRDS("Ingresos_expandidos_rural_17.rds")
df_2017_2 = merge( x = comuna_corr, y = ingresos_expandidos_2017, by = "código", all.x = TRUE)
union_final_urb <- df_2017_2[,-c(1,(ncol(df_2017_2)-1))]
write_xlsx(union_final_urb, "ESCOLARIDAD_rural.xlsx")
data_sum <- summary(union_final_urb)
kbl(head(data_sum)) %>%
kable_styling(bootstrap_options = c("striped", "hover")) %>%
kable_paper() %>%
scroll_box(width = "100%", height = "500px")
0_años | 1_años | 2_años | 3_años | 4_años | 5_años | 6_años | 7_años | 8_años | 9_años | 10_años | 11_años | 12_años | 13_años | 14_años | 15_años | 16_años | 17_años | 18_años | 19_años | 20_años | 21_años | ingresos_expandidos | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
Min. : 2.0 | Min. : 1.0 | Min. : 1.0 | Min. : 1.0 | Min. : 1.0 | Min. : 1.0 | Min. : 3.0 | Min. : 1.00 | Min. : 1.0 | Min. : 1.0 | Min. : 1.0 | Min. : 1.00 | Min. : 4 | Min. : 1.00 | Min. : 1.0 | Min. : 1.0 | Min. : 1.0 | Min. : 3 | Min. : 1.00 | Min. : 1.00 | Min. : 1.000 | Min. : 1.000 | Min. :2.792e+08 | |
1st Qu.: 284.0 | 1st Qu.: 66.0 | 1st Qu.: 83.5 | 1st Qu.: 110.2 | 1st Qu.: 127.2 | 1st Qu.: 105.0 | 1st Qu.: 177.0 | 1st Qu.: 87.75 | 1st Qu.: 314.8 | 1st Qu.: 91.0 | 1st Qu.: 132.0 | 1st Qu.: 75.75 | 1st Qu.: 532 | 1st Qu.: 20.00 | 1st Qu.: 51.0 | 1st Qu.: 50.0 | 1st Qu.: 39.0 | 1st Qu.: 75 | 1st Qu.: 1.00 | 1st Qu.: 5.75 | 1st Qu.: 1.000 | 1st Qu.: 1.000 | 1st Qu.:1.809e+09 | |
Median : 594.0 | Median : 137.0 | Median : 168.5 | Median : 236.5 | Median : 259.5 | Median : 211.0 | Median : 410.0 | Median : 184.50 | Median : 638.5 | Median : 171.0 | Median : 250.0 | Median : 144.50 | Median : 925 | Median : 39.00 | Median : 97.0 | Median : 105.0 | Median : 77.0 | Median : 154 | Median : 2.00 | Median : 12.00 | Median : 2.000 | Median : 3.000 | Median :3.546e+09 | |
Mean : 773.1 | Mean : 176.2 | Mean : 215.3 | Mean : 301.7 | Mean : 332.1 | Mean : 276.2 | Mean : 490.6 | Mean : 256.43 | Mean : 826.8 | Mean : 232.3 | Mean : 329.6 | Mean : 193.25 | Mean : 1309 | Mean : 61.99 | Mean : 150.5 | Mean : 147.4 | Mean : 126.7 | Mean : 275 | Mean : 4.73 | Mean : 32.93 | Mean : 4.717 | Mean : 8.509 | Mean :8.206e+09 | |
3rd Qu.:1028.0 | 3rd Qu.: 237.0 | 3rd Qu.: 289.0 | 3rd Qu.: 412.8 | 3rd Qu.: 456.0 | 3rd Qu.: 377.0 | 3rd Qu.: 694.0 | 3rd Qu.: 346.00 | 3rd Qu.:1116.8 | 3rd Qu.: 315.0 | 3rd Qu.: 431.0 | 3rd Qu.: 247.75 | 3rd Qu.: 1706 | 3rd Qu.: 71.50 | 3rd Qu.: 177.0 | 3rd Qu.: 177.0 | 3rd Qu.: 141.8 | 3rd Qu.: 281 | 3rd Qu.: 4.25 | 3rd Qu.: 26.00 | 3rd Qu.: 4.000 | 3rd Qu.: 7.000 | 3rd Qu.:7.252e+09 | |
Max. :6078.0 | Max. :1291.0 | Max. :1648.0 | Max. :2160.0 | Max. :2402.0 | Max. :1936.0 | Max. :3231.0 | Max. :2037.00 | Max. :5549.0 | Max. :1928.0 | Max. :2790.0 | Max. :1488.00 | Max. :10634 | Max. :531.00 | Max. :1149.0 | Max. :1126.0 | Max. :1267.0 | Max. :4647 | Max. :117.00 | Max. :1176.00 | Max. :84.000 | Max. :143.000 | Max. :7.585e+10 |
Graficas:
library(plotly)
df_2017_fig <- df_2017_2[,-c((ncol(df_2017_2)-1))]
fig <- plot_ly(df_2017_fig, x = df_2017_fig$código, y = df_2017_fig[,2]
, name = colnames(df_2017_fig[2]), type = 'scatter', mode = 'lines',
width=7000, height=400)
grafica_fn <- function(g){
fig <<- fig %>% add_trace(y = ~df_2017_fig[,g]
, name = colnames(df_2017_fig[g]), mode = 'lines',
width=7000, height=400)
}
for (g in 3:(ncol(df_2017_2)-1)) {
grafica_fn(g)
}
fig <- fig %>% layout(autosize = T )
fig
4 Correlaciones
La distribución es asimétrica, poseyendo un sesgo positivo.
df_2017_2f <- filter(union_final_urb, union_final_urb$ingresos_expandidos != 'is.na')
III <- seq(1,(ncol(df_2017_2f)-1),1)
my_data <- df_2017_2f[, c(III)]
tabla <- cor(x=my_data, y=df_2017_2f$ingresos_expandidos, method=c("kendall"), use = "pairwise")
tabla <- as.data.frame(tabla)
colnames(tabla) <- "Correlación"
saveRDS(tabla,"tablas_de_corr/C_ESCOLARIDAD_RU.rds")
tabla %>%
rownames_to_column("ESCOLARIDAD") %>%
mutate(Correlación = cell_spec(Correlación, background=ifelse(Correlación == max(Correlación), "#fc0303", "#5cb81f"))) %>%
kbl(booktabs = T, linesep = "", escape=FALSE) %>%
kable_paper(full_width = F) %>%
column_spec(1, color = "black")%>%
column_spec(2, color = "white")
ESCOLARIDAD | Correlación |
---|---|
0_años | 0.332867668540522 |
1_años | 0.276759308361275 |
2_años | 0.247019250484416 |
3_años | 0.236264921676695 |
4_años | 0.234244240759791 |
5_años | 0.257099978143545 |
6_años | 0.204342061924082 |
7_años | 0.289504682245746 |
8_años | 0.260848945043302 |
9_años | 0.393469203061332 |
10_años | 0.41104428255418 |
11_años | 0.420101827044632 |
12_años | 0.425521826951836 |
13_años | 0.469299203825776 |
14_años | 0.492374000468392 |
15_años | 0.478463478455792 |
16_años | 0.463330940031934 |
17_años | 0.489422588448895 |
18_años | 0.375586427534859 |
19_años | 0.447931024901774 |
20_años | 0.385391652228919 |
21_años | 0.436293266842347 |
4.0.1 Kendall
i <- 1
f <- 10
for (cc in 1:2) {
III <- seq(i,f)
print(paste0(i-1,"-",f-1))
df_2017_2_sub_subset <- union_final_urb[,c(III,ncol(union_final_urb))]
chart.Correlation(df_2017_2_sub_subset, histogram=TRUE, method = c( "kendall"), pch=20)
i <- i+10
f <- f+10
}
## [1] "0-9"
## [1] "10-19"
## [1] "21 - 23"
df_2017_2_sub_subset <- union_final_urb[,c(22:23)]
chart.Correlation(df_2017_2_sub_subset, histogram=TRUE, method = c( "kendall"), pch=20)
4.0.2 Pearson
i <- 1
f <- 10
for (cc in 1:2) {
III <- seq(i,f)
print(paste0(i-1,"-",f-1))
df_2017_2_sub_subset <- union_final_urb[,c(III,ncol(union_final_urb))]
chart.Correlation(df_2017_2_sub_subset, histogram=TRUE, method = c( "pearson"), pch=20)
i <- i+10
f <- f+10
}
## [1] "0-9"
## [1] "10-19"
## [1] "21 - 23"
df_2017_2_sub_subset <- union_final_urb[,c(22:23)]
chart.Correlation(df_2017_2_sub_subset, histogram=TRUE, method = c( "pearson"), pch=20)
4.0.3 Spearman
i <- 1
f <- 10
for (cc in 1:2) {
III <- seq(i,f)
print(paste0(i-1,"-",f-1))
df_2017_2_sub_subset <- union_final_urb[,c(III,ncol(union_final_urb))]
chart.Correlation(df_2017_2_sub_subset, histogram=TRUE, method = c( "spearman"), pch=20)
i <- i+10
f <- f+10
}
## [1] "0-9"
## [1] "10-19"
## [1] "21 - 23"
df_2017_2_sub_subset <- union_final_urb[,c(22:23)]
chart.Correlation(df_2017_2_sub_subset, histogram=TRUE, method = c( "spearman"), pch=20)