TAREA5.knit


title: "Desarrollo tarea5"
author: "ingrid castaño"
date: "2024-07-26"
output: html_document

library(dplyr)

## 
## Adjuntando el paquete: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

library(readr)
Indice_pobreza <-read_csv("test_values.csv")

## Rows: 8400 Columns: 59

## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (5): country, religion, relationship_to_hh_head, employment_category_la...
## dbl (17): row_id, age, education_level, share_hh_income_provided, num_times_...
## lgl (37): is_urban, female, married, literacy, can_add, can_divide, can_calc...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

# delimitar por ; ya que es el delimitador que maneja el archivo
Indice_pobreza %>% head(10)

## # A tibble: 10 × 59
##    row_id country is_urban   age female married religion relationship_to_hh_head
##     <dbl> <chr>   <lgl>    <dbl> <lgl>  <lgl>   <chr>    <chr>                  
##  1      0 F       FALSE       57 TRUE   TRUE    X        Father/Mother          
##  2      1 C       FALSE       40 FALSE  TRUE    P        Head                   
##  3      2 I       FALSE       35 TRUE   TRUE    Q        Spouse                 
##  4      3 I       TRUE        19 TRUE   FALSE   Q        Son/Daughter           
##  5      4 A       FALSE       61 TRUE   TRUE    Q        Father/Mother          
##  6      5 A       TRUE        16 TRUE   FALSE   Q        Son/Daughter           
##  7      6 F       FALSE       40 FALSE  TRUE    Q        Head                   
##  8      7 C       FALSE       27 TRUE   TRUE    P        Spouse                 
##  9      8 A       TRUE        35 FALSE  TRUE    Q        Head                   
## 10      9 J       FALSE       72 TRUE   FALSE   X        Head                   
## # ℹ 51 more variables: education_level <dbl>, literacy <lgl>, can_add <lgl>,
## #   can_divide <lgl>, can_calc_percents <lgl>, can_calc_compounding <lgl>,
## #   employed_last_year <lgl>, employment_category_last_year <chr>,
## #   employment_type_last_year <chr>, share_hh_income_provided <dbl>,
## #   income_ag_livestock_last_year <lgl>, income_friends_family_last_year <lgl>,
## #   income_government_last_year <lgl>, income_own_business_last_year <lgl>,
## #   income_private_sector_last_year <lgl>, …

##  Cambio de nombre a las columnas y selccion de las columnas que necesito

Indice_pobreza %>%
  select(pais = country, 
         vivienda_urbana=is_urban,
         edad=age,
         casado=married,
         religion,
         Relacion_cabeza_famlia=relationship_to_hh_head,
         Nivel_educacion=education_level,
         lee_libros=literacy,
         Sabe_sumar=can_add,
         Empleado_año_pasado=employed_last_year,
         Tipo_Empleo=employment_category_last_year,
         Aporte_ingresos_hogar=share_hh_income_provided,
         Ingreso_Gobierno=income_government_last_year,
         Tipo_celular=phone_technology,
         Uso_intenet_celular=can_use_internet,
         Num_Actividad_Financiera=num_financial_activities_last_year)

## # A tibble: 8,400 × 16
##    pais  vivienda_urbana  edad casado religion Relacion_cabeza_famlia
##    <chr> <lgl>           <dbl> <lgl>  <chr>    <chr>                 
##  1 F     FALSE              57 TRUE   X        Father/Mother         
##  2 C     FALSE              40 TRUE   P        Head                  
##  3 I     FALSE              35 TRUE   Q        Spouse                
##  4 I     TRUE               19 FALSE  Q        Son/Daughter          
##  5 A     FALSE              61 TRUE   Q        Father/Mother         
##  6 A     TRUE               16 FALSE  Q        Son/Daughter          
##  7 F     FALSE              40 TRUE   Q        Head                  
##  8 C     FALSE              27 TRUE   P        Spouse                
##  9 A     TRUE               35 TRUE   Q        Head                  
## 10 J     FALSE              72 FALSE  X        Head                  
## # ℹ 8,390 more rows
## # ℹ 10 more variables: Nivel_educacion <dbl>, lee_libros <lgl>,
## #   Sabe_sumar <lgl>, Empleado_año_pasado <lgl>, Tipo_Empleo <chr>,
## #   Aporte_ingresos_hogar <dbl>, Ingreso_Gobierno <lgl>, Tipo_celular <dbl>,
## #   Uso_intenet_celular <lgl>, Num_Actividad_Financiera <dbl>

##  Filtro por edad mayores a 45 años

Indice_pobreza %>%
  select(pais = country, 
         vivienda_urbana=is_urban,         
         religion,
         Relacion_cabeza_famlia=relationship_to_hh_head,
         Nivel_educacion=education_level,
         lee_libros=literacy,
         edad=age)%>% 
         filter(edad >45) %>%
         arrange(desc(edad))

## # A tibble: 2,051 × 7
##    pais  vivienda_urbana religion Relacion_cabeza_famlia Nivel_educacion
##    <chr> <lgl>           <chr>    <chr>                            <dbl>
##  1 F     FALSE           X        Head                                 0
##  2 F     TRUE            Q        Head                                 3
##  3 F     TRUE            X        Father/Mother                        0
##  4 F     FALSE           X        Head                                 1
##  5 F     FALSE           X        Spouse                               0
##  6 G     FALSE           X        Head                                 0
##  7 J     FALSE           X        Head                                 0
##  8 A     FALSE           Q        Head                                 2
##  9 D     FALSE           X        Head                                 1
## 10 D     FALSE           X        Head                                 1
## # ℹ 2,041 more rows
## # ℹ 2 more variables: lee_libros <lgl>, edad <dbl>

##  Agrupación de nivel educación y calculo de la media de edades por educación

Indice_pobreza %>%
  select(pais = country, 
         vivienda_urbana=is_urban,         
         religion,
         Relacion_cabeza_famlia=relationship_to_hh_head,
         Nivel_educacion=education_level,
         lee_libros=literacy,
         edad=age)%>% 
         group_by(Nivel_educacion)%>%
         summarise(edades_promedio = mean(edad))

## # A tibble: 5 × 2
##   Nivel_educacion edades_promedio
##             <dbl>           <dbl>
## 1               0            44.9
## 2               1            37.7
## 3               2            31.3
## 4               3            32.1
## 5              NA            34.3

##  Filtro vivienda urbana por por pais y nivel de educación
##  Poblacion que viven en vivienda urbana y su nivel de educaión es profesional

Indice_pobreza %>%
         select(pais = country, 
         vivienda_urbana=is_urban,
         Nivel_educacion=education_level) %>% 
         filter(vivienda_urbana == TRUE,Nivel_educacion >=3)

## # A tibble: 459 × 3
##    pais  vivienda_urbana Nivel_educacion
##    <chr> <lgl>                     <dbl>
##  1 I     TRUE                          3
##  2 A     TRUE                          3
##  3 F     TRUE                          3
##  4 G     TRUE                          3
##  5 I     TRUE                          3
##  6 A     TRUE                          3
##  7 I     TRUE                          3
##  8 F     TRUE                          3
##  9 F     TRUE                          3
## 10 I     TRUE                          3
## # ℹ 449 more rows

##  Filtro vivienda urbana por por pais y nivel de educación
##  Poblacion que viven en vivienda urbana y su nivel de educación es sin terminar educación basica

Indice_pobreza %>%
         select(pais = country, 
         vivienda_urbana=is_urban,
         Nivel_educacion=education_level) %>% 
         filter(vivienda_urbana == TRUE,Nivel_educacion == 0)

## # A tibble: 373 × 3
##    pais  vivienda_urbana Nivel_educacion
##    <chr> <lgl>                     <dbl>
##  1 C     TRUE                          0
##  2 C     TRUE                          0
##  3 A     TRUE                          0
##  4 F     TRUE                          0
##  5 I     TRUE                          0
##  6 D     TRUE                          0
##  7 I     TRUE                          0
##  8 A     TRUE                          0
##  9 I     TRUE                          0
## 10 D     TRUE                          0
## # ℹ 363 more rows

#  Relacion con ser cabeza de hogar o esposa y el nivel de educación

Indice_pobreza %>%
  select(pais = country, 
         edad=age,
         casado=married,
         Relacion_cabeza_famlia=relationship_to_hh_head,
         Nivel_educacion=education_level) %>% 
         filter(Relacion_cabeza_famlia == "Head"|
         Relacion_cabeza_famlia =="Spouse",Nivel_educacion == 0 |                Nivel_educacion == 1 )

## # A tibble: 3,902 × 5
##    pais   edad casado Relacion_cabeza_famlia Nivel_educacion
##    <chr> <dbl> <lgl>  <chr>                            <dbl>
##  1 C        40 TRUE   Head                                 0
##  2 I        35 TRUE   Spouse                               1
##  3 F        40 TRUE   Head                                 0
##  4 C        27 TRUE   Spouse                               0
##  5 J        72 FALSE  Head                                 1
##  6 C        62 TRUE   Head                                 0
##  7 A        69 TRUE   Head                                 1
##  8 G        52 FALSE  Head                                 0
##  9 J        29 TRUE   Spouse                               1
## 10 C        60 TRUE   Spouse                               0
## # ℹ 3,892 more rows

#  La religión mas predominante

Indice_pobreza %>%
  select(religion) %>%
         count(religion) %>%
         arrange(desc(n))

## # A tibble: 5 × 2
##   religion     n
##   <chr>    <int>
## 1 Q         3693
## 2 X         3464
## 3 P         1151
## 4 O           62
## 5 N           30

# La religión mas predominante

Indice_pobreza %>%
  select(pais = country) %>%
         count(pais) %>%
         arrange(desc(n))

## # A tibble: 7 × 2
##   pais      n
##   <chr> <int>
## 1 C      1231
## 2 F      1231
## 3 A      1227
## 4 G      1195
## 5 I      1188
## 6 D      1171
## 7 J      1157

#  NIVEL EDUCACION PREDOMINANTE

Indice_pobreza %>%
  select(Nivel_educacion=education_level) %>%
         count(Nivel_educacion) %>%
         arrange(desc(n))

## # A tibble: 5 × 2
##   Nivel_educacion     n
##             <dbl> <int>
## 1               1  2947
## 2               2  2776
## 3               0  1732
## 4               3   796
## 5              NA   149

#  Edades predominantes

Indice_pobreza %>%
  select(edad=age) %>%
         count(edad) %>%
         arrange(desc(n))

## # A tibble: 82 × 2
##     edad     n
##    <dbl> <int>
##  1    30   437
##  2    25   377
##  3    35   362
##  4    40   324
##  5    20   294
##  6    18   258
##  7    45   255
##  8    28   243
##  9    32   234
## 10    27   233
## # ℹ 72 more rows

##  porcentaje de la media de población que con base a el nivel eduactivo leen libros

Indice_pobreza %>%
  select(Nivel_educacion=education_level,
         lee_libros=literacy,
         Sabe_sumar=can_add) %>%
         group_by(Nivel_educacion) %>%
         summarise(lectura_libros = mean(lee_libros)*100)

## # A tibble: 5 × 2
##   Nivel_educacion lectura_libros
##             <dbl>          <dbl>
## 1               0           10.8
## 2               1           57.7
## 3               2           87.8
## 4               3           97.5
## 5              NA           12.8

##  porcentaje de la media de población que con base a el nivel eduactivo saben sumar

Indice_pobreza %>%
  select(Nivel_educacion=education_level,
         lee_libros=literacy,
         Sabe_sumar=can_add) %>%
         group_by(Nivel_educacion) %>%
         summarise(Sabe_sumar = mean(Sabe_sumar)*100)

## # A tibble: 5 × 2
##   Nivel_educacion Sabe_sumar
##             <dbl>      <dbl>
## 1               0       81.9
## 2               1       89.3
## 3               2       96.6
## 4               3       98.4
## 5              NA       95.3

##  Ingreso en los hogares por tipo de empleo

Indice_pobreza %>%
         select(edad=age,
         Tipo_Empleo=employment_category_last_year,
         Aporte_ingresos_hogar=share_hh_income_provided,
         Num_Actividad_Financiera=num_financial_activities_last_year)%>%
         na.omit(Aporte_ingresos_hogar) %>%  # eliminar los NA
        group_by(Tipo_Empleo) %>%
         summarize(ingresos_hogar = mean(Aporte_ingresos_hogar))

## # A tibble: 5 × 2
##   Tipo_Empleo          ingresos_hogar
##   <chr>                         <dbl>
## 1 employed                       3.48
## 2 housewife_or_student           1.77
## 3 other                          2.90
## 4 retired_or_disabled            2.46
## 5 unemployed                     2.28

##   promedio de Edades por la actividad financiera

Indice_pobreza %>%
         select(edad=age,
         Num_Actividad_Financiera = num_financial_activities_last_year)%>% 
        group_by(Num_Actividad_Financiera)%>%
        summarise(total_edad = mean(edad)) %>%
        arrange(desc(Num_Actividad_Financiera))

## # A tibble: 11 × 2
##    Num_Actividad_Financiera total_edad
##                       <dbl>      <dbl>
##  1                       10       36.5
##  2                        9       38.6
##  3                        8       37.0
##  4                        7       35.2
##  5                        6       34.5
##  6                        5       32.4
##  7                        4       33.4
##  8                        3       35.3
##  9                        2       36.7
## 10                        1       37.7
## 11                        0       37.2

##  Edades mas frecuentes por tipo de empleo

Indice_pobreza %>%
         select(edad=age,
         Tipo_Empleo=employment_category_last_year)%>%
         group_by(Tipo_Empleo)%>%
         summarise(total_edad = mean(edad)) %>%
         arrange(desc(Tipo_Empleo))

## # A tibble: 5 × 2
##   Tipo_Empleo          total_edad
##   <chr>                     <dbl>
## 1 unemployed                 28.4
## 2 retired_or_disabled        63.8
## 3 other                      37.4
## 4 housewife_or_student       31.1
## 5 employed                   38.1

##  Segun el tipo de empleo que tanto frecuentan las actividades financieras

Indice_pobreza %>%
         select(edad=age,
         Tipo_Empleo=employment_category_last_year,
         Num_Actividad_Financiera=num_financial_activities_last_year)%>%
         na.omit() %>%  # eliminar los NA
         group_by(Tipo_Empleo) %>%
         summarize(Actividad_financiera= mean(Num_Actividad_Financiera))%>%
         arrange(desc(Actividad_financiera))

## # A tibble: 5 × 2
##   Tipo_Empleo          Actividad_financiera
##   <chr>                               <dbl>
## 1 employed                            1.85 
## 2 unemployed                          1.65 
## 3 other                               1.57 
## 4 housewife_or_student                0.992
## 5 retired_or_disabled                 0.911