title: "Desarrollo tarea06"
author: "ingrid castaño"
date: "2024-07-26"
output: html_document
library(dplyr)
## 
## Adjuntando el paquete: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(dslabs)
library(viridisLite)
library(RColorBrewer)
library(readr)


Indice_pobreza <-read_csv("test_values.csv") 
## Rows: 8400 Columns: 59
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (5): country, religion, relationship_to_hh_head, employment_category_la...
## dbl (17): row_id, age, education_level, share_hh_income_provided, num_times_...
## lgl (37): is_urban, female, married, literacy, can_add, can_divide, can_calc...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
#Indice_pobreza<-Indice_pobreza %>% mutate_if()
# delimitar por ; ya que es el delimitador que maneja el archivo
Indice_pobreza %>% head(10)
## # A tibble: 10 × 59
##    row_id country is_urban   age female married religion relationship_to_hh_head
##     <dbl> <chr>   <lgl>    <dbl> <lgl>  <lgl>   <chr>    <chr>                  
##  1      0 F       FALSE       57 TRUE   TRUE    X        Father/Mother          
##  2      1 C       FALSE       40 FALSE  TRUE    P        Head                   
##  3      2 I       FALSE       35 TRUE   TRUE    Q        Spouse                 
##  4      3 I       TRUE        19 TRUE   FALSE   Q        Son/Daughter           
##  5      4 A       FALSE       61 TRUE   TRUE    Q        Father/Mother          
##  6      5 A       TRUE        16 TRUE   FALSE   Q        Son/Daughter           
##  7      6 F       FALSE       40 FALSE  TRUE    Q        Head                   
##  8      7 C       FALSE       27 TRUE   TRUE    P        Spouse                 
##  9      8 A       TRUE        35 FALSE  TRUE    Q        Head                   
## 10      9 J       FALSE       72 TRUE   FALSE   X        Head                   
## # ℹ 51 more variables: education_level <dbl>, literacy <lgl>, can_add <lgl>,
## #   can_divide <lgl>, can_calc_percents <lgl>, can_calc_compounding <lgl>,
## #   employed_last_year <lgl>, employment_category_last_year <chr>,
## #   employment_type_last_year <chr>, share_hh_income_provided <dbl>,
## #   income_ag_livestock_last_year <lgl>, income_friends_family_last_year <lgl>,
## #   income_government_last_year <lgl>, income_own_business_last_year <lgl>,
## #   income_private_sector_last_year <lgl>, …
str(Indice_pobreza)
## spc_tbl_ [8,400 × 59] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
##  $ row_id                               : num [1:8400] 0 1 2 3 4 5 6 7 8 9 ...
##  $ country                              : chr [1:8400] "F" "C" "I" "I" ...
##  $ is_urban                             : logi [1:8400] FALSE FALSE FALSE TRUE FALSE TRUE ...
##  $ age                                  : num [1:8400] 57 40 35 19 61 16 40 27 35 72 ...
##  $ female                               : logi [1:8400] TRUE FALSE TRUE TRUE TRUE TRUE ...
##  $ married                              : logi [1:8400] TRUE TRUE TRUE FALSE TRUE FALSE ...
##  $ religion                             : chr [1:8400] "X" "P" "Q" "Q" ...
##  $ relationship_to_hh_head              : chr [1:8400] "Father/Mother" "Head" "Spouse" "Son/Daughter" ...
##  $ education_level                      : num [1:8400] 3 0 1 3 0 2 0 0 3 1 ...
##  $ literacy                             : logi [1:8400] TRUE TRUE FALSE TRUE FALSE TRUE ...
##  $ can_add                              : logi [1:8400] TRUE TRUE TRUE TRUE FALSE TRUE ...
##  $ can_divide                           : logi [1:8400] TRUE TRUE TRUE TRUE FALSE TRUE ...
##  $ can_calc_percents                    : logi [1:8400] TRUE FALSE FALSE FALSE FALSE FALSE ...
##  $ can_calc_compounding                 : logi [1:8400] TRUE FALSE FALSE FALSE FALSE TRUE ...
##  $ employed_last_year                   : logi [1:8400] TRUE TRUE FALSE TRUE FALSE FALSE ...
##  $ employment_category_last_year        : chr [1:8400] "employed" "employed" "housewife_or_student" "employed" ...
##  $ employment_type_last_year            : chr [1:8400] "salaried" "salaried" "not_working" "salaried" ...
##  $ share_hh_income_provided             : num [1:8400] NA 3 5 1 1 1 5 2 5 4 ...
##  $ income_ag_livestock_last_year        : logi [1:8400] FALSE FALSE FALSE FALSE TRUE FALSE ...
##  $ income_friends_family_last_year      : logi [1:8400] TRUE FALSE TRUE TRUE TRUE FALSE ...
##  $ income_government_last_year          : logi [1:8400] TRUE FALSE FALSE FALSE FALSE TRUE ...
##  $ income_own_business_last_year        : logi [1:8400] FALSE FALSE FALSE FALSE FALSE FALSE ...
##  $ income_private_sector_last_year      : logi [1:8400] FALSE FALSE FALSE TRUE FALSE FALSE ...
##  $ income_public_sector_last_year       : logi [1:8400] TRUE TRUE FALSE FALSE FALSE FALSE ...
##  $ num_times_borrowed_last_year         : num [1:8400] 1 1 1 0 1 1 0 0 0 2 ...
##  $ borrowing_recency                    : num [1:8400] 2 2 2 0 2 2 0 0 0 2 ...
##  $ formal_savings                       : logi [1:8400] FALSE TRUE FALSE FALSE FALSE FALSE ...
##  $ informal_savings                     : logi [1:8400] FALSE FALSE TRUE FALSE FALSE FALSE ...
##  $ cash_property_savings                : logi [1:8400] FALSE FALSE TRUE FALSE FALSE FALSE ...
##  $ has_insurance                        : logi [1:8400] FALSE FALSE FALSE FALSE FALSE FALSE ...
##  $ has_investment                       : logi [1:8400] FALSE FALSE FALSE FALSE FALSE FALSE ...
##  $ bank_interest_rate                   : num [1:8400] NA NA NA NA NA NA NA NA NA NA ...
##  $ mm_interest_rate                     : num [1:8400] NA NA NA NA NA NA NA NA NA NA ...
##  $ mfi_interest_rate                    : num [1:8400] NA NA NA NA NA NA NA NA NA NA ...
##  $ other_fsp_interest_rate              : num [1:8400] NA NA NA NA NA NA NA NA NA NA ...
##  $ num_shocks_last_year                 : num [1:8400] 2 0 0 1 0 0 0 0 1 3 ...
##  $ avg_shock_strength_last_year         : num [1:8400] 2 0 0 5 0 0 0 0 2 5 ...
##  $ borrowed_for_emergency_last_year     : logi [1:8400] FALSE FALSE FALSE FALSE FALSE TRUE ...
##  $ borrowed_for_daily_expenses_last_year: logi [1:8400] FALSE FALSE TRUE FALSE TRUE TRUE ...
##  $ borrowed_for_home_or_biz_last_year   : logi [1:8400] FALSE FALSE FALSE FALSE FALSE TRUE ...
##  $ phone_technology                     : num [1:8400] 1 1 1 0 0 0 0 0 2 1 ...
##  $ can_call                             : logi [1:8400] TRUE FALSE TRUE TRUE FALSE TRUE ...
##  $ can_text                             : logi [1:8400] TRUE FALSE TRUE FALSE FALSE TRUE ...
##  $ can_use_internet                     : logi [1:8400] TRUE FALSE FALSE FALSE FALSE TRUE ...
##  $ can_make_transaction                 : logi [1:8400] TRUE FALSE FALSE FALSE FALSE TRUE ...
##  $ phone_ownership                      : num [1:8400] 2 2 2 1 1 1 0 0 2 2 ...
##  $ advanced_phone_use                   : logi [1:8400] TRUE FALSE FALSE FALSE FALSE FALSE ...
##  $ reg_bank_acct                        : logi [1:8400] FALSE TRUE FALSE FALSE FALSE FALSE ...
##  $ reg_mm_acct                          : logi [1:8400] FALSE FALSE FALSE FALSE FALSE FALSE ...
##  $ reg_formal_nbfi_account              : logi [1:8400] FALSE FALSE FALSE FALSE FALSE FALSE ...
##  $ financially_included                 : logi [1:8400] FALSE TRUE FALSE FALSE FALSE FALSE ...
##  $ active_bank_user                     : logi [1:8400] FALSE FALSE FALSE FALSE FALSE FALSE ...
##  $ active_mm_user                       : logi [1:8400] FALSE FALSE FALSE FALSE FALSE FALSE ...
##  $ active_formal_nbfi_user              : logi [1:8400] FALSE FALSE FALSE FALSE FALSE FALSE ...
##  $ active_informal_nbfi_user            : logi [1:8400] FALSE FALSE TRUE TRUE FALSE FALSE ...
##  $ nonreg_active_mm_user                : logi [1:8400] FALSE FALSE TRUE FALSE FALSE FALSE ...
##  $ num_formal_institutions_last_year    : num [1:8400] 0 0 1 0 0 0 0 1 2 2 ...
##  $ num_informal_institutions_last_year  : num [1:8400] 0 0 1 1 0 0 0 0 0 0 ...
##  $ num_financial_activities_last_year   : num [1:8400] 0 0 2 0 0 0 0 1 2 4 ...
##  - attr(*, "spec")=
##   .. cols(
##   ..   row_id = col_double(),
##   ..   country = col_character(),
##   ..   is_urban = col_logical(),
##   ..   age = col_double(),
##   ..   female = col_logical(),
##   ..   married = col_logical(),
##   ..   religion = col_character(),
##   ..   relationship_to_hh_head = col_character(),
##   ..   education_level = col_double(),
##   ..   literacy = col_logical(),
##   ..   can_add = col_logical(),
##   ..   can_divide = col_logical(),
##   ..   can_calc_percents = col_logical(),
##   ..   can_calc_compounding = col_logical(),
##   ..   employed_last_year = col_logical(),
##   ..   employment_category_last_year = col_character(),
##   ..   employment_type_last_year = col_character(),
##   ..   share_hh_income_provided = col_double(),
##   ..   income_ag_livestock_last_year = col_logical(),
##   ..   income_friends_family_last_year = col_logical(),
##   ..   income_government_last_year = col_logical(),
##   ..   income_own_business_last_year = col_logical(),
##   ..   income_private_sector_last_year = col_logical(),
##   ..   income_public_sector_last_year = col_logical(),
##   ..   num_times_borrowed_last_year = col_double(),
##   ..   borrowing_recency = col_double(),
##   ..   formal_savings = col_logical(),
##   ..   informal_savings = col_logical(),
##   ..   cash_property_savings = col_logical(),
##   ..   has_insurance = col_logical(),
##   ..   has_investment = col_logical(),
##   ..   bank_interest_rate = col_double(),
##   ..   mm_interest_rate = col_double(),
##   ..   mfi_interest_rate = col_double(),
##   ..   other_fsp_interest_rate = col_double(),
##   ..   num_shocks_last_year = col_double(),
##   ..   avg_shock_strength_last_year = col_double(),
##   ..   borrowed_for_emergency_last_year = col_logical(),
##   ..   borrowed_for_daily_expenses_last_year = col_logical(),
##   ..   borrowed_for_home_or_biz_last_year = col_logical(),
##   ..   phone_technology = col_double(),
##   ..   can_call = col_logical(),
##   ..   can_text = col_logical(),
##   ..   can_use_internet = col_logical(),
##   ..   can_make_transaction = col_logical(),
##   ..   phone_ownership = col_double(),
##   ..   advanced_phone_use = col_logical(),
##   ..   reg_bank_acct = col_logical(),
##   ..   reg_mm_acct = col_logical(),
##   ..   reg_formal_nbfi_account = col_logical(),
##   ..   financially_included = col_logical(),
##   ..   active_bank_user = col_logical(),
##   ..   active_mm_user = col_logical(),
##   ..   active_formal_nbfi_user = col_logical(),
##   ..   active_informal_nbfi_user = col_logical(),
##   ..   nonreg_active_mm_user = col_logical(),
##   ..   num_formal_institutions_last_year = col_double(),
##   ..   num_informal_institutions_last_year = col_double(),
##   ..   num_financial_activities_last_year = col_double()
##   .. )
##  - attr(*, "problems")=<externalptr>
##  Agrupación de nivel educación y calculo de la media de edades por educación

tabla1<-Indice_pobreza %>%
  select(pais = country, 
         vivienda_urbana=is_urban,         
         religion,
         Relacion_cabeza_famlia=relationship_to_hh_head,
         Nivel_educacion=education_level,
         lee_libros=literacy,
         edad=age)%>% 
         group_by(Nivel_educacion)%>%
         summarise(edades_promedio = mean(edad, na.rm = TRUE))

print(tabla1)
## # A tibble: 5 × 2
##   Nivel_educacion edades_promedio
##             <dbl>           <dbl>
## 1               0            44.9
## 2               1            37.7
## 3               2            31.3
## 4               3            32.1
## 5              NA            34.3
barplot_1<-barplot(tabla1$edades_promedio,
        names.arg = tabla1$Nivel_educacion,
        main="Promedio edades por nivel educativo",
        ylab="Edad Promedio",
        xlab ="Nivel Educativo",
        ylim = c(0, max(tabla1$edades_promedio) + 5),
        col=brewer.pal(10,"Set1"))
## Warning in brewer.pal(10, "Set1"): n too large, allowed maximum for palette Set1 is 9
## Returning the palette you asked for with that many colors
     text(x = barplot_1, 
     y = tabla1$edades_promedio + 1.8,
     labels = round(tabla1$edades_promedio))

#  La religión mas predominante
# Datos de ejemplo
tabla2 <- Indice_pobreza %>%
         select(religion) %>%
         count(religion) %>%
         arrange(desc(n)) %>%
         mutate(porcentaje = (n / sum(n)) * 100)

# Ajustar la paleta de colores
num_colores <- min(9, nrow(tabla2)) # Usar máximo 9 colores
colores <- brewer.pal(num_colores, "Set1")

# Crear gráfico circular
pie(tabla2$n,
    labels = paste0(round(tabla2$porcentaje, 2), "%"),
    col = colores, 
    main = "Frecuencia Religiones",
    cex = 0.8
)

# Añadir leyenda
legend("topright",
       legend = tabla2$religion,
       cex = 0.8,
       fill = colores
)

#  La religión mas predominante

tabla2 <- Indice_pobreza %>%
         select(religion) %>%
         count(religion) %>%
         arrange(desc(n))%>%
         mutate(porcentaje = (n / sum(n)) * 100) 

print(tabla2)
## # A tibble: 5 × 3
##   religion     n porcentaje
##   <chr>    <int>      <dbl>
## 1 Q         3693     44.0  
## 2 X         3464     41.2  
## 3 P         1151     13.7  
## 4 O           62      0.738
## 5 N           30      0.357
barplot_2<-barplot(tabla2$n,
        names.arg = tabla2$religion,
        main="Religiones",
        ylim = c(0, max(tabla2$n) + 300),
        xlab ="Religion",
        col=brewer.pal(10,"Set1"))
## Warning in brewer.pal(10, "Set1"): n too large, allowed maximum for palette Set1 is 9
## Returning the palette you asked for with that many colors
     text(x = barplot_2, 
     y = tabla2$n + 150,
     labels = paste0(round(tabla2$porcentaje, 1), "%"))

##  porcentaje de la media de población que con base a el nivel eduactivo leen libros


tabla3 <-Indice_pobreza %>%
         select(Nivel_educacion = education_level,
         lee_libros = literacy,
         Sabe_sumar = can_add) %>%
  group_by(Nivel_educacion) %>%
  summarise(lectura_libros = mean(lee_libros) * 100)


barplot_3 <-barplot(tabla3$lectura_libros,
            names.arg = tabla3$Nivel_educacion,
            main = "Porcentaje de Lectura de Libros por Nivel Educativo",
            ylim = c(0, max(tabla3$lectura_libros) + 20),
            xlab = "Nivel Educativo",
            ylab = "Porcentaje",
            col = brewer.pal(n = nrow(tabla3), "Set1"))


text(x = barplot_3, 
     y = tabla3$lectura_libros + 9,
     labels = paste0(round(tabla3$lectura_libros, 1), "%"))