GGPLOT_TAREA.knit


title: "Desarrollo tarea06"
author: "ingrid castaño"
date: "2024-07-26"
output: html_document

library(dplyr)

## 
## Adjuntando el paquete: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

library(dslabs)
library(viridisLite)
library(RColorBrewer)
library(readr)
library(ggplot2)


Indice_pobreza <-read_csv("test_values.csv")

## Rows: 8400 Columns: 59

## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (5): country, religion, relationship_to_hh_head, employment_category_la...
## dbl (17): row_id, age, education_level, share_hh_income_provided, num_times_...
## lgl (37): is_urban, female, married, literacy, can_add, can_divide, can_calc...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

str(Indice_pobreza)

## spc_tbl_ [8,400 × 59] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
##  $ row_id                               : num [1:8400] 0 1 2 3 4 5 6 7 8 9 ...
##  $ country                              : chr [1:8400] "F" "C" "I" "I" ...
##  $ is_urban                             : logi [1:8400] FALSE FALSE FALSE TRUE FALSE TRUE ...
##  $ age                                  : num [1:8400] 57 40 35 19 61 16 40 27 35 72 ...
##  $ female                               : logi [1:8400] TRUE FALSE TRUE TRUE TRUE TRUE ...
##  $ married                              : logi [1:8400] TRUE TRUE TRUE FALSE TRUE FALSE ...
##  $ religion                             : chr [1:8400] "X" "P" "Q" "Q" ...
##  $ relationship_to_hh_head              : chr [1:8400] "Father/Mother" "Head" "Spouse" "Son/Daughter" ...
##  $ education_level                      : num [1:8400] 3 0 1 3 0 2 0 0 3 1 ...
##  $ literacy                             : logi [1:8400] TRUE TRUE FALSE TRUE FALSE TRUE ...
##  $ can_add                              : logi [1:8400] TRUE TRUE TRUE TRUE FALSE TRUE ...
##  $ can_divide                           : logi [1:8400] TRUE TRUE TRUE TRUE FALSE TRUE ...
##  $ can_calc_percents                    : logi [1:8400] TRUE FALSE FALSE FALSE FALSE FALSE ...
##  $ can_calc_compounding                 : logi [1:8400] TRUE FALSE FALSE FALSE FALSE TRUE ...
##  $ employed_last_year                   : logi [1:8400] TRUE TRUE FALSE TRUE FALSE FALSE ...
##  $ employment_category_last_year        : chr [1:8400] "employed" "employed" "housewife_or_student" "employed" ...
##  $ employment_type_last_year            : chr [1:8400] "salaried" "salaried" "not_working" "salaried" ...
##  $ share_hh_income_provided             : num [1:8400] NA 3 5 1 1 1 5 2 5 4 ...
##  $ income_ag_livestock_last_year        : logi [1:8400] FALSE FALSE FALSE FALSE TRUE FALSE ...
##  $ income_friends_family_last_year      : logi [1:8400] TRUE FALSE TRUE TRUE TRUE FALSE ...
##  $ income_government_last_year          : logi [1:8400] TRUE FALSE FALSE FALSE FALSE TRUE ...
##  $ income_own_business_last_year        : logi [1:8400] FALSE FALSE FALSE FALSE FALSE FALSE ...
##  $ income_private_sector_last_year      : logi [1:8400] FALSE FALSE FALSE TRUE FALSE FALSE ...
##  $ income_public_sector_last_year       : logi [1:8400] TRUE TRUE FALSE FALSE FALSE FALSE ...
##  $ num_times_borrowed_last_year         : num [1:8400] 1 1 1 0 1 1 0 0 0 2 ...
##  $ borrowing_recency                    : num [1:8400] 2 2 2 0 2 2 0 0 0 2 ...
##  $ formal_savings                       : logi [1:8400] FALSE TRUE FALSE FALSE FALSE FALSE ...
##  $ informal_savings                     : logi [1:8400] FALSE FALSE TRUE FALSE FALSE FALSE ...
##  $ cash_property_savings                : logi [1:8400] FALSE FALSE TRUE FALSE FALSE FALSE ...
##  $ has_insurance                        : logi [1:8400] FALSE FALSE FALSE FALSE FALSE FALSE ...
##  $ has_investment                       : logi [1:8400] FALSE FALSE FALSE FALSE FALSE FALSE ...
##  $ bank_interest_rate                   : num [1:8400] NA NA NA NA NA NA NA NA NA NA ...
##  $ mm_interest_rate                     : num [1:8400] NA NA NA NA NA NA NA NA NA NA ...
##  $ mfi_interest_rate                    : num [1:8400] NA NA NA NA NA NA NA NA NA NA ...
##  $ other_fsp_interest_rate              : num [1:8400] NA NA NA NA NA NA NA NA NA NA ...
##  $ num_shocks_last_year                 : num [1:8400] 2 0 0 1 0 0 0 0 1 3 ...
##  $ avg_shock_strength_last_year         : num [1:8400] 2 0 0 5 0 0 0 0 2 5 ...
##  $ borrowed_for_emergency_last_year     : logi [1:8400] FALSE FALSE FALSE FALSE FALSE TRUE ...
##  $ borrowed_for_daily_expenses_last_year: logi [1:8400] FALSE FALSE TRUE FALSE TRUE TRUE ...
##  $ borrowed_for_home_or_biz_last_year   : logi [1:8400] FALSE FALSE FALSE FALSE FALSE TRUE ...
##  $ phone_technology                     : num [1:8400] 1 1 1 0 0 0 0 0 2 1 ...
##  $ can_call                             : logi [1:8400] TRUE FALSE TRUE TRUE FALSE TRUE ...
##  $ can_text                             : logi [1:8400] TRUE FALSE TRUE FALSE FALSE TRUE ...
##  $ can_use_internet                     : logi [1:8400] TRUE FALSE FALSE FALSE FALSE TRUE ...
##  $ can_make_transaction                 : logi [1:8400] TRUE FALSE FALSE FALSE FALSE TRUE ...
##  $ phone_ownership                      : num [1:8400] 2 2 2 1 1 1 0 0 2 2 ...
##  $ advanced_phone_use                   : logi [1:8400] TRUE FALSE FALSE FALSE FALSE FALSE ...
##  $ reg_bank_acct                        : logi [1:8400] FALSE TRUE FALSE FALSE FALSE FALSE ...
##  $ reg_mm_acct                          : logi [1:8400] FALSE FALSE FALSE FALSE FALSE FALSE ...
##  $ reg_formal_nbfi_account              : logi [1:8400] FALSE FALSE FALSE FALSE FALSE FALSE ...
##  $ financially_included                 : logi [1:8400] FALSE TRUE FALSE FALSE FALSE FALSE ...
##  $ active_bank_user                     : logi [1:8400] FALSE FALSE FALSE FALSE FALSE FALSE ...
##  $ active_mm_user                       : logi [1:8400] FALSE FALSE FALSE FALSE FALSE FALSE ...
##  $ active_formal_nbfi_user              : logi [1:8400] FALSE FALSE FALSE FALSE FALSE FALSE ...
##  $ active_informal_nbfi_user            : logi [1:8400] FALSE FALSE TRUE TRUE FALSE FALSE ...
##  $ nonreg_active_mm_user                : logi [1:8400] FALSE FALSE TRUE FALSE FALSE FALSE ...
##  $ num_formal_institutions_last_year    : num [1:8400] 0 0 1 0 0 0 0 1 2 2 ...
##  $ num_informal_institutions_last_year  : num [1:8400] 0 0 1 1 0 0 0 0 0 0 ...
##  $ num_financial_activities_last_year   : num [1:8400] 0 0 2 0 0 0 0 1 2 4 ...
##  - attr(*, "spec")=
##   .. cols(
##   ..   row_id = col_double(),
##   ..   country = col_character(),
##   ..   is_urban = col_logical(),
##   ..   age = col_double(),
##   ..   female = col_logical(),
##   ..   married = col_logical(),
##   ..   religion = col_character(),
##   ..   relationship_to_hh_head = col_character(),
##   ..   education_level = col_double(),
##   ..   literacy = col_logical(),
##   ..   can_add = col_logical(),
##   ..   can_divide = col_logical(),
##   ..   can_calc_percents = col_logical(),
##   ..   can_calc_compounding = col_logical(),
##   ..   employed_last_year = col_logical(),
##   ..   employment_category_last_year = col_character(),
##   ..   employment_type_last_year = col_character(),
##   ..   share_hh_income_provided = col_double(),
##   ..   income_ag_livestock_last_year = col_logical(),
##   ..   income_friends_family_last_year = col_logical(),
##   ..   income_government_last_year = col_logical(),
##   ..   income_own_business_last_year = col_logical(),
##   ..   income_private_sector_last_year = col_logical(),
##   ..   income_public_sector_last_year = col_logical(),
##   ..   num_times_borrowed_last_year = col_double(),
##   ..   borrowing_recency = col_double(),
##   ..   formal_savings = col_logical(),
##   ..   informal_savings = col_logical(),
##   ..   cash_property_savings = col_logical(),
##   ..   has_insurance = col_logical(),
##   ..   has_investment = col_logical(),
##   ..   bank_interest_rate = col_double(),
##   ..   mm_interest_rate = col_double(),
##   ..   mfi_interest_rate = col_double(),
##   ..   other_fsp_interest_rate = col_double(),
##   ..   num_shocks_last_year = col_double(),
##   ..   avg_shock_strength_last_year = col_double(),
##   ..   borrowed_for_emergency_last_year = col_logical(),
##   ..   borrowed_for_daily_expenses_last_year = col_logical(),
##   ..   borrowed_for_home_or_biz_last_year = col_logical(),
##   ..   phone_technology = col_double(),
##   ..   can_call = col_logical(),
##   ..   can_text = col_logical(),
##   ..   can_use_internet = col_logical(),
##   ..   can_make_transaction = col_logical(),
##   ..   phone_ownership = col_double(),
##   ..   advanced_phone_use = col_logical(),
##   ..   reg_bank_acct = col_logical(),
##   ..   reg_mm_acct = col_logical(),
##   ..   reg_formal_nbfi_account = col_logical(),
##   ..   financially_included = col_logical(),
##   ..   active_bank_user = col_logical(),
##   ..   active_mm_user = col_logical(),
##   ..   active_formal_nbfi_user = col_logical(),
##   ..   active_informal_nbfi_user = col_logical(),
##   ..   nonreg_active_mm_user = col_logical(),
##   ..   num_formal_institutions_last_year = col_double(),
##   ..   num_informal_institutions_last_year = col_double(),
##   ..   num_financial_activities_last_year = col_double()
##   .. )
##  - attr(*, "problems")=<externalptr>

ggplot(data = Indice_pobreza, aes(x = age, y = education_level)) +
  geom_jitter(alpha = 0.6, width = 0.3, height = 0.3) +  # añade (alpha= controlar la trsnparencia en puntos,width=propio de jitter, añade desplzamiento horizontal cuando los puntos tiene valores similares)
  geom_smooth(method = "lm", se = FALSE, color = "blue", linetype = "dashed")+ #linea tendencia; lm =linea de tendencia lineal,se = FALSE significa que no se mostrará el intervalo de confianza,linetype = dashed hace que la línea de tendencia sea discontinua
ggtitle("Edad y Nivel Educativo") +
  xlab("Edad") +
  ylab("Nivel Educativo") +
  scale_x_continuous(breaks = seq(0, 120, 10), limits = c(0, 120)) + # Escalas ajustadas(crea una secuencia de números desde 0 hasta 120, con un incremento de 10)
  scale_y_continuous(breaks = 0:3, labels = c("Sin educación", "Primaria", "Bachillerato", "Superior"))+ # Etiquetas personalizadas Y
 theme_minimal(base_size = 15) +  # Tamaño de fuente mayor para mejor legibilidad
  theme(
    plot.title = element_text(hjust = 0.5),  # Centrar título
    axis.title = element_text(face = "bold")  # Negrita en títulos de ejes
  )

## `geom_smooth()` using formula = 'y ~ x'

## Warning: Removed 149 rows containing non-finite outside the scale range
## (`stat_smooth()`).

## Warning: Removed 149 rows containing missing values or values outside the scale range
## (`geom_point()`).

#Sin Educación: Predomina en edades más jóvenes y va disminuyendo a medida que aumenta la edad.
#Primaria: Es común en edades jóvenes(15-30) y adultos(40-60), pero disminuye en personas mayores(60-120).
#Bachillerato: Tiene un comportamiento similar a primaria, pero empieza a haber una reducion desde los 50 años
#Superior: redomina en edades jovenes desde (15-30) y de los 30 empieza a disminuir a medida que avanza la edad


#LINEA TENDENCIA
# El nivel educativo tiende a ser mas bajo en personas mayores y mayor en personas mas jovenes, marcando la mejora en el acceso a la educación y la creciente educación en las ultimos años, pero tambien podemos deducir en la necesidad de seguir trabajando en la equidad de educación para todas las edades.

ggplot(data = Indice_pobreza, aes(x = age, y = education_level)) +
  geom_jitter(alpha = 0.6, width = 0.3, height = 0.3) +  # añade (alpha= controlar la trsnparencia en puntos,width=propio de jitter, añade desplzamiento horizontal cuando los puntos tiene valores similares)
  geom_smooth(method = "lm", se = FALSE, color = "blue", linetype = "dashed")+ #linea tendencia; lm =linea de tendencia lineal,se = FALSE significa que no se mostrará el intervalo de confianza,linetype = dashed hace que la línea de tendencia sea discontinua
ggtitle("Edad y Nivel Educativo") +
  xlab("Edad") +
  ylab("Nivel Educativo") +
  scale_x_continuous(breaks = seq(0, 120, 10), limits = c(0, 100)) + # Escalas ajustadas(crea una secuencia de números desde 0 hasta 120, con un incremento de 10)
  scale_y_continuous(breaks = 0:3, labels = c("Sin educación", "Primaria", "Bachillerato", "Superior"))+ # Etiquetas personalizadas Y
 theme_minimal(base_size = 15) +  # Tamaño de fuente mayor para mejor legibilidad
  theme(
    plot.title = element_text(hjust = 0.5),  # Centrar título
    axis.title = element_text(face = "bold")  # Negrita en títulos de ejes
  )+facet_wrap(~ female)

## `geom_smooth()` using formula = 'y ~ x'

## Warning: Removed 151 rows containing non-finite outside the scale range
## (`stat_smooth()`).

## Warning: Removed 154 rows containing missing values or values outside the scale range
## (`geom_point()`).

# superior: Las hombres adultos y mayores con repecto a los mujeres tienen mayor participacion en la educaión primaria, bachillerato o superior puede estar ligado con reponsabilidad en los hogares, religion o que unas decadas anteriores por la cultura las mujeres no estudiaban

# Ademas es importante resaltar que en en las edmas jovenes esta tendencia va cambiando para mujeres y hombres

# Cabe resaltar que el sin educación tiene son mas probables los hombres vs las mujeres

library(ggplot2)

ggplot(data = Indice_pobreza, aes(x = age, fill = as.factor(female))) + # as.factor(female) convierte la variable logica female en un factor
  geom_histogram(binwidth = 5, position = "dodge") + #binwidth  ancho de las barras del histograma a 5 años,position = "dodge" as barras para hombres y mujeres se dibujen una al lado de la otra
  scale_fill_manual(values = c("blue", "pink"), labels = c("Hombres", "Mujeres")) + # personalización de colores y asignacion de nombres
  ggtitle("Distribución de la Edad por Género") +
  xlab("Edad (años)") +
  ylab("Frecuencia") +
  theme_minimal() +  #fondo
    scale_x_continuous(breaks = seq(10, 120, 10), limits = c(10, 120))+
  theme(
    plot.title = element_text(hjust = 0.5),  #centra el título horizontalmente
    axis.title = element_text(face = "bold")  #negrita las etiquetas de los ejes
  )

## Warning: Removed 2 rows containing missing values or values outside the scale range
## (`geom_bar()`).

# Analisis

# Las diistribuciones de edades en ambos casos mujeres y hombres en la poblacion a estudio es mayor en edades de 20  hats 40 años , tneindo una reducion a medida que pasan los años

# Las mujeres predominan en la poblacion a estudio, y tiene una mayor participacion en edades desde de 20 hasta 40 muy por encima de la participacion de los hombres.

# Una mayor participación de mujeres con respeto a hombre en edades mayores (70-100 años) puede indicar una mayor esperanza de vida en mujeres o una mayor disposicion a aprticipar en el estudio

ggplot(data = Indice_pobreza, aes(x = as.factor(education_level), fill = as.factor(phone_technology))) +
  geom_bar(position = "fill") +
  scale_fill_brewer(palette = "Set3", labels = c("Sin teléfono", "Teléfono básico", "Teléfono inteligente","N/A")) +
    scale_x_discrete(breaks = 0:3, labels = c("Sin educación", "Primaria", "Secundaria", "Superior"))+
  ggtitle("Acceso a la Tecnología por Nivel Educativo") +
  xlab("Nivel Educativo") +
  ylab("Proporción") +
  theme_minimal() +
  theme(
    plot.title = element_text(hjust = 0.5),
    axis.title = element_text(face = "bold")
  )+
  facet_wrap(~ country)

# Analisis 
# A medida que aumenta el nivel educativo, también aumenta la proporción de personas que poseen telefonos inteligentes
#la proporción de personas sin teléfono disminuye significativamente a medida que se avanza en los estudios
# Es notable una significativa la brecha digital entre los niveles educativos, ya que las personas que tienen un menor nivel educativo, tienen limitado la tecnologia, en especial a los telefonoc inteligentes

ggplot(data=Indice_pobreza, aes(x = religion, fill = country)) +
  geom_bar(position = "dodge")+
  ggtitle("Religion por Pais") +
  xlab("Religion") +
  ylab("No personas") +
  theme_replace() +
  theme(
    plot.title = element_text(hjust = 0.5),
    axis.title = element_text(face = "bold")
  )+facet_wrap(~ is_urban)

# Analisis

Indice_pobreza %>%
  group_by(employment_type_last_year) %>%
  summarize(ingreso_medio = mean(share_hh_income_provided, na.rm = TRUE)) %>%
  ggplot(aes(x = employment_type_last_year, y = ingreso_medio, fill = employment_type_last_year)) +
  geom_bar(stat = "identity") +
  labs(title = "Ingresos Medios por Tipo de Empleo", x = "Tipo de Empleo", y = "Ingreso Medio") +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

Indice_pobreza %>%
  group_by(education_level) %>%
  summarize(promedio_actividades = mean(num_financial_activities_last_year, na.rm = TRUE)) %>%
  ggplot(aes(x = as.factor(education_level), y = promedio_actividades, fill = as.factor(education_level))) +
  geom_bar(stat = "identity") +
  labs(title = "Promedio de Actividades Financieras por Nivel de Educación", x = "Nivel de Educación", y = "Promedio de Actividades Financieras") +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))