library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.1 ✔ tibble 3.2.1
## ✔ lubridate 1.9.3 ✔ tidyr 1.3.1
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(dplyr)
library(ggplot2)
library(Metrics)
library(dslabs)
#install.packages("writexl")
library(writexl)
library(lmtest)
## Cargando paquete requerido: zoo
##
## Adjuntando el paquete: 'zoo'
##
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
library(plotly)
##
## Adjuntando el paquete: 'plotly'
##
## The following object is masked from 'package:ggplot2':
##
## last_plot
##
## The following object is masked from 'package:stats':
##
## filter
##
## The following object is masked from 'package:graphics':
##
## layout
library(corrplot)
## corrplot 0.94 loaded
Indice_pobreza<- read_csv("test_values.csv")
## Rows: 8400 Columns: 59
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (5): country, religion, relationship_to_hh_head, employment_category_la...
## dbl (17): row_id, age, education_level, share_hh_income_provided, num_times_...
## lgl (37): is_urban, female, married, literacy, can_add, can_divide, can_calc...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
str(Indice_pobreza)
## spc_tbl_ [8,400 × 59] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
## $ row_id : num [1:8400] 0 1 2 3 4 5 6 7 8 9 ...
## $ country : chr [1:8400] "F" "C" "I" "I" ...
## $ is_urban : logi [1:8400] FALSE FALSE FALSE TRUE FALSE TRUE ...
## $ age : num [1:8400] 57 40 35 19 61 16 40 27 35 72 ...
## $ female : logi [1:8400] TRUE FALSE TRUE TRUE TRUE TRUE ...
## $ married : logi [1:8400] TRUE TRUE TRUE FALSE TRUE FALSE ...
## $ religion : chr [1:8400] "X" "P" "Q" "Q" ...
## $ relationship_to_hh_head : chr [1:8400] "Father/Mother" "Head" "Spouse" "Son/Daughter" ...
## $ education_level : num [1:8400] 3 0 1 3 0 2 0 0 3 1 ...
## $ literacy : logi [1:8400] TRUE TRUE FALSE TRUE FALSE TRUE ...
## $ can_add : logi [1:8400] TRUE TRUE TRUE TRUE FALSE TRUE ...
## $ can_divide : logi [1:8400] TRUE TRUE TRUE TRUE FALSE TRUE ...
## $ can_calc_percents : logi [1:8400] TRUE FALSE FALSE FALSE FALSE FALSE ...
## $ can_calc_compounding : logi [1:8400] TRUE FALSE FALSE FALSE FALSE TRUE ...
## $ employed_last_year : logi [1:8400] TRUE TRUE FALSE TRUE FALSE FALSE ...
## $ employment_category_last_year : chr [1:8400] "employed" "employed" "housewife_or_student" "employed" ...
## $ employment_type_last_year : chr [1:8400] "salaried" "salaried" "not_working" "salaried" ...
## $ share_hh_income_provided : num [1:8400] NA 3 5 1 1 1 5 2 5 4 ...
## $ income_ag_livestock_last_year : logi [1:8400] FALSE FALSE FALSE FALSE TRUE FALSE ...
## $ income_friends_family_last_year : logi [1:8400] TRUE FALSE TRUE TRUE TRUE FALSE ...
## $ income_government_last_year : logi [1:8400] TRUE FALSE FALSE FALSE FALSE TRUE ...
## $ income_own_business_last_year : logi [1:8400] FALSE FALSE FALSE FALSE FALSE FALSE ...
## $ income_private_sector_last_year : logi [1:8400] FALSE FALSE FALSE TRUE FALSE FALSE ...
## $ income_public_sector_last_year : logi [1:8400] TRUE TRUE FALSE FALSE FALSE FALSE ...
## $ num_times_borrowed_last_year : num [1:8400] 1 1 1 0 1 1 0 0 0 2 ...
## $ borrowing_recency : num [1:8400] 2 2 2 0 2 2 0 0 0 2 ...
## $ formal_savings : logi [1:8400] FALSE TRUE FALSE FALSE FALSE FALSE ...
## $ informal_savings : logi [1:8400] FALSE FALSE TRUE FALSE FALSE FALSE ...
## $ cash_property_savings : logi [1:8400] FALSE FALSE TRUE FALSE FALSE FALSE ...
## $ has_insurance : logi [1:8400] FALSE FALSE FALSE FALSE FALSE FALSE ...
## $ has_investment : logi [1:8400] FALSE FALSE FALSE FALSE FALSE FALSE ...
## $ bank_interest_rate : num [1:8400] NA NA NA NA NA NA NA NA NA NA ...
## $ mm_interest_rate : num [1:8400] NA NA NA NA NA NA NA NA NA NA ...
## $ mfi_interest_rate : num [1:8400] NA NA NA NA NA NA NA NA NA NA ...
## $ other_fsp_interest_rate : num [1:8400] NA NA NA NA NA NA NA NA NA NA ...
## $ num_shocks_last_year : num [1:8400] 2 0 0 1 0 0 0 0 1 3 ...
## $ avg_shock_strength_last_year : num [1:8400] 2 0 0 5 0 0 0 0 2 5 ...
## $ borrowed_for_emergency_last_year : logi [1:8400] FALSE FALSE FALSE FALSE FALSE TRUE ...
## $ borrowed_for_daily_expenses_last_year: logi [1:8400] FALSE FALSE TRUE FALSE TRUE TRUE ...
## $ borrowed_for_home_or_biz_last_year : logi [1:8400] FALSE FALSE FALSE FALSE FALSE TRUE ...
## $ phone_technology : num [1:8400] 1 1 1 0 0 0 0 0 2 1 ...
## $ can_call : logi [1:8400] TRUE FALSE TRUE TRUE FALSE TRUE ...
## $ can_text : logi [1:8400] TRUE FALSE TRUE FALSE FALSE TRUE ...
## $ can_use_internet : logi [1:8400] TRUE FALSE FALSE FALSE FALSE TRUE ...
## $ can_make_transaction : logi [1:8400] TRUE FALSE FALSE FALSE FALSE TRUE ...
## $ phone_ownership : num [1:8400] 2 2 2 1 1 1 0 0 2 2 ...
## $ advanced_phone_use : logi [1:8400] TRUE FALSE FALSE FALSE FALSE FALSE ...
## $ reg_bank_acct : logi [1:8400] FALSE TRUE FALSE FALSE FALSE FALSE ...
## $ reg_mm_acct : logi [1:8400] FALSE FALSE FALSE FALSE FALSE FALSE ...
## $ reg_formal_nbfi_account : logi [1:8400] FALSE FALSE FALSE FALSE FALSE FALSE ...
## $ financially_included : logi [1:8400] FALSE TRUE FALSE FALSE FALSE FALSE ...
## $ active_bank_user : logi [1:8400] FALSE FALSE FALSE FALSE FALSE FALSE ...
## $ active_mm_user : logi [1:8400] FALSE FALSE FALSE FALSE FALSE FALSE ...
## $ active_formal_nbfi_user : logi [1:8400] FALSE FALSE FALSE FALSE FALSE FALSE ...
## $ active_informal_nbfi_user : logi [1:8400] FALSE FALSE TRUE TRUE FALSE FALSE ...
## $ nonreg_active_mm_user : logi [1:8400] FALSE FALSE TRUE FALSE FALSE FALSE ...
## $ num_formal_institutions_last_year : num [1:8400] 0 0 1 0 0 0 0 1 2 2 ...
## $ num_informal_institutions_last_year : num [1:8400] 0 0 1 1 0 0 0 0 0 0 ...
## $ num_financial_activities_last_year : num [1:8400] 0 0 2 0 0 0 0 1 2 4 ...
## - attr(*, "spec")=
## .. cols(
## .. row_id = col_double(),
## .. country = col_character(),
## .. is_urban = col_logical(),
## .. age = col_double(),
## .. female = col_logical(),
## .. married = col_logical(),
## .. religion = col_character(),
## .. relationship_to_hh_head = col_character(),
## .. education_level = col_double(),
## .. literacy = col_logical(),
## .. can_add = col_logical(),
## .. can_divide = col_logical(),
## .. can_calc_percents = col_logical(),
## .. can_calc_compounding = col_logical(),
## .. employed_last_year = col_logical(),
## .. employment_category_last_year = col_character(),
## .. employment_type_last_year = col_character(),
## .. share_hh_income_provided = col_double(),
## .. income_ag_livestock_last_year = col_logical(),
## .. income_friends_family_last_year = col_logical(),
## .. income_government_last_year = col_logical(),
## .. income_own_business_last_year = col_logical(),
## .. income_private_sector_last_year = col_logical(),
## .. income_public_sector_last_year = col_logical(),
## .. num_times_borrowed_last_year = col_double(),
## .. borrowing_recency = col_double(),
## .. formal_savings = col_logical(),
## .. informal_savings = col_logical(),
## .. cash_property_savings = col_logical(),
## .. has_insurance = col_logical(),
## .. has_investment = col_logical(),
## .. bank_interest_rate = col_double(),
## .. mm_interest_rate = col_double(),
## .. mfi_interest_rate = col_double(),
## .. other_fsp_interest_rate = col_double(),
## .. num_shocks_last_year = col_double(),
## .. avg_shock_strength_last_year = col_double(),
## .. borrowed_for_emergency_last_year = col_logical(),
## .. borrowed_for_daily_expenses_last_year = col_logical(),
## .. borrowed_for_home_or_biz_last_year = col_logical(),
## .. phone_technology = col_double(),
## .. can_call = col_logical(),
## .. can_text = col_logical(),
## .. can_use_internet = col_logical(),
## .. can_make_transaction = col_logical(),
## .. phone_ownership = col_double(),
## .. advanced_phone_use = col_logical(),
## .. reg_bank_acct = col_logical(),
## .. reg_mm_acct = col_logical(),
## .. reg_formal_nbfi_account = col_logical(),
## .. financially_included = col_logical(),
## .. active_bank_user = col_logical(),
## .. active_mm_user = col_logical(),
## .. active_formal_nbfi_user = col_logical(),
## .. active_informal_nbfi_user = col_logical(),
## .. nonreg_active_mm_user = col_logical(),
## .. num_formal_institutions_last_year = col_double(),
## .. num_informal_institutions_last_year = col_double(),
## .. num_financial_activities_last_year = col_double()
## .. )
## - attr(*, "problems")=<externalptr>
Indice_pobreza <- Indice_pobreza %>% mutate_if(is.character, as.factor)
str(Indice_pobreza)
## tibble [8,400 × 59] (S3: tbl_df/tbl/data.frame)
## $ row_id : num [1:8400] 0 1 2 3 4 5 6 7 8 9 ...
## $ country : Factor w/ 7 levels "A","C","D","F",..: 4 2 6 6 1 1 4 2 1 7 ...
## $ is_urban : logi [1:8400] FALSE FALSE FALSE TRUE FALSE TRUE ...
## $ age : num [1:8400] 57 40 35 19 61 16 40 27 35 72 ...
## $ female : logi [1:8400] TRUE FALSE TRUE TRUE TRUE TRUE ...
## $ married : logi [1:8400] TRUE TRUE TRUE FALSE TRUE FALSE ...
## $ religion : Factor w/ 5 levels "N","O","P","Q",..: 5 3 4 4 4 4 4 3 4 5 ...
## $ relationship_to_hh_head : Factor w/ 7 levels "Father/Mother",..: 1 2 6 5 1 5 2 6 2 2 ...
## $ education_level : num [1:8400] 3 0 1 3 0 2 0 0 3 1 ...
## $ literacy : logi [1:8400] TRUE TRUE FALSE TRUE FALSE TRUE ...
## $ can_add : logi [1:8400] TRUE TRUE TRUE TRUE FALSE TRUE ...
## $ can_divide : logi [1:8400] TRUE TRUE TRUE TRUE FALSE TRUE ...
## $ can_calc_percents : logi [1:8400] TRUE FALSE FALSE FALSE FALSE FALSE ...
## $ can_calc_compounding : logi [1:8400] TRUE FALSE FALSE FALSE FALSE TRUE ...
## $ employed_last_year : logi [1:8400] TRUE TRUE FALSE TRUE FALSE FALSE ...
## $ employment_category_last_year : Factor w/ 5 levels "employed","housewife_or_student",..: 1 1 2 1 2 2 1 2 1 1 ...
## $ employment_type_last_year : Factor w/ 5 levels "irregular_seasonal",..: 4 4 2 4 2 2 5 2 4 5 ...
## $ share_hh_income_provided : num [1:8400] NA 3 5 1 1 1 5 2 5 4 ...
## $ income_ag_livestock_last_year : logi [1:8400] FALSE FALSE FALSE FALSE TRUE FALSE ...
## $ income_friends_family_last_year : logi [1:8400] TRUE FALSE TRUE TRUE TRUE FALSE ...
## $ income_government_last_year : logi [1:8400] TRUE FALSE FALSE FALSE FALSE TRUE ...
## $ income_own_business_last_year : logi [1:8400] FALSE FALSE FALSE FALSE FALSE FALSE ...
## $ income_private_sector_last_year : logi [1:8400] FALSE FALSE FALSE TRUE FALSE FALSE ...
## $ income_public_sector_last_year : logi [1:8400] TRUE TRUE FALSE FALSE FALSE FALSE ...
## $ num_times_borrowed_last_year : num [1:8400] 1 1 1 0 1 1 0 0 0 2 ...
## $ borrowing_recency : num [1:8400] 2 2 2 0 2 2 0 0 0 2 ...
## $ formal_savings : logi [1:8400] FALSE TRUE FALSE FALSE FALSE FALSE ...
## $ informal_savings : logi [1:8400] FALSE FALSE TRUE FALSE FALSE FALSE ...
## $ cash_property_savings : logi [1:8400] FALSE FALSE TRUE FALSE FALSE FALSE ...
## $ has_insurance : logi [1:8400] FALSE FALSE FALSE FALSE FALSE FALSE ...
## $ has_investment : logi [1:8400] FALSE FALSE FALSE FALSE FALSE FALSE ...
## $ bank_interest_rate : num [1:8400] NA NA NA NA NA NA NA NA NA NA ...
## $ mm_interest_rate : num [1:8400] NA NA NA NA NA NA NA NA NA NA ...
## $ mfi_interest_rate : num [1:8400] NA NA NA NA NA NA NA NA NA NA ...
## $ other_fsp_interest_rate : num [1:8400] NA NA NA NA NA NA NA NA NA NA ...
## $ num_shocks_last_year : num [1:8400] 2 0 0 1 0 0 0 0 1 3 ...
## $ avg_shock_strength_last_year : num [1:8400] 2 0 0 5 0 0 0 0 2 5 ...
## $ borrowed_for_emergency_last_year : logi [1:8400] FALSE FALSE FALSE FALSE FALSE TRUE ...
## $ borrowed_for_daily_expenses_last_year: logi [1:8400] FALSE FALSE TRUE FALSE TRUE TRUE ...
## $ borrowed_for_home_or_biz_last_year : logi [1:8400] FALSE FALSE FALSE FALSE FALSE TRUE ...
## $ phone_technology : num [1:8400] 1 1 1 0 0 0 0 0 2 1 ...
## $ can_call : logi [1:8400] TRUE FALSE TRUE TRUE FALSE TRUE ...
## $ can_text : logi [1:8400] TRUE FALSE TRUE FALSE FALSE TRUE ...
## $ can_use_internet : logi [1:8400] TRUE FALSE FALSE FALSE FALSE TRUE ...
## $ can_make_transaction : logi [1:8400] TRUE FALSE FALSE FALSE FALSE TRUE ...
## $ phone_ownership : num [1:8400] 2 2 2 1 1 1 0 0 2 2 ...
## $ advanced_phone_use : logi [1:8400] TRUE FALSE FALSE FALSE FALSE FALSE ...
## $ reg_bank_acct : logi [1:8400] FALSE TRUE FALSE FALSE FALSE FALSE ...
## $ reg_mm_acct : logi [1:8400] FALSE FALSE FALSE FALSE FALSE FALSE ...
## $ reg_formal_nbfi_account : logi [1:8400] FALSE FALSE FALSE FALSE FALSE FALSE ...
## $ financially_included : logi [1:8400] FALSE TRUE FALSE FALSE FALSE FALSE ...
## $ active_bank_user : logi [1:8400] FALSE FALSE FALSE FALSE FALSE FALSE ...
## $ active_mm_user : logi [1:8400] FALSE FALSE FALSE FALSE FALSE FALSE ...
## $ active_formal_nbfi_user : logi [1:8400] FALSE FALSE FALSE FALSE FALSE FALSE ...
## $ active_informal_nbfi_user : logi [1:8400] FALSE FALSE TRUE TRUE FALSE FALSE ...
## $ nonreg_active_mm_user : logi [1:8400] FALSE FALSE TRUE FALSE FALSE FALSE ...
## $ num_formal_institutions_last_year : num [1:8400] 0 0 1 0 0 0 0 1 2 2 ...
## $ num_informal_institutions_last_year : num [1:8400] 0 0 1 1 0 0 0 0 0 0 ...
## $ num_financial_activities_last_year : num [1:8400] 0 0 2 0 0 0 0 1 2 4 ...
Indice_pobreza_final <- Indice_pobreza %>%
mutate(literacy = as.numeric(literacy)) %>%
mutate(literacy = ifelse(literacy == 1, 1, 0)) %>%
select(literacy, education_level)
head(Indice_pobreza_final)
## # A tibble: 6 × 2
## literacy education_level
## <dbl> <dbl>
## 1 1 3
## 2 1 0
## 3 0 1
## 4 1 3
## 5 0 0
## 6 1 2
modelo_logistico <- glm(literacy~education_level,
data = Indice_pobreza_final,
family = "binomial")
ggplot(data =Indice_pobreza_final, aes(x = education_level, y = literacy)) +
geom_point(aes(color = as.factor(literacy)),shape = 1) +
stat_function(fun = function(x){predict(modelo_logistico,
newdata = data.frame(education_level= x),
type = "response")}) +
theme_bw() +
labs(title = "Regresión logística",
x = "Nivel de educación",
y = "Acostumbra a leer") +
theme(legend.position = "none")
## Warning: Removed 149 rows containing missing values or values outside the scale range
## (`geom_point()`).
