library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.1     ✔ tibble    3.2.1
## ✔ lubridate 1.9.3     ✔ tidyr     1.3.1
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(dplyr)
library(ggplot2)
library(Metrics)
library(dslabs)
#install.packages("writexl")
library(writexl)
library(lmtest)
## Cargando paquete requerido: zoo
## 
## Adjuntando el paquete: 'zoo'
## 
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
library(plotly)
## 
## Adjuntando el paquete: 'plotly'
## 
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## 
## The following object is masked from 'package:stats':
## 
##     filter
## 
## The following object is masked from 'package:graphics':
## 
##     layout
library(corrplot)
## corrplot 0.94 loaded
Indice_pobreza<- read_csv("test_values.csv")
## Rows: 8400 Columns: 59
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (5): country, religion, relationship_to_hh_head, employment_category_la...
## dbl (17): row_id, age, education_level, share_hh_income_provided, num_times_...
## lgl (37): is_urban, female, married, literacy, can_add, can_divide, can_calc...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
str(Indice_pobreza)
## spc_tbl_ [8,400 × 59] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
##  $ row_id                               : num [1:8400] 0 1 2 3 4 5 6 7 8 9 ...
##  $ country                              : chr [1:8400] "F" "C" "I" "I" ...
##  $ is_urban                             : logi [1:8400] FALSE FALSE FALSE TRUE FALSE TRUE ...
##  $ age                                  : num [1:8400] 57 40 35 19 61 16 40 27 35 72 ...
##  $ female                               : logi [1:8400] TRUE FALSE TRUE TRUE TRUE TRUE ...
##  $ married                              : logi [1:8400] TRUE TRUE TRUE FALSE TRUE FALSE ...
##  $ religion                             : chr [1:8400] "X" "P" "Q" "Q" ...
##  $ relationship_to_hh_head              : chr [1:8400] "Father/Mother" "Head" "Spouse" "Son/Daughter" ...
##  $ education_level                      : num [1:8400] 3 0 1 3 0 2 0 0 3 1 ...
##  $ literacy                             : logi [1:8400] TRUE TRUE FALSE TRUE FALSE TRUE ...
##  $ can_add                              : logi [1:8400] TRUE TRUE TRUE TRUE FALSE TRUE ...
##  $ can_divide                           : logi [1:8400] TRUE TRUE TRUE TRUE FALSE TRUE ...
##  $ can_calc_percents                    : logi [1:8400] TRUE FALSE FALSE FALSE FALSE FALSE ...
##  $ can_calc_compounding                 : logi [1:8400] TRUE FALSE FALSE FALSE FALSE TRUE ...
##  $ employed_last_year                   : logi [1:8400] TRUE TRUE FALSE TRUE FALSE FALSE ...
##  $ employment_category_last_year        : chr [1:8400] "employed" "employed" "housewife_or_student" "employed" ...
##  $ employment_type_last_year            : chr [1:8400] "salaried" "salaried" "not_working" "salaried" ...
##  $ share_hh_income_provided             : num [1:8400] NA 3 5 1 1 1 5 2 5 4 ...
##  $ income_ag_livestock_last_year        : logi [1:8400] FALSE FALSE FALSE FALSE TRUE FALSE ...
##  $ income_friends_family_last_year      : logi [1:8400] TRUE FALSE TRUE TRUE TRUE FALSE ...
##  $ income_government_last_year          : logi [1:8400] TRUE FALSE FALSE FALSE FALSE TRUE ...
##  $ income_own_business_last_year        : logi [1:8400] FALSE FALSE FALSE FALSE FALSE FALSE ...
##  $ income_private_sector_last_year      : logi [1:8400] FALSE FALSE FALSE TRUE FALSE FALSE ...
##  $ income_public_sector_last_year       : logi [1:8400] TRUE TRUE FALSE FALSE FALSE FALSE ...
##  $ num_times_borrowed_last_year         : num [1:8400] 1 1 1 0 1 1 0 0 0 2 ...
##  $ borrowing_recency                    : num [1:8400] 2 2 2 0 2 2 0 0 0 2 ...
##  $ formal_savings                       : logi [1:8400] FALSE TRUE FALSE FALSE FALSE FALSE ...
##  $ informal_savings                     : logi [1:8400] FALSE FALSE TRUE FALSE FALSE FALSE ...
##  $ cash_property_savings                : logi [1:8400] FALSE FALSE TRUE FALSE FALSE FALSE ...
##  $ has_insurance                        : logi [1:8400] FALSE FALSE FALSE FALSE FALSE FALSE ...
##  $ has_investment                       : logi [1:8400] FALSE FALSE FALSE FALSE FALSE FALSE ...
##  $ bank_interest_rate                   : num [1:8400] NA NA NA NA NA NA NA NA NA NA ...
##  $ mm_interest_rate                     : num [1:8400] NA NA NA NA NA NA NA NA NA NA ...
##  $ mfi_interest_rate                    : num [1:8400] NA NA NA NA NA NA NA NA NA NA ...
##  $ other_fsp_interest_rate              : num [1:8400] NA NA NA NA NA NA NA NA NA NA ...
##  $ num_shocks_last_year                 : num [1:8400] 2 0 0 1 0 0 0 0 1 3 ...
##  $ avg_shock_strength_last_year         : num [1:8400] 2 0 0 5 0 0 0 0 2 5 ...
##  $ borrowed_for_emergency_last_year     : logi [1:8400] FALSE FALSE FALSE FALSE FALSE TRUE ...
##  $ borrowed_for_daily_expenses_last_year: logi [1:8400] FALSE FALSE TRUE FALSE TRUE TRUE ...
##  $ borrowed_for_home_or_biz_last_year   : logi [1:8400] FALSE FALSE FALSE FALSE FALSE TRUE ...
##  $ phone_technology                     : num [1:8400] 1 1 1 0 0 0 0 0 2 1 ...
##  $ can_call                             : logi [1:8400] TRUE FALSE TRUE TRUE FALSE TRUE ...
##  $ can_text                             : logi [1:8400] TRUE FALSE TRUE FALSE FALSE TRUE ...
##  $ can_use_internet                     : logi [1:8400] TRUE FALSE FALSE FALSE FALSE TRUE ...
##  $ can_make_transaction                 : logi [1:8400] TRUE FALSE FALSE FALSE FALSE TRUE ...
##  $ phone_ownership                      : num [1:8400] 2 2 2 1 1 1 0 0 2 2 ...
##  $ advanced_phone_use                   : logi [1:8400] TRUE FALSE FALSE FALSE FALSE FALSE ...
##  $ reg_bank_acct                        : logi [1:8400] FALSE TRUE FALSE FALSE FALSE FALSE ...
##  $ reg_mm_acct                          : logi [1:8400] FALSE FALSE FALSE FALSE FALSE FALSE ...
##  $ reg_formal_nbfi_account              : logi [1:8400] FALSE FALSE FALSE FALSE FALSE FALSE ...
##  $ financially_included                 : logi [1:8400] FALSE TRUE FALSE FALSE FALSE FALSE ...
##  $ active_bank_user                     : logi [1:8400] FALSE FALSE FALSE FALSE FALSE FALSE ...
##  $ active_mm_user                       : logi [1:8400] FALSE FALSE FALSE FALSE FALSE FALSE ...
##  $ active_formal_nbfi_user              : logi [1:8400] FALSE FALSE FALSE FALSE FALSE FALSE ...
##  $ active_informal_nbfi_user            : logi [1:8400] FALSE FALSE TRUE TRUE FALSE FALSE ...
##  $ nonreg_active_mm_user                : logi [1:8400] FALSE FALSE TRUE FALSE FALSE FALSE ...
##  $ num_formal_institutions_last_year    : num [1:8400] 0 0 1 0 0 0 0 1 2 2 ...
##  $ num_informal_institutions_last_year  : num [1:8400] 0 0 1 1 0 0 0 0 0 0 ...
##  $ num_financial_activities_last_year   : num [1:8400] 0 0 2 0 0 0 0 1 2 4 ...
##  - attr(*, "spec")=
##   .. cols(
##   ..   row_id = col_double(),
##   ..   country = col_character(),
##   ..   is_urban = col_logical(),
##   ..   age = col_double(),
##   ..   female = col_logical(),
##   ..   married = col_logical(),
##   ..   religion = col_character(),
##   ..   relationship_to_hh_head = col_character(),
##   ..   education_level = col_double(),
##   ..   literacy = col_logical(),
##   ..   can_add = col_logical(),
##   ..   can_divide = col_logical(),
##   ..   can_calc_percents = col_logical(),
##   ..   can_calc_compounding = col_logical(),
##   ..   employed_last_year = col_logical(),
##   ..   employment_category_last_year = col_character(),
##   ..   employment_type_last_year = col_character(),
##   ..   share_hh_income_provided = col_double(),
##   ..   income_ag_livestock_last_year = col_logical(),
##   ..   income_friends_family_last_year = col_logical(),
##   ..   income_government_last_year = col_logical(),
##   ..   income_own_business_last_year = col_logical(),
##   ..   income_private_sector_last_year = col_logical(),
##   ..   income_public_sector_last_year = col_logical(),
##   ..   num_times_borrowed_last_year = col_double(),
##   ..   borrowing_recency = col_double(),
##   ..   formal_savings = col_logical(),
##   ..   informal_savings = col_logical(),
##   ..   cash_property_savings = col_logical(),
##   ..   has_insurance = col_logical(),
##   ..   has_investment = col_logical(),
##   ..   bank_interest_rate = col_double(),
##   ..   mm_interest_rate = col_double(),
##   ..   mfi_interest_rate = col_double(),
##   ..   other_fsp_interest_rate = col_double(),
##   ..   num_shocks_last_year = col_double(),
##   ..   avg_shock_strength_last_year = col_double(),
##   ..   borrowed_for_emergency_last_year = col_logical(),
##   ..   borrowed_for_daily_expenses_last_year = col_logical(),
##   ..   borrowed_for_home_or_biz_last_year = col_logical(),
##   ..   phone_technology = col_double(),
##   ..   can_call = col_logical(),
##   ..   can_text = col_logical(),
##   ..   can_use_internet = col_logical(),
##   ..   can_make_transaction = col_logical(),
##   ..   phone_ownership = col_double(),
##   ..   advanced_phone_use = col_logical(),
##   ..   reg_bank_acct = col_logical(),
##   ..   reg_mm_acct = col_logical(),
##   ..   reg_formal_nbfi_account = col_logical(),
##   ..   financially_included = col_logical(),
##   ..   active_bank_user = col_logical(),
##   ..   active_mm_user = col_logical(),
##   ..   active_formal_nbfi_user = col_logical(),
##   ..   active_informal_nbfi_user = col_logical(),
##   ..   nonreg_active_mm_user = col_logical(),
##   ..   num_formal_institutions_last_year = col_double(),
##   ..   num_informal_institutions_last_year = col_double(),
##   ..   num_financial_activities_last_year = col_double()
##   .. )
##  - attr(*, "problems")=<externalptr>
Indice_pobreza <- Indice_pobreza %>% mutate_if(is.character, as.factor)
str(Indice_pobreza)
## tibble [8,400 × 59] (S3: tbl_df/tbl/data.frame)
##  $ row_id                               : num [1:8400] 0 1 2 3 4 5 6 7 8 9 ...
##  $ country                              : Factor w/ 7 levels "A","C","D","F",..: 4 2 6 6 1 1 4 2 1 7 ...
##  $ is_urban                             : logi [1:8400] FALSE FALSE FALSE TRUE FALSE TRUE ...
##  $ age                                  : num [1:8400] 57 40 35 19 61 16 40 27 35 72 ...
##  $ female                               : logi [1:8400] TRUE FALSE TRUE TRUE TRUE TRUE ...
##  $ married                              : logi [1:8400] TRUE TRUE TRUE FALSE TRUE FALSE ...
##  $ religion                             : Factor w/ 5 levels "N","O","P","Q",..: 5 3 4 4 4 4 4 3 4 5 ...
##  $ relationship_to_hh_head              : Factor w/ 7 levels "Father/Mother",..: 1 2 6 5 1 5 2 6 2 2 ...
##  $ education_level                      : num [1:8400] 3 0 1 3 0 2 0 0 3 1 ...
##  $ literacy                             : logi [1:8400] TRUE TRUE FALSE TRUE FALSE TRUE ...
##  $ can_add                              : logi [1:8400] TRUE TRUE TRUE TRUE FALSE TRUE ...
##  $ can_divide                           : logi [1:8400] TRUE TRUE TRUE TRUE FALSE TRUE ...
##  $ can_calc_percents                    : logi [1:8400] TRUE FALSE FALSE FALSE FALSE FALSE ...
##  $ can_calc_compounding                 : logi [1:8400] TRUE FALSE FALSE FALSE FALSE TRUE ...
##  $ employed_last_year                   : logi [1:8400] TRUE TRUE FALSE TRUE FALSE FALSE ...
##  $ employment_category_last_year        : Factor w/ 5 levels "employed","housewife_or_student",..: 1 1 2 1 2 2 1 2 1 1 ...
##  $ employment_type_last_year            : Factor w/ 5 levels "irregular_seasonal",..: 4 4 2 4 2 2 5 2 4 5 ...
##  $ share_hh_income_provided             : num [1:8400] NA 3 5 1 1 1 5 2 5 4 ...
##  $ income_ag_livestock_last_year        : logi [1:8400] FALSE FALSE FALSE FALSE TRUE FALSE ...
##  $ income_friends_family_last_year      : logi [1:8400] TRUE FALSE TRUE TRUE TRUE FALSE ...
##  $ income_government_last_year          : logi [1:8400] TRUE FALSE FALSE FALSE FALSE TRUE ...
##  $ income_own_business_last_year        : logi [1:8400] FALSE FALSE FALSE FALSE FALSE FALSE ...
##  $ income_private_sector_last_year      : logi [1:8400] FALSE FALSE FALSE TRUE FALSE FALSE ...
##  $ income_public_sector_last_year       : logi [1:8400] TRUE TRUE FALSE FALSE FALSE FALSE ...
##  $ num_times_borrowed_last_year         : num [1:8400] 1 1 1 0 1 1 0 0 0 2 ...
##  $ borrowing_recency                    : num [1:8400] 2 2 2 0 2 2 0 0 0 2 ...
##  $ formal_savings                       : logi [1:8400] FALSE TRUE FALSE FALSE FALSE FALSE ...
##  $ informal_savings                     : logi [1:8400] FALSE FALSE TRUE FALSE FALSE FALSE ...
##  $ cash_property_savings                : logi [1:8400] FALSE FALSE TRUE FALSE FALSE FALSE ...
##  $ has_insurance                        : logi [1:8400] FALSE FALSE FALSE FALSE FALSE FALSE ...
##  $ has_investment                       : logi [1:8400] FALSE FALSE FALSE FALSE FALSE FALSE ...
##  $ bank_interest_rate                   : num [1:8400] NA NA NA NA NA NA NA NA NA NA ...
##  $ mm_interest_rate                     : num [1:8400] NA NA NA NA NA NA NA NA NA NA ...
##  $ mfi_interest_rate                    : num [1:8400] NA NA NA NA NA NA NA NA NA NA ...
##  $ other_fsp_interest_rate              : num [1:8400] NA NA NA NA NA NA NA NA NA NA ...
##  $ num_shocks_last_year                 : num [1:8400] 2 0 0 1 0 0 0 0 1 3 ...
##  $ avg_shock_strength_last_year         : num [1:8400] 2 0 0 5 0 0 0 0 2 5 ...
##  $ borrowed_for_emergency_last_year     : logi [1:8400] FALSE FALSE FALSE FALSE FALSE TRUE ...
##  $ borrowed_for_daily_expenses_last_year: logi [1:8400] FALSE FALSE TRUE FALSE TRUE TRUE ...
##  $ borrowed_for_home_or_biz_last_year   : logi [1:8400] FALSE FALSE FALSE FALSE FALSE TRUE ...
##  $ phone_technology                     : num [1:8400] 1 1 1 0 0 0 0 0 2 1 ...
##  $ can_call                             : logi [1:8400] TRUE FALSE TRUE TRUE FALSE TRUE ...
##  $ can_text                             : logi [1:8400] TRUE FALSE TRUE FALSE FALSE TRUE ...
##  $ can_use_internet                     : logi [1:8400] TRUE FALSE FALSE FALSE FALSE TRUE ...
##  $ can_make_transaction                 : logi [1:8400] TRUE FALSE FALSE FALSE FALSE TRUE ...
##  $ phone_ownership                      : num [1:8400] 2 2 2 1 1 1 0 0 2 2 ...
##  $ advanced_phone_use                   : logi [1:8400] TRUE FALSE FALSE FALSE FALSE FALSE ...
##  $ reg_bank_acct                        : logi [1:8400] FALSE TRUE FALSE FALSE FALSE FALSE ...
##  $ reg_mm_acct                          : logi [1:8400] FALSE FALSE FALSE FALSE FALSE FALSE ...
##  $ reg_formal_nbfi_account              : logi [1:8400] FALSE FALSE FALSE FALSE FALSE FALSE ...
##  $ financially_included                 : logi [1:8400] FALSE TRUE FALSE FALSE FALSE FALSE ...
##  $ active_bank_user                     : logi [1:8400] FALSE FALSE FALSE FALSE FALSE FALSE ...
##  $ active_mm_user                       : logi [1:8400] FALSE FALSE FALSE FALSE FALSE FALSE ...
##  $ active_formal_nbfi_user              : logi [1:8400] FALSE FALSE FALSE FALSE FALSE FALSE ...
##  $ active_informal_nbfi_user            : logi [1:8400] FALSE FALSE TRUE TRUE FALSE FALSE ...
##  $ nonreg_active_mm_user                : logi [1:8400] FALSE FALSE TRUE FALSE FALSE FALSE ...
##  $ num_formal_institutions_last_year    : num [1:8400] 0 0 1 0 0 0 0 1 2 2 ...
##  $ num_informal_institutions_last_year  : num [1:8400] 0 0 1 1 0 0 0 0 0 0 ...
##  $ num_financial_activities_last_year   : num [1:8400] 0 0 2 0 0 0 0 1 2 4 ...
Indice_pobreza_final <- Indice_pobreza %>%
  mutate(literacy = as.numeric(literacy)) %>%
  mutate(literacy = ifelse(literacy == 1, 1, 0)) %>%
  select(literacy, education_level)

head(Indice_pobreza_final)
## # A tibble: 6 × 2
##   literacy education_level
##      <dbl>           <dbl>
## 1        1               3
## 2        1               0
## 3        0               1
## 4        1               3
## 5        0               0
## 6        1               2
modelo_logistico <- glm(literacy~education_level, 
                        data = Indice_pobreza_final, 
                        family = "binomial")



ggplot(data =Indice_pobreza_final, aes(x = education_level, y = literacy)) +
  geom_point(aes(color = as.factor(literacy)),shape = 1) + 
  stat_function(fun = function(x){predict(modelo_logistico,
                                          newdata = data.frame(education_level= x),
                                          type = "response")}) +
  theme_bw() +
  labs(title = "Regresión logística",
       x = "Nivel de educación",
       y = "Acostumbra a leer") +
  theme(legend.position = "none")
## Warning: Removed 149 rows containing missing values or values outside the scale range
## (`geom_point()`).