#INTEGRANTES DEL EQUIPO
#Benjamín Reyes Sánchez A00830513
#Emilio Amarante Portilla A01236198
#Santiago Macías Villa A00830653
#Santiago Guerra Leija A00571467
#Lesly Gómez Hernández A00830701

bank <- read.csv("/Users/benjaminreyessanchez/Downloads/bank2.csv")

library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.1     ✔ readr     2.1.4
## ✔ forcats   1.0.0     ✔ stringr   1.5.0
## ✔ ggplot2   3.4.1     ✔ tibble    3.2.1
## ✔ lubridate 1.9.2     ✔ tidyr     1.3.0
## ✔ purrr     1.0.1     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(dplyr)
library(ggplot2)
glimpse(bank)
## Rows: 4,521
## Columns: 17
## $ age       <int> 30, 33, 35, 30, 59, 35, 36, 39, 41, 43, 39, 43, 36, 20, 31, …
## $ job       <chr> "unemployed", "services", "management", "management", "blue-…
## $ marital   <chr> "married", "married", "single", "married", "married", "singl…
## $ education <chr> "primary", "secondary", "tertiary", "tertiary", "secondary",…
## $ default   <chr> "no", "no", "no", "no", "no", "no", "no", "no", "no", "no", …
## $ balance   <int> 1787, 4789, 1350, 1476, 0, 747, 307, 147, 221, -88, 9374, 26…
## $ housing   <chr> "no", "yes", "yes", "yes", "yes", "no", "yes", "yes", "yes",…
## $ loan      <chr> "no", "yes", "no", "yes", "no", "no", "no", "no", "no", "yes…
## $ contact   <chr> "cellular", "cellular", "cellular", "unknown", "unknown", "c…
## $ day       <int> 19, 11, 16, 3, 5, 23, 14, 6, 14, 17, 20, 17, 13, 30, 29, 29,…
## $ month     <chr> "oct", "may", "apr", "jun", "may", "feb", "may", "may", "may…
## $ duration  <int> 79, 220, 185, 199, 226, 141, 341, 151, 57, 313, 273, 113, 32…
## $ campaign  <int> 1, 1, 1, 4, 1, 2, 1, 2, 2, 1, 1, 2, 2, 1, 1, 2, 5, 1, 1, 1, …
## $ pdays     <int> -1, 339, 330, -1, -1, 176, 330, -1, -1, 147, -1, -1, -1, -1,…
## $ previous  <int> 0, 4, 1, 0, 0, 3, 2, 0, 0, 2, 0, 0, 0, 0, 1, 0, 0, 2, 0, 1, …
## $ poutcome  <chr> "unknown", "failure", "failure", "unknown", "unknown", "fail…
## $ y         <chr> "no", "no", "no", "no", "no", "no", "no", "no", "no", "no", …
summary(bank)
##       age            job              marital           education        
##  Min.   :19.00   Length:4521        Length:4521        Length:4521       
##  1st Qu.:33.00   Class :character   Class :character   Class :character  
##  Median :39.00   Mode  :character   Mode  :character   Mode  :character  
##  Mean   :41.17                                                           
##  3rd Qu.:49.00                                                           
##  Max.   :87.00                                                           
##    default             balance        housing              loan          
##  Length:4521        Min.   :-3313   Length:4521        Length:4521       
##  Class :character   1st Qu.:   69   Class :character   Class :character  
##  Mode  :character   Median :  444   Mode  :character   Mode  :character  
##                     Mean   : 1423                                        
##                     3rd Qu.: 1480                                        
##                     Max.   :71188                                        
##    contact               day           month              duration   
##  Length:4521        Min.   : 1.00   Length:4521        Min.   :   4  
##  Class :character   1st Qu.: 9.00   Class :character   1st Qu.: 104  
##  Mode  :character   Median :16.00   Mode  :character   Median : 185  
##                     Mean   :15.92                      Mean   : 264  
##                     3rd Qu.:21.00                      3rd Qu.: 329  
##                     Max.   :31.00                      Max.   :3025  
##     campaign          pdays           previous         poutcome        
##  Min.   : 1.000   Min.   : -1.00   Min.   : 0.0000   Length:4521       
##  1st Qu.: 1.000   1st Qu.: -1.00   1st Qu.: 0.0000   Class :character  
##  Median : 2.000   Median : -1.00   Median : 0.0000   Mode  :character  
##  Mean   : 2.794   Mean   : 39.77   Mean   : 0.5426                     
##  3rd Qu.: 3.000   3rd Qu.: -1.00   3rd Qu.: 0.0000                     
##  Max.   :50.000   Max.   :871.00   Max.   :25.0000                     
##       y            
##  Length:4521       
##  Class :character  
##  Mode  :character  
##                    
##                    
## 

Prestamo de vivienda según ocupación

#Cantidad de personas con un prestamo de vivienda, según su ocupación.
tab <- table(bank$housing, bank$job)
tab
##      
##       admin. blue-collar entrepreneur housemaid management retired
##   no     176         251           74        73        466     180
##   yes    302         695           94        39        503      50
##      
##       self-employed services student technician unemployed unknown
##   no             95      132      64        344         70      37
##   yes            88      285      20        424         58       1
barplot(tab, main = "Prestamo de vivienda según ocupación") 
legend(x="topright", legend=c("No", "Yes"), fill = c("Black","Grey"), title = "Housing")

prop.table(tab, margin = 2)
##      
##           admin. blue-collar entrepreneur  housemaid management    retired
##   no  0.36820084  0.26532770   0.44047619 0.65178571 0.48090815 0.78260870
##   yes 0.63179916  0.73467230   0.55952381 0.34821429 0.51909185 0.21739130
##      
##       self-employed   services    student technician unemployed    unknown
##   no     0.51912568 0.31654676 0.76190476 0.44791667 0.54687500 0.97368421
##   yes    0.48087432 0.68345324 0.23809524 0.55208333 0.45312500 0.02631579
#Proporciones

Resultados de la Campaña - ¿Lograron o no que el cliente hiciera un depósito en la institución?

#Diagrama de Dispersión tomando en cuenta la edad del cliente y la duración del último contacto que se tuvo. Y estableciendo por colores si se obtuvo un resultado positivo o negativo de realizar un depósito a largo plazo en la institución bancaria.
plot(x = bank$age, y = bank$duration, col = factor(bank$y), main = "Resultados de la Campaña", xlab = "Edad", ylab = "Duración del último contacto")
legend(x = "topleft", legend = c("No", "Sí"), fill = c("Black", "Red"), title = "Objetivo Completado")