bank <- read.csv("/Users/ingridzapatajuarez/Downloads/R/bank2.csv")
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.2     ✔ readr     2.1.4
## ✔ forcats   1.0.0     ✔ stringr   1.5.0
## ✔ ggplot2   3.4.3     ✔ tibble    3.2.1
## ✔ lubridate 1.9.2     ✔ tidyr     1.3.0
## ✔ purrr     1.0.1     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the ]8;;http://conflicted.r-lib.org/conflicted package]8;; to force all conflicts to become errors
library(dplyr)
library(ggplot2)
glimpse(bank)
## Rows: 4,521
## Columns: 17
## $ age       <int> 30, 33, 35, 30, 59, 35, 36, 39, 41, 43, 39, 43, 36, 20, 31, …
## $ job       <chr> "unemployed", "services", "management", "management", "blue-…
## $ marital   <chr> "married", "married", "single", "married", "married", "singl…
## $ education <chr> "primary", "secondary", "tertiary", "tertiary", "secondary",…
## $ default   <chr> "no", "no", "no", "no", "no", "no", "no", "no", "no", "no", …
## $ balance   <int> 1787, 4789, 1350, 1476, 0, 747, 307, 147, 221, -88, 9374, 26…
## $ housing   <chr> "no", "yes", "yes", "yes", "yes", "no", "yes", "yes", "yes",…
## $ loan      <chr> "no", "yes", "no", "yes", "no", "no", "no", "no", "no", "yes…
## $ contact   <chr> "cellular", "cellular", "cellular", "unknown", "unknown", "c…
## $ day       <int> 19, 11, 16, 3, 5, 23, 14, 6, 14, 17, 20, 17, 13, 30, 29, 29,…
## $ month     <chr> "oct", "may", "apr", "jun", "may", "feb", "may", "may", "may…
## $ duration  <int> 79, 220, 185, 199, 226, 141, 341, 151, 57, 313, 273, 113, 32…
## $ campaign  <int> 1, 1, 1, 4, 1, 2, 1, 2, 2, 1, 1, 2, 2, 1, 1, 2, 5, 1, 1, 1, …
## $ pdays     <int> -1, 339, 330, -1, -1, 176, 330, -1, -1, 147, -1, -1, -1, -1,…
## $ previous  <int> 0, 4, 1, 0, 0, 3, 2, 0, 0, 2, 0, 0, 0, 0, 1, 0, 0, 2, 0, 1, …
## $ poutcome  <chr> "unknown", "failure", "failure", "unknown", "unknown", "fail…
## $ y         <chr> "no", "no", "no", "no", "no", "no", "no", "no", "no", "no", …
summary(bank)
##       age            job              marital           education        
##  Min.   :19.00   Length:4521        Length:4521        Length:4521       
##  1st Qu.:33.00   Class :character   Class :character   Class :character  
##  Median :39.00   Mode  :character   Mode  :character   Mode  :character  
##  Mean   :41.17                                                           
##  3rd Qu.:49.00                                                           
##  Max.   :87.00                                                           
##    default             balance        housing              loan          
##  Length:4521        Min.   :-3313   Length:4521        Length:4521       
##  Class :character   1st Qu.:   69   Class :character   Class :character  
##  Mode  :character   Median :  444   Mode  :character   Mode  :character  
##                     Mean   : 1423                                        
##                     3rd Qu.: 1480                                        
##                     Max.   :71188                                        
##    contact               day           month              duration   
##  Length:4521        Min.   : 1.00   Length:4521        Min.   :   4  
##  Class :character   1st Qu.: 9.00   Class :character   1st Qu.: 104  
##  Mode  :character   Median :16.00   Mode  :character   Median : 185  
##                     Mean   :15.92                      Mean   : 264  
##                     3rd Qu.:21.00                      3rd Qu.: 329  
##                     Max.   :31.00                      Max.   :3025  
##     campaign          pdays           previous         poutcome        
##  Min.   : 1.000   Min.   : -1.00   Min.   : 0.0000   Length:4521       
##  1st Qu.: 1.000   1st Qu.: -1.00   1st Qu.: 0.0000   Class :character  
##  Median : 2.000   Median : -1.00   Median : 0.0000   Mode  :character  
##  Mean   : 2.794   Mean   : 39.77   Mean   : 0.5426                     
##  3rd Qu.: 3.000   3rd Qu.: -1.00   3rd Qu.: 0.0000                     
##  Max.   :50.000   Max.   :871.00   Max.   :25.0000                     
##       y            
##  Length:4521       
##  Class :character  
##  Mode  :character  
##                    
##                    
## 

Histograma

hist(x=bank$age)

hist(x=bank$age, main = "Histograma de Edad",
     xlab="Edad", ylab="Frecuencia")

Cambiar color

hist(x=bank$age, main = "Histograma de Edad",
     xlab="Edad", ylab="Frecuencia",
     col="orange")

Crear clases

hist(x=bank$age, breaks=5, main = "Histograma de Edad",
     xlab="Edad", ylab="Frecuencia",
     col="orange")

Gráfica de barras

plot(factor(bank$education))

Agregar colores

plot(factor(bank$education), main ="Gráfica de Educación",
     xlab="Nivel Educativo", ylab="Frecuencia", 
     col= c("royalblue", "deepskyblue1", "dodgerblue3", "lavenderblush4"))

Diagrama de dispersión

plot (x=bank$age, y =bank$balance)

Manipulando datos

bank$balance<- ifelse (bank$balance > 15000, 15000, bank$balance)
plot(x=bank$age, y=bank$balance)

Anexar color para distinguir el nivel educativo de los datos

plot(x=bank$age, y=bank$balance, col=factor(bank$education))

Elegir los colores

plot(x=bank$age, y=bank$balance, col=factor(bank$education %>% c("royalblue", "seagreen", "purple", "grey")))

Etiquetas

plot(x=bank$age, y=bank$balance, col=factor(bank$loan))
legend(x="topleft", legend = c("No", "Yes"), fill = c("Black", "Red"), title ="Loan")

DIAGRAMA DE CAJA / BOX PLOT

plot(x=factor (bank$education), y = bank$age)

Colores

plot(x=factor (bank$education), y = bank$age, main="Edad por nivel educativo",
     xlab="Nivel educativo", ylab="Edad",
     col = c("olivedrab3", "rosybrown3", "royalblue4", "violetred3"))

Otra manera de generar Box Plot

boxplot(x=bank$age, col= "gold")

GRÁFICO DE MOSAICO

plot(x=factor (bank$marital), y= factor (bank$education))

Color

plot(x=factor (bank$marital), y= factor (bank$education),
     col = c("violetred3", "black", "royalblue4"))

plot(x=factor (bank$job), y= factor (bank$loan),
     main ="Relación Trabajo/Prestamo",
     col = c("violetred3", "orange"))

tab_bank2 <- table (bank$loan, bank$job)
tab_bank2
##      
##       admin. blue-collar entrepreneur housemaid management retired
##   no     387         790          127        99        849     198
##   yes     91         156           41        13        120      32
##      
##       self-employed services student technician unemployed unknown
##   no            153      343      83        649        115      37
##   yes            30       74       1        119         13       1
barplot(tab_bank2,
        col = c("darkorchid4", "darkorchid1"),
        main="Relación Trabajo-Préstamo",
        xlab="Trabajo", ylab="Frecuencia")
legend(x="topleft", legend = c("No", "Yes"), fill = c("darkorchid4", "darkorchid1"), title ="Loan")

Barplot - ejercicio - estado marital / loan

table(bank$marital)
## 
## divorced  married   single 
##      528     2797     1196
tab_bank1 <- table(bank$loan, bank$marital)
tab_bank1
##      
##       divorced married single
##   no       438    2344   1048
##   yes       90     453    148

Proporción por renglón

prop.table(tab_bank1, margin = 1)
##      
##        divorced   married    single
##   no  0.1143603 0.6120104 0.2736292
##   yes 0.1302460 0.6555716 0.2141823

Proporción por columna

prop.table(tab_bank1, margin = 2)
##      
##        divorced   married    single
##   no  0.8295455 0.8380408 0.8762542
##   yes 0.1704545 0.1619592 0.1237458
barplot(tab_bank1,
        col = c("darkorchid4", "darkorchid1"),
        main="Relación Préstamo - Estado Marital",
        xlab="Estado Marital", ylab="Frecuencia")
legend(x="topleft", legend = c("No", "Yes"), fill = c("darkorchid4", "darkorchid1"), title ="Loan")

#FUNCIÓN BARPLOT

table(bank$education)
## 
##   primary secondary  tertiary   unknown 
##       678      2306      1350       187
tab_bank <- table (bank$loan, bank$education)
tab_bank
##      
##       primary secondary tertiary unknown
##   no      584      1890     1176     180
##   yes      94       416      174       7
barplot(tab_bank)

Proporción por renglón

prop.table(tab_bank, margin=1)
##      
##          primary  secondary   tertiary    unknown
##   no  0.15248042 0.49347258 0.30704961 0.04699739
##   yes 0.13603473 0.60202605 0.25180897 0.01013025
prop.table(tab_bank, margin=2)
##      
##          primary  secondary   tertiary    unknown
##   no  0.86135693 0.81960104 0.87111111 0.96256684
##   yes 0.13864307 0.18039896 0.12888889 0.03743316

#Proporción por columna

prop.table(tab_bank)
##      
##          primary  secondary   tertiary    unknown
##   no  0.12917496 0.41804910 0.26011944 0.03981420
##   yes 0.02079186 0.09201504 0.03848706 0.00154833