bank <- read.csv("/Users/ingridzapatajuarez/Downloads/R/bank2.csv")
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.2 ✔ readr 2.1.4
## ✔ forcats 1.0.0 ✔ stringr 1.5.0
## ✔ ggplot2 3.4.3 ✔ tibble 3.2.1
## ✔ lubridate 1.9.2 ✔ tidyr 1.3.0
## ✔ purrr 1.0.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the ]8;;http://conflicted.r-lib.org/conflicted package]8;; to force all conflicts to become errors
library(dplyr)
library(ggplot2)
glimpse(bank)
## Rows: 4,521
## Columns: 17
## $ age <int> 30, 33, 35, 30, 59, 35, 36, 39, 41, 43, 39, 43, 36, 20, 31, …
## $ job <chr> "unemployed", "services", "management", "management", "blue-…
## $ marital <chr> "married", "married", "single", "married", "married", "singl…
## $ education <chr> "primary", "secondary", "tertiary", "tertiary", "secondary",…
## $ default <chr> "no", "no", "no", "no", "no", "no", "no", "no", "no", "no", …
## $ balance <int> 1787, 4789, 1350, 1476, 0, 747, 307, 147, 221, -88, 9374, 26…
## $ housing <chr> "no", "yes", "yes", "yes", "yes", "no", "yes", "yes", "yes",…
## $ loan <chr> "no", "yes", "no", "yes", "no", "no", "no", "no", "no", "yes…
## $ contact <chr> "cellular", "cellular", "cellular", "unknown", "unknown", "c…
## $ day <int> 19, 11, 16, 3, 5, 23, 14, 6, 14, 17, 20, 17, 13, 30, 29, 29,…
## $ month <chr> "oct", "may", "apr", "jun", "may", "feb", "may", "may", "may…
## $ duration <int> 79, 220, 185, 199, 226, 141, 341, 151, 57, 313, 273, 113, 32…
## $ campaign <int> 1, 1, 1, 4, 1, 2, 1, 2, 2, 1, 1, 2, 2, 1, 1, 2, 5, 1, 1, 1, …
## $ pdays <int> -1, 339, 330, -1, -1, 176, 330, -1, -1, 147, -1, -1, -1, -1,…
## $ previous <int> 0, 4, 1, 0, 0, 3, 2, 0, 0, 2, 0, 0, 0, 0, 1, 0, 0, 2, 0, 1, …
## $ poutcome <chr> "unknown", "failure", "failure", "unknown", "unknown", "fail…
## $ y <chr> "no", "no", "no", "no", "no", "no", "no", "no", "no", "no", …
summary(bank)
## age job marital education
## Min. :19.00 Length:4521 Length:4521 Length:4521
## 1st Qu.:33.00 Class :character Class :character Class :character
## Median :39.00 Mode :character Mode :character Mode :character
## Mean :41.17
## 3rd Qu.:49.00
## Max. :87.00
## default balance housing loan
## Length:4521 Min. :-3313 Length:4521 Length:4521
## Class :character 1st Qu.: 69 Class :character Class :character
## Mode :character Median : 444 Mode :character Mode :character
## Mean : 1423
## 3rd Qu.: 1480
## Max. :71188
## contact day month duration
## Length:4521 Min. : 1.00 Length:4521 Min. : 4
## Class :character 1st Qu.: 9.00 Class :character 1st Qu.: 104
## Mode :character Median :16.00 Mode :character Median : 185
## Mean :15.92 Mean : 264
## 3rd Qu.:21.00 3rd Qu.: 329
## Max. :31.00 Max. :3025
## campaign pdays previous poutcome
## Min. : 1.000 Min. : -1.00 Min. : 0.0000 Length:4521
## 1st Qu.: 1.000 1st Qu.: -1.00 1st Qu.: 0.0000 Class :character
## Median : 2.000 Median : -1.00 Median : 0.0000 Mode :character
## Mean : 2.794 Mean : 39.77 Mean : 0.5426
## 3rd Qu.: 3.000 3rd Qu.: -1.00 3rd Qu.: 0.0000
## Max. :50.000 Max. :871.00 Max. :25.0000
## y
## Length:4521
## Class :character
## Mode :character
##
##
##
hist(x=bank$age)
hist(x=bank$age, main = "Histograma de Edad",
xlab="Edad", ylab="Frecuencia")
hist(x=bank$age, main = "Histograma de Edad",
xlab="Edad", ylab="Frecuencia",
col="orange")
hist(x=bank$age, breaks=5, main = "Histograma de Edad",
xlab="Edad", ylab="Frecuencia",
col="orange")
plot(factor(bank$education))
plot(factor(bank$education), main ="Gráfica de Educación",
xlab="Nivel Educativo", ylab="Frecuencia",
col= c("royalblue", "deepskyblue1", "dodgerblue3", "lavenderblush4"))
plot (x=bank$age, y =bank$balance)
bank$balance<- ifelse (bank$balance > 15000, 15000, bank$balance)
plot(x=bank$age, y=bank$balance)
plot(x=bank$age, y=bank$balance, col=factor(bank$education))
plot(x=bank$age, y=bank$balance, col=factor(bank$education %>% c("royalblue", "seagreen", "purple", "grey")))
plot(x=bank$age, y=bank$balance, col=factor(bank$loan))
legend(x="topleft", legend = c("No", "Yes"), fill = c("Black", "Red"), title ="Loan")
plot(x=factor (bank$education), y = bank$age)
plot(x=factor (bank$education), y = bank$age, main="Edad por nivel educativo",
xlab="Nivel educativo", ylab="Edad",
col = c("olivedrab3", "rosybrown3", "royalblue4", "violetred3"))
boxplot(x=bank$age, col= "gold")
plot(x=factor (bank$marital), y= factor (bank$education))
plot(x=factor (bank$marital), y= factor (bank$education),
col = c("violetred3", "black", "royalblue4"))
plot(x=factor (bank$job), y= factor (bank$loan),
main ="Relación Trabajo/Prestamo",
col = c("violetred3", "orange"))
tab_bank2 <- table (bank$loan, bank$job)
tab_bank2
##
## admin. blue-collar entrepreneur housemaid management retired
## no 387 790 127 99 849 198
## yes 91 156 41 13 120 32
##
## self-employed services student technician unemployed unknown
## no 153 343 83 649 115 37
## yes 30 74 1 119 13 1
barplot(tab_bank2,
col = c("darkorchid4", "darkorchid1"),
main="Relación Trabajo-Préstamo",
xlab="Trabajo", ylab="Frecuencia")
legend(x="topleft", legend = c("No", "Yes"), fill = c("darkorchid4", "darkorchid1"), title ="Loan")
table(bank$marital)
##
## divorced married single
## 528 2797 1196
tab_bank1 <- table(bank$loan, bank$marital)
tab_bank1
##
## divorced married single
## no 438 2344 1048
## yes 90 453 148
#FUNCIÓN BARPLOT
table(bank$education)
##
## primary secondary tertiary unknown
## 678 2306 1350 187
tab_bank <- table (bank$loan, bank$education)
tab_bank
##
## primary secondary tertiary unknown
## no 584 1890 1176 180
## yes 94 416 174 7
barplot(tab_bank)