En el presente capÃtulo de estadistica descriptiva, exploraremos el uso del Rmarkdown para realizar nuestras presentaciones. # Paso 1: Importación de datos A continuación se importa los datos
BankChurners <- read.csv("/cloud/project/BankChurners.csv")
summary(BankChurners)
bd <- BankChurners[ , 2:21]
str(bd)
## 'data.frame': 10127 obs. of 20 variables:
## $ Attrition_Flag : chr "Existing Customer" "Existing Customer" "Existing Customer" "Existing Customer" ...
## $ Customer_Age : int 45 49 51 40 40 44 51 32 37 48 ...
## $ Gender : chr "M" "F" "M" "F" ...
## $ Dependent_count : int 3 5 3 4 3 2 4 0 3 2 ...
## $ Education_Level : chr "High School" "Graduate" "Graduate" "High School" ...
## $ Marital_Status : chr "Married" "Single" "Married" "Unknown" ...
## $ Income_Category : chr "$60K - $80K" "Less than $40K" "$80K - $120K" "Less than $40K" ...
## $ Card_Category : chr "Blue" "Blue" "Blue" "Blue" ...
## $ Months_on_book : int 39 44 36 34 21 36 46 27 36 36 ...
## $ Total_Relationship_Count: int 5 6 4 3 5 3 6 2 5 6 ...
## $ Months_Inactive_12_mon : int 1 1 1 4 1 1 1 2 2 3 ...
## $ Contacts_Count_12_mon : int 3 2 0 1 0 2 3 2 0 3 ...
## $ Credit_Limit : num 12691 8256 3418 3313 4716 ...
## $ Total_Revolving_Bal : int 777 864 0 2517 0 1247 2264 1396 2517 1677 ...
## $ Avg_Open_To_Buy : num 11914 7392 3418 796 4716 ...
## $ Total_Amt_Chng_Q4_Q1 : num 1.33 1.54 2.59 1.41 2.17 ...
## $ Total_Trans_Amt : int 1144 1291 1887 1171 816 1088 1330 1538 1350 1441 ...
## $ Total_Trans_Ct : int 42 33 20 20 28 24 31 36 24 32 ...
## $ Total_Ct_Chng_Q4_Q1 : num 1.62 3.71 2.33 2.33 2.5 ...
## $ Avg_Utilization_Ratio : num 0.061 0.105 0 0.76 0 0.311 0.066 0.048 0.113 0.144 ...
summary(bd)
## Attrition_Flag Customer_Age Gender Dependent_count
## Length:10127 Min. :26.00 Length:10127 Min. :0.000
## Class :character 1st Qu.:41.00 Class :character 1st Qu.:1.000
## Mode :character Median :46.00 Mode :character Median :2.000
## Mean :46.33 Mean :2.346
## 3rd Qu.:52.00 3rd Qu.:3.000
## Max. :73.00 Max. :5.000
## Education_Level Marital_Status Income_Category Card_Category
## Length:10127 Length:10127 Length:10127 Length:10127
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
## Months_on_book Total_Relationship_Count Months_Inactive_12_mon
## Min. :13.00 Min. :1.000 Min. :0.000
## 1st Qu.:31.00 1st Qu.:3.000 1st Qu.:2.000
## Median :36.00 Median :4.000 Median :2.000
## Mean :35.93 Mean :3.813 Mean :2.341
## 3rd Qu.:40.00 3rd Qu.:5.000 3rd Qu.:3.000
## Max. :56.00 Max. :6.000 Max. :6.000
## Contacts_Count_12_mon Credit_Limit Total_Revolving_Bal Avg_Open_To_Buy
## Min. :0.000 Min. : 1438 Min. : 0 Min. : 3
## 1st Qu.:2.000 1st Qu.: 2555 1st Qu.: 359 1st Qu.: 1324
## Median :2.000 Median : 4549 Median :1276 Median : 3474
## Mean :2.455 Mean : 8632 Mean :1163 Mean : 7469
## 3rd Qu.:3.000 3rd Qu.:11068 3rd Qu.:1784 3rd Qu.: 9859
## Max. :6.000 Max. :34516 Max. :2517 Max. :34516
## Total_Amt_Chng_Q4_Q1 Total_Trans_Amt Total_Trans_Ct Total_Ct_Chng_Q4_Q1
## Min. :0.0000 Min. : 510 Min. : 10.00 Min. :0.0000
## 1st Qu.:0.6310 1st Qu.: 2156 1st Qu.: 45.00 1st Qu.:0.5820
## Median :0.7360 Median : 3899 Median : 67.00 Median :0.7020
## Mean :0.7599 Mean : 4404 Mean : 64.86 Mean :0.7122
## 3rd Qu.:0.8590 3rd Qu.: 4741 3rd Qu.: 81.00 3rd Qu.:0.8180
## Max. :3.3970 Max. :18484 Max. :139.00 Max. :3.7140
## Avg_Utilization_Ratio
## Min. :0.0000
## 1st Qu.:0.0230
## Median :0.1760
## Mean :0.2749
## 3rd Qu.:0.5030
## Max. :0.9990
Utilizaremos las funciones de for, if y is.numeric
columna <- dim(bd)[2]
indicec <- NULL
indicen <- NULL
par(mfrow = c(2,5))
for(i in 1:columna)
{
if(is.numeric(bd[,i])=="TRUE")
{
texto <- paste("Análisis del atributo ", colnames(bd)[i])
hist(bd[,i], col = i, main= texto, xlab = colnames(bd)[i])
indicen <- c(indicen,i)
}
else
{
texto <- paste("Análisis del atributo ", colnames(bd)[i])
pie(table(bd[,i]), main = texto)
indicec <- c(indicec,i)
}
}
indicen
## [1] 2 4 9 10 11 12 13 14 15 16 17 18 19 20
indicec
## [1] 1 3 5 6 7 8