Estadisticas descriptivas

En el presente capítulo de estadistica descriptiva, exploraremos el uso del Rmarkdown para realizar nuestras presentaciones. # Paso 1: Importación de datos A continuación se importa los datos

BankChurners <- read.csv("/cloud/project/BankChurners.csv")
summary(BankChurners)
bd <- BankChurners[   , 2:21]
str(bd)
## 'data.frame':    10127 obs. of  20 variables:
##  $ Attrition_Flag          : chr  "Existing Customer" "Existing Customer" "Existing Customer" "Existing Customer" ...
##  $ Customer_Age            : int  45 49 51 40 40 44 51 32 37 48 ...
##  $ Gender                  : chr  "M" "F" "M" "F" ...
##  $ Dependent_count         : int  3 5 3 4 3 2 4 0 3 2 ...
##  $ Education_Level         : chr  "High School" "Graduate" "Graduate" "High School" ...
##  $ Marital_Status          : chr  "Married" "Single" "Married" "Unknown" ...
##  $ Income_Category         : chr  "$60K - $80K" "Less than $40K" "$80K - $120K" "Less than $40K" ...
##  $ Card_Category           : chr  "Blue" "Blue" "Blue" "Blue" ...
##  $ Months_on_book          : int  39 44 36 34 21 36 46 27 36 36 ...
##  $ Total_Relationship_Count: int  5 6 4 3 5 3 6 2 5 6 ...
##  $ Months_Inactive_12_mon  : int  1 1 1 4 1 1 1 2 2 3 ...
##  $ Contacts_Count_12_mon   : int  3 2 0 1 0 2 3 2 0 3 ...
##  $ Credit_Limit            : num  12691 8256 3418 3313 4716 ...
##  $ Total_Revolving_Bal     : int  777 864 0 2517 0 1247 2264 1396 2517 1677 ...
##  $ Avg_Open_To_Buy         : num  11914 7392 3418 796 4716 ...
##  $ Total_Amt_Chng_Q4_Q1    : num  1.33 1.54 2.59 1.41 2.17 ...
##  $ Total_Trans_Amt         : int  1144 1291 1887 1171 816 1088 1330 1538 1350 1441 ...
##  $ Total_Trans_Ct          : int  42 33 20 20 28 24 31 36 24 32 ...
##  $ Total_Ct_Chng_Q4_Q1     : num  1.62 3.71 2.33 2.33 2.5 ...
##  $ Avg_Utilization_Ratio   : num  0.061 0.105 0 0.76 0 0.311 0.066 0.048 0.113 0.144 ...
summary(bd)
##  Attrition_Flag      Customer_Age      Gender          Dependent_count
##  Length:10127       Min.   :26.00   Length:10127       Min.   :0.000  
##  Class :character   1st Qu.:41.00   Class :character   1st Qu.:1.000  
##  Mode  :character   Median :46.00   Mode  :character   Median :2.000  
##                     Mean   :46.33                      Mean   :2.346  
##                     3rd Qu.:52.00                      3rd Qu.:3.000  
##                     Max.   :73.00                      Max.   :5.000  
##  Education_Level    Marital_Status     Income_Category    Card_Category     
##  Length:10127       Length:10127       Length:10127       Length:10127      
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##  Months_on_book  Total_Relationship_Count Months_Inactive_12_mon
##  Min.   :13.00   Min.   :1.000            Min.   :0.000         
##  1st Qu.:31.00   1st Qu.:3.000            1st Qu.:2.000         
##  Median :36.00   Median :4.000            Median :2.000         
##  Mean   :35.93   Mean   :3.813            Mean   :2.341         
##  3rd Qu.:40.00   3rd Qu.:5.000            3rd Qu.:3.000         
##  Max.   :56.00   Max.   :6.000            Max.   :6.000         
##  Contacts_Count_12_mon  Credit_Limit   Total_Revolving_Bal Avg_Open_To_Buy
##  Min.   :0.000         Min.   : 1438   Min.   :   0        Min.   :    3  
##  1st Qu.:2.000         1st Qu.: 2555   1st Qu.: 359        1st Qu.: 1324  
##  Median :2.000         Median : 4549   Median :1276        Median : 3474  
##  Mean   :2.455         Mean   : 8632   Mean   :1163        Mean   : 7469  
##  3rd Qu.:3.000         3rd Qu.:11068   3rd Qu.:1784        3rd Qu.: 9859  
##  Max.   :6.000         Max.   :34516   Max.   :2517        Max.   :34516  
##  Total_Amt_Chng_Q4_Q1 Total_Trans_Amt Total_Trans_Ct   Total_Ct_Chng_Q4_Q1
##  Min.   :0.0000       Min.   :  510   Min.   : 10.00   Min.   :0.0000     
##  1st Qu.:0.6310       1st Qu.: 2156   1st Qu.: 45.00   1st Qu.:0.5820     
##  Median :0.7360       Median : 3899   Median : 67.00   Median :0.7020     
##  Mean   :0.7599       Mean   : 4404   Mean   : 64.86   Mean   :0.7122     
##  3rd Qu.:0.8590       3rd Qu.: 4741   3rd Qu.: 81.00   3rd Qu.:0.8180     
##  Max.   :3.3970       Max.   :18484   Max.   :139.00   Max.   :3.7140     
##  Avg_Utilization_Ratio
##  Min.   :0.0000       
##  1st Qu.:0.0230       
##  Median :0.1760       
##  Mean   :0.2749       
##  3rd Qu.:0.5030       
##  Max.   :0.9990

Paso 2: Generar un código para la lectura de los datos

Utilizaremos las funciones de for, if y is.numeric

columna <- dim(bd)[2]

indicec <- NULL
indicen <- NULL

par(mfrow = c(2,5))

for(i in 1:columna)
{
  if(is.numeric(bd[,i])=="TRUE")
  {
    texto <- paste("Análisis del atributo ", colnames(bd)[i])
    
    hist(bd[,i], col = i, main= texto, xlab = colnames(bd)[i])
    
    indicen <- c(indicen,i)
  }
  else
  {
    texto <- paste("Análisis del atributo ", colnames(bd)[i])
    pie(table(bd[,i]), main = texto)
    indicec <- c(indicec,i)
  }
}

indicen
##  [1]  2  4  9 10 11 12 13 14 15 16 17 18 19 20
indicec
## [1] 1 3 5 6 7 8