Estadisticas Descriptivas

En el presente capítulo exploraremos el uso de Rmarkdown para realizar nuestras presentaciones.

#Paso1: Importación de Datos

BankChurners <- read.csv("C:/Users/erick/OneDrive/Desktop/2-StatisticsProgramingForBusinessAnalytics/Clases/BankChurners.csv")
summary(BankChurners)
BankDB <- BankChurners
summary(BankDB)
##    CLIENTNUM         Attrition_Flag      Customer_Age      Gender         
##  Min.   :708082083   Length:10127       Min.   :26.00   Length:10127      
##  1st Qu.:713036770   Class :character   1st Qu.:41.00   Class :character  
##  Median :717926358   Mode  :character   Median :46.00   Mode  :character  
##  Mean   :739177606                      Mean   :46.33                     
##  3rd Qu.:773143533                      3rd Qu.:52.00                     
##  Max.   :828343083                      Max.   :73.00                     
##  Dependent_count Education_Level    Marital_Status     Income_Category   
##  Min.   :0.000   Length:10127       Length:10127       Length:10127      
##  1st Qu.:1.000   Class :character   Class :character   Class :character  
##  Median :2.000   Mode  :character   Mode  :character   Mode  :character  
##  Mean   :2.346                                                           
##  3rd Qu.:3.000                                                           
##  Max.   :5.000                                                           
##  Card_Category      Months_on_book  Total_Relationship_Count
##  Length:10127       Min.   :13.00   Min.   :1.000           
##  Class :character   1st Qu.:31.00   1st Qu.:3.000           
##  Mode  :character   Median :36.00   Median :4.000           
##                     Mean   :35.93   Mean   :3.813           
##                     3rd Qu.:40.00   3rd Qu.:5.000           
##                     Max.   :56.00   Max.   :6.000           
##  Months_Inactive_12_mon Contacts_Count_12_mon  Credit_Limit  
##  Min.   :0.000          Min.   :0.000         Min.   : 1438  
##  1st Qu.:2.000          1st Qu.:2.000         1st Qu.: 2555  
##  Median :2.000          Median :2.000         Median : 4549  
##  Mean   :2.341          Mean   :2.455         Mean   : 8632  
##  3rd Qu.:3.000          3rd Qu.:3.000         3rd Qu.:11068  
##  Max.   :6.000          Max.   :6.000         Max.   :34516  
##  Total_Revolving_Bal Avg_Open_To_Buy Total_Amt_Chng_Q4_Q1 Total_Trans_Amt
##  Min.   :   0        Min.   :    3   Min.   :0.0000       Min.   :  510  
##  1st Qu.: 359        1st Qu.: 1324   1st Qu.:0.6310       1st Qu.: 2156  
##  Median :1276        Median : 3474   Median :0.7360       Median : 3899  
##  Mean   :1163        Mean   : 7469   Mean   :0.7599       Mean   : 4404  
##  3rd Qu.:1784        3rd Qu.: 9859   3rd Qu.:0.8590       3rd Qu.: 4741  
##  Max.   :2517        Max.   :34516   Max.   :3.3970       Max.   :18484  
##  Total_Trans_Ct   Total_Ct_Chng_Q4_Q1 Avg_Utilization_Ratio
##  Min.   : 10.00   Min.   :0.0000      Min.   :0.0000       
##  1st Qu.: 45.00   1st Qu.:0.5820      1st Qu.:0.0230       
##  Median : 67.00   Median :0.7020      Median :0.1760       
##  Mean   : 64.86   Mean   :0.7122      Mean   :0.2749       
##  3rd Qu.: 81.00   3rd Qu.:0.8180      3rd Qu.:0.5030       
##  Max.   :139.00   Max.   :3.7140      Max.   :0.9990       
##  Naive_Bayes_Classifier_Attrition_Flag_Card_Category_Contacts_Count_12_mon_Dependent_count_Education_Level_Months_Inactive_12_mon_1
##  Min.   :0.0000077                                                                                                                 
##  1st Qu.:0.0000990                                                                                                                 
##  Median :0.0001815                                                                                                                 
##  Mean   :0.1599975                                                                                                                 
##  3rd Qu.:0.0003373                                                                                                                 
##  Max.   :0.9995800                                                                                                                 
##  Naive_Bayes_Classifier_Attrition_Flag_Card_Category_Contacts_Count_12_mon_Dependent_count_Education_Level_Months_Inactive_12_mon_2
##  Min.   :0.00042                                                                                                                   
##  1st Qu.:0.99966                                                                                                                   
##  Median :0.99982                                                                                                                   
##  Mean   :0.84000                                                                                                                   
##  3rd Qu.:0.99990                                                                                                                   
##  Max.   :0.99999
str(BankDB)
## 'data.frame':    10127 obs. of  23 variables:
##  $ CLIENTNUM                                                                                                                         : int  768805383 818770008 713982108 769911858 709106358 713061558 810347208 818906208 710930508 719661558 ...
##  $ Attrition_Flag                                                                                                                    : chr  "Existing Customer" "Existing Customer" "Existing Customer" "Existing Customer" ...
##  $ Customer_Age                                                                                                                      : int  45 49 51 40 40 44 51 32 37 48 ...
##  $ Gender                                                                                                                            : chr  "M" "F" "M" "F" ...
##  $ Dependent_count                                                                                                                   : int  3 5 3 4 3 2 4 0 3 2 ...
##  $ Education_Level                                                                                                                   : chr  "High School" "Graduate" "Graduate" "High School" ...
##  $ Marital_Status                                                                                                                    : chr  "Married" "Single" "Married" "Unknown" ...
##  $ Income_Category                                                                                                                   : chr  "$60K - $80K" "Less than $40K" "$80K - $120K" "Less than $40K" ...
##  $ Card_Category                                                                                                                     : chr  "Blue" "Blue" "Blue" "Blue" ...
##  $ Months_on_book                                                                                                                    : int  39 44 36 34 21 36 46 27 36 36 ...
##  $ Total_Relationship_Count                                                                                                          : int  5 6 4 3 5 3 6 2 5 6 ...
##  $ Months_Inactive_12_mon                                                                                                            : int  1 1 1 4 1 1 1 2 2 3 ...
##  $ Contacts_Count_12_mon                                                                                                             : int  3 2 0 1 0 2 3 2 0 3 ...
##  $ Credit_Limit                                                                                                                      : num  12691 8256 3418 3313 4716 ...
##  $ Total_Revolving_Bal                                                                                                               : int  777 864 0 2517 0 1247 2264 1396 2517 1677 ...
##  $ Avg_Open_To_Buy                                                                                                                   : num  11914 7392 3418 796 4716 ...
##  $ Total_Amt_Chng_Q4_Q1                                                                                                              : num  1.33 1.54 2.59 1.4 2.17 ...
##  $ Total_Trans_Amt                                                                                                                   : int  1144 1291 1887 1171 816 1088 1330 1538 1350 1441 ...
##  $ Total_Trans_Ct                                                                                                                    : int  42 33 20 20 28 24 31 36 24 32 ...
##  $ Total_Ct_Chng_Q4_Q1                                                                                                               : num  1.62 3.71 2.33 2.33 2.5 ...
##  $ Avg_Utilization_Ratio                                                                                                             : num  0.061 0.105 0 0.76 0 0.311 0.066 0.048 0.113 0.144 ...
##  $ Naive_Bayes_Classifier_Attrition_Flag_Card_Category_Contacts_Count_12_mon_Dependent_count_Education_Level_Months_Inactive_12_mon_1: num  9.34e-05 5.69e-05 2.11e-05 1.34e-04 2.17e-05 ...
##  $ Naive_Bayes_Classifier_Attrition_Flag_Card_Category_Contacts_Count_12_mon_Dependent_count_Education_Level_Months_Inactive_12_mon_2: num  1 1 1 1 1 ...
names(BankDB)
##  [1] "CLIENTNUM"                                                                                                                         
##  [2] "Attrition_Flag"                                                                                                                    
##  [3] "Customer_Age"                                                                                                                      
##  [4] "Gender"                                                                                                                            
##  [5] "Dependent_count"                                                                                                                   
##  [6] "Education_Level"                                                                                                                   
##  [7] "Marital_Status"                                                                                                                    
##  [8] "Income_Category"                                                                                                                   
##  [9] "Card_Category"                                                                                                                     
## [10] "Months_on_book"                                                                                                                    
## [11] "Total_Relationship_Count"                                                                                                          
## [12] "Months_Inactive_12_mon"                                                                                                            
## [13] "Contacts_Count_12_mon"                                                                                                             
## [14] "Credit_Limit"                                                                                                                      
## [15] "Total_Revolving_Bal"                                                                                                               
## [16] "Avg_Open_To_Buy"                                                                                                                   
## [17] "Total_Amt_Chng_Q4_Q1"                                                                                                              
## [18] "Total_Trans_Amt"                                                                                                                   
## [19] "Total_Trans_Ct"                                                                                                                    
## [20] "Total_Ct_Chng_Q4_Q1"                                                                                                               
## [21] "Avg_Utilization_Ratio"                                                                                                             
## [22] "Naive_Bayes_Classifier_Attrition_Flag_Card_Category_Contacts_Count_12_mon_Dependent_count_Education_Level_Months_Inactive_12_mon_1"
## [23] "Naive_Bayes_Classifier_Attrition_Flag_Card_Category_Contacts_Count_12_mon_Dependent_count_Education_Level_Months_Inactive_12_mon_2"
bd <- BankDB[ , 1:21]
summary(bd)
##    CLIENTNUM         Attrition_Flag      Customer_Age      Gender         
##  Min.   :708082083   Length:10127       Min.   :26.00   Length:10127      
##  1st Qu.:713036770   Class :character   1st Qu.:41.00   Class :character  
##  Median :717926358   Mode  :character   Median :46.00   Mode  :character  
##  Mean   :739177606                      Mean   :46.33                     
##  3rd Qu.:773143533                      3rd Qu.:52.00                     
##  Max.   :828343083                      Max.   :73.00                     
##  Dependent_count Education_Level    Marital_Status     Income_Category   
##  Min.   :0.000   Length:10127       Length:10127       Length:10127      
##  1st Qu.:1.000   Class :character   Class :character   Class :character  
##  Median :2.000   Mode  :character   Mode  :character   Mode  :character  
##  Mean   :2.346                                                           
##  3rd Qu.:3.000                                                           
##  Max.   :5.000                                                           
##  Card_Category      Months_on_book  Total_Relationship_Count
##  Length:10127       Min.   :13.00   Min.   :1.000           
##  Class :character   1st Qu.:31.00   1st Qu.:3.000           
##  Mode  :character   Median :36.00   Median :4.000           
##                     Mean   :35.93   Mean   :3.813           
##                     3rd Qu.:40.00   3rd Qu.:5.000           
##                     Max.   :56.00   Max.   :6.000           
##  Months_Inactive_12_mon Contacts_Count_12_mon  Credit_Limit  
##  Min.   :0.000          Min.   :0.000         Min.   : 1438  
##  1st Qu.:2.000          1st Qu.:2.000         1st Qu.: 2555  
##  Median :2.000          Median :2.000         Median : 4549  
##  Mean   :2.341          Mean   :2.455         Mean   : 8632  
##  3rd Qu.:3.000          3rd Qu.:3.000         3rd Qu.:11068  
##  Max.   :6.000          Max.   :6.000         Max.   :34516  
##  Total_Revolving_Bal Avg_Open_To_Buy Total_Amt_Chng_Q4_Q1 Total_Trans_Amt
##  Min.   :   0        Min.   :    3   Min.   :0.0000       Min.   :  510  
##  1st Qu.: 359        1st Qu.: 1324   1st Qu.:0.6310       1st Qu.: 2156  
##  Median :1276        Median : 3474   Median :0.7360       Median : 3899  
##  Mean   :1163        Mean   : 7469   Mean   :0.7599       Mean   : 4404  
##  3rd Qu.:1784        3rd Qu.: 9859   3rd Qu.:0.8590       3rd Qu.: 4741  
##  Max.   :2517        Max.   :34516   Max.   :3.3970       Max.   :18484  
##  Total_Trans_Ct   Total_Ct_Chng_Q4_Q1 Avg_Utilization_Ratio
##  Min.   : 10.00   Min.   :0.0000      Min.   :0.0000       
##  1st Qu.: 45.00   1st Qu.:0.5820      1st Qu.:0.0230       
##  Median : 67.00   Median :0.7020      Median :0.1760       
##  Mean   : 64.86   Mean   :0.7122      Mean   :0.2749       
##  3rd Qu.: 81.00   3rd Qu.:0.8180      3rd Qu.:0.5030       
##  Max.   :139.00   Max.   :3.7140      Max.   :0.9990
str(bd)
## 'data.frame':    10127 obs. of  21 variables:
##  $ CLIENTNUM               : int  768805383 818770008 713982108 769911858 709106358 713061558 810347208 818906208 710930508 719661558 ...
##  $ Attrition_Flag          : chr  "Existing Customer" "Existing Customer" "Existing Customer" "Existing Customer" ...
##  $ Customer_Age            : int  45 49 51 40 40 44 51 32 37 48 ...
##  $ Gender                  : chr  "M" "F" "M" "F" ...
##  $ Dependent_count         : int  3 5 3 4 3 2 4 0 3 2 ...
##  $ Education_Level         : chr  "High School" "Graduate" "Graduate" "High School" ...
##  $ Marital_Status          : chr  "Married" "Single" "Married" "Unknown" ...
##  $ Income_Category         : chr  "$60K - $80K" "Less than $40K" "$80K - $120K" "Less than $40K" ...
##  $ Card_Category           : chr  "Blue" "Blue" "Blue" "Blue" ...
##  $ Months_on_book          : int  39 44 36 34 21 36 46 27 36 36 ...
##  $ Total_Relationship_Count: int  5 6 4 3 5 3 6 2 5 6 ...
##  $ Months_Inactive_12_mon  : int  1 1 1 4 1 1 1 2 2 3 ...
##  $ Contacts_Count_12_mon   : int  3 2 0 1 0 2 3 2 0 3 ...
##  $ Credit_Limit            : num  12691 8256 3418 3313 4716 ...
##  $ Total_Revolving_Bal     : int  777 864 0 2517 0 1247 2264 1396 2517 1677 ...
##  $ Avg_Open_To_Buy         : num  11914 7392 3418 796 4716 ...
##  $ Total_Amt_Chng_Q4_Q1    : num  1.33 1.54 2.59 1.4 2.17 ...
##  $ Total_Trans_Amt         : int  1144 1291 1887 1171 816 1088 1330 1538 1350 1441 ...
##  $ Total_Trans_Ct          : int  42 33 20 20 28 24 31 36 24 32 ...
##  $ Total_Ct_Chng_Q4_Q1     : num  1.62 3.71 2.33 2.33 2.5 ...
##  $ Avg_Utilization_Ratio   : num  0.061 0.105 0 0.76 0 0.311 0.066 0.048 0.113 0.144 ...

#Paso2: Generar un código para la lectura de los datos

names(bd)
##  [1] "CLIENTNUM"                "Attrition_Flag"          
##  [3] "Customer_Age"             "Gender"                  
##  [5] "Dependent_count"          "Education_Level"         
##  [7] "Marital_Status"           "Income_Category"         
##  [9] "Card_Category"            "Months_on_book"          
## [11] "Total_Relationship_Count" "Months_Inactive_12_mon"  
## [13] "Contacts_Count_12_mon"    "Credit_Limit"            
## [15] "Total_Revolving_Bal"      "Avg_Open_To_Buy"         
## [17] "Total_Amt_Chng_Q4_Q1"     "Total_Trans_Amt"         
## [19] "Total_Trans_Ct"           "Total_Ct_Chng_Q4_Q1"     
## [21] "Avg_Utilization_Ratio"
columna <- dim(bd) [2]
indicen <- NULL
indicec <- NULL

par(mfrow = c(2,5))

for ( i in 1:columna) 
{
  if(is.numeric(bd[ ,i]) == "TRUE")
   {texto1 <- paste ("HIST", colnames(bd)[i])
    hist(bd[ ,i], col=i,  main = texto1, xlab = colnames(bd)[i])
    indicen <- c(indicen,i)}
  else
    {texto2 <- paste("PIE", colnames(bd)[i])
     pie(table(bd[ ,i]), main = texto2)
     indicec <- c(indicec,i)}
}