En el presente capÃtulo exploraremos el uso de Rmarkdown para realizar nuestras presentaciones.
#Paso1: Importación de Datos
BankChurners <- read.csv("C:/Users/erick/OneDrive/Desktop/2-StatisticsProgramingForBusinessAnalytics/Clases/BankChurners.csv")
summary(BankChurners)
BankDB <- BankChurners
summary(BankDB)
## CLIENTNUM Attrition_Flag Customer_Age Gender
## Min. :708082083 Length:10127 Min. :26.00 Length:10127
## 1st Qu.:713036770 Class :character 1st Qu.:41.00 Class :character
## Median :717926358 Mode :character Median :46.00 Mode :character
## Mean :739177606 Mean :46.33
## 3rd Qu.:773143533 3rd Qu.:52.00
## Max. :828343083 Max. :73.00
## Dependent_count Education_Level Marital_Status Income_Category
## Min. :0.000 Length:10127 Length:10127 Length:10127
## 1st Qu.:1.000 Class :character Class :character Class :character
## Median :2.000 Mode :character Mode :character Mode :character
## Mean :2.346
## 3rd Qu.:3.000
## Max. :5.000
## Card_Category Months_on_book Total_Relationship_Count
## Length:10127 Min. :13.00 Min. :1.000
## Class :character 1st Qu.:31.00 1st Qu.:3.000
## Mode :character Median :36.00 Median :4.000
## Mean :35.93 Mean :3.813
## 3rd Qu.:40.00 3rd Qu.:5.000
## Max. :56.00 Max. :6.000
## Months_Inactive_12_mon Contacts_Count_12_mon Credit_Limit
## Min. :0.000 Min. :0.000 Min. : 1438
## 1st Qu.:2.000 1st Qu.:2.000 1st Qu.: 2555
## Median :2.000 Median :2.000 Median : 4549
## Mean :2.341 Mean :2.455 Mean : 8632
## 3rd Qu.:3.000 3rd Qu.:3.000 3rd Qu.:11068
## Max. :6.000 Max. :6.000 Max. :34516
## Total_Revolving_Bal Avg_Open_To_Buy Total_Amt_Chng_Q4_Q1 Total_Trans_Amt
## Min. : 0 Min. : 3 Min. :0.0000 Min. : 510
## 1st Qu.: 359 1st Qu.: 1324 1st Qu.:0.6310 1st Qu.: 2156
## Median :1276 Median : 3474 Median :0.7360 Median : 3899
## Mean :1163 Mean : 7469 Mean :0.7599 Mean : 4404
## 3rd Qu.:1784 3rd Qu.: 9859 3rd Qu.:0.8590 3rd Qu.: 4741
## Max. :2517 Max. :34516 Max. :3.3970 Max. :18484
## Total_Trans_Ct Total_Ct_Chng_Q4_Q1 Avg_Utilization_Ratio
## Min. : 10.00 Min. :0.0000 Min. :0.0000
## 1st Qu.: 45.00 1st Qu.:0.5820 1st Qu.:0.0230
## Median : 67.00 Median :0.7020 Median :0.1760
## Mean : 64.86 Mean :0.7122 Mean :0.2749
## 3rd Qu.: 81.00 3rd Qu.:0.8180 3rd Qu.:0.5030
## Max. :139.00 Max. :3.7140 Max. :0.9990
## Naive_Bayes_Classifier_Attrition_Flag_Card_Category_Contacts_Count_12_mon_Dependent_count_Education_Level_Months_Inactive_12_mon_1
## Min. :0.0000077
## 1st Qu.:0.0000990
## Median :0.0001815
## Mean :0.1599975
## 3rd Qu.:0.0003373
## Max. :0.9995800
## Naive_Bayes_Classifier_Attrition_Flag_Card_Category_Contacts_Count_12_mon_Dependent_count_Education_Level_Months_Inactive_12_mon_2
## Min. :0.00042
## 1st Qu.:0.99966
## Median :0.99982
## Mean :0.84000
## 3rd Qu.:0.99990
## Max. :0.99999
str(BankDB)
## 'data.frame': 10127 obs. of 23 variables:
## $ CLIENTNUM : int 768805383 818770008 713982108 769911858 709106358 713061558 810347208 818906208 710930508 719661558 ...
## $ Attrition_Flag : chr "Existing Customer" "Existing Customer" "Existing Customer" "Existing Customer" ...
## $ Customer_Age : int 45 49 51 40 40 44 51 32 37 48 ...
## $ Gender : chr "M" "F" "M" "F" ...
## $ Dependent_count : int 3 5 3 4 3 2 4 0 3 2 ...
## $ Education_Level : chr "High School" "Graduate" "Graduate" "High School" ...
## $ Marital_Status : chr "Married" "Single" "Married" "Unknown" ...
## $ Income_Category : chr "$60K - $80K" "Less than $40K" "$80K - $120K" "Less than $40K" ...
## $ Card_Category : chr "Blue" "Blue" "Blue" "Blue" ...
## $ Months_on_book : int 39 44 36 34 21 36 46 27 36 36 ...
## $ Total_Relationship_Count : int 5 6 4 3 5 3 6 2 5 6 ...
## $ Months_Inactive_12_mon : int 1 1 1 4 1 1 1 2 2 3 ...
## $ Contacts_Count_12_mon : int 3 2 0 1 0 2 3 2 0 3 ...
## $ Credit_Limit : num 12691 8256 3418 3313 4716 ...
## $ Total_Revolving_Bal : int 777 864 0 2517 0 1247 2264 1396 2517 1677 ...
## $ Avg_Open_To_Buy : num 11914 7392 3418 796 4716 ...
## $ Total_Amt_Chng_Q4_Q1 : num 1.33 1.54 2.59 1.4 2.17 ...
## $ Total_Trans_Amt : int 1144 1291 1887 1171 816 1088 1330 1538 1350 1441 ...
## $ Total_Trans_Ct : int 42 33 20 20 28 24 31 36 24 32 ...
## $ Total_Ct_Chng_Q4_Q1 : num 1.62 3.71 2.33 2.33 2.5 ...
## $ Avg_Utilization_Ratio : num 0.061 0.105 0 0.76 0 0.311 0.066 0.048 0.113 0.144 ...
## $ Naive_Bayes_Classifier_Attrition_Flag_Card_Category_Contacts_Count_12_mon_Dependent_count_Education_Level_Months_Inactive_12_mon_1: num 9.34e-05 5.69e-05 2.11e-05 1.34e-04 2.17e-05 ...
## $ Naive_Bayes_Classifier_Attrition_Flag_Card_Category_Contacts_Count_12_mon_Dependent_count_Education_Level_Months_Inactive_12_mon_2: num 1 1 1 1 1 ...
names(BankDB)
## [1] "CLIENTNUM"
## [2] "Attrition_Flag"
## [3] "Customer_Age"
## [4] "Gender"
## [5] "Dependent_count"
## [6] "Education_Level"
## [7] "Marital_Status"
## [8] "Income_Category"
## [9] "Card_Category"
## [10] "Months_on_book"
## [11] "Total_Relationship_Count"
## [12] "Months_Inactive_12_mon"
## [13] "Contacts_Count_12_mon"
## [14] "Credit_Limit"
## [15] "Total_Revolving_Bal"
## [16] "Avg_Open_To_Buy"
## [17] "Total_Amt_Chng_Q4_Q1"
## [18] "Total_Trans_Amt"
## [19] "Total_Trans_Ct"
## [20] "Total_Ct_Chng_Q4_Q1"
## [21] "Avg_Utilization_Ratio"
## [22] "Naive_Bayes_Classifier_Attrition_Flag_Card_Category_Contacts_Count_12_mon_Dependent_count_Education_Level_Months_Inactive_12_mon_1"
## [23] "Naive_Bayes_Classifier_Attrition_Flag_Card_Category_Contacts_Count_12_mon_Dependent_count_Education_Level_Months_Inactive_12_mon_2"
bd <- BankDB[ , 1:21]
summary(bd)
## CLIENTNUM Attrition_Flag Customer_Age Gender
## Min. :708082083 Length:10127 Min. :26.00 Length:10127
## 1st Qu.:713036770 Class :character 1st Qu.:41.00 Class :character
## Median :717926358 Mode :character Median :46.00 Mode :character
## Mean :739177606 Mean :46.33
## 3rd Qu.:773143533 3rd Qu.:52.00
## Max. :828343083 Max. :73.00
## Dependent_count Education_Level Marital_Status Income_Category
## Min. :0.000 Length:10127 Length:10127 Length:10127
## 1st Qu.:1.000 Class :character Class :character Class :character
## Median :2.000 Mode :character Mode :character Mode :character
## Mean :2.346
## 3rd Qu.:3.000
## Max. :5.000
## Card_Category Months_on_book Total_Relationship_Count
## Length:10127 Min. :13.00 Min. :1.000
## Class :character 1st Qu.:31.00 1st Qu.:3.000
## Mode :character Median :36.00 Median :4.000
## Mean :35.93 Mean :3.813
## 3rd Qu.:40.00 3rd Qu.:5.000
## Max. :56.00 Max. :6.000
## Months_Inactive_12_mon Contacts_Count_12_mon Credit_Limit
## Min. :0.000 Min. :0.000 Min. : 1438
## 1st Qu.:2.000 1st Qu.:2.000 1st Qu.: 2555
## Median :2.000 Median :2.000 Median : 4549
## Mean :2.341 Mean :2.455 Mean : 8632
## 3rd Qu.:3.000 3rd Qu.:3.000 3rd Qu.:11068
## Max. :6.000 Max. :6.000 Max. :34516
## Total_Revolving_Bal Avg_Open_To_Buy Total_Amt_Chng_Q4_Q1 Total_Trans_Amt
## Min. : 0 Min. : 3 Min. :0.0000 Min. : 510
## 1st Qu.: 359 1st Qu.: 1324 1st Qu.:0.6310 1st Qu.: 2156
## Median :1276 Median : 3474 Median :0.7360 Median : 3899
## Mean :1163 Mean : 7469 Mean :0.7599 Mean : 4404
## 3rd Qu.:1784 3rd Qu.: 9859 3rd Qu.:0.8590 3rd Qu.: 4741
## Max. :2517 Max. :34516 Max. :3.3970 Max. :18484
## Total_Trans_Ct Total_Ct_Chng_Q4_Q1 Avg_Utilization_Ratio
## Min. : 10.00 Min. :0.0000 Min. :0.0000
## 1st Qu.: 45.00 1st Qu.:0.5820 1st Qu.:0.0230
## Median : 67.00 Median :0.7020 Median :0.1760
## Mean : 64.86 Mean :0.7122 Mean :0.2749
## 3rd Qu.: 81.00 3rd Qu.:0.8180 3rd Qu.:0.5030
## Max. :139.00 Max. :3.7140 Max. :0.9990
str(bd)
## 'data.frame': 10127 obs. of 21 variables:
## $ CLIENTNUM : int 768805383 818770008 713982108 769911858 709106358 713061558 810347208 818906208 710930508 719661558 ...
## $ Attrition_Flag : chr "Existing Customer" "Existing Customer" "Existing Customer" "Existing Customer" ...
## $ Customer_Age : int 45 49 51 40 40 44 51 32 37 48 ...
## $ Gender : chr "M" "F" "M" "F" ...
## $ Dependent_count : int 3 5 3 4 3 2 4 0 3 2 ...
## $ Education_Level : chr "High School" "Graduate" "Graduate" "High School" ...
## $ Marital_Status : chr "Married" "Single" "Married" "Unknown" ...
## $ Income_Category : chr "$60K - $80K" "Less than $40K" "$80K - $120K" "Less than $40K" ...
## $ Card_Category : chr "Blue" "Blue" "Blue" "Blue" ...
## $ Months_on_book : int 39 44 36 34 21 36 46 27 36 36 ...
## $ Total_Relationship_Count: int 5 6 4 3 5 3 6 2 5 6 ...
## $ Months_Inactive_12_mon : int 1 1 1 4 1 1 1 2 2 3 ...
## $ Contacts_Count_12_mon : int 3 2 0 1 0 2 3 2 0 3 ...
## $ Credit_Limit : num 12691 8256 3418 3313 4716 ...
## $ Total_Revolving_Bal : int 777 864 0 2517 0 1247 2264 1396 2517 1677 ...
## $ Avg_Open_To_Buy : num 11914 7392 3418 796 4716 ...
## $ Total_Amt_Chng_Q4_Q1 : num 1.33 1.54 2.59 1.4 2.17 ...
## $ Total_Trans_Amt : int 1144 1291 1887 1171 816 1088 1330 1538 1350 1441 ...
## $ Total_Trans_Ct : int 42 33 20 20 28 24 31 36 24 32 ...
## $ Total_Ct_Chng_Q4_Q1 : num 1.62 3.71 2.33 2.33 2.5 ...
## $ Avg_Utilization_Ratio : num 0.061 0.105 0 0.76 0 0.311 0.066 0.048 0.113 0.144 ...
#Paso2: Generar un código para la lectura de los datos
names(bd)
## [1] "CLIENTNUM" "Attrition_Flag"
## [3] "Customer_Age" "Gender"
## [5] "Dependent_count" "Education_Level"
## [7] "Marital_Status" "Income_Category"
## [9] "Card_Category" "Months_on_book"
## [11] "Total_Relationship_Count" "Months_Inactive_12_mon"
## [13] "Contacts_Count_12_mon" "Credit_Limit"
## [15] "Total_Revolving_Bal" "Avg_Open_To_Buy"
## [17] "Total_Amt_Chng_Q4_Q1" "Total_Trans_Amt"
## [19] "Total_Trans_Ct" "Total_Ct_Chng_Q4_Q1"
## [21] "Avg_Utilization_Ratio"
columna <- dim(bd) [2]
indicen <- NULL
indicec <- NULL
par(mfrow = c(2,5))
for ( i in 1:columna)
{
if(is.numeric(bd[ ,i]) == "TRUE")
{texto1 <- paste ("HIST", colnames(bd)[i])
hist(bd[ ,i], col=i, main = texto1, xlab = colnames(bd)[i])
indicen <- c(indicen,i)}
else
{texto2 <- paste("PIE", colnames(bd)[i])
pie(table(bd[ ,i]), main = texto2)
indicec <- c(indicec,i)}
}