Limpieza y corrección de tipos de
dato
churn <- clean_names(churn)
str(churn)
## spc_tbl_ [440,833 × 12] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
## $ customer_id : num [1:440833] 2 3 4 5 6 8 9 10 11 12 ...
## $ age : num [1:440833] 30 65 55 58 23 51 58 55 39 64 ...
## $ gender : chr [1:440833] "Female" "Female" "Female" "Male" ...
## $ tenure : num [1:440833] 39 49 14 38 32 33 49 37 12 3 ...
## $ usage_frequency : num [1:440833] 14 1 4 21 20 25 12 8 5 25 ...
## $ support_calls : num [1:440833] 5 10 6 7 5 9 3 4 7 2 ...
## $ payment_delay : num [1:440833] 18 8 18 7 8 26 16 15 4 11 ...
## $ subscription_type: chr [1:440833] "Standard" "Basic" "Basic" "Standard" ...
## $ contract_length : chr [1:440833] "Annual" "Monthly" "Quarterly" "Monthly" ...
## $ total_spend : num [1:440833] 932 557 185 396 617 129 821 445 969 415 ...
## $ last_interaction : num [1:440833] 17 6 3 29 20 8 24 30 13 29 ...
## $ churn : num [1:440833] 1 1 1 1 1 1 1 1 1 1 ...
## - attr(*, "spec")=
## .. cols(
## .. CustomerID = col_double(),
## .. Age = col_double(),
## .. Gender = col_character(),
## .. Tenure = col_double(),
## .. `Usage Frequency` = col_double(),
## .. `Support Calls` = col_double(),
## .. `Payment Delay` = col_double(),
## .. `Subscription Type` = col_character(),
## .. `Contract Length` = col_character(),
## .. `Total Spend` = col_double(),
## .. `Last Interaction` = col_double(),
## .. Churn = col_double()
## .. )
## - attr(*, "problems")=<externalptr>
churn$gender<- as.factor(churn$gender)
churn$churn<- as.factor(churn$churn)
churn$subscription_type <- as.factor(churn$subscription_type)
churn$contract_length <- as.factor(churn$contract_length)
summary(churn)
## customer_id age gender tenure
## Min. : 2 Min. :18.00 Female:190580 Min. : 1.00
## 1st Qu.:113622 1st Qu.:29.00 Male :250252 1st Qu.:16.00
## Median :226126 Median :39.00 NA's : 1 Median :32.00
## Mean :225399 Mean :39.37 Mean :31.26
## 3rd Qu.:337739 3rd Qu.:48.00 3rd Qu.:46.00
## Max. :449999 Max. :65.00 Max. :60.00
## NA's :1 NA's :1 NA's :1
## usage_frequency support_calls payment_delay subscription_type
## Min. : 1.00 Min. : 0.000 Min. : 0.00 Basic :143026
## 1st Qu.: 9.00 1st Qu.: 1.000 1st Qu.: 6.00 Premium :148678
## Median :16.00 Median : 3.000 Median :12.00 Standard:149128
## Mean :15.81 Mean : 3.604 Mean :12.97 NA's : 1
## 3rd Qu.:23.00 3rd Qu.: 6.000 3rd Qu.:19.00
## Max. :30.00 Max. :10.000 Max. :30.00
## NA's :1 NA's :1 NA's :1
## contract_length total_spend last_interaction churn
## Annual :177198 Min. : 100.0 Min. : 1.00 0 :190833
## Monthly : 87104 1st Qu.: 480.0 1st Qu.: 7.00 1 :249999
## Quarterly:176530 Median : 661.0 Median :14.00 NA's: 1
## NA's : 1 Mean : 631.6 Mean :14.48
## 3rd Qu.: 830.0 3rd Qu.:22.00
## Max. :1000.0 Max. :30.00
## NA's :1 NA's :1
head(churn)
## # A tibble: 6 × 12
## customer_id age gender tenure usage_frequency support_calls payment_delay
## <dbl> <dbl> <fct> <dbl> <dbl> <dbl> <dbl>
## 1 2 30 Female 39 14 5 18
## 2 3 65 Female 49 1 10 8
## 3 4 55 Female 14 4 6 18
## 4 5 58 Male 38 21 7 7
## 5 6 23 Male 32 20 5 8
## 6 8 51 Male 33 25 9 26
## # ℹ 5 more variables: subscription_type <fct>, contract_length <fct>,
## # total_spend <dbl>, last_interaction <dbl>, churn <fct>
str(churn)
## spc_tbl_ [440,833 × 12] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
## $ customer_id : num [1:440833] 2 3 4 5 6 8 9 10 11 12 ...
## $ age : num [1:440833] 30 65 55 58 23 51 58 55 39 64 ...
## $ gender : Factor w/ 2 levels "Female","Male": 1 1 1 2 2 2 1 1 2 1 ...
## $ tenure : num [1:440833] 39 49 14 38 32 33 49 37 12 3 ...
## $ usage_frequency : num [1:440833] 14 1 4 21 20 25 12 8 5 25 ...
## $ support_calls : num [1:440833] 5 10 6 7 5 9 3 4 7 2 ...
## $ payment_delay : num [1:440833] 18 8 18 7 8 26 16 15 4 11 ...
## $ subscription_type: Factor w/ 3 levels "Basic","Premium",..: 3 1 1 3 1 2 3 2 3 3 ...
## $ contract_length : Factor w/ 3 levels "Annual","Monthly",..: 1 2 3 2 2 1 3 1 3 3 ...
## $ total_spend : num [1:440833] 932 557 185 396 617 129 821 445 969 415 ...
## $ last_interaction : num [1:440833] 17 6 3 29 20 8 24 30 13 29 ...
## $ churn : Factor w/ 2 levels "0","1": 2 2 2 2 2 2 2 2 2 2 ...
## - attr(*, "spec")=
## .. cols(
## .. CustomerID = col_double(),
## .. Age = col_double(),
## .. Gender = col_character(),
## .. Tenure = col_double(),
## .. `Usage Frequency` = col_double(),
## .. `Support Calls` = col_double(),
## .. `Payment Delay` = col_double(),
## .. `Subscription Type` = col_character(),
## .. `Contract Length` = col_character(),
## .. `Total Spend` = col_double(),
## .. `Last Interaction` = col_double(),
## .. Churn = col_double()
## .. )
## - attr(*, "problems")=<externalptr>