🧭 Executive Summary

This automated churn predictor generates sample customer data (with company names), trains a machine-learning model, evaluates performance, and saves risk predictions for a connected Shiny dashboard.

📂 1. Load or Generate Data (with Company Names)

if (file.exists("data/synthetic_churn_data.csv")) {
  churn_data <- read_csv("data/synthetic_churn_data.csv", show_col_types = FALSE)
  cat("Loaded existing churn dataset.
")
} else {
  dir.create("data", showWarnings = FALSE)
  n <- 1000
  churn_data <- tibble(
    company = paste("Company", seq_len(n)),
    usage_score = runif(n, 0, 100),
    nps = sample(seq(0, 10), n, replace = TRUE),
    tickets = rpois(n, 3),
    days_since_login = rpois(n, 30),
    contract_length_months = sample(c(6, 12, 24), n, replace = TRUE)
  ) %>%
    mutate(
      health_score = (usage_score * 0.35 + nps * 10 * 0.35 - tickets * 0.1),
      churn_prob   = plogis(-3 + 0.04 * days_since_login - 0.02 * usage_score - 0.1 * nps + 0.3 * tickets),
      churn        = rbinom(n, 1, churn_prob)
    )
  write_csv(churn_data, "data/synthetic_churn_data.csv")
  cat("✅ Synthetic churn dataset created with company names.
")
}
## ✅ Synthetic churn dataset created with company names.

🔢 2. Prepare Data

data_model <- churn_data %>% select(-churn_prob)
train_index <- createDataPartition(data_model$churn, p = 0.8, list = FALSE)
train_data <- data_model[train_index, ]
test_data  <- data_model[-train_index, ]

🤖 3. Train Model

rf_model <- randomForest(factor(churn) ~ usage_score + nps + tickets + days_since_login + contract_length_months + health_score,
                         data = train_data, ntree = 200, importance = TRUE)
rf_model
## 
## Call:
##  randomForest(formula = factor(churn) ~ usage_score + nps + tickets +      days_since_login + contract_length_months + health_score,      data = train_data, ntree = 200, importance = TRUE) 
##                Type of random forest: classification
##                      Number of trees: 200
## No. of variables tried at each split: 2
## 
##         OOB estimate of  error rate: 11.62%
## Confusion matrix:
##     0 1 class.error
## 0 702 7 0.009873061
## 1  86 5 0.945054945

📊 4. Evaluate Model

preds <- predict(rf_model, test_data, type = "response")
conf_matrix <- confusionMatrix(factor(preds), factor(test_data$churn))
conf_matrix
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction   0   1
##          0 182  16
##          1   2   0
##                                           
##                Accuracy : 0.91            
##                  95% CI : (0.8615, 0.9458)
##     No Information Rate : 0.92            
##     P-Value [Acc > NIR] : 0.749439        
##                                           
##                   Kappa : -0.0181         
##                                           
##  Mcnemar's Test P-Value : 0.002183        
##                                           
##             Sensitivity : 0.9891          
##             Specificity : 0.0000          
##          Pos Pred Value : 0.9192          
##          Neg Pred Value : 0.0000          
##              Prevalence : 0.9200          
##          Detection Rate : 0.9100          
##    Detection Prevalence : 0.9900          
##       Balanced Accuracy : 0.4946          
##                                           
##        'Positive' Class : 0               
## 

📈 5. Feature Importance

importance_df <- as.data.frame(importance(rf_model)) %>%
  rownames_to_column("Feature")

ggplot(importance_df, aes(x = reorder(Feature, MeanDecreaseGini), y = MeanDecreaseGini)) +
  geom_col(fill = "#2b8cbe") +
  coord_flip() +
  labs(title = "Feature Importance for Churn Model", x = "Feature", y = "Importance") +
  theme_minimal()

📉 6. Generate Predictions

preds_prob <- predict(rf_model, test_data, type = "prob")[, 2]
test_results <- test_data %>% mutate(predicted_prob = preds_prob) %>% arrange(desc(predicted_prob))

write_csv(test_results, "data/latest_churn_predictions.csv")
cat("Saved predictions to data/latest_churn_predictions.csv
")
## Saved predictions to data/latest_churn_predictions.csv

🧠 7. Performance Summary

cat("Model Accuracy:", round(conf_matrix$overall["Accuracy"], 3), "
")
## Model Accuracy: 0.91
cat("F1 Score:", round(conf_matrix$byClass["F1"], 3), "
")
## F1 Score: 0.953

⚙️ 8. Cron Automation (optional)

0 6 * * * /usr/local/bin/Rscript -e "rmarkdown::render('~/Projects/churn_predictor_with_company_names.Rmd')"

📊 9. Shiny Dashboard Connection

The output file below feeds the live dashboard.

data/latest_churn_predictions.csv

the Shiny app reads this automatically and displays company names in the Top 10 At‑Risk Customers table.