This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.
When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:
summary(cars)
## speed dist
## Min. : 4.0 Min. : 2.00
## 1st Qu.:12.0 1st Qu.: 26.00
## Median :15.0 Median : 36.00
## Mean :15.4 Mean : 42.98
## 3rd Qu.:19.0 3rd Qu.: 56.00
## Max. :25.0 Max. :120.00
You can also embed plots, for example:
Note that the echo = FALSE parameter was added to the
code chunk to prevent printing of the R code that generated the
plot.
library(tidymodels) library(tidyverse) library(baguette) library(vip) library(pdp) library(here) library(kernlab) library(ggplot2)
df <- read.csv(“customer_retention.csv”) df <- mutate(df, Status = factor(Status)) df <- na.omit(df)
setwd(“C:/Users/patil/OneDrive - University of Cincinnati/JUNIOR YEAR - FALL 24’/BANA 4080 - Data Mining”)
ggplot(df, aes(InternetService)) + geom_bar(fill = “orange”, color = “red”) + facet_wrap(~Contract) + coord_flip() + ggtitle(“Customer Internet Service and Length of Contract”) + labs(y = “Count of Contract Type”, x = “Customer Internet Service”)
ggplot(df, aes(PaymentMethod)) + geom_bar(fill = “orange”, color = “red”) + facet_wrap(~Contract) + ggtitle(“Payment Method vs. Type of Contract”) + theme(axis.text.x = element_text(angle = 50, size = 7, vjust = 0.5)) + labs(y = “Count of Customers”, x = “Payment Method”)
ggplot(df, aes(Partner)) + geom_bar(fill = “orange”, color = “red”) + facet_wrap(~Contract) + ggtitle(“Partner vs. Length of Contract”) + labs(y = “Count of Contract Type”, x = “Partner”)
set.seed(123) logistic_split <- initial_split(df, prop = .7, strata = Status) logistic_train <- training(logistic_split) logistic_test <- testing(logistic_split)
set.seed(123) logistic_kfolds <- vfold_cv(logistic_train, v = 5, strata = Status)
logistic_reg() %>% fit_resamples(Status ~ ., logistic_kfolds) %>% collect_metrics()
set.seed(123) mars_split <- initial_split(df, prop = .7, strata = Status) mars_train <- training(mars_split) mars_test <- testing(mars_split)
mars_recipe <- recipe(Status ~ ., data = mars_train)
set.seed(123) mars_kfolds <- vfold_cv(mars_train, v = 5, strata = “Status”)
mars_mod <- mars(num_terms = tune(), prod_degree = tune()) %>% set_mode(“classification”)
mars_grid <- grid_regular(num_terms(range = c(1,30)), prod_degree(), levels = 50)
mars_wf <- workflow() %>% add_recipe(mars_recipe) %>% add_model(mars_mod)
mars_results <- mars_wf %>% tune_grid(resamples = mars_kfolds, grid = mars_grid)
mars_results %>% collect_metrics() %>% filter(.metric == “roc_auc”) %>% arrange(desc(mean))
retention_recipe <- recipe(Status ~ ., data = logistic_train) %>% step_normalize(all_numeric_predictors()) %>% step_dummy(all_nominal_predictors())
dt_mod <- decision_tree(mode = “classification”) %>% set_engine(“rpart”)
dt_fit <- workflow() %>% add_recipe(retention_recipe) %>% add_model(dt_mod) %>% fit(data = logistic_train)
dt_results <- fit_resamples(dt_mod, retention_recipe, logistic_kfolds)
collect_metrics(dt_results)
knitr