Final Project

R Markdown

This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.

When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:

summary(cars)

##      speed           dist       
##  Min.   : 4.0   Min.   :  2.00  
##  1st Qu.:12.0   1st Qu.: 26.00  
##  Median :15.0   Median : 36.00  
##  Mean   :15.4   Mean   : 42.98  
##  3rd Qu.:19.0   3rd Qu.: 56.00  
##  Max.   :25.0   Max.   :120.00

Including Plots

You can also embed plots, for example:

Note that the echo = FALSE parameter was added to the code chunk to prevent printing of the R code that generated the plot.

library(tidymodels) library(tidyverse) library(baguette) library(vip) library(pdp) library(here) library(kernlab) library(ggplot2)

df <- read.csv(“customer_retention.csv”) df <- mutate(df, Status = factor(Status)) df <- na.omit(df)

setwd(“C:/Users/patil/OneDrive - University of Cincinnati/JUNIOR YEAR - FALL 24’/BANA 4080 - Data Mining”)

ggplot(df, aes(InternetService)) + geom_bar(fill = “orange”, color = “red”) + facet_wrap(~Contract) + coord_flip() + ggtitle(“Customer Internet Service and Length of Contract”) + labs(y = “Count of Contract Type”, x = “Customer Internet Service”)

ggplot(df, aes(PaymentMethod)) + geom_bar(fill = “orange”, color = “red”) + facet_wrap(~Contract) + ggtitle(“Payment Method vs. Type of Contract”) + theme(axis.text.x = element_text(angle = 50, size = 7, vjust = 0.5)) + labs(y = “Count of Customers”, x = “Payment Method”)

ggplot(df, aes(Partner)) + geom_bar(fill = “orange”, color = “red”) + facet_wrap(~Contract) + ggtitle(“Partner vs. Length of Contract”) + labs(y = “Count of Contract Type”, x = “Partner”)

set.seed(123) logistic_split <- initial_split(df, prop = .7, strata = Status) logistic_train <- training(logistic_split) logistic_test <- testing(logistic_split)

set.seed(123) logistic_kfolds <- vfold_cv(logistic_train, v = 5, strata = Status)

logistic_reg() %>% fit_resamples(Status ~ ., logistic_kfolds) %>% collect_metrics()

set.seed(123) mars_split <- initial_split(df, prop = .7, strata = Status) mars_train <- training(mars_split) mars_test <- testing(mars_split)

mars_recipe <- recipe(Status ~ ., data = mars_train)

set.seed(123) mars_kfolds <- vfold_cv(mars_train, v = 5, strata = “Status”)

mars_mod <- mars(num_terms = tune(), prod_degree = tune()) %>% set_mode(“classification”)

mars_grid <- grid_regular(num_terms(range = c(1,30)), prod_degree(), levels = 50)

mars_wf <- workflow() %>% add_recipe(mars_recipe) %>% add_model(mars_mod)

mars_results <- mars_wf %>% tune_grid(resamples = mars_kfolds, grid = mars_grid)

mars_results %>% collect_metrics() %>% filter(.metric == “roc_auc”) %>% arrange(desc(mean))

retention_recipe <- recipe(Status ~ ., data = logistic_train) %>% step_normalize(all_numeric_predictors()) %>% step_dummy(all_nominal_predictors())

dt_mod <- decision_tree(mode = “classification”) %>% set_engine(“rpart”)

dt_fit <- workflow() %>% add_recipe(retention_recipe) %>% add_model(dt_mod) %>% fit(data = logistic_train)

dt_results <- fit_resamples(dt_mod, retention_recipe, logistic_kfolds)

collect_metrics(dt_results)

knitr

Final Project

Atherva Patil

2024-12-08

R Markdown

Including Plots