# Set global chunk options for the document
knitr::opts_chunk$set(
echo = TRUE, # Show code in the output
warning = FALSE, # Suppress warnings
message = FALSE # Suppress messages
)Morgan State University
Department of Information Science & Systems
Fall 2024
INSS 615: Data Wrangling for Visualization
Title: Customer Churn Analysis For a European Bank
Name: AMOLE ADEYEMI & OLUWAFEMI SAMMUEL ESAN
# Load necessary library
library(rvest)
library(tidyverse)
library(readxl)
library(dplyr)
library(openxlsx)## CustomerId Surname CreditScore Geography Gender Age Tenure Balance
## 1 15634602 HARGRAVE -500 1 Female 42 2 £ -5000
## 2 15647311 hill 1500 3 Female 41 1 £ 9999999
## 3 15619304 ONIO 2000 1 Female 42 8 £ -300
## 4 15701354 boni -100 1 Female 39 1 £ 888888
## 5 15737888 MITCHELL 9999 3 Female 43 2 £ 1e+09
## 6 15574012 chu 645 3 Male 44 8 £ 113755.78
## NumOfProducts HasCrCard IsActiveMember EstimatedSalary Exited
## 1 1 1 1 £ 101348.88 1
## 2 1 0 1 £ 112542.58 0
## 3 3 1 0 £ 113931.57 1
## 4 2 0 0 £ 93826.63 0
## 5 1 1 1 £ 79084.1 0
## 6 2 1 0 £ 149756.71 1
# Remove all "-" characters from the dataset
bank <- bank %>%
mutate(across(everything(), ~ str_replace_all(., "-", "")))
# View the cleaned dataset
print(head(bank))## CustomerId Surname CreditScore Geography Gender Age Tenure Balance
## 1 15634602 HARGRAVE 500 1 Female 42 2 £ 5000
## 2 15647311 hill 1500 3 Female 41 1 £ 9999999
## 3 15619304 ONIO 2000 1 Female 42 8 £ 300
## 4 15701354 boni 100 1 Female 39 1 £ 888888
## 5 15737888 MITCHELL 9999 3 Female 43 2 £ 1e+09
## 6 15574012 chu 645 3 Male 44 8 £ 113755.78
## NumOfProducts HasCrCard IsActiveMember EstimatedSalary Exited
## 1 1 1 1 £ 101348.88 1
## 2 1 0 1 £ 112542.58 0
## 3 3 1 0 £ 113931.57 1
## 4 2 0 0 £ 93826.63 0
## 5 1 1 1 £ 79084.1 0
## 6 2 1 0 £ 149756.71 1
# Remove £ from all columns
bank <- bank %>%
mutate(across(everything(), ~ str_remove(., "£")))
# View the cleaned DataFrame
print(head(bank))## CustomerId Surname CreditScore Geography Gender Age Tenure Balance
## 1 15634602 HARGRAVE 500 1 Female 42 2 5000
## 2 15647311 hill 1500 3 Female 41 1 9999999
## 3 15619304 ONIO 2000 1 Female 42 8 300
## 4 15701354 boni 100 1 Female 39 1 888888
## 5 15737888 MITCHELL 9999 3 Female 43 2 1e+09
## 6 15574012 chu 645 3 Male 44 8 113755.78
## NumOfProducts HasCrCard IsActiveMember EstimatedSalary Exited
## 1 1 1 1 101348.88 1
## 2 1 0 1 112542.58 0
## 3 3 1 0 113931.57 1
## 4 2 0 0 93826.63 0
## 5 1 1 1 79084.1 0
## 6 2 1 0 149756.71 1
## Rows: 10,005
## Columns: 13
## $ CustomerId <chr> "15634602", "15647311", "15619304", "15701354", "15737…
## $ Surname <chr> "HARGRAVE", "hill", "ONIO", "boni", "MITCHELL", "chu",…
## $ CreditScore <chr> "500", "1500", "2000", "100", "9999", "645", "822", "3…
## $ Geography <chr> "1", "3", "1", "1", "3", "3", "1", "2", "1", "1", "1",…
## $ Gender <chr> "Female", "Female", "Female", "Female", "Female", "Mal…
## $ Age <chr> "42", "41", "42", "39", "43", "44", "50", "29", "44", …
## $ Tenure <chr> "2", "1", "8", "1", "2", "8", "7", "4", "4", NA, "6", …
## $ Balance <chr> " 5000", " 9999999", " 300", " 888888", " 1e+09", " 11…
## $ NumOfProducts <chr> "1", "1", "3", "2", "1", "2", "2", "4", "2", "1", "2",…
## $ HasCrCard <chr> "1", "0", "1", "0", "1", "1", "1", "1", "0", "1", "0",…
## $ IsActiveMember <chr> "1", "1", "0", "0", "1", "0", "1", "0", "1", "1", "0",…
## $ EstimatedSalary <chr> " 101348.88", " 112542.58", " 113931.57", " 93826.63",…
## $ Exited <chr> "1", "0", "1", "0", "0", "1", "0", "1", "0", "0", "0",…
## [1] "CustomerId" "Surname" "CreditScore" "Geography"
## [5] "Gender" "Age" "Tenure" "Balance"
## [9] "NumOfProducts" "HasCrCard" "IsActiveMember" "EstimatedSalary"
## [13] "Exited"
# Enforce correct data types
bank <- bank %>%
mutate(
CustomerId = as.character(CustomerId), # CustomerId as character
Surname = as.character(Surname), # Surname as character
CreditScore = as.integer(CreditScore), # CreditScore as integer
Geography = as.factor(Geography), # Geography as factor
Gender = as.factor(Gender), # Gender as factor
Age = as.numeric(Age), # Age as integer
Tenure = as.integer(Tenure), # Tenure as integer
Balance = as.numeric(Balance), # Balance as numeric
NumOfProducts = as.integer(NumOfProducts), # NumOfProducts as integer
HasCrCard = as.factor(HasCrCard), # HasCrCard as logical (1/0 -> TRUE/FALSE)
IsActiveMember = as.factor(IsActiveMember), # IsActiveMember as logical (1/0 -> TRUE/FALSE)
EstimatedSalary = round(as.numeric(EstimatedSalary),2), # EstimatedSalary as numeric
Exited = as.factor(Exited) # Exited as logical (1/0 -> TRUE/FALSE)
)
# View structure of the updated dataset
str(bank)## 'data.frame': 10005 obs. of 13 variables:
## $ CustomerId : chr "15634602" "15647311" "15619304" "15701354" ...
## $ Surname : chr "HARGRAVE" "hill" "ONIO" "boni" ...
## $ CreditScore : int 500 1500 2000 100 9999 645 822 376 501 684 ...
## $ Geography : Factor w/ 3 levels "1","2","3": 1 3 1 1 3 3 1 2 1 1 ...
## $ Gender : Factor w/ 2 levels "Female","Male": 1 1 1 1 1 2 2 1 2 2 ...
## $ Age : num 42 41 42 39 43 44 50 29 44 27 ...
## $ Tenure : int 2 1 8 1 2 8 7 4 4 NA ...
## $ Balance : num 5.00e+03 1.00e+07 3.00e+02 8.89e+05 1.00e+09 ...
## $ NumOfProducts : int 1 1 3 2 1 2 2 4 2 1 ...
## $ HasCrCard : Factor w/ 2 levels "0","1": 2 1 2 1 2 2 2 2 1 2 ...
## $ IsActiveMember : Factor w/ 2 levels "0","1": 2 2 1 1 2 1 2 1 2 2 ...
## $ EstimatedSalary: num 101349 112543 113932 93827 79084 ...
## $ Exited : Factor w/ 2 levels "0","1": 2 1 2 1 1 2 1 2 1 1 ...
## CustomerId Surname CreditScore Geography Gender Age Tenure Balance
## 1 15634602 HARGRAVE 500 1 Female 42 2 5000.0
## 2 15647311 hill 1500 3 Female 41 1 9999999.0
## 3 15619304 ONIO 2000 1 Female 42 8 300.0
## 4 15701354 boni 100 1 Female 39 1 888888.0
## 5 15737888 MITCHELL 9999 3 Female 43 2 1000000000.0
## 6 15574012 chu 645 3 Male 44 8 113755.8
## NumOfProducts HasCrCard IsActiveMember EstimatedSalary Exited
## 1 1 1 1 101348.88 1
## 2 1 0 1 112542.58 0
## 3 3 1 0 113931.57 1
## 4 2 0 0 93826.63 0
## 5 1 1 1 79084.10 0
## 6 2 1 0 149756.71 1
#checking missing values
#--is.na(bank)
# Calculate percentage of missing values for each column
# sapply(bank, function(col) {
# mean(is.na(col)) * 100
# })## CustomerId Surname CreditScore Geography Gender
## Length:10005 Length:10005 Min. : 100.0 1:5017 Female:4548
## Class :character Class :character 1st Qu.: 584.0 2:2509 Male :5457
## Mode :character Mode :character Median : 652.0 3:2479
## Mean : 652.7
## 3rd Qu.: 718.0
## Max. :9999.0
##
## Age Tenure Balance NumOfProducts HasCrCard
## Min. :18.00 Min. : 0.000 Min. :0.000e+00 Min. :1.00 0:2947
## 1st Qu.:32.00 1st Qu.: 3.000 1st Qu.:0.000e+00 1st Qu.:1.00 1:7058
## Median :37.00 Median : 5.000 Median :9.724e+04 Median :1.00
## Mean :38.92 Mean : 5.012 Mean :2.785e+05 Mean :1.53
## 3rd Qu.:44.00 3rd Qu.: 7.000 3rd Qu.:1.277e+05 3rd Qu.:2.00
## Max. :92.00 Max. :10.000 Max. :1.000e+09 Max. :4.00
## NA's :3
## IsActiveMember EstimatedSalary Exited
## 0:4851 Min. : 11.58 0:7966
## 1:5154 1st Qu.: 51012.47 1:2039
## Median :100193.91
## Mean :100080.74
## 3rd Qu.:149378.72
## Max. :199992.48
## NA's :3
# Load necessary libraries
library(dplyr)
library(summarytools)
# 1. Summary Statistics for Numeric Columns
numeric_summary <- bank %>%
select(CreditScore, Age, Tenure, Balance, NumOfProducts, EstimatedSalary) %>%
summary()
# Print Results
cat("\n1. Summary Statistics for Numeric Columns:\n")##
## 1. Summary Statistics for Numeric Columns:
## CreditScore Age Tenure Balance
## Min. : 100.0 Min. :18.00 Min. : 0.000 Min. :0.000e+00
## 1st Qu.: 584.0 1st Qu.:32.00 1st Qu.: 3.000 1st Qu.:0.000e+00
## Median : 652.0 Median :37.00 Median : 5.000 Median :9.724e+04
## Mean : 652.7 Mean :38.92 Mean : 5.012 Mean :2.785e+05
## 3rd Qu.: 718.0 3rd Qu.:44.00 3rd Qu.: 7.000 3rd Qu.:1.277e+05
## Max. :9999.0 Max. :92.00 Max. :10.000 Max. :1.000e+09
## NA's :3
## NumOfProducts EstimatedSalary
## Min. :1.00 Min. : 11.58
## 1st Qu.:1.00 1st Qu.: 51012.47
## Median :1.00 Median :100193.91
## Mean :1.53 Mean :100080.74
## 3rd Qu.:2.00 3rd Qu.:149378.72
## Max. :4.00 Max. :199992.48
## NA's :3
# 2. Frequency Counts for Categorical Variables
categorical_summary <- bank %>%
select(Geography, Gender, HasCrCard, IsActiveMember, Exited) %>%
summarytools::freq()
cat("\n2. Frequency Counts for Categorical Variables:\n")##
## 2. Frequency Counts for Categorical Variables:
## Frequencies
## bank$Geography
## Type: Factor
##
## Freq % Valid % Valid Cum. % Total % Total Cum.
## ----------- ------- --------- -------------- --------- --------------
## 1 5017 50.14 50.14 50.14 50.14
## 2 2509 25.08 75.22 25.08 75.22
## 3 2479 24.78 100.00 24.78 100.00
## <NA> 0 0.00 100.00
## Total 10005 100.00 100.00 100.00 100.00
##
## bank$Gender
## Type: Factor
##
## Freq % Valid % Valid Cum. % Total % Total Cum.
## ------------ ------- --------- -------------- --------- --------------
## Female 4548 45.46 45.46 45.46 45.46
## Male 5457 54.54 100.00 54.54 100.00
## <NA> 0 0.00 100.00
## Total 10005 100.00 100.00 100.00 100.00
##
## bank$HasCrCard
## Type: Factor
##
## Freq % Valid % Valid Cum. % Total % Total Cum.
## ----------- ------- --------- -------------- --------- --------------
## 0 2947 29.46 29.46 29.46 29.46
## 1 7058 70.54 100.00 70.54 100.00
## <NA> 0 0.00 100.00
## Total 10005 100.00 100.00 100.00 100.00
##
## bank$IsActiveMember
## Type: Factor
##
## Freq % Valid % Valid Cum. % Total % Total Cum.
## ----------- ------- --------- -------------- --------- --------------
## 0 4851 48.49 48.49 48.49 48.49
## 1 5154 51.51 100.00 51.51 100.00
## <NA> 0 0.00 100.00
## Total 10005 100.00 100.00 100.00 100.00
##
## bank$Exited
## Type: Factor
##
## Freq % Valid % Valid Cum. % Total % Total Cum.
## ----------- ------- --------- -------------- --------- --------------
## 0 7966 79.62 79.62 79.62 79.62
## 1 2039 20.38 100.00 20.38 100.00
## <NA> 0 0.00 100.00
## Total 10005 100.00 100.00 100.00 100.00
# 4. Grouped Analysis: Average Credit Score and Balance by Geography and Gender
grouped_analysis <- bank %>%
group_by(Geography, Gender) %>%
summarise(
AvgCreditScore = mean(CreditScore, na.rm = TRUE),
AvgBalance = mean(Balance, na.rm = TRUE),
AvgEstimatedSalary = mean(EstimatedSalary, na.rm = TRUE)
)
cat("\n4. Grouped Analysis (Average Metrics by Geography and Gender):\n")##
## 4. Grouped Analysis (Average Metrics by Geography and Gender):
## # A tibble: 6 × 5
## # Groups: Geography [3]
## Geography Gender AvgCreditScore AvgBalance AvgEstimatedSalary
## <fct> <fct> <dbl> <dbl> <dbl>
## 1 1 Female 650. 60962. 99530.
## 2 1 Male 650. 63546. 100174.
## 3 2 Female 653. 119146. 102446.
## 4 2 Male 650. 120260. 99905.
## 5 3 Female 670. 1911073. 100719.
## 6 3 Male 651. 63353. 98426.
# 5. Correlation Analysis for Numeric Variables
numeric_correlation <- bank %>%
select(CreditScore, Age, Tenure, Balance, NumOfProducts, EstimatedSalary) %>%
cor(use = "complete.obs")
cat("\n3. Correlation Matrix for Numeric Variables:\n")##
## 3. Correlation Matrix for Numeric Variables:
## CreditScore Age Tenure Balance
## CreditScore 1.0000000000 0.002683752 -0.009997899 0.799652586
## Age 0.0026837519 1.000000000 -0.010145893 0.005643235
## Tenure -0.0099978993 -0.010145893 1.000000000 -0.014992810
## Balance 0.7996525855 0.005643235 -0.014992810 1.000000000
## NumOfProducts 0.0001141537 -0.030498650 0.013755324 -0.014349903
## EstimatedSalary -0.0044682344 -0.007391629 0.007607030 -0.005081936
## NumOfProducts EstimatedSalary
## CreditScore 0.0001141537 -0.004468234
## Age -0.0304986501 -0.007391629
## Tenure 0.0137553244 0.007607030
## Balance -0.0143499035 -0.005081936
## NumOfProducts 1.0000000000 0.014479221
## EstimatedSalary 0.0144792212 1.000000000
## CustomerId Surname CreditScore Geography Gender Age Tenure Balance
## 1 15634602 HARGRAVE 500 1 Female 42 2 5000.0
## 2 15647311 hill 1500 3 Female 41 1 9999999.0
## 3 15619304 ONIO 2000 1 Female 42 8 300.0
## 4 15701354 boni 100 1 Female 39 1 888888.0
## 5 15737888 MITCHELL 9999 3 Female 43 2 1000000000.0
## 6 15574012 chu 645 3 Male 44 8 113755.8
## NumOfProducts HasCrCard IsActiveMember EstimatedSalary Exited
## 1 1 1 1 101348.88 1
## 2 1 0 1 112542.58 0
## 3 3 1 0 113931.57 1
## 4 2 0 0 93826.63 0
## 5 1 1 1 79084.10 0
## 6 2 1 0 149756.71 1
Comparative Analysis by Nationality
Objective: Compare customer behavior (e.g., balance, credit score, number of products) between customers. Boxplots: Use boxplots to compare distributions of numerical variables (e.g., balance, credit score, number of products) across nationalities.
# Create the boxplot with y-axis scaled to 300,000
ggplot(bank, aes(x = Geography, y = Balance, fill = Geography)) +
geom_boxplot(alpha = 0.8) +
scale_fill_manual(
values = c("1" = "#1f77b4", "2" = "#ff7f0e", "3" = "#2ca02c")
) +
scale_y_continuous(limits = c(0, 300000), expand = c(0, 0)) + # Maintain scale but include outliers
labs(
title = "Balance by Nationality",
x = "Nationality",
y = "Balance",
fill = "Geography"
) +
theme_minimal() +
theme(
plot.title = element_text(size = 14, face = "bold"),
axis.title = element_text(size = 12),
axis.text.x = element_text(size = 10, face = "bold"),
legend.position = "none" # Remove legend
)ggplot(bank, aes(x = as.factor(Exited), y = Balance, fill = as.factor(Exited))) +
geom_boxplot(alpha = 0.8) +
scale_fill_manual(
values = c("0" = "lightblue", "1" = "red"),
labels = c("No Churn", "Churn")
) +
labs(
title = "Balance by Churn Status",
x = "Churn Status",
y = "Balance",
fill = "Churned"
) +
scale_y_continuous(limits = c(0, 300000), expand = c(0, 0)) + # Maintain scale but include outliers
theme_minimal() +
theme(
plot.title = element_text(size = 14, face = "bold"),
axis.title = element_text(size = 12),
legend.position = "none", # Optional: Remove legend if unnecessary
axis.text.x = element_text(size = 12, face = "bold")
)#account balance by credit card ownership
ggplot(bank, aes(x = as.factor(HasCrCard), y = Balance, fill = as.factor(HasCrCard))) +
geom_boxplot(alpha = 0.8) +
scale_fill_manual(
values = c("0" = "lightblue", "1" = "red"),
labels = c("No Churn", "Churn")
) +
labs(
title = "Balance by Credit Card Ownership",
x = "Credit Card Ownership",
y = "Balance",
fill = "HasCrCard"
) +
scale_y_continuous(limits = c(0, 200000), expand = c(0, 0)) + # Maintain scale but include outliers
theme_minimal() +
theme(
plot.title = element_text(size = 14, face = "bold"),
axis.title = element_text(size = 12),
legend.position = "none", # Optional: Remove legend if unnecessary
axis.text.x = element_text(size = 12, face = "bold")
)Violin Plot or Strip Plot: To visualize the spread of credit scores across nationalities.
ggplot(bank, aes(x = Geography, y = CreditScore, fill = Geography)) +
geom_violin(alpha = 0.7) +
labs(title = "Credit Score by Nationality", x = "Nationality", y = "Credit Score") +
theme_minimal()Heatmaps or Faceted Density Plots:
Show how multiple variables interact across nationalities.
ggplot(bank, aes(x = CreditScore, y = Balance, color = Geography)) +
geom_point(alpha = 0.5) +
facet_wrap(~Geography) +
labs(title = "Credit Score vs Balance by Nationality", x = "Credit Score", y = "Balance") +
theme_minimal()
3. Customer Segmentation Objective: Identify customer segments and their
characteristics. Graphs to Plot: Cluster Plot:
Use K-means or hierarchical clustering to group customers based on variables like balance, credit score, and age. Visualize clusters using scatterplots.
ggplot(bank, aes(x = CreditScore, y = Balance, color = "")) +
geom_point(alpha = 0.7) +
labs(title = "Customer Segmentation Based on Credit Score and Balance", x = "Credit Score", y = "Balance") +
theme_minimal()# Load necessary libraries
library(ggplot2)
library(dplyr)
# Calculate percentages for each Exited category
pie_data <- bank %>%
group_by(Exited) %>%
summarise(Count = n()) %>%
mutate(Percentage = round((Count / sum(Count)) * 100, 1)) # Calculate percentages
# Create the pie chart
ggplot(pie_data, aes(x = "", y = Percentage, fill = as.factor(Exited))) +
geom_bar(stat = "identity", width = 1, color = "white") +
coord_polar(theta = "y") +
scale_fill_manual(
values = c("0" = "lightblue", "1" = "red"),
labels = c("No Churn", "Churn")
) +
labs(
title = "Churn Status Distribution",
fill = "Churn Status"
) +
theme_minimal() +
theme(
axis.title = element_blank(),
axis.text = element_blank(),
axis.ticks = element_blank(),
plot.title = element_text(size = 14, face = "bold"),
legend.title = element_text(size = 12),
legend.text = element_text(size = 10)
) +
geom_text(aes(label = paste0(Percentage, "%")),
position = position_stack(vjust = 0.5), size = 5)#Pie Chart of Churn Rate by Gender
# Load necessary libraries
library(ggplot2)
library(dplyr)
# Calculate churn rate by gender
churn_rate_by_gender <- bank %>%
group_by(Gender) %>%
summarise(
Churned = sum(Exited == 1), # Count churned customers
Total = n(), # Total customers
ChurnRate = round((Churned / Total) * 100, 1) # Calculate churn rate as percentage
)
# Create a pie chart
ggplot(churn_rate_by_gender, aes(x = "", y = ChurnRate, fill = Gender)) +
geom_bar(stat = "identity", width = 1, color = "white") +
coord_polar(theta = "y") +
scale_fill_manual(values = c("Male" = "blue", "Female" = "pink")) + # Custom colors
labs(
title = "Churn Rate by Gender",
fill = "Gender"
) +
theme_minimal() +
theme(
axis.title = element_blank(),
axis.text = element_blank(),
axis.ticks = element_blank(),
plot.title = element_text(size = 14, face = "bold"),
legend.title = element_text(size = 12),
legend.text = element_text(size = 10)
) +
geom_text(aes(label = paste0(ChurnRate, "%")),
position = position_stack(vjust = 0.5), size = 5)Exited by Geography
# Load necessary libraries
library(ggplot2)
library(dplyr)
# Prepare data
clustered_data <- bank %>%
group_by(Geography, Exited) %>%
summarise(Count = n(), .groups = "drop") # Count the number of customers in each group
# Create the clustered column chart
ggplot(clustered_data, aes(x = Geography, y = Count, fill = as.factor(Exited))) +
geom_bar(stat = "identity", position = "dodge", alpha = 0.8) +
scale_fill_manual(
values = c("0" = "lightblue", "1" = "red"),
labels = c("No Churn", "Churn")
) +
labs(
title = "Churn Status by Geography",
x = "Geography",
y = "Customer Count",
fill = "Churn Status"
) +
theme_minimal() +
theme(
plot.title = element_text(size = 14, face = "bold"),
axis.title = element_text(size = 12),
legend.title = element_text(size = 12, face = "bold"),
legend.text = element_text(size = 10),
axis.text.x = element_text(size = 10, face = "bold")
)Exited by Credit Card Status
# Load necessary library
library(ggplot2)
# Create the clustered column chart
ggplot(bank, aes(x = as.factor(HasCrCard), fill = as.factor(Exited))) +
geom_bar(position = "dodge", alpha = 0.8) +
scale_fill_manual(
values = c("0" = "lightblue", "1" = "red"),
labels = c("No Churn", "Churn")
) +
labs(
title = "Churn Status by Credit Card Ownership",
x = "Has Credit Card",
y = "Count",
fill = "Churn Status"
) +
theme_minimal() +
theme(
plot.title = element_text(size = 14, face = "bold"),
axis.title = element_text(size = 12),
axis.text.x = element_text(size = 10, face = "bold"),
legend.title = element_text(size = 12),
legend.text = element_text(size = 10)
)
#Exited by Activity Status
# Load necessary libraries
library(ggplot2)
library(dplyr)
# Create the clustered column chart
ggplot(bank, aes(x = as.factor(IsActiveMember), fill = as.factor(Exited))) +
geom_bar(position = "dodge", alpha = 0.8) +
scale_fill_manual(
values = c("0" = "lightblue", "1" = "red"),
labels = c("No Churn", "Churn")
) +
labs(
title = "Churn Status by Activity Status",
x = "Activity Status (Active Member)",
y = "Count",
fill = "Churn Status"
) +
theme_minimal() +
theme(
plot.title = element_text(size = 14, face = "bold"),
axis.title = element_text(size = 12),
legend.title = element_text(size = 12),
legend.text = element_text(size = 10),
axis.text.x = element_text(size = 12, face = "bold")
)#
# #save as CSV
# # Specify the file path and name
# output_bank <- "cleaned_bank.csv"
#
# # Save the final_data dataset to a CSV file
# write.csv(cleaned_bank, file = output_file, row.names = FALSE)
#
# # Print a confirmation message
# cat("The processed final_data has been saved to:", output_bank, "\n")
# # #save as excel workbook
# # Load the openxlsx library
# library(openxlsx)
#
# # Specify the file path and name for Excel
# output_bank <- "cleaned_bank.xlsx"
#
# # Save the final_data dataset to an Excel file
# write.xlsx(cleaned_bank, file = output_bank)
#
# # Print a confirmation message
# cat("The processed final_data has been saved to:", output_bank, "\n")