Load necessary libraries

library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.4.3
library(scales)
## Warning: package 'scales' was built under R version 4.4.3
library(dplyr)
## Warning: package 'dplyr' was built under R version 4.4.3
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

Read the dataset

df <- read.csv("C:/Users/LENOVO/Downloads/workshop 2.csv")

# Display the first few rows
head(df)
##   age         job marital education default housing loan  contact month day
## 1  30  unemployed married   primary      no      no   no cellular   oct  19
## 2  33    services married secondary      no     yes  yes cellular   may  11
## 3  35  management  single  tertiary      no     yes   no cellular   apr  16
## 4  30  management married  tertiary      no     yes  yes  unknown   jun   3
## 5  59 blue-collar married secondary      no     yes   no  unknown   may   5
## 6  35  management  single  tertiary      no      no   no cellular   feb  23
##   duration campaign pdays previous poutcome
## 1       79        1    -1        0  unknown
## 2      220        1   339        4  failure
## 3      185        1   330        1  failure
## 4      199        4    -1        0  unknown
## 5      226        1    -1        0  unknown
## 6      141        2   176        3  failure
# Convert relevant columns to factors
df$education <- as.factor(df$education)
df$loan <- as.factor(df$loan)

# Check summary
summary(df)
##       age            job              marital              education   
##  Min.   :19.00   Length:4521        Length:4521        primary  : 678  
##  1st Qu.:33.00   Class :character   Class :character   secondary:2306  
##  Median :39.00   Mode  :character   Mode  :character   tertiary :1350  
##  Mean   :41.17                                         unknown  : 187  
##  3rd Qu.:49.00                                                         
##  Max.   :87.00                                                         
##    default            housing           loan        contact         
##  Length:4521        Length:4521        no :3830   Length:4521       
##  Class :character   Class :character   yes: 691   Class :character  
##  Mode  :character   Mode  :character              Mode  :character  
##                                                                     
##                                                                     
##                                                                     
##     month                day           duration       campaign     
##  Length:4521        Min.   : 1.00   Min.   :   4   Min.   : 1.000  
##  Class :character   1st Qu.: 9.00   1st Qu.: 104   1st Qu.: 1.000  
##  Mode  :character   Median :16.00   Median : 185   Median : 2.000  
##                     Mean   :15.92   Mean   : 264   Mean   : 2.794  
##                     3rd Qu.:21.00   3rd Qu.: 329   3rd Qu.: 3.000  
##                     Max.   :31.00   Max.   :3025   Max.   :50.000  
##      pdays           previous         poutcome        
##  Min.   : -1.00   Min.   : 0.0000   Length:4521       
##  1st Qu.: -1.00   1st Qu.: 0.0000   Class :character  
##  Median : -1.00   Median : 0.0000   Mode  :character  
##  Mean   : 39.77   Mean   : 0.5426                     
##  3rd Qu.: -1.00   3rd Qu.: 0.0000                     
##  Max.   :871.00   Max.   :25.0000

Create the plot

p <- ggplot(data = df, aes(x = education, fill = loan)) +
  geom_bar(position = "fill") + 
  scale_y_continuous(labels = scales::percent_format()) + 
  scale_fill_manual(values = c("#FF9999", "#3399FF")) +
  labs(
    title = "Loan Status by Education Level",
    x = "Education Level",
    y = "Proportion of Respondents",
    fill = "Has Loan?"
  ) +
  theme_minimal()

Explicitly print the plot to ensure it appears

print(p)