Load necessary libraries
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.4.3
library(scales)
## Warning: package 'scales' was built under R version 4.4.3
library(dplyr)
## Warning: package 'dplyr' was built under R version 4.4.3
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
Read the dataset
df <- read.csv("C:/Users/LENOVO/Downloads/workshop 2.csv")
# Display the first few rows
head(df)
## age job marital education default housing loan contact month day
## 1 30 unemployed married primary no no no cellular oct 19
## 2 33 services married secondary no yes yes cellular may 11
## 3 35 management single tertiary no yes no cellular apr 16
## 4 30 management married tertiary no yes yes unknown jun 3
## 5 59 blue-collar married secondary no yes no unknown may 5
## 6 35 management single tertiary no no no cellular feb 23
## duration campaign pdays previous poutcome
## 1 79 1 -1 0 unknown
## 2 220 1 339 4 failure
## 3 185 1 330 1 failure
## 4 199 4 -1 0 unknown
## 5 226 1 -1 0 unknown
## 6 141 2 176 3 failure
# Convert relevant columns to factors
df$education <- as.factor(df$education)
df$loan <- as.factor(df$loan)
# Check summary
summary(df)
## age job marital education
## Min. :19.00 Length:4521 Length:4521 primary : 678
## 1st Qu.:33.00 Class :character Class :character secondary:2306
## Median :39.00 Mode :character Mode :character tertiary :1350
## Mean :41.17 unknown : 187
## 3rd Qu.:49.00
## Max. :87.00
## default housing loan contact
## Length:4521 Length:4521 no :3830 Length:4521
## Class :character Class :character yes: 691 Class :character
## Mode :character Mode :character Mode :character
##
##
##
## month day duration campaign
## Length:4521 Min. : 1.00 Min. : 4 Min. : 1.000
## Class :character 1st Qu.: 9.00 1st Qu.: 104 1st Qu.: 1.000
## Mode :character Median :16.00 Median : 185 Median : 2.000
## Mean :15.92 Mean : 264 Mean : 2.794
## 3rd Qu.:21.00 3rd Qu.: 329 3rd Qu.: 3.000
## Max. :31.00 Max. :3025 Max. :50.000
## pdays previous poutcome
## Min. : -1.00 Min. : 0.0000 Length:4521
## 1st Qu.: -1.00 1st Qu.: 0.0000 Class :character
## Median : -1.00 Median : 0.0000 Mode :character
## Mean : 39.77 Mean : 0.5426
## 3rd Qu.: -1.00 3rd Qu.: 0.0000
## Max. :871.00 Max. :25.0000
Create the plot
p <- ggplot(data = df, aes(x = education, fill = loan)) +
geom_bar(position = "fill") +
scale_y_continuous(labels = scales::percent_format()) +
scale_fill_manual(values = c("#FF9999", "#3399FF")) +
labs(
title = "Loan Status by Education Level",
x = "Education Level",
y = "Proportion of Respondents",
fill = "Has Loan?"
) +
theme_minimal()
Explicitly print the plot to ensure it appears
print(p)
