Cycle 1:
library(openintro)
## Loading required package: airports
## Loading required package: cherryblossom
## Loading required package: usdata
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.4.3
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
ggplot(loans_full_schema, aes(x = homeownership, y = interest_rate, fill = homeownership)) +
geom_boxplot(alpha = 0.7) +
labs(
title = "Interest Rate vs Home Ownership",
x = "Home Ownership Status",
y = "Interest Rate (%)"
) +
theme(legend.position = "none")
Cycle 2:
common_purposes <- loans_full_schema %>%
group_by(loan_purpose) %>%
tally(sort = TRUE) %>%
top_n(8) %>%
pull(loan_purpose)
## Selecting by n
loans_filtered <- loans_full_schema %>%
filter(loan_purpose %in% common_purposes)
ggplot(loans_filtered, aes(x = reorder(loan_purpose, interest_rate, median), y = interest_rate)) +
geom_violin(fill = "steelblue", alpha = 0.5) +
geom_boxplot(width = 0.1, outlier.shape = NA) +
coord_flip() +
labs(
title = "Interest Rates by Loan Purpose",
x = "Loan Purpose",
y = "Interest Rate (%)"
)
Cycle 3:
ggplot(loans_full_schema, aes(x = debt_to_income, y = interest_rate)) +
geom_point(alpha = 0.1, color = "darkgreen") +
geom_smooth(method = "lm", color = "coral") +
labs(
title = "Interest Rate vs Debt-to-Income Ratio",
x = "Debt to Income",
y = "Interest Rate (%)"
)
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 24 rows containing non-finite outside the scale range
## (`stat_smooth()`).
## Warning: Removed 24 rows containing missing values or values outside the scale range
## (`geom_point()`).
Cycle 4:
ggplot(loans_full_schema, aes(x = debt_to_income, y = interest_rate, color = verified_income)) +
geom_smooth(method = "lm", se = FALSE) +
labs(
title = "Impact of Income Verification on Interest Rates",
x = "Debt to Income Ratio",
y = "Interest Rate (%)",
color = "Income Verified"
)
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 24 rows containing non-finite outside the scale range
## (`stat_smooth()`).