Rows: 10,000
Columns: 8
$ loan_amount <int> 28000, 5000, 2000, 21600, 23000, 5000, 24000, 20000, 20…
$ interest_rate <dbl> 14.07, 12.61, 17.09, 6.72, 14.07, 6.72, 13.59, 11.99, 1…
$ term <dbl> 60, 36, 36, 36, 36, 36, 60, 60, 36, 36, 60, 60, 36, 60,…
$ grade <fct> C, C, D, A, C, A, C, B, C, A, C, B, C, B, D, D, D, F, E…
$ state <fct> NJ, HI, WI, PA, CA, KY, MI, AZ, NV, IL, IL, FL, SC, CO,…
$ annual_income <dbl> 90000, 40000, 40000, 30000, 35000, 34000, 35000, 110000…
$ homeownership <fct> MORTGAGE, RENT, RENT, RENT, RENT, OWN, MORTGAGE, MORTGA…
$ debt_to_income <dbl> 18.01, 5.04, 21.15, 10.16, 57.96, 6.46, 23.66, 16.19, 3…
Visualizing Numerical Data
Load Packages
Data
The dataset we will explore is called .
ggplot(loans, aes(x = loan_amount)) +
geom_histogram()p_loans <- ggplot(loans, aes(x = loan_amount))
p_loans +
geom_histogram(
binwidth = 1000,
fill = "skyblue",
color = "black",
alpha = 0.7
)p_loans +
geom_histogram(
binwidth = 5000,
fill = "skyblue",
color = "black",
alpha = 0.7
)p_loans +
geom_histogram(
binwidth = 20000,
fill = "skyblue",
color = "black",
alpha = 0.7
)ggplot(loans, aes(x = loan_amount)) +
geom_histogram(binwidth = 5000,
fill = "skyblue",
color = "black",
alpha = 0.7) +
labs(
x = "Loan Amount in $",
y = "Frequency",
title = "Amounts of Lending Club Loans"
) +
scale_x_continuous(
labels = label_dollar()
) +
scale_y_continuous(
labels = label_number(big.mark = ",")
)p_loans +
geom_density(
fill = "skyblue",
alpha = 0.8
)p_loans +
geom_density(adjust = 1.5)p_loans +
geom_boxplot()ggplot(loans, aes(x = interest_rate)) +
geom_boxplot() +
labs(
x = "Interest Rate",
y = NULL,
title = "Interest Rates on Lending Club Loans"
) +
theme(
axis.ticks.y = element_blank(),
axis.text.y = element_blank()
)loans |>
summarize(mean_loan_amt = mean(loan_amount))# A tibble: 1 × 1
mean_loan_amt
<dbl>
1 16362.
loans |>
summarize(
q25 = quantile(loan_amount, 0.25),
q75 = quantile(loan_amount, 0.75)
)# A tibble: 1 × 2
q25 q75
<dbl> <dbl>
1 8000 24000