Visualizing Numerical Data

Load Packages

Data

The dataset we will explore is called .

Rows: 10,000
Columns: 8
$ loan_amount    <int> 28000, 5000, 2000, 21600, 23000, 5000, 24000, 20000, 20…
$ interest_rate  <dbl> 14.07, 12.61, 17.09, 6.72, 14.07, 6.72, 13.59, 11.99, 1…
$ term           <dbl> 60, 36, 36, 36, 36, 36, 60, 60, 36, 36, 60, 60, 36, 60,…
$ grade          <fct> C, C, D, A, C, A, C, B, C, A, C, B, C, B, D, D, D, F, E…
$ state          <fct> NJ, HI, WI, PA, CA, KY, MI, AZ, NV, IL, IL, FL, SC, CO,…
$ annual_income  <dbl> 90000, 40000, 40000, 30000, 35000, 34000, 35000, 110000…
$ homeownership  <fct> MORTGAGE, RENT, RENT, RENT, RENT, OWN, MORTGAGE, MORTGA…
$ debt_to_income <dbl> 18.01, 5.04, 21.15, 10.16, 57.96, 6.46, 23.66, 16.19, 3…
ggplot(loans, aes(x = loan_amount)) +
  geom_histogram()

p_loans <- ggplot(loans, aes(x = loan_amount))

p_loans +
  geom_histogram(
    binwidth = 1000,
    fill = "skyblue",
    color = "black",
    alpha = 0.7
  )

p_loans +
  geom_histogram(
    binwidth = 5000,
    fill = "skyblue",
    color = "black",
    alpha = 0.7
  )

p_loans +
  geom_histogram(
    binwidth = 20000,
    fill = "skyblue",
    color = "black",
    alpha = 0.7
  )

ggplot(loans, aes(x = loan_amount)) +
  geom_histogram(binwidth = 5000,
    fill = "skyblue",
    color = "black",
    alpha = 0.7) +
  labs(
    x = "Loan Amount in $",
    y = "Frequency",
    title = "Amounts of Lending Club Loans"
  ) +
  scale_x_continuous(
    labels = label_dollar()
  ) +
  scale_y_continuous(
    labels = label_number(big.mark = ",")
  )

p_loans +
  geom_density(
    fill = "skyblue",
    alpha = 0.8
  )

p_loans +
  geom_density(adjust = 1.5)

p_loans +
  geom_boxplot()

ggplot(loans, aes(x = interest_rate)) +
  geom_boxplot() +
  labs(
    x = "Interest Rate",
    y = NULL,
    title = "Interest Rates on Lending Club Loans"
  ) +
  theme(
    axis.ticks.y = element_blank(),
    axis.text.y = element_blank()
  )

loans |>
  summarize(mean_loan_amt = mean(loan_amount))
# A tibble: 1 × 1
  mean_loan_amt
          <dbl>
1        16362.
loans |>
  summarize(
    q25 = quantile(loan_amount, 0.25),
    q75 = quantile(loan_amount, 0.75)
  )
# A tibble: 1 × 2
    q25   q75
  <dbl> <dbl>
1  8000 24000