loans <- select(loans_full_schema, loan_amount, term, interest_rate, grade, annual_income, homeownership, state, debt_to_income)

table(loans$homeownership)
## 
##               ANY MORTGAGE      OWN     RENT 
##        0        0     4789     1353     3858
#There are 3 distinct values, mortage is the most common one
length(unique(loans$interest_rate))
## [1] 58
table(loans$interest_rate)
## 
##  5.31  5.32     6  6.07  6.08  6.71  6.72  7.34  7.35  7.96  7.97  9.43  9.44 
##   188   234     3   202   277   192   312   243   325   211   274   280   367 
##  9.92  9.93 10.41 10.42  10.9 10.91 11.98 11.99 12.61 12.62 13.58 13.59 14.07 
##   248   390   194   346   275   306   255   376   264   333   225   347   183 
## 14.08 15.04 15.05 16.01 16.02 17.09 17.47 18.06 18.45 19.03 19.42    20 20.39 
##   318   199   304   196   284   195   124   176   146   197   114   137    93 
## 21.45 21.85  22.9 22.91 23.87 23.88 24.84 24.85 25.81 25.82  26.3 26.77 28.72 
##   172    90    13    28    20    37    31    42    26    47    53    38    31 
## 29.69 30.17 30.65 30.75 30.79 30.94 
##     9     9     5     4    11     1
#There are 58 distinct values and 11.99 is the most common one
table(loans$annual_income)
The result is not really helpful, there are too many distinct values
ggplot(data = loans)+
   geom_histogram(mapping = aes(x = loan_amount), bins = 20, boundary= 0)

ggplot(data = loans)+
 geom_histogram(mapping = aes(x = annual_income))

#The distance between the values on the x-axis is so large that we cannot see the difference between them
ggplot(data = loans, aes(x = debt_to_income)) +
 geom_histogram(aes(y = after_stat(density)), binwidth = 2) +
 xlim(0, 100)+
 geom_density(linewidth = 1.2)

#The distribution is right-skewed, most of people have low debt to income rate
 ggplot(data = loans) + 
     geom_point(mapping = aes(x = interest_rate, y = debt_to_income, color = grade))

ggplot(data = loans) + 
    geom_point(mapping = aes(x = loan_amount, y = interest_rate, color = term))