#Install ‘data.table’ and ‘ggplot2’ packages
#If not installed, run following command: install.packages(c('data.table', 'ggplot2'), repos='http://cran.us.r-project.org')
library(data.table)
library(ggplot2)
#This should be the path to the data. Adapt for your system. In this case, I set it to Linux.
linux_filepath <- '/Users/jaclynbazsika/documents/documents/github/lending_club_EDA/'
accepted_def <- read.csv(paste(linux_filepath,'accepted_2007_to_2016.csv', sep=''), na.strings='')
acc_dt <- as.data.table(accepted_def)
#library data.table
library(data.table)
This Histogram depicts a very normal distribution. The X axis refers to the loan amount awarded to the applicant;The Y axis refers to the number of times that loan amount was awarded. Given the data, the most commmon loan amount was between $15,000.00-20,000.00; awarded nearly 100,000 times.
This chart depicts the status of the loan relating to credit grade. As shown in the data, the higher the credit rating, the more likely the account is in current or fully paid status.Credit grade A has the lowest distribution of those in charge off or late status; However, the group with the highest distribution in the fully paid status is credit grade B. There is little difference between B/C, but once we start going lower in credit rating, the likelihood of accounts being current decreased.
When I originlly did this graph, I hypothesized that the greater number of inquiries would equal a higher interest rate; However, given the data, the higher number of inquires did not lead to a higher interest rate, and in fact seemed statistically insignifcant. It was evenly distributed at lower inquires.
The interest rate increases with lower credit grades. There is a higher variance as credit scores decreased.
#create a variable with annual income and state
median_income <- acc_dt[, .(income=mean(annual_inc)), by=addr_state]
#print contents of income
median_income
# clean up median_income to remove NA(this was in the NY column and messed with distribution); created variable--- median_income
median_income<- median_income[!(median_income$income=="NA"),]
#median income in 51 states
median(median_income[, income])
[1] 71825.57
LS0tCnRpdGxlOiAiTGVuZGluZyBDbHViIEVEQSBpbiBSIgpvdXRwdXQ6CiAgaHRtbF9ub3RlYm9vazogZGVmYXVsdAotLS0KCmBgYHtyIEluc3RhbGwgcGFja2FnZXN9CiNJbnN0YWxsIOKAmGRhdGEudGFibGXigJkgYW5kIOKAmGdncGxvdDLigJkgcGFja2FnZXMgCiNJZiBub3QgaW5zdGFsbGVkLCBydW4gZm9sbG93aW5nIGNvbW1hbmQ6IGluc3RhbGwucGFja2FnZXMoYygnZGF0YS50YWJsZScsICdnZ3Bsb3QyJyksIHJlcG9zPSdodHRwOi8vY3Jhbi51cy5yLXByb2plY3Qub3JnJykKCgpsaWJyYXJ5KGRhdGEudGFibGUpCmxpYnJhcnkoZ2dwbG90MikKYGBgCgoKYGBge3J9CiNHZXQgd29ya2luZyBkaXJlY3RvcnksIG9yIHNldCB3b3JraW5nIGRpcmVjdG9yeSBpZiBub3QgaW4gdGhlIGRlc2lyZWQgb25lLgpnZXR3ZCgpCmBgYAoKCmBgYHtyfQojVGhpcyBzaG91bGQgYmUgdGhlIHBhdGggdG8gdGhlIGRhdGEuIEFkYXB0IGZvciB5b3VyIHN5c3RlbS4gSW4gdGhpcyBjYXNlLCBJIHNldCBpdCB0byBMaW51eC4KCmxpbnV4X2ZpbGVwYXRoIDwtICcvVXNlcnMvamFjbHluYmF6c2lrYS9kb2N1bWVudHMvZG9jdW1lbnRzL2dpdGh1Yi9sZW5kaW5nX2NsdWJfRURBLycKYWNjZXB0ZWRfZGVmIDwtIHJlYWQuY3N2KHBhc3RlKGxpbnV4X2ZpbGVwYXRoLCdhY2NlcHRlZF8yMDA3X3RvXzIwMTYuY3N2Jywgc2VwPScnKSwgbmEuc3RyaW5ncz0nJykKYWNjX2R0IDwtIGFzLmRhdGEudGFibGUoYWNjZXB0ZWRfZGVmKQpgYGAKCgpgYGB7cn0KI1JldHJpZXZlIHRoZSBkaW1lbnNpb25zIG9mIHRoZSBkYXRhIHNldC4KZGltKGFjY19kdCkKYGBgCgoKYGBge3J9CiNOZXh0LCBnZXQgdGhlIG5hbWVzIG9mIHRoZSBmaWxlcyB1bmRlciB0aGUgb2JqZWN0Cm5hbWVzKGFjY19kdCkKYGBgCgoKYGBge3J9CiNOZXh0LCBkaXNwbGF5IHRoZSBzdHJ1Y3R1cmUgb2YgdGhlIGRhdGEgdGFibGUgaW4gdGhlIHNldC4Kc3RyKGFjY19kdCkKYGBgCgpgYGB7cn0KI2xpYnJhcnkgZGF0YS50YWJsZQpsaWJyYXJ5KGRhdGEudGFibGUpCmBgYAoKYGBge3J9CiNjcmVhdGUgZGF0YSB0YWJsZSBvZiBhY2NfZHQKZGF0YS50YWJsZShhY2NfZHQpCmBgYAoKYGBge3J9CiNtZWFuIGludGVyZXN0IHJhdGUKbWVhbihhY2NfZHQkaW50X3JhdGUpCmBgYApgYGB7cn0KbWluKGFjY19kdCRpbnRfcmF0ZSkKYGBgCmBgYHtyfQptYXgoYWNjX2R0JGludF9yYXRlKQpgYGAKCmBgYHtyfQojc3RhbmRhcmQgZGV2aWF0ZS0gaW50ZXJlc3QgcmF0ZQpzZChhY2NfZHQkaW50X3JhdGUpCmBgYAoKYGBge3J9CiNDcmVhdGUgaGlzdG9ncmFtIGZvciBkdGkuIAojVGhlIG91dGxpZXJzIG1lc3Mgd2l0aCB0aGUgZGlzdHJpYnV0aW9uLgpoaXN0KGFjY19kdFssIGR0aV0pCgpgYGAKCgpgYGB7cn0KI1JlbW92ZSBvdXRsaWVycyBzbyB0aGF0IHlvdSBoYXZlIGEgbW9yZSBhY2N1cmF0ZSBkaXN0cmlidXRpb24uCnJlbW92ZV9vdXRsaWVycyA8LSBmdW5jdGlvbih4LCBuYS5ybSA9IFRSVUUsIC4uLikgewogIHFudCA8LSBxdWFudGlsZSh4LCBwcm9icz1jKC4yNSwgLjc1KSwgbmEucm0gPSBuYS5ybSwgLi4uKQogIEggPC0gMS41ICogSVFSKHgsIG5hLnJtID0gbmEucm0pCiAgeSA8LSB4CiAgeVt4IDwgKHFudFsxXSAtIEgpXSA8LSBOQQogIHlbeCA+IChxbnRbMl0gKyBIKV0gPC0gTkEKICB5Cn0KYGBgCgpgYGB7cn0KbG9hbl9hbW50cyA8LSByZW1vdmVfb3V0bGllcnMoYWNjX2R0WywgZHRpXSkKYGBgCgpgYGB7cn0KI3JlbmFtZSBsb2FuIGFtb3VudApMb2FuX0Ftb3VudCA8LSBsb2FuX2FtbnRzCmBgYAoKYGBge3J9CiMgaXQgd29ya3MhCmhpc3QoTG9hbl9BbW91bnQpCmBgYAoKVGhpcyBIaXN0b2dyYW0gZGVwaWN0cyBhIHZlcnkgbm9ybWFsIGRpc3RyaWJ1dGlvbi4gVGhlIFggYXhpcyByZWZlcnMgdG8gdGhlIGxvYW4gYW1vdW50IGF3YXJkZWQgdG8gdGhlIGFwcGxpY2FudDtUaGUgWSBheGlzIHJlZmVycyB0byB0aGUgbnVtYmVyIG9mIHRpbWVzIHRoYXQgbG9hbiBhbW91bnQgd2FzIGF3YXJkZWQuIEdpdmVuIHRoZSBkYXRhLCB0aGUgbW9zdCBjb21tbW9uIGxvYW4gYW1vdW50IHdhcyBiZXR3ZWVuICQxNSwwMDAuMDAtMjAsMDAwLjAwOyBhd2FyZGVkIG5lYXJseSAxMDAsMDAwIHRpbWVzLgoKCmBgYHtyfQojIENyZWF0ZSB2YWx1ZSBmb3IgbWVkaWFuIGxvYW4gYW1vdW50CkxvYW5fQW1vdW50cyA8LSBhY2NfZHQkbG9hbl9hbW50CgojRmluZCBtZWRpYW4gZm9yIGxvYW4gYW1vdW50cwptZWRpYW4oTG9hbl9BbW91bnRzKQoKYGBgCgpgYGB7cn0KI0ZpbmQgdGhlIG1vZGUKbW9kZShMb2FuX0Ftb3VudHMpCmBgYAoKYGBge3J9CiMuLi4gcmVwZWF0IGZvciBtZWFuCm1lYW4oTG9hbl9BbW91bnRzKQpgYGAKCmBgYHtyfQojQ2FsY3VsYXRlIHN0YW5kYXJkIGRldmlhdGlvbiBmb3IgbG9hbiBhbW91bnRzCnNkKExvYW5fQW1vdW50cykKYGBgCgpgYGB7cn0KI0NhbGN1bGF0ZSBtYXggbG9hbiBhbW91bnQKbWF4KExvYW5fQW1vdW50cykKYGBgCgpgYGB7cn0KI0NhbGN1bGF0ZSBtaW4gbG9hbiBhbW91bnQKbWluKExvYW5fQW1vdW50cykKYGBgCgpgYGB7cn0KI0JhciBncmFwaCBjb21wYXJpbmcgbG9hbiBzdGF0dXMgdG8gY3JlZGl0IGdyYWRlCgpwbG90KGFjY19kdFssIGdyYWRlLGxvYW5fc3RhdHVzXSkKCmBgYApUaGlzIGNoYXJ0IGRlcGljdHMgdGhlIHN0YXR1cyBvZiB0aGUgbG9hbiByZWxhdGluZyB0byBjcmVkaXQgZ3JhZGUuIEFzIHNob3duIGluIHRoZSBkYXRhLCB0aGUgaGlnaGVyIHRoZSBjcmVkaXQgcmF0aW5nLCB0aGUgbW9yZSBsaWtlbHkgdGhlIGFjY291bnQgaXMgaW4gY3VycmVudCBvciBmdWxseSBwYWlkIHN0YXR1cy5DcmVkaXQgZ3JhZGUgQSBoYXMgdGhlIGxvd2VzdCBkaXN0cmlidXRpb24gb2YgdGhvc2UgaW4gY2hhcmdlIG9mZiBvciBsYXRlIHN0YXR1czsgSG93ZXZlciwgdGhlIGdyb3VwIHdpdGggdGhlIGhpZ2hlc3QgZGlzdHJpYnV0aW9uIGluIHRoZSBmdWxseSBwYWlkIHN0YXR1cyBpcyBjcmVkaXQgZ3JhZGUgQi4gVGhlcmUgaXMgbGl0dGxlIGRpZmZlcmVuY2UgYmV0d2VlbiBCL0MsIGJ1dCBvbmNlIHdlIHN0YXJ0IGdvaW5nIGxvd2VyIGluIGNyZWRpdCByYXRpbmcsIHRoZSBsaWtlbGlob29kIG9mIGFjY291bnRzIGJlaW5nIGN1cnJlbnQgZGVjcmVhc2VkLiAKCgpgYGB7cn0KI3NjYXR0ZXIgcGxvdAoJZ2dwbG90KGFjY19kdCwgYWVzKHg9aW5xX2xhc3RfNm10aHMsIHk9aW50X3JhdGUpKSArIGdlb21fcG9pbnQoKQoKYGBgCldoZW4gSSBvcmlnaW5sbHkgZGlkIHRoaXMgZ3JhcGgsIEkgaHlwb3RoZXNpemVkIHRoYXQgdGhlIGdyZWF0ZXIgbnVtYmVyIG9mIGlucXVpcmllcyB3b3VsZCBlcXVhbCBhIGhpZ2hlciBpbnRlcmVzdCByYXRlOyBIb3dldmVyLCBnaXZlbiB0aGUgZGF0YSwgdGhlIGhpZ2hlciBudW1iZXIgb2YgaW5xdWlyZXMgZGlkIG5vdCBsZWFkIHRvIGEgaGlnaGVyIGludGVyZXN0IHJhdGUsIGFuZCBpbiBmYWN0IHNlZW1lZCBzdGF0aXN0aWNhbGx5IGluc2lnbmlmY2FudC4gSXQgd2FzIGV2ZW5seSBkaXN0cmlidXRlZCBhdCBsb3dlciBpbnF1aXJlcy4KCmBgYHtyfQojIGJveHBsb3QgY3JlZGl0IGdyYWRlIHZzLiBpbnRlcmVzdCByYXRlCmJveHBsb3QoaW50X3JhdGUgfiBncmFkZSwgZGF0YT1hY2NfZHQsIHhsYWI9J2dyYWRlJywKIGNvbD0oYygicmVkIiwiYmx1ZSAiKSksIHlsYWI9J0ludGVyZXN0IFJhdGUnKQpgYGAKIFRoZSBpbnRlcmVzdCByYXRlIGluY3JlYXNlcyB3aXRoIGxvd2VyIGNyZWRpdCBncmFkZXMuIFRoZXJlIGlzIGEgaGlnaGVyIHZhcmlhbmNlIGFzIGNyZWRpdCBzY29yZXMgZGVjcmVhc2VkLgoKYGBge3J9CiNjcmVhdGUgYSB2YXJpYWJsZSB3aXRoIGFubnVhbCBpbmNvbWUgYW5kIHN0YXRlCm1lZGlhbl9pbmNvbWUgPC0gYWNjX2R0WywgLihpbmNvbWU9bWVhbihhbm51YWxfaW5jKSksIGJ5PWFkZHJfc3RhdGVdCmBgYAoKYGBge3J9CgojcHJpbnQgY29udGVudHMgb2YgaW5jb21lCm1lZGlhbl9pbmNvbWUKCmBgYAoKYGBge3J9CiMgY2xlYW4gdXAgbWVkaWFuX2luY29tZSB0byByZW1vdmUgTkEodGhpcyB3YXMgaW4gdGhlIE5ZIGNvbHVtbiBhbmQgbWVzc2VkIHdpdGggZGlzdHJpYnV0aW9uKTsgY3JlYXRlZCB2YXJpYWJsZS0tLSBtZWRpYW5faW5jb21lCm1lZGlhbl9pbmNvbWU8LSBtZWRpYW5faW5jb21lWyEobWVkaWFuX2luY29tZSRpbmNvbWU9PSJOQSIpLF0KYGBgCgpgYGB7cn0KI21lZGlhbiBpbmNvbWUgaW4gNTEgc3RhdGVzCgptZWRpYW4obWVkaWFuX2luY29tZVssIGluY29tZV0pCmBgYAoKCgo=