library(ggplot2) # Data visualization
## Warning: package 'ggplot2' was built under R version 3.3.3
library(ggthemes)
## Warning: package 'ggthemes' was built under R version 3.3.3
library(dplyr)
## Warning: package 'dplyr' was built under R version 3.3.3
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(tidyr)
## Warning: package 'tidyr' was built under R version 3.3.3
library(readr)
## Warning: package 'readr' was built under R version 3.3.3
Each week the Consumer Finance Public Bureau sends thousands of consumers’ complaints about financial products and services to companies for response. Those complaints are published here after the company responds or after 15 days, whichever comes first. By adding their voice, consumers help improve the financial marketplace. First I looked at all the variables in the table. The data was 90 mb was I couldn’t directly upload the data and so had to use import dataset.
consumer_complaints <- read_csv("~/2 MSSA/463/datasets/consumer_complaints.csv")
## Parsed with column specification:
## cols(
## date_received = col_character(),
## product = col_character(),
## sub_product = col_character(),
## issue = col_character(),
## sub_issue = col_character(),
## consumer_complaint_narrative = col_character(),
## company_public_response = col_character(),
## company = col_character(),
## state = col_character(),
## zipcode = col_character(),
## tags = col_character(),
## consumer_consent_provided = col_character(),
## submitted_via = col_character(),
## date_sent_to_company = col_character(),
## company_response_to_consumer = col_character(),
## timely_response = col_character(),
## `consumer_disputed?` = col_character(),
## complaint_id = col_integer()
## )
names(consumer_complaints)
## [1] "date_received" "product"
## [3] "sub_product" "issue"
## [5] "sub_issue" "consumer_complaint_narrative"
## [7] "company_public_response" "company"
## [9] "state" "zipcode"
## [11] "tags" "consumer_consent_provided"
## [13] "submitted_via" "date_sent_to_company"
## [15] "company_response_to_consumer" "timely_response"
## [17] "consumer_disputed?" "complaint_id"
Next I looked in all the topics in the product.
table(consumer_complaints$product)
##
## Bank account or service Consumer Loan Credit card
## 62563 20990 66468
## Credit reporting Debt collection Money transfers
## 91854 101052 3812
## Mortgage Other financial service Payday loan
## 186475 557 3877
## Prepaid card Student loan
## 2470 15839
Student loan was something I was interested in and wanted to use student loan and map the complaints regarding student loan.
student_loans <- subset(consumer_complaints, product == "Student loan")
I wanted to see the total complains regarding student loans. 15839 complains in a week compared to total of 555957 complaints.
table(student_loans$sub_product)
##
## Non-federal student loan
## 15839
Below I created a new table for complains regarding student loan. The variables included date, company, issue and company’s response to consumer.
student_loans %>%
select(date_received, product, company, issue, company_response_to_consumer)
## # A tibble: 15,839 × 5
## date_received product company
## <chr> <chr> <chr>
## 1 08/30/2013 Student loan Navient Solutions, Inc.
## 2 09/17/2013 Student loan Schachter Portnoy, L.L.C.
## 3 08/30/2013 Student loan Navient Solutions, Inc.
## 4 09/10/2013 Student loan Navient Solutions, Inc.
## 5 09/10/2013 Student loan Navient Solutions, Inc.
## 6 09/10/2013 Student loan Wells Fargo & Company
## 7 09/24/2013 Student loan Navient Solutions, Inc.
## 8 09/10/2013 Student loan Navient Solutions, Inc.
## 9 09/17/2013 Student loan Navient Solutions, Inc.
## 10 09/10/2013 Student loan Navient Solutions, Inc.
## # ... with 15,829 more rows, and 2 more variables: issue <chr>,
## # company_response_to_consumer <chr>
ggplot(student_loans, aes(x=issue, fill=issue)) +
geom_bar() +
ggtitle("Student Loans - Complaints")