This is my WPA 5
credit <- read.table("http://nathanieldphillips.com/wp-content/uploads/2015/05/credit.txt",
sep = ",",
header = T,
stringsAsFactors = F)
Question 1
#How many rows and columns are in the dataframe?
nrow(credit)
## [1] 1000
ncol(credit)
## [1] 17
#What are the names of the columns in the dataframe?
names(credit)
## [1] "checking_balance" "months_loan_duration" "credit_history"
## [4] "purpose" "amount" "savings_balance"
## [7] "employment_duration" "percent_of_income" "years_at_residence"
## [10] "age" "other_credit" "housing"
## [13] "existing_loans_count" "job" "dependents"
## [16] "phone" "default"
Question 2
#What are the different values of the credit_history variable and how often did each occur?
table(credit$credit_history)
##
## critical good perfect poor very good
## 293 530 40 88 49
#What is the mean loan amount (column is called amount) for each level of credit_history?
aggregate(formula = amount ~ credit_history,
FUN = mean,
na.rm = T,
data = credit)
## credit_history amount
## 1 critical 3088.038
## 2 good 3040.958
## 3 perfect 5305.675
## 4 poor 4302.602
## 5 very good 3344.878
#What is the median age for each level of credit_history?
aggregate(formula = amount ~ credit_history,
FUN = median,
na.rm = T,
data = credit)
## credit_history amount
## 1 critical 2181.0
## 2 good 2217.5
## 3 perfect 4193.0
## 4 poor 2985.5
## 5 very good 2149.0
Question 3
#What was the purpose of the highest loan amount? (Hint: Start by answering the question: What was the maximum loan amount for each each loan purpose?)
aggregate(formula = amount ~ purpose,
FUN = max,
na.rm = T,
data = credit)
## purpose amount
## 1 business 15945
## 2 car 18424
## 3 education 12612
## 4 furniture/appliances 15653
## 5 renovations 11998
#What was the purpose of the smallest loan amount?
aggregate(formula = amount ~ purpose,
FUN = min,
na.rm = T,
data = credit)
## purpose amount
## 1 business 609
## 2 car 250
## 3 education 339
## 4 furniture/appliances 338
## 5 renovations 454
Question 4
#Does it look like there is a relationship between a person’s housing status and their age?
aggregate(formula = age ~ housing,
FUN = mean,
na.rm = T,
data = credit)
## housing age
## 1 other 43.81481
## 2 own 35.59327
## 3 rent 30.36872
Question 5
#Create a new column called amount_gt1000, a binary variable that has TRUE if the loan amount is greater than 1000 and FALSE if the loan amount is less than or equal to 1000
credit$amount_gt1000 <- credit$amount > 1000
#Were people who were unemployed more likely to have a loan amount greater than 1000 than people with an employment duration greater than 7 years? (Hint: Calculate the percentage for all employment_duration values).