This is my WPA 5

credit <- read.table("http://nathanieldphillips.com/wp-content/uploads/2015/05/credit.txt", 
                     sep = ",", 
                     header = T, 
                     stringsAsFactors = F)

Question 1

#How many rows and columns are in the dataframe?
nrow(credit)
## [1] 1000
ncol(credit)
## [1] 17
#What are the names of the columns in the dataframe?
names(credit)
##  [1] "checking_balance"     "months_loan_duration" "credit_history"      
##  [4] "purpose"              "amount"               "savings_balance"     
##  [7] "employment_duration"  "percent_of_income"    "years_at_residence"  
## [10] "age"                  "other_credit"         "housing"             
## [13] "existing_loans_count" "job"                  "dependents"          
## [16] "phone"                "default"

Question 2

#What are the different values of the credit_history variable and how often did each occur?

table(credit$credit_history)
## 
##  critical      good   perfect      poor very good 
##       293       530        40        88        49
#What is the mean loan amount (column is called amount) for each level of credit_history?
aggregate(formula = amount ~ credit_history, 
FUN = mean,
na.rm = T,
data =  credit)
##   credit_history   amount
## 1       critical 3088.038
## 2           good 3040.958
## 3        perfect 5305.675
## 4           poor 4302.602
## 5      very good 3344.878
#What is the median age for each level of credit_history?
aggregate(formula = amount ~ credit_history, 
FUN = median,
na.rm = T,
data =  credit)
##   credit_history amount
## 1       critical 2181.0
## 2           good 2217.5
## 3        perfect 4193.0
## 4           poor 2985.5
## 5      very good 2149.0

Question 3

#What was the purpose of the highest loan amount? (Hint: Start by answering the question: What was the maximum loan amount for each each loan purpose?)

aggregate(formula = amount ~ purpose, 
FUN = max,
na.rm = T,
data =  credit)
##                purpose amount
## 1             business  15945
## 2                  car  18424
## 3            education  12612
## 4 furniture/appliances  15653
## 5          renovations  11998
#What was the purpose of the smallest loan amount?
aggregate(formula = amount ~ purpose, 
FUN = min,
na.rm = T,
data =  credit)
##                purpose amount
## 1             business    609
## 2                  car    250
## 3            education    339
## 4 furniture/appliances    338
## 5          renovations    454

Question 4

#Does it look like there is a relationship between a person’s housing status and their age?
aggregate(formula = age ~ housing, 
FUN = mean,
na.rm = T,
data =  credit)
##   housing      age
## 1   other 43.81481
## 2     own 35.59327
## 3    rent 30.36872

Question 5

#Create a new column called amount_gt1000, a binary variable that has TRUE if the loan amount is greater than 1000 and FALSE if the loan amount is less than or equal to 1000
credit$amount_gt1000 <- credit$amount > 1000

#Were people who were unemployed more likely to have a loan amount greater than 1000 than people with an employment duration greater than 7 years? (Hint: Calculate the percentage for all employment_duration values).