title: ‘WPA#3: Chapters 5-6’ author: “Rebekka Herz” date: “7. Mai 2015” output: html_document
credit <- read.table(file = "http://nathanieldphillips.com/wp-content/uploads/2015/05/credit.txt", header = T, sep = ",", stringsAsFactors = F)
head(credit) #I imported it via the "Import Dataset function"
## checking_balance months_loan_duration credit_history
## 1 < 0 DM 6 critical
## 2 1 - 200 DM 48 good
## 3 unknown 12 critical
## 4 < 0 DM 42 good
## 5 < 0 DM 24 poor
## 6 unknown 36 good
## purpose amount savings_balance employment_duration
## 1 furniture/appliances 1169 unknown > 7 years
## 2 furniture/appliances 5951 < 100 DM 1 - 4 years
## 3 education 2096 < 100 DM 4 - 7 years
## 4 furniture/appliances 7882 < 100 DM 4 - 7 years
## 5 car 4870 < 100 DM 1 - 4 years
## 6 education 9055 unknown 1 - 4 years
## percent_of_income years_at_residence age other_credit housing
## 1 4 4 67 none own
## 2 2 2 22 none own
## 3 2 3 49 none own
## 4 2 4 45 none other
## 5 3 4 53 none other
## 6 2 4 35 none other
## existing_loans_count job dependents phone default
## 1 2 skilled 1 yes no
## 2 1 skilled 1 no yes
## 3 1 unskilled 2 no no
## 4 1 skilled 2 no no
## 5 2 skilled 2 no yes
## 6 1 unskilled 2 yes no
nrow(credit)
## [1] 1000
ncol(credit)
## [1] 17
dim(credit)
## [1] 1000 17
names(credit)
## [1] "checking_balance" "months_loan_duration" "credit_history"
## [4] "purpose" "amount" "savings_balance"
## [7] "employment_duration" "percent_of_income" "years_at_residence"
## [10] "age" "other_credit" "housing"
## [13] "existing_loans_count" "job" "dependents"
## [16] "phone" "default"
credit$amount.eur <- credit$amount * 2
head(credit)
## checking_balance months_loan_duration credit_history
## 1 < 0 DM 6 critical
## 2 1 - 200 DM 48 good
## 3 unknown 12 critical
## 4 < 0 DM 42 good
## 5 < 0 DM 24 poor
## 6 unknown 36 good
## purpose amount savings_balance employment_duration
## 1 furniture/appliances 1169 unknown > 7 years
## 2 furniture/appliances 5951 < 100 DM 1 - 4 years
## 3 education 2096 < 100 DM 4 - 7 years
## 4 furniture/appliances 7882 < 100 DM 4 - 7 years
## 5 car 4870 < 100 DM 1 - 4 years
## 6 education 9055 unknown 1 - 4 years
## percent_of_income years_at_residence age other_credit housing
## 1 4 4 67 none own
## 2 2 2 22 none own
## 3 2 3 49 none own
## 4 2 4 45 none other
## 5 3 4 53 none other
## 6 2 4 35 none other
## existing_loans_count job dependents phone default amount.eur
## 1 2 skilled 1 yes no 2338
## 2 1 skilled 1 no yes 11902
## 3 1 unskilled 2 no no 4192
## 4 1 skilled 2 no no 15764
## 5 2 skilled 2 no yes 9740
## 6 1 unskilled 2 yes no 18110
median(credit$amount.eur)
## [1] 4639
mean(credit$amount.eur)
## [1] 6542.516
sd(credit$amount.eur)
## [1] 5645.474
table(credit$purpose)
##
## business car education
## 97 349 59
## furniture/appliances renovations
## 473 22
sort(table(credit$purpose))
##
## renovations education business
## 22 59 97
## car furniture/appliances
## 349 473
# I ran table and furniture/appliances are the most commmon purposes for getting a loan.
mean(c(credit$purpose == "education" | credit$purpose == "car"))
## [1] 0.408
table(credit$credit_history)
##
## critical good perfect poor very good
## 293 530 40 88 49
history.log <- c(credit$credit_history == "good")
#history.log
mean(c(history.log == "TRUE"))
## [1] 0.53
critical.log <- c(credit$credit_history == "critical")
critical.data <- credit[critical.log,]
#critical.data
mean(critical.data$amount.eur)
## [1] 6176.075
good.log <- c(credit$credit_history == "good")
good.data <- credit[good.log, ]
mean(good.data$amount.eur)
## [1] 6081.917
names(credit)
## [1] "checking_balance" "months_loan_duration" "credit_history"
## [4] "purpose" "amount" "savings_balance"
## [7] "employment_duration" "percent_of_income" "years_at_residence"
## [10] "age" "other_credit" "housing"
## [13] "existing_loans_count" "job" "dependents"
## [16] "phone" "default" "amount.eur"
#1 correlation
cor(credit$age, credit$amount.eur)
## [1] 0.03271642
#2 comparison of the average loan amount of people whose age is above the median age to those whose age is less than the median age
median(credit$age)
## [1] 33
over33.log <- c(credit$age > median(credit$age))
over33.data <- credit[over33.log, ]
#over33.data
mean(over33.data$amount.eur)
## [1] 6708.62
under33.log <- c(credit$age == median(credit$age))
under33.data <- credit[under33.log, ]
#under33.data
mean(under33.data$amount.eur)
## [1] 5318.667
setequal(mean(over33.data$amount.eur), mean(under33.data$amount.eur))
## [1] FALSE
lessthan100.log <- c(credit$savings_balance == "< 100 DM")
lessthan100.data <- credit[lessthan100.log, ]