WPA 4

Exercise 1

Download the dataframe credit.csv from http://nathanieldphillips.com/wp-content/uploads/2015/05/credit.txt. The data are stored in a comma-separated text file with headers. Load the dataframe into an object called credit.

credit <- read.table(
  file= "http://nathanieldphillips.com/wp-content/uploads/2015/05/credit.txt", 
  header=T, 
  sep =",", 
  stringsAsFactors=F)

Exercise 2 Use the column amount to create the following histogram.

Step 1: Create the histogram.
Step 2: Add the vertical line at the median using abline() or segments().
Step 3: Create the text to be added using paste().
Step 4: Add the text using text()

hist(x = credit$amount, main = "Personal loans by German borrowers", ylim = c(0,500), xlab = "Loan size (in DM")
abline(v = median(credit$amount),lty = 2)
t <- paste("Median =", median(credit$amount))
t

## [1] "Median = 2319.5"

text(median(credit$amount), 400, labels = t, pos = 4)

Exercise 3 Using the columns age and amount, to create the following scatterplot

plot(x = credit$age, y = credit$amount, main = "Borrower age and loan amount", xlab = "Borrower Age", ylab = "Loan amount (in DM)", xlim = c(20, 70), ylim = c(0,15000), col = "grey",pch = 16, cex = 1, type = "p")

Exercise 4: Using the columns years_at_residence and amount, create the following beanplots using the beanplot package.

require("beanplot")

## Loading required package: beanplot

beanplot(amount ~ years_at_residence,
data = credit,
main = "Number of years at residence and loan amounts", xlab = "Years at Residence", ylab = "Loan amount in DM (log-transformed)", col = "white" , lwd = 1,
what = c(1, 1, 1, 1), log = "y" )

Exercise 5: Using the columns job, months_loan_duration and amount Create this plot:

Step 1: Create a blank plot
Step 2: Add gridlines with abline()
Step 3: Add red points for Skilled workers with points()
Step 4: Add blue points for Unskilled workers with points()
Step 5: Add legend with legend()

plot(x = 1, y = 1, xlab = "Loan Duration (in months)", ylab = "Laon Amount (in DM)", type = "n", main = "Loan duration and amount of skilled and unskilled borrowers", xlim = c(0, 80), ylim = c(0, 15000))
abline(h = seq(0, 15000, 1000), lwd = 1, col = gray(.8))
abline(h = seq(0, 15000, 5000), lwd = 1, col = gray(.4))
abline(v = seq(0, 80, 10), lwd = 1, col = gray(.8))
abline(v = seq(0, 80, 20), lwd = 1, col = gray(.4))

skilled <- subset(credit, job == "skilled")
points(skilled$months_loan_duration, skilled$amount, pch = 16, col = "red")

unskilled <- subset(credit, job == "unskilled")
points(unskilled$months_loan_duration, unskilled$amount, pch = 16, col = "blue")

legend("topright",
legend = c("skilled", "unskilled"), col = c('red', 'blue'), pch = c(16, 16),
bg = "white"
)

Exercise 6

Create the following plot from the default and credit_history columns, where the size of the large points indicates how many people there were in each category, and the middle of each point indicates the average default rate of each group.

Step 1: Calculate the mean default rate for each credit history value and store as vector called means.vec, calculate the number of people in each default rate as lengths.vec Step 2: Create the blank plot Step 3: Add the margin texts to the top and bottom using mtext() Step 4: Add the large points, where the size of the points is a function of the number of borrowers in that category (hints: make the argument for a function of the length of the vector for each category, possibly divided by some value to keep the figures from becoming too big…)Ste Step 5: Add the center points Step 6: Add the text showing the mean default rate for each category Step 7: Add the segments with the segments() function

critical <- mean(subset(x = credit, subset = credit_history == "critical")$default == "yes")
critical

## [1] 0.1706485

poor <- mean(subset(x = credit, subset = credit_history == "poor")$default == "yes")
poor

## [1] 0.3181818

good <- mean(subset(x = credit, subset = credit_history == "good")$default == "yes")
good

## [1] 0.3188679

verygood <- mean(subset(x = credit, subset = credit_history == "very good")$default == "yes")
verygood

## [1] 0.5714286

perfect <- mean(subset(x = credit, subset = credit_history == "perfect")$default == "yes")
perfect

## [1] 0.625

means.vec <- c(critical, poor, good, verygood, perfect)
means.vec

## [1] 0.1706485 0.3181818 0.3188679 0.5714286 0.6250000

l.critical <- length(subset(x = credit, subset = credit_history == "critical")$default == "yes")
l.critical

## [1] 293

l.poor <- length(subset(x = credit, subset = credit_history == "poor")$default == "yes")
l.poor

## [1] 88

l.good <- length(subset(x = credit, subset = credit_history == "good")$default == "yes")
l.good

## [1] 530

l.verygood <- length(subset(x = credit, subset = credit_history == "very good")$default == "yes")
l.verygood

## [1] 49

l.perfect <- length(subset(x = credit, subset = credit_history == "perfect")$default == "yes")
l.perfect

## [1] 40

lengths.vec <- c(l.critical, l.poor, l.good, l.verygood, l.perfect)
lengths.vec

## [1] 293  88 530  49  40

WPA 4

Sabrina Englert

Mai 2015