library(ggplot2)
library(dplyr)
library(tidyr)
library(vcd)
library(GGally)
library(googleVis)
library(plotly)
library(ggridges)
default <-read.csv("Default.csv")
default$SEX <- factor(default$SEX, levels = c(1,2),
labels = c("Male","Female"))
default$EDUCATION[default$EDUCATION == 0] <- 4
default$EDUCATION[default$EDUCATION == 5] <- 4
default$EDUCATION[default$EDUCATION == 6] <- 4
default$EDUCATION <- factor(default$EDUCATION, levels = c(1,2,3,4),
labels = c("Graduate","University","High school","Other"),
ordered = TRUE)
default$MARRIAGE[default$MARRIAGE == 0] <- 3
default$MARRIAGE <- factor(default$MARRIAGE, levels = c(1,2,3),
labels = c("Married","Single","Other"))
default$default <- factor(default$default, levels = c(0,1),
labels = c("No","Yes"))
default$PAY_0[default$PAY_0 == 0] <- 4
default[7:12][default[7:12] == -2] <- 0
default[7:12][default[7:12] == -1] <- 0
default[7:12] <- lapply(default[7:12], factor, levels = c(0,1,2,3,4,5,6,7,8,9),
labels = c("Pay Duly", "Delay 1M", "Delay 2M",
"Delay 3M", "Delay 4M", "Delay 5M",
"Delay 6M", "Delay 7M", "Delay 8M",
"Delay 9M"))
Design a multivariate data visualisation, using any method and combination of variables, that provides the most insight into the likelihood of a customer defaulting on a loan. Submit a knitted RMarkdown report to Canvas showing your code and visualisation. Submit your visualisation and code to the Slack site to share and receive feedback.
ggplot(data = default, aes(x=AGE, y = EDUCATION, fill = default)) +
geom_density_ridges2(scale = 1.5, alpha=0.7, colour = 'white', size = 0.2) +
labs(y = "Education", x = "Age",
title = "Faceted Density Plots on Loan Defaults, s3644119") +
theme(axis.text.y = element_text(vjust = 0)) +
scale_x_continuous(expand = c(0.01, 0)) +
coord_cartesian(xlim = c(18, 68)) +
scale_y_discrete(expand = c(0.01, 0)) +
facet_grid(MARRIAGE~SEX) +
scale_fill_manual(values=c("#0076B8", "#D7611B"))