# MAS 646
# January 14, 2019
# Linear regression fun
kc <- read.csv('https://s3.amazonaws.com/douglas2/MAS646/kc_house_data.csv')
## CODE BELOW IS DIRECTLY FROM SLIDES
# Simulated data for credit scores and defaults
set.seed(121)
n <- 1000
CreditScore <- rnorm(n, sd=40)
Default <- rbinom(n, 1, 1-plogis(2 + CreditScore/15 ))
head(data.frame(Default, CreditScore = round(CreditScore) ), n=4)
## Default CreditScore
## 1 0 -10
## 2 0 4
## 3 0 5
## 4 0 -3
# Look at data
plot(Default ~ CreditScore)
# Fit linear model
m <- lm(Default ~ CreditScore)
plot(Default ~ CreditScore)
abline(m, col='red')
abline(h=mean(Default), lty=2)
legend('topright', c('Linear Model', 'No Model'), lty=c(1,2), col=c('red','black'))

summary(m)$coefficients[, c(1,4)]
## Estimate Pr(>|t|)
## (Intercept) 0.25752193 5.673027e-101
## CreditScore -0.00708646 9.316857e-115
plot(Default ~ CreditScore, ylim=c(-1,2), xlim=c(-200, 200))
abline(m, col='red')

plot(m$residuals ~ m$fitted.values)

# Fit GLM
m2 <- glm(Default ~ CreditScore, family=binomial)
plot( Default ~ CreditScore )
lines(m2$fitted.values[order(CreditScore)] ~ CreditScore[order(CreditScore)], col='red')
abline(m, col='blue', lty=2)
legend('topright', c('LM', 'GLM'), lty=c(2,1), col=c('blue','red'))

head( data.frame(Linear.Fits = round(m$fitted.values,2), Logistic.Fits = round(m2$fitted.values,2) ), n=10)
## Linear.Fits Logistic.Fits
## 1 0.33 0.20
## 2 0.23 0.08
## 3 0.22 0.08
## 4 0.28 0.13
## 5 0.46 0.48
## 6 -0.20 0.00
## 7 0.17 0.05
## 8 0.27 0.12
## 9 0.57 0.73
## 10 0.00 0.01