# MAS 646
# January 14, 2019


# Linear regression fun
kc <- read.csv('https://s3.amazonaws.com/douglas2/MAS646/kc_house_data.csv')









## CODE BELOW IS DIRECTLY FROM SLIDES

# Simulated data for credit scores and defaults
set.seed(121)
n <- 1000
CreditScore <- rnorm(n, sd=40)
Default <- rbinom(n, 1, 1-plogis(2 + CreditScore/15 ))    
head(data.frame(Default, CreditScore = round(CreditScore) ), n=4)
##   Default CreditScore
## 1       0         -10
## 2       0           4
## 3       0           5
## 4       0          -3
# Look at data
plot(Default ~ CreditScore)

# Fit linear model
m <- lm(Default ~ CreditScore)
plot(Default ~ CreditScore)
abline(m, col='red')
abline(h=mean(Default), lty=2)
legend('topright', c('Linear Model', 'No Model'), lty=c(1,2), col=c('red','black'))

summary(m)$coefficients[, c(1,4)]
##                Estimate      Pr(>|t|)
## (Intercept)  0.25752193 5.673027e-101
## CreditScore -0.00708646 9.316857e-115
plot(Default ~ CreditScore, ylim=c(-1,2), xlim=c(-200, 200))
abline(m, col='red')

plot(m$residuals ~ m$fitted.values)

# Fit GLM
m2 <- glm(Default ~ CreditScore, family=binomial)
plot( Default ~ CreditScore )
lines(m2$fitted.values[order(CreditScore)] ~ CreditScore[order(CreditScore)], col='red')
abline(m, col='blue', lty=2)
legend('topright', c('LM', 'GLM'), lty=c(2,1), col=c('blue','red'))

head( data.frame(Linear.Fits =  round(m$fitted.values,2), Logistic.Fits = round(m2$fitted.values,2) ), n=10)
##    Linear.Fits Logistic.Fits
## 1         0.33          0.20
## 2         0.23          0.08
## 3         0.22          0.08
## 4         0.28          0.13
## 5         0.46          0.48
## 6        -0.20          0.00
## 7         0.17          0.05
## 8         0.27          0.12
## 9         0.57          0.73
## 10        0.00          0.01