# Load some packages: 
library(tidyverse)
library(broom)

# Load data: 
default <- as_tibble(ISLR::Default)

# Logistic Curve: 
theme_set(theme_bw())

default %>% 
  mutate(prob = case_when(default == "No" ~ 0, TRUE ~ 1)) %>% 
  ggplot(aes(balance, prob)) + 
  geom_point(aes(color = factor(prob)), show.legend = FALSE, alpha = 0.1) + 
  geom_smooth(method = "glm", method.args = list(family = "binomial")) + 
  labs(title = "An Example of Logistic Function", 
       x = "Balance", 
       y = "Probability of Default")

default %>% 
  mutate(prob = case_when(default == "No" ~ 0, TRUE ~ 1)) %>% 
  ggplot(aes(balance, prob, color = student)) + 
  geom_point(alpha = 0.1) + 
  geom_smooth(method = "glm", method.args = list(family = "binomial"), alpha = 0.15) + 
  labs(title = "An Example of Logistic Function", 
       subtitle = "The S-curve shows that students have a lower\nprobability of default than non-students given the balance", 
       x = "Balance", 
       y = "Probability of Default")

# Splot data: 

train <- default %>% 
  group_by(default) %>% 
  sample_frac(0.5, replace = FALSE)

test <- setdiff(default, train)

# Simple Logistic Regression and results: 
model1 <- glm(default ~ balance, family = "binomial", data = train)
model1 %>% tidy()
## # A tibble: 2 x 5
##   term         estimate std.error statistic  p.value
##   <chr>           <dbl>     <dbl>     <dbl>    <dbl>
## 1 (Intercept) -10.3      0.493        -21.0 1.64e-97
## 2 balance       0.00532  0.000304      17.5 1.40e-68
# ROC curve: 

library(pROC)
my_auc <- roc(test$default, predict(model1, test, type = "response"))
sen_spec_df <- data.frame(TPR = my_auc$sensitivities, FPR = 1 - my_auc$specificities)

sen_spec_df %>% 
  ggplot(aes(x = FPR, ymin = 0, ymax = TPR))+
  geom_polygon(aes(y = TPR), fill = "red", alpha = 0.3)+
  geom_path(aes(y = TPR), col = "firebrick", size = 1.2) +
  geom_abline(intercept = 0, slope = 1, color = "gray37", size = 1, linetype = "dashed") + 
  coord_equal() +
  labs(x = "FPR (1 - Specificity)", 
       y = "TPR (Sensitivity)", 
       title = "Model Performance for Logistic Classifier based on Test Data", 
       subtitle = paste0("AUC Value: ", my_auc$auc %>% round(2)))

LS0tDQp0aXRsZTogIlBlcmZvcm0gTG9naXN0aWMgUmVncmVzc2lvbiB3aXRoIFIiIA0Kc3VidGl0bGU6ICJSIGZvciBNeSBDbGFzcyINCmF1dGhvcjogIk5ndXllbiBDaGkgRHVuZyINCm91dHB1dDoNCiAgaHRtbF9kb2N1bWVudDogDQogICAgY29kZV9kb3dubG9hZDogdHJ1ZQ0KICAgIGNvZGVfZm9sZGluZzogaGlkZQ0KICAgIGhpZ2hsaWdodDogcHlnbWVudHMNCiAgICAjIG51bWJlcl9zZWN0aW9uczogeWVzDQogICAgdGhlbWU6ICJmbGF0bHkiDQogICAgdG9jOiBUUlVFDQogICAgdG9jX2Zsb2F0OiBUUlVFDQotLS0NCg0KYGBge3Igc2V0dXAsaW5jbHVkZT1GQUxTRX0NCmtuaXRyOjpvcHRzX2NodW5rJHNldChlY2hvID0gVFJVRSwgd2FybmluZyA9IEZBTFNFLCBtZXNzYWdlID0gRkFMU0UpDQpgYGANCg0KYGBge3J9DQoNCiMgTG9hZCBzb21lIHBhY2thZ2VzOiANCmxpYnJhcnkodGlkeXZlcnNlKQ0KbGlicmFyeShicm9vbSkNCg0KIyBMb2FkIGRhdGE6IA0KZGVmYXVsdCA8LSBhc190aWJibGUoSVNMUjo6RGVmYXVsdCkNCg0KIyBMb2dpc3RpYyBDdXJ2ZTogDQp0aGVtZV9zZXQodGhlbWVfYncoKSkNCg0KZGVmYXVsdCAlPiUgDQogIG11dGF0ZShwcm9iID0gY2FzZV93aGVuKGRlZmF1bHQgPT0gIk5vIiB+IDAsIFRSVUUgfiAxKSkgJT4lIA0KICBnZ3Bsb3QoYWVzKGJhbGFuY2UsIHByb2IpKSArIA0KICBnZW9tX3BvaW50KGFlcyhjb2xvciA9IGZhY3Rvcihwcm9iKSksIHNob3cubGVnZW5kID0gRkFMU0UsIGFscGhhID0gMC4xKSArIA0KICBnZW9tX3Ntb290aChtZXRob2QgPSAiZ2xtIiwgbWV0aG9kLmFyZ3MgPSBsaXN0KGZhbWlseSA9ICJiaW5vbWlhbCIpKSArIA0KICBsYWJzKHRpdGxlID0gIkFuIEV4YW1wbGUgb2YgTG9naXN0aWMgRnVuY3Rpb24iLCANCiAgICAgICB4ID0gIkJhbGFuY2UiLCANCiAgICAgICB5ID0gIlByb2JhYmlsaXR5IG9mIERlZmF1bHQiKQ0KICANCg0KZGVmYXVsdCAlPiUgDQogIG11dGF0ZShwcm9iID0gY2FzZV93aGVuKGRlZmF1bHQgPT0gIk5vIiB+IDAsIFRSVUUgfiAxKSkgJT4lIA0KICBnZ3Bsb3QoYWVzKGJhbGFuY2UsIHByb2IsIGNvbG9yID0gc3R1ZGVudCkpICsgDQogIGdlb21fcG9pbnQoYWxwaGEgPSAwLjEpICsgDQogIGdlb21fc21vb3RoKG1ldGhvZCA9ICJnbG0iLCBtZXRob2QuYXJncyA9IGxpc3QoZmFtaWx5ID0gImJpbm9taWFsIiksIGFscGhhID0gMC4xNSkgKyANCiAgbGFicyh0aXRsZSA9ICJBbiBFeGFtcGxlIG9mIExvZ2lzdGljIEZ1bmN0aW9uIiwgDQogICAgICAgc3VidGl0bGUgPSAiVGhlIFMtY3VydmUgc2hvd3MgdGhhdCBzdHVkZW50cyBoYXZlIGEgbG93ZXJcbnByb2JhYmlsaXR5IG9mIGRlZmF1bHQgdGhhbiBub24tc3R1ZGVudHMgZ2l2ZW4gdGhlIGJhbGFuY2UiLCANCiAgICAgICB4ID0gIkJhbGFuY2UiLCANCiAgICAgICB5ID0gIlByb2JhYmlsaXR5IG9mIERlZmF1bHQiKQ0KDQoNCiMgU3Bsb3QgZGF0YTogDQoNCnRyYWluIDwtIGRlZmF1bHQgJT4lIA0KICBncm91cF9ieShkZWZhdWx0KSAlPiUgDQogIHNhbXBsZV9mcmFjKDAuNSwgcmVwbGFjZSA9IEZBTFNFKQ0KDQp0ZXN0IDwtIHNldGRpZmYoZGVmYXVsdCwgdHJhaW4pDQoNCiMgU2ltcGxlIExvZ2lzdGljIFJlZ3Jlc3Npb24gYW5kIHJlc3VsdHM6IA0KbW9kZWwxIDwtIGdsbShkZWZhdWx0IH4gYmFsYW5jZSwgZmFtaWx5ID0gImJpbm9taWFsIiwgZGF0YSA9IHRyYWluKQ0KbW9kZWwxICU+JSB0aWR5KCkNCg0KIyBST0MgY3VydmU6IA0KDQpsaWJyYXJ5KHBST0MpDQpteV9hdWMgPC0gcm9jKHRlc3QkZGVmYXVsdCwgcHJlZGljdChtb2RlbDEsIHRlc3QsIHR5cGUgPSAicmVzcG9uc2UiKSkNCnNlbl9zcGVjX2RmIDwtIGRhdGEuZnJhbWUoVFBSID0gbXlfYXVjJHNlbnNpdGl2aXRpZXMsIEZQUiA9IDEgLSBteV9hdWMkc3BlY2lmaWNpdGllcykNCg0Kc2VuX3NwZWNfZGYgJT4lIA0KICBnZ3Bsb3QoYWVzKHggPSBGUFIsIHltaW4gPSAwLCB5bWF4ID0gVFBSKSkrDQogIGdlb21fcG9seWdvbihhZXMoeSA9IFRQUiksIGZpbGwgPSAicmVkIiwgYWxwaGEgPSAwLjMpKw0KICBnZW9tX3BhdGgoYWVzKHkgPSBUUFIpLCBjb2wgPSAiZmlyZWJyaWNrIiwgc2l6ZSA9IDEuMikgKw0KICBnZW9tX2FibGluZShpbnRlcmNlcHQgPSAwLCBzbG9wZSA9IDEsIGNvbG9yID0gImdyYXkzNyIsIHNpemUgPSAxLCBsaW5ldHlwZSA9ICJkYXNoZWQiKSArIA0KICBjb29yZF9lcXVhbCgpICsNCiAgbGFicyh4ID0gIkZQUiAoMSAtIFNwZWNpZmljaXR5KSIsIA0KICAgICAgIHkgPSAiVFBSIChTZW5zaXRpdml0eSkiLCANCiAgICAgICB0aXRsZSA9ICJNb2RlbCBQZXJmb3JtYW5jZSBmb3IgTG9naXN0aWMgQ2xhc3NpZmllciBiYXNlZCBvbiBUZXN0IERhdGEiLCANCiAgICAgICBzdWJ0aXRsZSA9IHBhc3RlMCgiQVVDIFZhbHVlOiAiLCBteV9hdWMkYXVjICU+JSByb3VuZCgyKSkpDQpgYGANCg0K