title: “Logistic regression” author: “leela” date: “May 24, 2016” output: html_document
binary <- read.csv(“C:/Users/leelavathi.a/Downloads/binary.csv”)
summary(binary )
dt = sort(sample(nrow(binary), nrow(binary)*.7)) train<-binary[dt,] val<-binary[-dt,]
mylogistic <- glm(admit ~ ., data = train, family = “binomial”) summary(mylogistic)$coefficient
mylogit = step(mylogistic)
summary.coeff0 = summary(mylogit)$coefficient
OddRatio = exp(coef(mylogit)) summary.coeff = cbind(Variable = row.names(summary.coeff0), OddRatio, summary.coeff0) row.names(summary.coeff) = NULL
stdz.coff <- function (regmodel) { b <- summary(regmodel)\(coef[-1,1] sx <- sapply(regmodel\)model[-1], sd) sy <- sapply(regmodel$model[1], sd) beta <- b * sx / sy return(beta) }
std.Coeff = data.frame(Standardized.Coeff = stdz.coff(mylogit)) std.Coeff = cbind(Variable = row.names(std.Coeff), std.Coeff) row.names(std.Coeff) = NULL
final = merge(summary.coeff, std.Coeff, by = “Variable”, all.x = TRUE)
pred = predict(mylogit,val, type = “response”) finaldata = cbind(val, pred)
library(ROCR) pred_val <-prediction(pred ,finaldata$admit)
acc.perf <- performance(pred_val, “acc”) ind = which.max( slot(acc.perf, “y.values”)[[1]]) acc = slot(acc.perf, “y.values”)[[1]][ind] cutoff = slot(acc.perf, “x.values”)[[1]][ind]
print(c(accuracy= acc, cutoff = cutoff))
perf_val <- performance(pred_val,“auc”) perf_val
plot(performance(pred_val, measure=“lift”, x.measure=“rpp”), colorize=TRUE)
perf_val2 <- performance(pred_val, “tpr”, “fpr”) plot(perf_val2, col = “green”, lwd = 1.5)
ks1.tree <- max(attr(perf_val2, “y.values”)[[1]] - (attr(perf_val2, “x.values”)[[1]])) ks1.tree