setwd("C:/Users/Nick/Desktop/School/Year 4/Winter/Data Science/Assignment 5")
RidingMowers_input = as.data.frame(read.csv("RidingMowers.csv"))
head(RidingMowers_input)
## Income Lot_Size Ownership Ownership_Binary
## 1 60.0 18.4 Owner 1
## 2 85.5 16.8 Owner 1
## 3 64.8 21.6 Owner 1
## 4 61.5 20.8 Owner 1
## 5 87.0 23.6 Owner 1
## 6 110.1 19.2 Owner 1
sum(RidingMowers_input$Income)
## [1] 1642.5
RidingMowers_logistic1 <- glm (Ownership_Binary~Lot_Size, data=RidingMowers_input)
summary(RidingMowers_logistic1)
##
## Call:
## glm(formula = Ownership_Binary ~ Lot_Size, data = RidingMowers_input)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -0.71553 -0.34272 0.03008 0.30777 0.75048
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.70771 0.71307 -2.395 0.02558 *
## Lot_Size 0.11650 0.03734 3.120 0.00498 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for gaussian family taken to be 0.1890577)
##
## Null deviance: 6.0000 on 23 degrees of freedom
## Residual deviance: 4.1593 on 22 degrees of freedom
## AIC: 32.044
##
## Number of Fisher Scoring iterations: 2
RidingMowers_logistic2 <- glm (Ownership_Binary~Income, data=RidingMowers_input)
summary(RidingMowers_logistic2)
##
## Call:
## glm(formula = Ownership_Binary ~ Income, data = RidingMowers_input)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -0.72876 -0.29439 -0.09697 0.29550 0.75632
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.505979 0.321619 -1.573 0.12994
## Income 0.014699 0.004522 3.251 0.00367 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for gaussian family taken to be 0.1842312)
##
## Null deviance: 6.0000 on 23 degrees of freedom
## Residual deviance: 4.0531 on 22 degrees of freedom
## AIC: 31.423
##
## Number of Fisher Scoring iterations: 2
RidingMowers_logistic3 <- glm (Ownership_Binary~Lot_Size + Income, data=RidingMowers_input)
summary(RidingMowers_logistic3)
##
## Call:
## glm(formula = Ownership_Binary ~ Lot_Size + Income, data = RidingMowers_input)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -0.6470 -0.2155 -0.0217 0.2494 0.6608
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -2.235501 0.617215 -3.622 0.00160 **
## Lot_Size 0.098804 0.031650 3.122 0.00516 **
## Income 0.012612 0.003883 3.248 0.00385 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for gaussian family taken to be 0.1318273)
##
## Null deviance: 6.0000 on 23 degrees of freedom
## Residual deviance: 2.7684 on 21 degrees of freedom
## AIC: 24.274
##
## Number of Fisher Scoring iterations: 2
summary(RidingMowers_logistic2)
##
## Call:
## glm(formula = Ownership_Binary ~ Income, data = RidingMowers_input)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -0.72876 -0.29439 -0.09697 0.29550 0.75632
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.505979 0.321619 -1.573 0.12994
## Income 0.014699 0.004522 3.251 0.00367 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for gaussian family taken to be 0.1842312)
##
## Null deviance: 6.0000 on 23 degrees of freedom
## Residual deviance: 4.0531 on 22 degrees of freedom
## AIC: 31.423
##
## Number of Fisher Scoring iterations: 2
pchisq(.9 , 1, lower=FALSE)
## [1] 0.3427817
library(ROCR)
## Loading required package: gplots
##
## Attaching package: 'gplots'
## The following object is masked from 'package:stats':
##
## lowess
pred = predict(RidingMowers_logistic3, type="response")
predObj = prediction(pred, RidingMowers_input$Ownership_Binary)
rocObj = performance(predObj, measure="tpr", x.measure="fpr")
aucObj = performance(predObj, measure="auc")
plot(rocObj, main = paste("Area under the curve:", round(aucObj@y.values[[1]] ,4)))

alpha <- round(as.numeric(unlist(rocObj@alpha.values)),4)
fpr <- round(as.numeric(unlist(rocObj@x.values)),4)
tpr <- round(as.numeric(unlist(rocObj@y.values)),4)
par(mar = c(5, 5, 2, 5))
plot(alpha, tpr, xlab="Threshold", xlim=c(0,1), ylab="True positive rate", type="l")
par(new="True")
plot(alpha, fpr, xlab="", ylab="", axes=F, xlim=c(0,1), type="l")
axis(side=4)
mtext(side=4, line=3, "False positive rate")
text(0.18, 0.18, "FPR")
text(0.58, 0.58, "TPR")

i <- which(round(alpha,2) == .6)
paste("Threshold=" ,(alpha[i]) ," TPR=" , tpr[i],"FPR=", fpr[i])
## [1] "Threshold= 0.5953 TPR= 0.75 FPR= 0.0833"
i <- which(round(alpha,2) == .19)
paste("Threshold=" ,(alpha[i]) ," TPR=" , tpr[i], "FPR=", fpr[i])
## [1] "Threshold= TPR= FPR= "
pred = predict(RidingMowers_logistic2, type="response")
predObj = prediction(pred, RidingMowers_input$Ownership_Binary)
rocObj = performance(predObj, measure="tpr", x.measure="fpr")
aucObj = performance(predObj, measure="auc")
plot(rocObj, main = paste("Area under the curve:", round(aucObj@y.values[[1]] ,4)))

alpha <- round(as.numeric(unlist(rocObj@alpha.values)),4)
fpr <- round(as.numeric(unlist(rocObj@x.values)),4)
tpr <- round(as.numeric(unlist(rocObj@y.values)),4)
par(mar = c(5, 5, 2, 5))
plot(alpha, tpr, xlab="Threshold", xlim=c(0,1), ylab="True positive rate", type="l")
par(new="True")
plot(alpha, fpr, xlab="", ylab="", axes=F, xlim=c(0,1), type="l")
axis(side=4)
mtext(side=4, line=3, "False positive rate")
text(0.18, 0.18, "FPR")
text(0.58, 0.58, "TPR")

i <- which(round(alpha,2) == .6)
paste("Threshold=" ,(alpha[i]) ," TPR=" , tpr[i],"FPR=", fpr[i])
## [1] "Threshold= 0.5965 TPR= 0.5833 FPR= 0.1667"
i <- which(round(alpha,2) == .19)
paste("Threshold=" ,(alpha[i]) ," TPR=" , tpr[i], "FPR=", fpr[i])
## [1] "Threshold= 0.1908 TPR= 1 FPR= 0.8333"
pred = predict(RidingMowers_logistic1, type="response")
predObj = prediction(pred, RidingMowers_input$Ownership_Binary)
rocObj = performance(predObj, measure="tpr", x.measure="fpr")
aucObj = performance(predObj, measure="auc")
plot(rocObj, main = paste("Area under the curve:", round(aucObj@y.values[[1]] ,4)))

alpha <- round(as.numeric(unlist(rocObj@alpha.values)),4)
fpr <- round(as.numeric(unlist(rocObj@x.values)),4)
tpr <- round(as.numeric(unlist(rocObj@y.values)),4)
par(mar = c(5, 5, 2, 5))
plot(alpha, tpr, xlab="Threshold", xlim=c(0,1), ylab="True positive rate", type="l")
par(new="True")
plot(alpha, fpr, xlab="", ylab="", axes=F, xlim=c(0,1), type="l")
axis(side=4)
mtext(side=4, line=3, "False positive rate")
text(0.18, 0.18, "FPR")
text(0.58, 0.58, "TPR")

i <- which(round(alpha,2) == .6)
paste("Threshold=" ,(alpha[i]) ," TPR=" , tpr[i],"FPR=", fpr[i])
## [1] "Threshold= TPR= FPR= "
i <- which(round(alpha,2) == .19)
paste("Threshold=" ,(alpha[i]) ," TPR=" , tpr[i], "FPR=", fpr[i])
## [1] "Threshold= TPR= FPR= "