setwd("C:/Users/Nick/Desktop/School/Year 4/Winter/Data Science/Assignment 5")

RidingMowers_input = as.data.frame(read.csv("RidingMowers.csv"))
head(RidingMowers_input)
##   Income Lot_Size Ownership Ownership_Binary
## 1   60.0     18.4     Owner                1
## 2   85.5     16.8     Owner                1
## 3   64.8     21.6     Owner                1
## 4   61.5     20.8     Owner                1
## 5   87.0     23.6     Owner                1
## 6  110.1     19.2     Owner                1
sum(RidingMowers_input$Income)
## [1] 1642.5
RidingMowers_logistic1 <- glm (Ownership_Binary~Lot_Size, data=RidingMowers_input)

summary(RidingMowers_logistic1)
## 
## Call:
## glm(formula = Ownership_Binary ~ Lot_Size, data = RidingMowers_input)
## 
## Deviance Residuals: 
##      Min        1Q    Median        3Q       Max  
## -0.71553  -0.34272   0.03008   0.30777   0.75048  
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)   
## (Intercept) -1.70771    0.71307  -2.395  0.02558 * 
## Lot_Size     0.11650    0.03734   3.120  0.00498 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for gaussian family taken to be 0.1890577)
## 
##     Null deviance: 6.0000  on 23  degrees of freedom
## Residual deviance: 4.1593  on 22  degrees of freedom
## AIC: 32.044
## 
## Number of Fisher Scoring iterations: 2
RidingMowers_logistic2 <- glm (Ownership_Binary~Income, data=RidingMowers_input)

summary(RidingMowers_logistic2)
## 
## Call:
## glm(formula = Ownership_Binary ~ Income, data = RidingMowers_input)
## 
## Deviance Residuals: 
##      Min        1Q    Median        3Q       Max  
## -0.72876  -0.29439  -0.09697   0.29550   0.75632  
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)   
## (Intercept) -0.505979   0.321619  -1.573  0.12994   
## Income       0.014699   0.004522   3.251  0.00367 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for gaussian family taken to be 0.1842312)
## 
##     Null deviance: 6.0000  on 23  degrees of freedom
## Residual deviance: 4.0531  on 22  degrees of freedom
## AIC: 31.423
## 
## Number of Fisher Scoring iterations: 2
RidingMowers_logistic3 <- glm (Ownership_Binary~Lot_Size + Income, data=RidingMowers_input)

summary(RidingMowers_logistic3)
## 
## Call:
## glm(formula = Ownership_Binary ~ Lot_Size + Income, data = RidingMowers_input)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -0.6470  -0.2155  -0.0217   0.2494   0.6608  
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)   
## (Intercept) -2.235501   0.617215  -3.622  0.00160 **
## Lot_Size     0.098804   0.031650   3.122  0.00516 **
## Income       0.012612   0.003883   3.248  0.00385 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for gaussian family taken to be 0.1318273)
## 
##     Null deviance: 6.0000  on 23  degrees of freedom
## Residual deviance: 2.7684  on 21  degrees of freedom
## AIC: 24.274
## 
## Number of Fisher Scoring iterations: 2
summary(RidingMowers_logistic2)
## 
## Call:
## glm(formula = Ownership_Binary ~ Income, data = RidingMowers_input)
## 
## Deviance Residuals: 
##      Min        1Q    Median        3Q       Max  
## -0.72876  -0.29439  -0.09697   0.29550   0.75632  
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)   
## (Intercept) -0.505979   0.321619  -1.573  0.12994   
## Income       0.014699   0.004522   3.251  0.00367 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for gaussian family taken to be 0.1842312)
## 
##     Null deviance: 6.0000  on 23  degrees of freedom
## Residual deviance: 4.0531  on 22  degrees of freedom
## AIC: 31.423
## 
## Number of Fisher Scoring iterations: 2
pchisq(.9 , 1, lower=FALSE)
## [1] 0.3427817
library(ROCR)
## Loading required package: gplots
## 
## Attaching package: 'gplots'
## The following object is masked from 'package:stats':
## 
##     lowess
pred = predict(RidingMowers_logistic3, type="response")
predObj = prediction(pred, RidingMowers_input$Ownership_Binary)

rocObj = performance(predObj, measure="tpr", x.measure="fpr")
aucObj = performance(predObj, measure="auc")

plot(rocObj, main = paste("Area under the curve:", round(aucObj@y.values[[1]] ,4)))

alpha <- round(as.numeric(unlist(rocObj@alpha.values)),4)
fpr <- round(as.numeric(unlist(rocObj@x.values)),4)
tpr <- round(as.numeric(unlist(rocObj@y.values)),4)

par(mar = c(5, 5, 2, 5))
plot(alpha, tpr, xlab="Threshold", xlim=c(0,1), ylab="True positive rate", type="l")
par(new="True")
plot(alpha, fpr, xlab="", ylab="", axes=F, xlim=c(0,1), type="l")
axis(side=4)
mtext(side=4, line=3, "False positive rate")

text(0.18, 0.18, "FPR")
text(0.58, 0.58, "TPR")

i <- which(round(alpha,2) == .6)
paste("Threshold=" ,(alpha[i]) ," TPR=" , tpr[i],"FPR=", fpr[i])
## [1] "Threshold= 0.5953  TPR= 0.75 FPR= 0.0833"
i <- which(round(alpha,2) == .19)
paste("Threshold=" ,(alpha[i]) ," TPR=" , tpr[i], "FPR=", fpr[i])
## [1] "Threshold=   TPR=  FPR= "
pred = predict(RidingMowers_logistic2, type="response")
predObj = prediction(pred, RidingMowers_input$Ownership_Binary)

rocObj = performance(predObj, measure="tpr", x.measure="fpr")
aucObj = performance(predObj, measure="auc")

plot(rocObj, main = paste("Area under the curve:", round(aucObj@y.values[[1]] ,4)))

alpha <- round(as.numeric(unlist(rocObj@alpha.values)),4)
fpr <- round(as.numeric(unlist(rocObj@x.values)),4)
tpr <- round(as.numeric(unlist(rocObj@y.values)),4)

par(mar = c(5, 5, 2, 5))
plot(alpha, tpr, xlab="Threshold", xlim=c(0,1), ylab="True positive rate", type="l")
par(new="True")
plot(alpha, fpr, xlab="", ylab="", axes=F, xlim=c(0,1), type="l")
axis(side=4)
mtext(side=4, line=3, "False positive rate")

text(0.18, 0.18, "FPR")
text(0.58, 0.58, "TPR")

i <- which(round(alpha,2) == .6)
paste("Threshold=" ,(alpha[i]) ," TPR=" , tpr[i],"FPR=", fpr[i])
## [1] "Threshold= 0.5965  TPR= 0.5833 FPR= 0.1667"
i <- which(round(alpha,2) == .19)
paste("Threshold=" ,(alpha[i]) ," TPR=" , tpr[i], "FPR=", fpr[i])
## [1] "Threshold= 0.1908  TPR= 1 FPR= 0.8333"
pred = predict(RidingMowers_logistic1, type="response")
predObj = prediction(pred, RidingMowers_input$Ownership_Binary)

rocObj = performance(predObj, measure="tpr", x.measure="fpr")
aucObj = performance(predObj, measure="auc")

plot(rocObj, main = paste("Area under the curve:", round(aucObj@y.values[[1]] ,4)))

alpha <- round(as.numeric(unlist(rocObj@alpha.values)),4)
fpr <- round(as.numeric(unlist(rocObj@x.values)),4)
tpr <- round(as.numeric(unlist(rocObj@y.values)),4)

par(mar = c(5, 5, 2, 5))
plot(alpha, tpr, xlab="Threshold", xlim=c(0,1), ylab="True positive rate", type="l")
par(new="True")
plot(alpha, fpr, xlab="", ylab="", axes=F, xlim=c(0,1), type="l")
axis(side=4)
mtext(side=4, line=3, "False positive rate")

text(0.18, 0.18, "FPR")
text(0.58, 0.58, "TPR")

i <- which(round(alpha,2) == .6)
paste("Threshold=" ,(alpha[i]) ," TPR=" , tpr[i],"FPR=", fpr[i])
## [1] "Threshold=   TPR=  FPR= "
i <- which(round(alpha,2) == .19)
paste("Threshold=" ,(alpha[i]) ," TPR=" , tpr[i], "FPR=", fpr[i])
## [1] "Threshold=   TPR=  FPR= "