library(ROCR)
## Loading required package: gplots
##
## Attaching package: 'gplots'
## The following object is masked from 'package:stats':
##
## lowess
Load in data
mowers_input = as.data.frame(read.csv("RidingMowers.csv"))
Predicting ownership as a function of Income
mowers_log1 <- glm (Ownership~Lot_Size, data=mowers_input, family=binomial(link="logit"))
summary(mowers_log1)
##
## Call:
## glm(formula = Ownership ~ Lot_Size, family = binomial(link = "logit"),
## data = mowers_input)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.69488 -0.82973 0.01745 0.78175 1.80154
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -12.1872 5.2407 -2.326 0.0200 *
## Lot_Size 0.6419 0.2742 2.341 0.0192 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 33.271 on 23 degrees of freedom
## Residual deviance: 24.718 on 22 degrees of freedom
## AIC: 28.718
##
## Number of Fisher Scoring iterations: 4
pred = predict(mowers_log1, type="response")
predobj = prediction(pred, mowers_input$Ownership)
rocobj = performance(predobj, measure = "tpr", x.measure = "fpr")
aucobj = performance(predobj, measure = "auc")
#extract the aplha(threshold), FPR, TPR values from rocobj
alpha <- round(as.numeric(unlist(rocobj@alpha.values)),4)
fpr <- round(as.numeric(unlist(rocobj@x.values)), 4)
tpr <- round(as.numeric(unlist(rocobj@y.values)), 4)
#adjust margins and plot TPR and FPR
par(mar = c( 5,5,2,4))
Predicting ownership as a function of Income
mowers_log2 <- glm (Ownership~Income, data=mowers_input, family=binomial(link="logit"))
summary(mowers_log1)
##
## Call:
## glm(formula = Ownership ~ Lot_Size, family = binomial(link = "logit"),
## data = mowers_input)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.69488 -0.82973 0.01745 0.78175 1.80154
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -12.1872 5.2407 -2.326 0.0200 *
## Lot_Size 0.6419 0.2742 2.341 0.0192 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 33.271 on 23 degrees of freedom
## Residual deviance: 24.718 on 22 degrees of freedom
## AIC: 28.718
##
## Number of Fisher Scoring iterations: 4
pred = predict(mowers_log2, type="response")
predobj = prediction(pred, mowers_input$Ownership)
rocobj = performance(predobj, measure = "tpr", x.measure = "fpr")
aucobj = performance(predobj, measure = "auc")
#extract the aplha(threshold), FPR, TPR values from rocobj
alpha <- round(as.numeric(unlist(rocobj@alpha.values)),4)
fpr <- round(as.numeric(unlist(rocobj@x.values)), 4)
tpr <- round(as.numeric(unlist(rocobj@y.values)), 4)
#adjust margins and plot TPR and FPR
par(mar = c( 5,5,2,4))
Predicting ownership as a function of both Income and LotSize
mowers_log3 <- glm (Ownership~ Lot_Size + Income, data=mowers_input, family=binomial(link="logit"))
summary(mowers_log1)
##
## Call:
## glm(formula = Ownership ~ Lot_Size, family = binomial(link = "logit"),
## data = mowers_input)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.69488 -0.82973 0.01745 0.78175 1.80154
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -12.1872 5.2407 -2.326 0.0200 *
## Lot_Size 0.6419 0.2742 2.341 0.0192 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 33.271 on 23 degrees of freedom
## Residual deviance: 24.718 on 22 degrees of freedom
## AIC: 28.718
##
## Number of Fisher Scoring iterations: 4
pred = predict(mowers_log3, type="response")
predobj = prediction(pred, mowers_input$Ownership)
rocobj = performance(predobj, measure = "tpr", x.measure = "fpr")
aucobj = performance(predobj, measure = "auc")
#extract the aplha(threshold), FPR, TPR values from rocobj
alpha <- round(as.numeric(unlist(rocobj@alpha.values)),4)
fpr <- round(as.numeric(unlist(rocobj@x.values)), 4)
tpr <- round(as.numeric(unlist(rocobj@y.values)), 4)
#adjust margins and plot TPR and FPR
par(mar = c( 5,5,2,4))
The last model provides the best prediction. It has the highest true positive rate while maintaning the lowest false positive rate. It also has the highest Area Under the ROC Curve. This means that the model is the best at predicting true cases of ownership.