Logistic Regression

library(ROCR)

## Loading required package: gplots

## 
## Attaching package: 'gplots'

## The following object is masked from 'package:stats':
## 
##     lowess

Load in data

mowers_input = as.data.frame(read.csv("RidingMowers.csv"))

The first logistic regression model

Predicting ownership as a function of Income

mowers_log1 <- glm (Ownership~Lot_Size, data=mowers_input, family=binomial(link="logit"))
summary(mowers_log1)

## 
## Call:
## glm(formula = Ownership ~ Lot_Size, family = binomial(link = "logit"), 
##     data = mowers_input)
## 
## Deviance Residuals: 
##      Min        1Q    Median        3Q       Max  
## -1.69488  -0.82973   0.01745   0.78175   1.80154  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)  
## (Intercept) -12.1872     5.2407  -2.326   0.0200 *
## Lot_Size      0.6419     0.2742   2.341   0.0192 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 33.271  on 23  degrees of freedom
## Residual deviance: 24.718  on 22  degrees of freedom
## AIC: 28.718
## 
## Number of Fisher Scoring iterations: 4

pred = predict(mowers_log1, type="response")
predobj = prediction(pred, mowers_input$Ownership)

rocobj = performance(predobj, measure = "tpr", x.measure = "fpr")
aucobj = performance(predobj, measure = "auc")

#extract the aplha(threshold), FPR, TPR values from rocobj

alpha <- round(as.numeric(unlist(rocobj@alpha.values)),4)
fpr <- round(as.numeric(unlist(rocobj@x.values)), 4)
tpr <- round(as.numeric(unlist(rocobj@y.values)), 4)

#adjust margins and plot TPR and FPR
par(mar = c( 5,5,2,4))

The second logistic regression model

Predicting ownership as a function of Income

mowers_log2 <- glm (Ownership~Income, data=mowers_input, family=binomial(link="logit"))
summary(mowers_log1)

## 
## Call:
## glm(formula = Ownership ~ Lot_Size, family = binomial(link = "logit"), 
##     data = mowers_input)
## 
## Deviance Residuals: 
##      Min        1Q    Median        3Q       Max  
## -1.69488  -0.82973   0.01745   0.78175   1.80154  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)  
## (Intercept) -12.1872     5.2407  -2.326   0.0200 *
## Lot_Size      0.6419     0.2742   2.341   0.0192 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 33.271  on 23  degrees of freedom
## Residual deviance: 24.718  on 22  degrees of freedom
## AIC: 28.718
## 
## Number of Fisher Scoring iterations: 4

pred = predict(mowers_log2, type="response")
predobj = prediction(pred, mowers_input$Ownership)

rocobj = performance(predobj, measure = "tpr", x.measure = "fpr")
aucobj = performance(predobj, measure = "auc")

#extract the aplha(threshold), FPR, TPR values from rocobj

alpha <- round(as.numeric(unlist(rocobj@alpha.values)),4)
fpr <- round(as.numeric(unlist(rocobj@x.values)), 4)
tpr <- round(as.numeric(unlist(rocobj@y.values)), 4)

#adjust margins and plot TPR and FPR
par(mar = c( 5,5,2,4))

The third logistic regression model

Predicting ownership as a function of both Income and LotSize

mowers_log3 <- glm (Ownership~ Lot_Size + Income, data=mowers_input, family=binomial(link="logit"))
summary(mowers_log1)

## 
## Call:
## glm(formula = Ownership ~ Lot_Size, family = binomial(link = "logit"), 
##     data = mowers_input)
## 
## Deviance Residuals: 
##      Min        1Q    Median        3Q       Max  
## -1.69488  -0.82973   0.01745   0.78175   1.80154  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)  
## (Intercept) -12.1872     5.2407  -2.326   0.0200 *
## Lot_Size      0.6419     0.2742   2.341   0.0192 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 33.271  on 23  degrees of freedom
## Residual deviance: 24.718  on 22  degrees of freedom
## AIC: 28.718
## 
## Number of Fisher Scoring iterations: 4

pred = predict(mowers_log3, type="response")
predobj = prediction(pred, mowers_input$Ownership)

rocobj = performance(predobj, measure = "tpr", x.measure = "fpr")
aucobj = performance(predobj, measure = "auc")

#extract the aplha(threshold), FPR, TPR values from rocobj

alpha <- round(as.numeric(unlist(rocobj@alpha.values)),4)
fpr <- round(as.numeric(unlist(rocobj@x.values)), 4)
tpr <- round(as.numeric(unlist(rocobj@y.values)), 4)

#adjust margins and plot TPR and FPR
par(mar = c( 5,5,2,4))

Results

The last model provides the best prediction. It has the highest true positive rate while maintaning the lowest false positive rate. It also has the highest Area Under the ROC Curve. This means that the model is the best at predicting true cases of ownership.

Assignment5

Andrew Williams

08/03/2020

Logistic Regression

The first logistic regression model

The second logistic regression model

The third logistic regression model

Results