simmons.knit

Project Objective:

Use the estimated LR equation to make business recommendations for Simmons store catalog promotion. How can Simmons use this information to better target customers for the new promotion? Suppose Simmons wants to send the promotional catalog only to customers who have a .40 or higher probability of using the coupon

Step 1: Install and load required libraries

library(readxl)

## Warning: package 'readxl' was built under R version 4.3.3

library(Hmisc)

## Warning: package 'Hmisc' was built under R version 4.3.3

## 
## Attaching package: 'Hmisc'

## The following objects are masked from 'package:base':
## 
##     format.pval, units

library(pscl)

## Warning: package 'pscl' was built under R version 4.3.3

## Classes and Methods for R originally developed in the
## Political Science Computational Laboratory
## Department of Political Science
## Stanford University (2002-2015),
## by and under the direction of Simon Jackman.
## hurdle and zeroinfl functions by Achim Zeileis.

library(pROC)

## Warning: package 'pROC' was built under R version 4.3.3

## Type 'citation("pROC")' for a citation.

## 
## Attaching package: 'pROC'

## The following objects are masked from 'package:stats':
## 
##     cov, smooth, var

Step 2 & 3: Explore the dataset

simmons = read_excel(file.choose())
head(simmons)

## # A tibble: 6 × 4
##   Customer Spending  Card Coupon
##      <dbl>    <dbl> <dbl>  <dbl>
## 1        1     2.29     1      0
## 2        2     3.22     1      0
## 3        3     2.13     1      0
## 4        4     3.92     0      0
## 5        5     2.53     1      0
## 6        6     2.47     0      1

sapply(simmons, sd) # Standard Deviatoin using the SD function

##   Customer   Spending       Card     Coupon 
## 29.0114920  1.7412979  0.5025189  0.4923660

# customer 29, spending 1.74, card .50, coupon 49

xtabs(~Coupon + Card, data = simmons) # Cross tabulation of coupon and card

##       Card
## Coupon  0  1
##      0 36 24
##      1 14 26

Step 4: Building Model

sim_logit = glm(Coupon ~ Card + Spending, data = simmons, family = binomial)
summary(sim_logit)

## 
## Call:
## glm(formula = Coupon ~ Card + Spending, family = binomial, data = simmons)
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)  -2.1464     0.5772  -3.718 0.000201 ***
## Card          1.0987     0.4447   2.471 0.013483 *  
## Spending      0.3416     0.1287   2.655 0.007928 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 134.60  on 99  degrees of freedom
## Residual deviance: 120.97  on 97  degrees of freedom
## AIC: 126.97
## 
## Number of Fisher Scoring iterations: 4

# step 5: interperting the logistic regression equation using odds ratios only

exp(coef(sim_logit))

## (Intercept)        Card    Spending 
##   0.1169074   3.0003587   1.4072585

# .117 3.00 1.41