library(foreign)
library(ggplot2)
library(MASS)
library(dplyr)
library(openxlsx)
library(readxl)

Dataset

Compra<-read.xlsx("car_purchase_data.xlsx")
head(Compra)
##   User.ID Gender Age AnnualSalary Offspring Purchased
## 1     385   Male  35        20000         1         0
## 2     681   Male  40        43500         0         0
## 3     353   Male  49        74000         1         0
## 4     895   Male  40       107500         0         1
## 5     661   Male  25        79000         1         0
## 6     846 Female  47        33500         1         1

Variables:

Compra <- Compra %>% mutate(Gender = recode(Gender, "Male" = 0, "Female" = 1))
Compra$Purchased<-factor(Compra$Purchased, levels = c(0,1), labels = c("Didn't Purchased","Purchased"))
head(Compra)
##   User.ID Gender Age AnnualSalary Offspring        Purchased
## 1     385      0  35        20000         1 Didn't Purchased
## 2     681      0  40        43500         0 Didn't Purchased
## 3     353      0  49        74000         1 Didn't Purchased
## 4     895      0  40       107500         0        Purchased
## 5     661      0  25        79000         1 Didn't Purchased
## 6     846      1  47        33500         1        Purchased
dis=lda(Purchased~Gender+Age+log(AnnualSalary)+Offspring, data=Compra,prior=c(0.5,0.5))
dis
## Call:
## lda(Purchased ~ Gender + Age + log(AnnualSalary) + Offspring, 
##     data = Compra, prior = c(0.5, 0.5))
## 
## Prior probabilities of groups:
## Didn't Purchased        Purchased 
##              0.5              0.5 
## 
## Group means:
##                     Gender      Age log(AnnualSalary) Offspring
## Didn't Purchased 0.4966555 34.70067          10.94977 0.5083612
## Purchased        0.5447761 48.14677          11.23676 0.5273632
## 
## Coefficients of linear discriminants:
##                           LD1
## Gender            -0.04752817
## Age                0.11226338
## log(AnnualSalary)  0.66566148
## Offspring         -0.04652487

#Nueva observación Supongamos que entra un nuevo cliente. Y que: Gender = 0 Age = 30 AnnualSalary = 77000 Offspring = 0

Creamos el perfil del cliente:

nuevo.cliente=rbind(c(0,30,77000,0))
colnames(nuevo.cliente)=colnames(Compra[,2:5])
nuevo.cliente=data.frame(nuevo.cliente)
predict(dis,newdata =nuevo.cliente)
## $class
## [1] Didn't Purchased
## Levels: Didn't Purchased Purchased
## 
## $posterior
##   Didn't Purchased Purchased
## 1        0.8715874 0.1284126
## 
## $x
##         LD1
## 1 -1.128253

De acuerdo con la predicción, lo más probable para nuestro nuevo cliente en base a sus datos, es que no compre un auto nuevo.