library(readr)
library(MASS)
library(DiscriMiner)
train <- read_csv("D:/PG Business Analytics/PA/Group Assignment/PaulBooks1.csv")
Parsed with column specification:
cols(
`'ID` = col_integer(),
Months = col_integer(),
NoBought = col_integer(),
Purchase = col_integer()
)
test <- read_csv("D:/PG Business Analytics/PA/Group Assignment/PaulBooks2.csv")
Parsed with column specification:
cols(
`'ID` = col_integer(),
Months = col_integer(),
NoBought = col_integer(),
Purchase = col_integer()
)
head(train)
head(test)
ggplot(data=train, aes(x=factor(Months))) + stat_count() + xlab('Months')
ggplot(data=train, aes(x=factor(NoBought))) + stat_count() + xlab('NoBought')
ggplot(data=train,aes(Months,Months))+geom_boxplot()
ggplot(data=train,aes(NoBought,NoBought))+geom_boxplot()
ggplot(stack(train[,2:4]), aes(x = ind, y = values,color=ind)) +
geom_boxplot()+ coord_flip()+xlab(" Values ") +ylab("Variables")+theme(legend.position="none")
ggplot(data=train,aes(x=Purchase,y=Months,group=Purchase))+geom_boxplot()
ggplot(data=train,aes(x=Purchase,y=NoBought,group=Purchase))+geom_boxplot()
ggplot(data=train,aes(x=Months,y=Purchase))+geom_point(shape=1,col='grey')+
geom_smooth(method=lm)+
xlab(paste('Living Area above Ground',' R-Sqaured:',
summary(lm(train$Purchase~train$Months))$adj.r.squared))+theme_light()
ggplot(data=train,aes(x=Purchase,y=NoBought))+geom_point(shape=1,col='grey')+
geom_smooth(method=lm)+
xlab(paste('Living Area above Ground',' R-Sqaured:',
summary(lm(train$Purchase~train$NoBought))$adj.r.squared))+theme_light()
model = lda(Purchase~Months+NoBought,data=train,na.action="na.omit", CV=TRUE)
summary(model)
Length Class Mode
class 1000 factor numeric
posterior 2000 -none- numeric
terms 3 terms call
call 5 -none- call
xlevels 0 -none- list
train$pred = as.numeric(model$posterior[,1]<model$posterior[,2])
table(actual=train$Purchase,Predicted = train$pred)
Predicted
actual 0 1
0 907 10
1 72 11
train$dummy<-log(train$NoBought/train$Months)
train$dummy<-as.numeric(train$Months<36)
train$dummy<-train$dummy+as.numeric(train$Months<24)
train$dummy<-train$dummy+as.numeric(train$Months<12)
model = lda(Purchase~Months+NoBought+dummy,data=train,na.action="na.omit", CV=TRUE)
pred = as.numeric(model$posterior[,1]<model$posterior[,2])
table(actual=train$Purchase,Predicted = pred)
Predicted
actual 0 1
0 903 14
1 70 13
x<-train[,2:3]
y<-as.numeric(train[,4])
Error: (list) object cannot be coerced to type 'double'
mahalanobis<-linDA(x,y)
mahalanobis
Linear Discriminant Analysis
-------------------------------------------
$functions discrimination functions
$confusion confusion matrix
$scores discriminant scores
$classification assigned class
$error_rate error rate
-------------------------------------------
$functions
0 1
constant -1.473 -4.295
Months 0.200 0.143
NoBought 0.699 2.267
$confusion
predicted
original 0 1
0 907 10
1 72 11
$error_rate
[1] 0.082
$scores
0 1
1 3.315145 -0.8650023
2 1.718994 -2.0082531
3 1.519476 -2.1511595
4 2.916107 -1.1508150
5 1.519476 -2.1511595
6 1.120868 1.0966496
...
$classification
[1] 0 0 0 0 0 0
Levels: 0 1
...
mscore<-as.data.frame(mahalanobis$scores)
dwpred<-as.numeric(mscore$`0`<mscore$`1`)
table(rm$Purchase,dwpred)
dwpred
0 1
0 907 10
1 72 11
c<-classify(mahalanobis,test[,2:3])
test$pred<-c$pred_class
table(actual=test$Purchase,predict=test$pred)
predict
actual 0 1
0 908 11
1 67 14
The accuracy in holdout sample is 92.2%