############################################################
# #
# My Theme ####
# #
############################################################
my_theme <- function(base_size = 12, base_family = "sans"){
theme_minimal(base_size = base_size, base_family = base_family) +
theme(
axis.text = element_text(size = 12),
axis.text.x = element_text(angle = 0, vjust = 0.5, hjust = 0.5),
axis.title = element_text(size = 14),
panel.grid.major = element_line(color = "grey"),
panel.grid.minor = element_blank(),
panel.background = element_rect(fill = "aliceblue"),
strip.background = element_rect(fill = "navy", color = "navy", size = 1),
strip.text = element_text(face = "bold", size = 12, color = "white"),
legend.position = "right",
legend.justification = "top",
legend.background = element_blank(),
panel.border = element_rect(color = "grey", fill = NA, size = 0.5)
)
}
theme_set(my_theme())
############################################################
# #
# Plot Neural Network #
# #
############################################################
plot.nnet <- function(mod.in,nid=T,all.out=T,all.in=T,bias=T,wts.only=F,rel.rsc=5,
circle.cex=5,node.labs=T,var.labs=T,x.lab=NULL,y.lab=NULL,
line.stag=NULL,struct=NULL,cex.val=1,alpha.val=1,
circle.col='lightblue',pos.col='black',neg.col='grey',
bord.col='black', ...){
require(scales)
#sanity checks
if('mlp' %in% class(mod.in)) warning('Bias layer not applicable for rsnns object')
if('numeric' %in% class(mod.in)){
if(is.null(struct)) stop('Three-element vector required for struct')
if(length(mod.in) != ((struct[1]*struct[2]+struct[2]*struct[3])+(struct[3]+struct[2])))
stop('Incorrect length of weight matrix for given network structure')
}
if('train' %in% class(mod.in)){
if('nnet' %in% class(mod.in$finalModel)){
mod.in<-mod.in$finalModel
warning('Using best nnet model from train output')
}
else stop('Only nnet method can be used with train object')
}
#gets weights for neural network, output is list
#if rescaled argument is true, weights are returned but rescaled based on abs value
nnet.vals<-function(mod.in,nid,rel.rsc,struct.out=struct){
require(scales)
require(reshape)
if('numeric' %in% class(mod.in)){
struct.out<-struct
wts<-mod.in
}
#neuralnet package
if('nn' %in% class(mod.in)){
struct.out<-unlist(lapply(mod.in$weights[[1]],ncol))
struct.out<-struct.out[-length(struct.out)]
struct.out<-c(
length(mod.in$model.list$variables),
struct.out,
length(mod.in$model.list$response)
)
wts<-unlist(mod.in$weights[[1]])
}
#nnet package
if('nnet' %in% class(mod.in)){
struct.out<-mod.in$n
wts<-mod.in$wts
}
#RSNNS package
if('mlp' %in% class(mod.in)){
struct.out<-c(mod.in$nInputs,mod.in$archParams$size,mod.in$nOutputs)
hid.num<-length(struct.out)-2
wts<-mod.in$snnsObject$getCompleteWeightMatrix()
#get all input-hidden and hidden-hidden wts
inps<-wts[grep('Input',row.names(wts)),grep('Hidden_2',colnames(wts)),drop=F]
inps<-melt(rbind(rep(NA,ncol(inps)),inps))$value
uni.hids<-paste0('Hidden_',1+seq(1,hid.num))
for(i in 1:length(uni.hids)){
if(is.na(uni.hids[i+1])) break
tmp<-wts[grep(uni.hids[i],rownames(wts)),grep(uni.hids[i+1],colnames(wts)),drop=F]
inps<-c(inps,melt(rbind(rep(NA,ncol(tmp)),tmp))$value)
}
#get connections from last hidden to output layers
outs<-wts[grep(paste0('Hidden_',hid.num+1),row.names(wts)),grep('Output',colnames(wts)),drop=F]
outs<-rbind(rep(NA,ncol(outs)),outs)
#weight vector for all
wts<-c(inps,melt(outs)$value)
assign('bias',F,envir=environment(nnet.vals))
}
if(nid) wts<-rescale(abs(wts),c(1,rel.rsc))
#convert wts to list with appropriate names
hid.struct<-struct.out[-c(length(struct.out))]
row.nms<-NULL
for(i in 1:length(hid.struct)){
if(is.na(hid.struct[i+1])) break
row.nms<-c(row.nms,rep(paste('hidden',i,seq(1:hid.struct[i+1])),each=1+hid.struct[i]))
}
row.nms<-c(
row.nms,
rep(paste('out',seq(1:struct.out[length(struct.out)])),each=1+struct.out[length(struct.out)-1])
)
out.ls<-data.frame(wts,row.nms)
out.ls$row.nms<-factor(row.nms,levels=unique(row.nms),labels=unique(row.nms))
out.ls<-split(out.ls$wts,f=out.ls$row.nms)
assign('struct',struct.out,envir=environment(nnet.vals))
out.ls
}
wts<-nnet.vals(mod.in,nid=F)
if(wts.only) return(wts)
#circle colors for input, if desired, must be two-vector list, first vector is for input layer
if(is.list(circle.col)){
circle.col.inp<-circle.col[[1]]
circle.col<-circle.col[[2]]
}
else circle.col.inp<-circle.col
#initiate plotting
x.range<-c(0,100)
y.range<-c(0,100)
#these are all proportions from 0-1
if(is.null(line.stag)) line.stag<-0.011*circle.cex/2
layer.x<-seq(0.17,0.9,length=length(struct))
bias.x<-layer.x[-length(layer.x)]+diff(layer.x)/2
bias.y<-0.95
circle.cex<-circle.cex
#get variable names from mod.in object
#change to user input if supplied
if('numeric' %in% class(mod.in)){
x.names<-paste0(rep('X',struct[1]),seq(1:struct[1]))
y.names<-paste0(rep('Y',struct[3]),seq(1:struct[3]))
}
if('mlp' %in% class(mod.in)){
all.names<-mod.in$snnsObject$getUnitDefinitions()
x.names<-all.names[grep('Input',all.names$unitName),'unitName']
y.names<-all.names[grep('Output',all.names$unitName),'unitName']
}
if('nn' %in% class(mod.in)){
x.names<-mod.in$model.list$variables
y.names<-mod.in$model.list$respons
}
if('xNames' %in% names(mod.in)){
x.names<-mod.in$xNames
y.names<-attr(terms(mod.in),'factor')
y.names<-row.names(y.names)[!row.names(y.names) %in% x.names]
}
if(!'xNames' %in% names(mod.in) & 'nnet' %in% class(mod.in)){
if(is.null(mod.in$call$formula)){
x.names<-colnames(eval(mod.in$call$x))
y.names<-colnames(eval(mod.in$call$y))
}
else{
forms<-eval(mod.in$call$formula)
x.names<-mod.in$coefnames
facts<-attr(terms(mod.in),'factors')
y.check<-mod.in$fitted
if(ncol(y.check)>1) y.names<-colnames(y.check)
else y.names<-as.character(forms)[2]
}
}
#change variables names to user sub
if(!is.null(x.lab)){
if(length(x.names) != length(x.lab)) stop('x.lab length not equal to number of input variables')
else x.names<-x.lab
}
if(!is.null(y.lab)){
if(length(y.names) != length(y.lab)) stop('y.lab length not equal to number of output variables')
else y.names<-y.lab
}
#initiate plot
plot(x.range,y.range,type='n',axes=F,ylab='',xlab='',...)
#function for getting y locations for input, hidden, output layers
#input is integer value from 'struct'
get.ys<-function(lyr){
spacing<-diff(c(0*diff(y.range),0.9*diff(y.range)))/max(struct)
seq(0.5*(diff(y.range)+spacing*(lyr-1)),0.5*(diff(y.range)-spacing*(lyr-1)),
length=lyr)
}
#function for plotting nodes
#'layer' specifies which layer, integer from 'struct'
#'x.loc' indicates x location for layer, integer from 'layer.x'
#'layer.name' is string indicating text to put in node
layer.points<-function(layer,x.loc,layer.name,cex=cex.val){
x<-rep(x.loc*diff(x.range),layer)
y<-get.ys(layer)
points(x,y,pch=21,cex=circle.cex,col=bord.col,bg=in.col)
if(node.labs) text(x,y,paste(layer.name,1:layer,sep=''),cex=cex.val)
if(layer.name=='I' & var.labs) text(x-line.stag*diff(x.range),y,x.names,pos=2,cex=cex.val)
if(layer.name=='O' & var.labs) text(x+line.stag*diff(x.range),y,y.names,pos=4,cex=cex.val)
}
#function for plotting bias points
#'bias.x' is vector of values for x locations
#'bias.y' is vector for y location
#'layer.name' is string indicating text to put in node
bias.points<-function(bias.x,bias.y,layer.name,cex,...){
for(val in 1:length(bias.x)){
points(
diff(x.range)*bias.x[val],
bias.y*diff(y.range),
pch=21,col=bord.col,bg=in.col,cex=circle.cex
)
if(node.labs)
text(
diff(x.range)*bias.x[val],
bias.y*diff(y.range),
paste(layer.name,val,sep=''),
cex=cex.val
)
}
}
#function creates lines colored by direction and width as proportion of magnitude
#use 'all.in' argument if you want to plot connection lines for only a single input node
layer.lines<-function(mod.in,h.layer,layer1=1,layer2=2,out.layer=F,nid,rel.rsc,all.in,pos.col,
neg.col,...){
x0<-rep(layer.x[layer1]*diff(x.range)+line.stag*diff(x.range),struct[layer1])
x1<-rep(layer.x[layer2]*diff(x.range)-line.stag*diff(x.range),struct[layer1])
if(out.layer==T){
y0<-get.ys(struct[layer1])
y1<-rep(get.ys(struct[layer2])[h.layer],struct[layer1])
src.str<-paste('out',h.layer)
wts<-nnet.vals(mod.in,nid=F,rel.rsc)
wts<-wts[grep(src.str,names(wts))][[1]][-1]
wts.rs<-nnet.vals(mod.in,nid=T,rel.rsc)
wts.rs<-wts.rs[grep(src.str,names(wts.rs))][[1]][-1]
cols<-rep(pos.col,struct[layer1])
cols[wts<0]<-neg.col
if(nid) segments(x0,y0,x1,y1,col=cols,lwd=wts.rs)
else segments(x0,y0,x1,y1)
}
else{
if(is.logical(all.in)) all.in<-h.layer
else all.in<-which(x.names==all.in)
y0<-rep(get.ys(struct[layer1])[all.in],struct[2])
y1<-get.ys(struct[layer2])
src.str<-paste('hidden',layer1)
wts<-nnet.vals(mod.in,nid=F,rel.rsc)
wts<-unlist(lapply(wts[grep(src.str,names(wts))],function(x) x[all.in+1]))
wts.rs<-nnet.vals(mod.in,nid=T,rel.rsc)
wts.rs<-unlist(lapply(wts.rs[grep(src.str,names(wts.rs))],function(x) x[all.in+1]))
cols<-rep(pos.col,struct[layer2])
cols[wts<0]<-neg.col
if(nid) segments(x0,y0,x1,y1,col=cols,lwd=wts.rs)
else segments(x0,y0,x1,y1)
}
}
bias.lines<-function(bias.x,mod.in,nid,rel.rsc,all.out,pos.col,neg.col,...){
if(is.logical(all.out)) all.out<-1:struct[length(struct)]
else all.out<-which(y.names==all.out)
for(val in 1:length(bias.x)){
wts<-nnet.vals(mod.in,nid=F,rel.rsc)
wts.rs<-nnet.vals(mod.in,nid=T,rel.rsc)
if(val != length(bias.x)){
wts<-wts[grep('out',names(wts),invert=T)]
wts.rs<-wts.rs[grep('out',names(wts.rs),invert=T)]
sel.val<-grep(val,substr(names(wts.rs),8,8))
wts<-wts[sel.val]
wts.rs<-wts.rs[sel.val]
}
else{
wts<-wts[grep('out',names(wts))]
wts.rs<-wts.rs[grep('out',names(wts.rs))]
}
cols<-rep(pos.col,length(wts))
cols[unlist(lapply(wts,function(x) x[1]))<0]<-neg.col
wts.rs<-unlist(lapply(wts.rs,function(x) x[1]))
if(nid==F){
wts.rs<-rep(1,struct[val+1])
cols<-rep('black',struct[val+1])
}
if(val != length(bias.x)){
segments(
rep(diff(x.range)*bias.x[val]+diff(x.range)*line.stag,struct[val+1]),
rep(bias.y*diff(y.range),struct[val+1]),
rep(diff(x.range)*layer.x[val+1]-diff(x.range)*line.stag,struct[val+1]),
get.ys(struct[val+1]),
lwd=wts.rs,
col=cols
)
}
else{
segments(
rep(diff(x.range)*bias.x[val]+diff(x.range)*line.stag,struct[val+1]),
rep(bias.y*diff(y.range),struct[val+1]),
rep(diff(x.range)*layer.x[val+1]-diff(x.range)*line.stag,struct[val+1]),
get.ys(struct[val+1])[all.out],
lwd=wts.rs[all.out],
col=cols[all.out]
)
}
}
}
#use functions to plot connections between layers
#bias lines
if(bias) bias.lines(bias.x,mod.in,nid=nid,rel.rsc=rel.rsc,all.out=all.out,pos.col=alpha(pos.col,alpha.val),
neg.col=alpha(neg.col,alpha.val))
#layer lines, makes use of arguments to plot all or for individual layers
#starts with input-hidden
#uses 'all.in' argument to plot connection lines for all input nodes or a single node
if(is.logical(all.in)){
mapply(
function(x) layer.lines(mod.in,x,layer1=1,layer2=2,nid=nid,rel.rsc=rel.rsc,
all.in=all.in,pos.col=alpha(pos.col,alpha.val),neg.col=alpha(neg.col,alpha.val)),
1:struct[1]
)
}
else{
node.in<-which(x.names==all.in)
layer.lines(mod.in,node.in,layer1=1,layer2=2,nid=nid,rel.rsc=rel.rsc,all.in=all.in,
pos.col=alpha(pos.col,alpha.val),neg.col=alpha(neg.col,alpha.val))
}
#connections between hidden layers
lays<-split(c(1,rep(2:(length(struct)-1),each=2),length(struct)),
f=rep(1:(length(struct)-1),each=2))
lays<-lays[-c(1,(length(struct)-1))]
for(lay in lays){
for(node in 1:struct[lay[1]]){
layer.lines(mod.in,node,layer1=lay[1],layer2=lay[2],nid=nid,rel.rsc=rel.rsc,all.in=T,
pos.col=alpha(pos.col,alpha.val),neg.col=alpha(neg.col,alpha.val))
}
}
#lines for hidden-output
#uses 'all.out' argument to plot connection lines for all output nodes or a single node
if(is.logical(all.out))
mapply(
function(x) layer.lines(mod.in,x,layer1=length(struct)-1,layer2=length(struct),out.layer=T,nid=nid,rel.rsc=rel.rsc,
all.in=all.in,pos.col=alpha(pos.col,alpha.val),neg.col=alpha(neg.col,alpha.val)),
1:struct[length(struct)]
)
else{
node.in<-which(y.names==all.out)
layer.lines(mod.in,node.in,layer1=length(struct)-1,layer2=length(struct),out.layer=T,nid=nid,rel.rsc=rel.rsc,
pos.col=pos.col,neg.col=neg.col,all.out=all.out)
}
#use functions to plot nodes
for(i in 1:length(struct)){
in.col<-circle.col
layer.name<-'H'
if(i==1) { layer.name<-'I'; in.col<-circle.col.inp}
if(i==length(struct)) layer.name<-'O'
layer.points(struct[i],layer.x[i],layer.name)
}
if(bias) bias.points(bias.x,bias.y,'B')
}wait <- read_csv("C:/Users/gexbxl1/Documents/OtherData/No-show-Issue-Comma-300k.csv")
colnames(wait)[4] <- "AppointmentData"
colnames(wait)[8] <- "Alcoholism"
colnames(wait)[9] <- "Hypertension"
wait$Status %<>% as.factor()
wait <- wait[,c(6,3,4,1,2,5,7:15)]wait %>%
select(-c(AppointmentRegistration, AppointmentData)) %>%
gather(x, y, Age:AwaitingTime) %>%
ggplot(aes(x = y, fill = Status, color = Status)) +
geom_density(alpha = 0.3, size = 1) +
geom_rug() +
scale_fill_brewer(palette = "Set1") +
scale_color_brewer(palette = "Set1") +
facet_wrap( ~ as.factor(x), scales = "free_y", ncol = 3) +
theme(plot.subtitle = element_text(vjust = 1),
plot.caption = element_text(vjust = 1),
axis.text.x = element_text(vjust = 0.5),
plot.title = element_text(size = 18,
face = "bold.italic")) +
labs(title = "Doctor Appointment Attendance Density")+
labs(x = "Various Variables") +
labs(y = "Density") +
theme(plot.title = element_text(hjust = 0.5)) +
theme(axis.text.x = element_blank())wait$Status %<>% as.factor()
Status_DENSITY <-
(ggplot(wait, aes(x=Status)) +
geom_density(aes(y = ..count../sum(..count..), group = Status, fill = Status), alpha = 0.3) +
theme(plot.subtitle = element_text(vjust = 1),
plot.caption = element_text(vjust = 1),
axis.text.x = element_text(angle = 90))+
labs(title = "Doctor Appointment Attendance Density",
x = "Attendance", y = "Density", colour = "Attendance") +
my_theme() +
theme(axis.text.x = element_text(angle = 50),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank()) +
theme(legend.background = element_rect(size = 0.1),
legend.position = "bottom", legend.direction = "horizontal") +
theme(plot.title = element_text(size = 14, hjust = 0.5))+
labs(fill = "Attendance"))
DAYOFTHEWEEK_DENSITY <-
(ggplot(wait, aes(x=DayOfTheWeek)) +
geom_density(aes(y = ..count../sum(..count..),group =DayOfTheWeek, fill = DayOfTheWeek), alpha = 0.3) +
theme(plot.subtitle = element_text(vjust = 1),
plot.caption = element_text(vjust = 1),
axis.text.x = element_text(angle = 90))+
labs(title = "Day Of The Week Density",
x = "Days of The Week", y = "Density", colour = "Days of The Week") +
my_theme() +
theme(axis.text.x = element_text(angle = 50),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank()) +
theme(legend.background = element_rect(size = 0.1),
legend.position = "bottom", legend.direction = "horizontal") +
theme(plot.title = element_text(size = 14, hjust = 0.5)) +
labs(fill = "Days of The Week"))
GENDER_DENSITY <-
(ggplot(wait, aes(x=Gender)) +
geom_density(aes(y = ..count../sum(..count..),group = Gender, fill = Gender), alpha = 0.3) +
theme(plot.subtitle = element_text(vjust = 1),
plot.caption = element_text(vjust = 1),
axis.text.x = element_text(angle = 90))+
labs(title = "Gender Densities",
x = "Gender", y = "Density", colour = "Gender") +
my_theme() +
theme(axis.text.x = element_text(angle = 50),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank()) +
theme(legend.background = element_rect(size = 0.1),
legend.position = "bottom", legend.direction = "horizontal") +
theme(plot.title = element_text(size = 14, hjust = 0.5)) +
labs(fill = "Gender"))
main=textGrob("Densities of Variables",gp=gpar(fontsize=30,font=3))
grid.arrange(Status_DENSITY, DAYOFTHEWEEK_DENSITY, GENDER_DENSITY, nrow = 3, top = main)wait_bin <- wait
wait_bin[,"DayOfTheWeek"][wait_bin[,"DayOfTheWeek"] == "Sunday"] = 1
wait_bin[,"DayOfTheWeek"][wait_bin[,"DayOfTheWeek"] == "Monday"] = 2
wait_bin[,"DayOfTheWeek"][wait_bin[,"DayOfTheWeek"] == "Tuesday"] = 3
wait_bin[,"DayOfTheWeek"][wait_bin[,"DayOfTheWeek"] == "Wednesday"] = 4
wait_bin[,"DayOfTheWeek"][wait_bin[,"DayOfTheWeek"] == "Thursday"] = 5
wait_bin[,"DayOfTheWeek"][wait_bin[,"DayOfTheWeek"] == "Friday"] = 6
wait_bin[,"DayOfTheWeek"][wait_bin[,"DayOfTheWeek"] == "Saturday"] = 7
wait_bin$DayOfTheWeek %<>% as.integer()
wait_bin[,"Gender"][wait_bin[,"Gender"] == "M"] = 0
wait_bin[,"Gender"][wait_bin[,"Gender"] == "F"] = 1
wait_bin$Gender %<>% as.integer()
wait_bin$Status_Binary = 0
wait_bin[,"Status_Binary"][wait_bin[,"Status"] == "Show-Up"] = 0
wait_bin[,"Status_Binary"][wait_bin[,"Status"] == "No-Show"] = 1
wait.pca <- wait_bin %>% select(-c(AppointmentRegistration, AppointmentData, Status)) %>% PCA(graph = FALSE)fviz_pca_var(wait.pca , col.var="contrib",
gradient.cols = c("#00AFBB", "#E7B800", "#FC4E07"),
repel = TRUE # Avoid text overlapping
)fviz_pca_var %>% dput()function (X, axes = c(1, 2), geom = c("arrow", "text"), repel = FALSE,
col.var = "black", alpha.var = 1, col.quanti.sup = "blue",
col.circle = "grey70", select.var = list(name = NULL, cos2 = NULL,
contrib = NULL), ...)
{
fviz(X, element = "var", axes = axes, geom = geom, color = col.var,
alpha = alpha.var, select = select.var, repel = repel,
col.col.sup = col.quanti.sup, col.circle = col.circle,
...)
}
# Contributions of variables to PC1
fviz_contrib(wait.pca, choice = "var", axes = 1, top = 10)# Contributions of variables to PC2
fviz_contrib(wait.pca, choice = "var", axes = 2, top = 10)fviz_pca_ind(wait.pca,
label = "none", # hide individual labels
habillage = wait$Status, # color by groups
palette = c("#00AFBB", "#E7B800", "#FC4E07"),
addEllipses = TRUE # Concentration ellipses
)wait <- wait[,c(1,5,6,2,3,4,7:15)]wait_bin %>%
select(-c(AppointmentData, AppointmentRegistration, Status)) %>%
sample_frac(0.005) %>%
scale() %>%
kmeans(4,nstart=25) %>%
fviz_cluster(data = wait_bin %>% select(-c(AppointmentData, AppointmentRegistration, Status)),
main = "Partitioning Clustering Plot"
) +
scale_color_brewer(palette = "Set2") +
my_theme() Okay, so clustering didn’t really come out all that well…
HCPC(wait_bin %>% select(-c(AppointmentRegistration, AppointmentData, Status)) %>% sample_frac(0.0004) %>% PCA(graph = FALSE), graph=FALSE) %>% plot(choice = "3D.map")wait$Gender %<>% as.character()
wait$DayOfTheWeek %<>% as.character()
temp_wait <- wait %>% dplyr::select(-c(AppointmentRegistration, AppointmentData))
temp_wait$Status %<>% as.character()
temp_wait[,"Status"][temp_wait[,"Status"] == "Show-Up"] = "Show"
temp_wait[,"Status"][temp_wait[,"Status"] == "No-Show"] = "No"
temp_wait$Status %<>% as.factor()
data <- sample(2, nrow(temp_wait), replace = T, prob = c(0.7, 0.3))
train <- temp_wait[data == 1,]
test <- temp_wait[data == 2,]rp <- rpart(Status~ Age + AwaitingTime + Alcoholism + Hypertension + Scholarship + Smokes + Sms_Reminder,
train,
method = "class",
maxdepth = 4,
minsplit = 2,
minbucket = 1,
cp=-1)
printcp(rp)
Classification tree:
rpart(formula = Status ~ Age + AwaitingTime + Alcoholism + Hypertension +
Scholarship + Smokes + Sms_Reminder, data = train, method = "class",
maxdepth = 4, minsplit = 2, minbucket = 1, cp = -1)
Variables actually used in tree construction:
[1] Age Alcoholism AwaitingTime Scholarship Smokes
[6] Sms_Reminder
Root node error: 63307/209703 = 0.30189
n= 209703
CP nsplit rel error xerror xstd
1 0 0 1 1 0.0033208
2 -1 15 1 1 0.0033208
#rpart.plot::rpart.plot(rp, type = 2, fallen.leaves = FALSE, extra = 4)
rattle::fancyRpartPlot(rp, sub="", main = "Recursive Partioning of Attendance")testPred <- predict(rp, newdata = test)
predictability <- sum(testPred == test$Status)/ length(test$Status)*100
predictability[1] 0
Rpart_Pred <- signif(predictability,4)
caret::varImp(rp) Overall
Age 1497.3918
Alcoholism 255.1612
AwaitingTime 1494.7662
Hypertension 323.2188
Scholarship 348.2283
Smokes 382.7392
Sms_Reminder 181.5362
With R Part, we recieve an accuracy of 0%. We can tell from the Decision Tree that it isn’t all that great.
model_nnet <- nnet(formula = Status ~ ., data=temp_wait, size=4, decay=0.0001, maxit = 700, trace= FALSE)
x = temp_wait[,2:ncol(temp_wait)]
y = temp_wait[,1]
pred <- predict(model_nnet,x,type="class")
pred %<>% as.factor()
cfm_nnet <- confusionMatrix(pred, y$Status)
cfm_nnetConfusion Matrix and Statistics
Reference
Prediction No Show
No 0 0
Show 90731 209269
Accuracy : 0.6976
95% CI : (0.6959, 0.6992)
No Information Rate : 0.6976
P-Value [Acc > NIR] : 0.5009
Kappa : 0
Mcnemar's Test P-Value : <0.0000000000000002
Sensitivity : 0.0000
Specificity : 1.0000
Pos Pred Value : NaN
Neg Pred Value : 0.6976
Prevalence : 0.3024
Detection Rate : 0.0000
Detection Prevalence : 0.0000
Balanced Accuracy : 0.5000
'Positive' Class : No
nnet_pred <- signif(cfm_nnet$overall[[1]]*100,4)
nnet_plot <- plot.nnet(model_nnet, alpha.val = 0.5, cex= 0.7, circle.col = list('lightblue', 'white'), bord.col = 'black')For a Neural Network, we receive an accuracy of 69.76%.
wait_ml <- temp_wait %>% sample_frac(.05)
x = wait_ml[,2:ncol(wait_ml)]
y = wait_ml [,1]
model_bag <- bagging(Status ~ . , data=wait_ml)
pred <- predict(model_bag,x)
l <- union(pred,y)
cfm_bag <- confusionMatrix(pred, y$Status)
cfm_bagConfusion Matrix and Statistics
Reference
Prediction No Show
No 3775 269
Show 693 10263
Accuracy : 0.9359
95% CI : (0.9318, 0.9397)
No Information Rate : 0.7021
P-Value [Acc > NIR] : < 0.00000000000000022
Kappa : 0.8424
Mcnemar's Test P-Value : < 0.00000000000000022
Sensitivity : 0.8449
Specificity : 0.9745
Pos Pred Value : 0.9335
Neg Pred Value : 0.9367
Prevalence : 0.2979
Detection Rate : 0.2517
Detection Prevalence : 0.2696
Balanced Accuracy : 0.9097
'Positive' Class : No
Cart_pred <- signif(cfm_bag$overall[[1]]*100,4)For a C5 Bagging CART we receive an accuracy of 93.59%.
C5 Bagging CART was the best accuracy we received with 93.59%.
Neural Network Accuracy = 69.76%. Rpart Accuracy = 0%.
I wanted to try out PCA and other Clustering Methods with the “factoMineR” library, and although it gave some pretty cool outputs, it wasn’t really useful with this Dataset.