Decision Tree Load the packages and dataset
library(ISLR)
df <- College
head(df)
Plotting
library(ggplot2)
ggplot(df,aes(Room.Board,Grad.Rate))+geom_point(aes(color=Private),alpha=0.4)
ggplot(df,aes(F.Undergrad))+geom_histogram(aes(fill=Private),color='orange',bins = 50)
ggplot(df,aes(Grad.Rate))+geom_histogram(aes(fill=Private),color='yellow',bins = 50)
subset(df,Grad.Rate >100)
df['Cazenovia College',"Grad.Rate"]<-100
Train and Test
library(caTools)
set.seed(101)
sample <- sample.split(df$Private,SplitRatio=0.70)
train <- subset(df, sample == T)
test <- subset(df, sample == F)
Decision Tree
library(rpart)
tree<- rpart(Private ~ . ,data = df,method = 'class')
Predict using the trained model
tree.pred <- predict(tree,test)
head(tree.pred)
No
Adrian College 0.00462963
Alfred University 0.00462963
Allegheny College 0.00462963
Allentown Coll. of St. Francis de Sales 0.00462963
Alma College 0.00462963
Amherst College 0.00462963
Yes
Adrian College 0.9953704
Alfred University 0.9953704
Allegheny College 0.9953704
Allentown Coll. of St. Francis de Sales 0.9953704
Alma College 0.9953704
Amherst College 0.9953704
Writing a function to spearate the yes,no cloumn into Yes or no in Private table
tree.pred <- as.data.frame(tree.pred)
joiner <- function(x)
{
if(x>=0.5){
return("Yes")
}else{
return("No")
}
}
Applying the function
tree.pred$Private <- sapply(tree.pred$Yes,joiner)
Calling the head of the Predicted Tree
head(tree.pred)
NA
Creating a table with the Private column
table(tree.pred$Private,test$Private)
No Yes
No 61 4
Yes 3 165
Decision Tree
library(rpart.plot)
prp(tree)
Random Forest Method
library(randomForest)
Bilding a Model
model <- randomForest(Private~. ,data = df,importance=T)
Confusion on the Model
model$confusion
No Yes class.error
No 182 30 0.1415094
Yes 17 548 0.0300885
Passing Imporatance on the model (it give in deatil info about gini and Means)
prediction of the Model using the test Data
p <- predict(model,test)
Confusion matrix on the Predicted result
table(p,test$Private)
p No Yes
No 64 0
Yes 0 169