Decision Tree Load the packages and dataset

library(ISLR)
df <- College
head(df)

Plotting

library(ggplot2)
ggplot(df,aes(Room.Board,Grad.Rate))+geom_point(aes(color=Private),alpha=0.4)

ggplot(df,aes(F.Undergrad))+geom_histogram(aes(fill=Private),color='orange',bins = 50)

ggplot(df,aes(Grad.Rate))+geom_histogram(aes(fill=Private),color='yellow',bins = 50)

subset(df,Grad.Rate >100)
df['Cazenovia College',"Grad.Rate"]<-100

Train and Test

library(caTools)
set.seed(101)
sample <- sample.split(df$Private,SplitRatio=0.70)
train <- subset(df, sample == T)
test <- subset(df, sample == F)

Decision Tree

library(rpart)
tree<- rpart(Private ~ . ,data = df,method = 'class')

Predict using the trained model

tree.pred <- predict(tree,test)
head(tree.pred)
                                                No
Adrian College                          0.00462963
Alfred University                       0.00462963
Allegheny College                       0.00462963
Allentown Coll. of St. Francis de Sales 0.00462963
Alma College                            0.00462963
Amherst College                         0.00462963
                                              Yes
Adrian College                          0.9953704
Alfred University                       0.9953704
Allegheny College                       0.9953704
Allentown Coll. of St. Francis de Sales 0.9953704
Alma College                            0.9953704
Amherst College                         0.9953704

Writing a function to spearate the yes,no cloumn into Yes or no in Private table

tree.pred <- as.data.frame(tree.pred)
joiner <- function(x)
{
  if(x>=0.5){
    return("Yes")
  }else{
    return("No")
  }
}

Applying the function

tree.pred$Private <- sapply(tree.pred$Yes,joiner)

Calling the head of the Predicted Tree

head(tree.pred)
NA

Creating a table with the Private column

table(tree.pred$Private,test$Private)
     
       No Yes
  No   61   4
  Yes   3 165

Decision Tree

library(rpart.plot)
prp(tree)

Random Forest Method

library(randomForest)

Bilding a Model

model <- randomForest(Private~. ,data = df,importance=T)

Confusion on the Model

model$confusion
     No Yes class.error
No  182  30   0.1415094
Yes  17 548   0.0300885

Passing Imporatance on the model (it give in deatil info about gini and Means)

prediction of the Model using the test Data

p <- predict(model,test)

Confusion matrix on the Predicted result

table(p,test$Private)
     
p      No Yes
  No   64   0
  Yes   0 169
LS0tDQp0aXRsZTogIlIgTm90ZWJvb2siDQpvdXRwdXQ6IGh0bWxfbm90ZWJvb2sNCi0tLQ0KRGVjaXNpb24gVHJlZQ0KTG9hZCB0aGUgcGFja2FnZXMgYW5kIGRhdGFzZXQNCmBgYHtyfQ0KbGlicmFyeShJU0xSKQ0KZGYgPC0gQ29sbGVnZQ0KaGVhZChkZikNCmBgYA0KUGxvdHRpbmcgDQpgYGB7cn0NCmxpYnJhcnkoZ2dwbG90MikNCmdncGxvdChkZixhZXMoUm9vbS5Cb2FyZCxHcmFkLlJhdGUpKStnZW9tX3BvaW50KGFlcyhjb2xvcj1Qcml2YXRlKSxhbHBoYT0wLjQpDQpgYGANCg0KYGBge3J9DQpnZ3Bsb3QoZGYsYWVzKEYuVW5kZXJncmFkKSkrZ2VvbV9oaXN0b2dyYW0oYWVzKGZpbGw9UHJpdmF0ZSksY29sb3I9J29yYW5nZScsYmlucyA9IDUwKQ0KYGBgDQpgYGB7cn0NCmdncGxvdChkZixhZXMoR3JhZC5SYXRlKSkrZ2VvbV9oaXN0b2dyYW0oYWVzKGZpbGw9UHJpdmF0ZSksY29sb3I9J3llbGxvdycsYmlucyA9IDUwKQ0KYGBgDQpgYGB7cn0NCnN1YnNldChkZixHcmFkLlJhdGUgPjEwMCkNCmBgYA0KDQpgYGB7cn0NCmRmWydDYXplbm92aWEgQ29sbGVnZScsIkdyYWQuUmF0ZSJdPC0xMDANCmBgYA0KVHJhaW4gYW5kIFRlc3QNCmBgYHtyfQ0KbGlicmFyeShjYVRvb2xzKQ0Kc2V0LnNlZWQoMTAxKQ0Kc2FtcGxlIDwtIHNhbXBsZS5zcGxpdChkZiRQcml2YXRlLFNwbGl0UmF0aW89MC43MCkNCnRyYWluIDwtIHN1YnNldChkZiwgc2FtcGxlID09IFQpDQp0ZXN0IDwtIHN1YnNldChkZiwgc2FtcGxlID09IEYpDQpgYGANCkRlY2lzaW9uIFRyZWUNCg0KYGBge3J9DQpsaWJyYXJ5KHJwYXJ0KQ0KdHJlZTwtIHJwYXJ0KFByaXZhdGUgfiAuICxkYXRhID0gZGYsbWV0aG9kID0gJ2NsYXNzJykNCmBgYA0KUHJlZGljdCB1c2luZyB0aGUgdHJhaW5lZCBtb2RlbA0KYGBge3J9DQp0cmVlLnByZWQgPC0gcHJlZGljdCh0cmVlLHRlc3QpDQpgYGANCg0KYGBge3J9DQpoZWFkKHRyZWUucHJlZCkNCmBgYA0KV3JpdGluZyAgYSBmdW5jdGlvbiB0byBzcGVhcmF0ZSB0aGUgeWVzLG5vIGNsb3VtbiBpbnRvIFllcyBvciBubyBpbiBQcml2YXRlIHRhYmxlIA0KYGBge3J9DQp0cmVlLnByZWQgPC0gYXMuZGF0YS5mcmFtZSh0cmVlLnByZWQpDQpqb2luZXIgPC0gZnVuY3Rpb24oeCkNCnsNCiAgaWYoeD49MC41KXsNCiAgICByZXR1cm4oIlllcyIpDQogIH1lbHNlew0KICAgIHJldHVybigiTm8iKQ0KICB9DQp9DQpgYGANCg0KQXBwbHlpbmcgdGhlIGZ1bmN0aW9uDQpgYGB7cn0NCnRyZWUucHJlZCRQcml2YXRlIDwtIHNhcHBseSh0cmVlLnByZWQkWWVzLGpvaW5lcikNCmBgYA0KQ2FsbGluZyB0aGUgaGVhZCBvZiB0aGUgUHJlZGljdGVkIFRyZWUNCmBgYHtyfQ0KaGVhZCh0cmVlLnByZWQpDQoNCmBgYA0KQ3JlYXRpbmcgYSB0YWJsZSB3aXRoIHRoZSBQcml2YXRlIGNvbHVtbg0KYGBge3J9DQp0YWJsZSh0cmVlLnByZWQkUHJpdmF0ZSx0ZXN0JFByaXZhdGUpDQpgYGANCkRlY2lzaW9uIFRyZWUgDQpgYGB7cn0NCmxpYnJhcnkocnBhcnQucGxvdCkNCnBycCh0cmVlKQ0KYGBgDQpSYW5kb20gRm9yZXN0IE1ldGhvZA0KYGBge3J9DQpsaWJyYXJ5KHJhbmRvbUZvcmVzdCkNCg0KYGBgDQpCaWxkaW5nIGEgTW9kZWwNCmBgYHtyfQ0KbW9kZWwgPC0gcmFuZG9tRm9yZXN0KFByaXZhdGV+LiAsZGF0YSA9IGRmLGltcG9ydGFuY2U9VCkNCmBgYA0KQ29uZnVzaW9uIG9uIHRoZSBNb2RlbA0KYGBge3J9DQptb2RlbCRjb25mdXNpb24NCg0KYGBgDQpQYXNzaW5nIEltcG9yYXRhbmNlIG9uIHRoZSBtb2RlbCAoaXQgZ2l2ZSBpbiBkZWF0aWwgaW5mbyBhYm91dCBnaW5pIGFuZCBNZWFucykNCmBgYHtyfQ0KbW9kZWwkaW1wb3J0YW5jZQ0KYGBgDQpwcmVkaWN0aW9uIG9mIHRoZSBNb2RlbCB1c2luZyB0aGUgdGVzdCBEYXRhDQpgYGB7cn0NCnAgPC0gcHJlZGljdChtb2RlbCx0ZXN0KQ0KYGBgDQpDb25mdXNpb24gbWF0cml4IG9uIHRoZSBQcmVkaWN0ZWQgcmVzdWx0DQpgYGB7cn0NCnRhYmxlKHAsdGVzdCRQcml2YXRlKQ0KYGBgDQoNCg0K