library(readr)
library(randomForest)
## randomForest 4.7-1.1
## Type rfNews() to see new features/changes/bug fixes.
library(caret)
## Loading required package: ggplot2
##
## Attaching package: 'ggplot2'
## The following object is masked from 'package:randomForest':
##
## margin
## Loading required package: lattice
Admission_Predict <- read_csv("C:/Users/USER/Desktop/Admission_Predict.csv")
## Rows: 400 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (4): gre, sop, cgpa, admitted
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
attach(Admission_Predict)
Admission_Predict$admitted<-as.factor(Admission_Predict$admitted)
data<-Admission_Predict
str(data)
## spc_tbl_ [400 × 4] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
## $ gre : num [1:400] 337 324 316 322 314 330 321 308 302 323 ...
## $ sop : num [1:400] 4.5 4 3 3.5 2 4.5 3 3 2 3.5 ...
## $ cgpa : num [1:400] 9.65 8.87 8 8.67 8.21 9.34 8.2 7.9 8 8.6 ...
## $ admitted: Factor w/ 2 levels "0","1": 2 2 2 2 1 2 2 1 1 1 ...
## - attr(*, "spec")=
## .. cols(
## .. gre = col_double(),
## .. sop = col_double(),
## .. cgpa = col_double(),
## .. admitted = col_double()
## .. )
## - attr(*, "problems")=<externalptr>
explore the data
summary(data)
## gre sop cgpa admitted
## Min. :290.0 Min. :1.0 Min. :6.800 0:181
## 1st Qu.:308.0 1st Qu.:2.5 1st Qu.:8.170 1:219
## Median :317.0 Median :3.5 Median :8.610
## Mean :316.8 Mean :3.4 Mean :8.599
## 3rd Qu.:325.0 3rd Qu.:4.0 3rd Qu.:9.062
## Max. :340.0 Max. :5.0 Max. :9.920
table(data$admitted)
##
## 0 1
## 181 219
data partitioning
set.seed(123)
pd<-sample(2,nrow(data),replace = TRUE,prob = c(0.8,0.2))
train<-data[pd==1,]
test<-data[pd==2,]
model
library(party)
## Loading required package: grid
## Loading required package: mvtnorm
## Loading required package: modeltools
## Loading required package: stats4
## Loading required package: strucchange
## Loading required package: zoo
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
## Loading required package: sandwich
tree<-ctree(admitted~.,data=train)
tree
##
## Conditional inference tree with 4 terminal nodes
##
## Response: admitted
## Inputs: gre, sop, cgpa
## Number of observations: 325
##
## 1) gre <= 319; criterion = 1, statistic = 109.922
## 2) gre <= 310; criterion = 0.993, statistic = 9.279
## 3)* weights = 95
## 2) gre > 310
## 4)* weights = 87
## 1) gre > 319
## 5) cgpa <= 9.14; criterion = 0.996, statistic = 10.173
## 6)* weights = 79
## 5) cgpa > 9.14
## 7)* weights = 64
plot(tree)
prune the model
tree<-ctree(admitted~.,data=train,controls = ctree_control(mincriterion = 0.99,minsplit = 130))
tree
##
## Conditional inference tree with 4 terminal nodes
##
## Response: admitted
## Inputs: gre, sop, cgpa
## Number of observations: 325
##
## 1) gre <= 319; criterion = 1, statistic = 109.922
## 2) gre <= 310; criterion = 0.993, statistic = 9.279
## 3)* weights = 95
## 2) gre > 310
## 4)* weights = 87
## 1) gre > 319
## 5) cgpa <= 9.14; criterion = 0.996, statistic = 10.173
## 6)* weights = 79
## 5) cgpa > 9.14
## 7)* weights = 64
plot(tree)
prediction
head(predict(tree,test,type="prob"))
## [[1]]
## [1] 0.2025316 0.7974684
##
## [[2]]
## [1] 0.5862069 0.4137931
##
## [[3]]
## [1] 0.8210526 0.1789474
##
## [[4]]
## [1] 0.2025316 0.7974684
##
## [[5]]
## [1] 0.5862069 0.4137931
##
## [[6]]
## [1] 0.8210526 0.1789474
head(predict(tree,test))
## [1] 1 0 0 1 0 0
## Levels: 0 1
prediction
p1<-predict(tree,train)
head(p1)
## [1] 1 1 0 1 1 0
## Levels: 0 1
head(data$admitted)
## [1] 1 1 1 1 0 1
## Levels: 0 1
confusion matrix
confusionMatrix(p1,train$admitted)
## Confusion Matrix and Statistics
##
## Reference
## Prediction 0 1
## 0 129 53
## 1 17 126
##
## Accuracy : 0.7846
## 95% CI : (0.7359, 0.8281)
## No Information Rate : 0.5508
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.5744
##
## Mcnemar's Test P-Value : 2.873e-05
##
## Sensitivity : 0.8836
## Specificity : 0.7039
## Pos Pred Value : 0.7088
## Neg Pred Value : 0.8811
## Prevalence : 0.4492
## Detection Rate : 0.3969
## Detection Prevalence : 0.5600
## Balanced Accuracy : 0.7937
##
## 'Positive' Class : 0
##