DECISION

library(readr)
library(randomForest)

## randomForest 4.7-1.1

## Type rfNews() to see new features/changes/bug fixes.

library(caret)

## Loading required package: ggplot2

## 
## Attaching package: 'ggplot2'

## The following object is masked from 'package:randomForest':
## 
##     margin

## Loading required package: lattice

Admission_Predict <- read_csv("C:/Users/USER/Desktop/Admission_Predict.csv")

## Rows: 400 Columns: 4

## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (4): gre, sop, cgpa, admitted
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

attach(Admission_Predict)
Admission_Predict$admitted<-as.factor(Admission_Predict$admitted)
data<-Admission_Predict
str(data)

## spc_tbl_ [400 × 4] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
##  $ gre     : num [1:400] 337 324 316 322 314 330 321 308 302 323 ...
##  $ sop     : num [1:400] 4.5 4 3 3.5 2 4.5 3 3 2 3.5 ...
##  $ cgpa    : num [1:400] 9.65 8.87 8 8.67 8.21 9.34 8.2 7.9 8 8.6 ...
##  $ admitted: Factor w/ 2 levels "0","1": 2 2 2 2 1 2 2 1 1 1 ...
##  - attr(*, "spec")=
##   .. cols(
##   ..   gre = col_double(),
##   ..   sop = col_double(),
##   ..   cgpa = col_double(),
##   ..   admitted = col_double()
##   .. )
##  - attr(*, "problems")=<externalptr>

explore the data

summary(data)

##       gre             sop           cgpa       admitted
##  Min.   :290.0   Min.   :1.0   Min.   :6.800   0:181   
##  1st Qu.:308.0   1st Qu.:2.5   1st Qu.:8.170   1:219   
##  Median :317.0   Median :3.5   Median :8.610           
##  Mean   :316.8   Mean   :3.4   Mean   :8.599           
##  3rd Qu.:325.0   3rd Qu.:4.0   3rd Qu.:9.062           
##  Max.   :340.0   Max.   :5.0   Max.   :9.920

table(data$admitted)

## 
##   0   1 
## 181 219

data partitioning

set.seed(123)
pd<-sample(2,nrow(data),replace = TRUE,prob = c(0.8,0.2))
train<-data[pd==1,]
test<-data[pd==2,]

model

library(party)

## Loading required package: grid

## Loading required package: mvtnorm

## Loading required package: modeltools

## Loading required package: stats4

## Loading required package: strucchange

## Loading required package: zoo

## 
## Attaching package: 'zoo'

## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric

## Loading required package: sandwich

tree<-ctree(admitted~.,data=train)
tree

## 
##   Conditional inference tree with 4 terminal nodes
## 
## Response:  admitted 
## Inputs:  gre, sop, cgpa 
## Number of observations:  325 
## 
## 1) gre <= 319; criterion = 1, statistic = 109.922
##   2) gre <= 310; criterion = 0.993, statistic = 9.279
##     3)*  weights = 95 
##   2) gre > 310
##     4)*  weights = 87 
## 1) gre > 319
##   5) cgpa <= 9.14; criterion = 0.996, statistic = 10.173
##     6)*  weights = 79 
##   5) cgpa > 9.14
##     7)*  weights = 64

plot(tree)

prune the model

tree<-ctree(admitted~.,data=train,controls = ctree_control(mincriterion = 0.99,minsplit = 130))
tree

## 
##   Conditional inference tree with 4 terminal nodes
## 
## Response:  admitted 
## Inputs:  gre, sop, cgpa 
## Number of observations:  325 
## 
## 1) gre <= 319; criterion = 1, statistic = 109.922
##   2) gre <= 310; criterion = 0.993, statistic = 9.279
##     3)*  weights = 95 
##   2) gre > 310
##     4)*  weights = 87 
## 1) gre > 319
##   5) cgpa <= 9.14; criterion = 0.996, statistic = 10.173
##     6)*  weights = 79 
##   5) cgpa > 9.14
##     7)*  weights = 64

plot(tree)

prediction

head(predict(tree,test,type="prob"))

## [[1]]
## [1] 0.2025316 0.7974684
## 
## [[2]]
## [1] 0.5862069 0.4137931
## 
## [[3]]
## [1] 0.8210526 0.1789474
## 
## [[4]]
## [1] 0.2025316 0.7974684
## 
## [[5]]
## [1] 0.5862069 0.4137931
## 
## [[6]]
## [1] 0.8210526 0.1789474

head(predict(tree,test))

## [1] 1 0 0 1 0 0
## Levels: 0 1

prediction

p1<-predict(tree,train)
head(p1)

## [1] 1 1 0 1 1 0
## Levels: 0 1

head(data$admitted)

## [1] 1 1 1 1 0 1
## Levels: 0 1

confusion matrix

confusionMatrix(p1,train$admitted)

## Confusion Matrix and Statistics
## 
##           Reference
## Prediction   0   1
##          0 129  53
##          1  17 126
##                                           
##                Accuracy : 0.7846          
##                  95% CI : (0.7359, 0.8281)
##     No Information Rate : 0.5508          
##     P-Value [Acc > NIR] : < 2.2e-16       
##                                           
##                   Kappa : 0.5744          
##                                           
##  Mcnemar's Test P-Value : 2.873e-05       
##                                           
##             Sensitivity : 0.8836          
##             Specificity : 0.7039          
##          Pos Pred Value : 0.7088          
##          Neg Pred Value : 0.8811          
##              Prevalence : 0.4492          
##          Detection Rate : 0.3969          
##    Detection Prevalence : 0.5600          
##       Balanced Accuracy : 0.7937          
##                                           
##        'Positive' Class : 0               
##

DECISION_TREE

mugo_muiruri_james

2023-09-16