SURVEY TABLE
dataset<-read.csv("V:\\5th sem -3rd year\\dsp\\dermatology-2.csv")
dataset$age[is.na(dataset$age)]<-mean(dataset$age,na.rm=TRUE)
dataset$class<-factor(dataset$class,levels=c(1,2,3,4,5,6),labels=c("d1","d2","d3","d4","d5","d6"))
library(dplyr)
## Warning: package 'dplyr' was built under R version 4.1.3
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
datasetnew=dataset%>%select(1:2,15:17,32:35)
str(datasetnew)
## 'data.frame': 366 obs. of 9 variables:
## $ erythema : int 2 3 2 2 2 2 2 2 2 2 ...
## $ scaling : int 2 3 1 2 3 3 1 2 2 2 ...
## $ fibrosis_of_the_papillary_dermis : int 0 0 0 0 0 0 3 0 0 0 ...
## $ exocytosis : int 3 1 1 0 1 2 1 2 2 3 ...
## $ acanthosis : int 2 2 2 2 2 2 3 3 1 2 ...
## $ inflammatory_monoluclear_inflitrate: int 1 1 2 3 2 1 2 3 2 2 ...
## $ band.like_infiltrate : int 0 0 3 0 3 0 0 3 0 0 ...
## $ age : num 55 8 26 40 45 41 18 57 22 30 ...
## $ class : Factor w/ 6 levels "d1","d2","d3",..: 2 1 3 1 3 2 5 3 4 4 ...
library(caTools)
## Warning: package 'caTools' was built under R version 4.1.3
set.seed(123)
split=sample.split(Y=datasetnew$class,SplitRatio=0.7)
train_set=subset(x=datasetnew,split==TRUE)
test_set=subset(x=datasetnew,split==FALSE)
dim(train_set)
## [1] 255 9
dim(test_set)
## [1] 111 9
library(rpart)
## Warning: package 'rpart' was built under R version 4.1.3
fit=rpart(formula=class~.,data=train_set,method="class")
summary(fit)
## Call:
## rpart(formula = class ~ ., data = train_set, method = "class")
## n= 255
##
## CP nsplit rel error xerror xstd
## 1 0.27683616 0 1.0000000 1.0000000 0.04157101
## 2 0.20338983 1 0.7231638 0.7231638 0.04510903
## 3 0.14124294 2 0.5197740 0.5197740 0.04332558
## 4 0.06214689 3 0.3785311 0.3785311 0.03970754
## 5 0.01000000 5 0.2542373 0.2542373 0.03439320
##
## Variable importance
## band.like_infiltrate fibrosis_of_the_papillary_dermis
## 29 24
## exocytosis scaling
## 21 10
## age acanthosis
## 9 4
## erythema
## 3
##
## Node number 1: 255 observations, complexity param=0.2768362
## predicted class=d1 expected loss=0.6941176 P(node) =1
## class counts: 78 43 50 34 36 14
## probabilities: 0.306 0.169 0.196 0.133 0.141 0.055
## left son=2 (203 obs) right son=3 (52 obs)
## Primary splits:
## band.like_infiltrate < 1.5 to the left, improve=47.75823, (0 missing)
## fibrosis_of_the_papillary_dermis < 0.5 to the left, improve=36.31802, (0 missing)
## exocytosis < 0.5 to the left, improve=35.45872, (0 missing)
## scaling < 1.5 to the right, improve=13.53976, (0 missing)
## age < 14 to the right, improve=10.61054, (0 missing)
##
## Node number 2: 203 observations, complexity param=0.2033898
## predicted class=d1 expected loss=0.6206897 P(node) =0.7960784
## class counts: 77 42 0 34 36 14
## probabilities: 0.379 0.207 0.000 0.167 0.177 0.069
## left son=4 (167 obs) right son=5 (36 obs)
## Primary splits:
## fibrosis_of_the_papillary_dermis < 0.5 to the left, improve=39.22079, (0 missing)
## exocytosis < 0.5 to the left, improve=28.74752, (0 missing)
## scaling < 1.5 to the right, improve=16.25222, (0 missing)
## erythema < 1.5 to the right, improve=11.85390, (0 missing)
## age < 14 to the right, improve=11.77322, (0 missing)
## Surrogate splits:
## erythema < 1.5 to the right, agree=0.842, adj=0.111, (0 split)
## scaling < 0.5 to the right, agree=0.837, adj=0.083, (0 split)
##
## Node number 3: 52 observations
## predicted class=d3 expected loss=0.03846154 P(node) =0.2039216
## class counts: 1 1 50 0 0 0
## probabilities: 0.019 0.019 0.962 0.000 0.000 0.000
##
## Node number 4: 167 observations, complexity param=0.1412429
## predicted class=d1 expected loss=0.5389222 P(node) =0.654902
## class counts: 77 42 0 34 0 14
## probabilities: 0.461 0.251 0.000 0.204 0.000 0.084
## left son=8 (64 obs) right son=9 (103 obs)
## Primary splits:
## exocytosis < 0.5 to the left, improve=35.045240, (0 missing)
## age < 14 to the right, improve=13.190970, (0 missing)
## scaling < 2.5 to the right, improve= 8.074897, (0 missing)
## acanthosis < 1.5 to the right, improve= 4.209210, (0 missing)
## erythema < 1.5 to the right, improve= 3.083663, (0 missing)
## Surrogate splits:
## scaling < 2.5 to the right, agree=0.689, adj=0.188, (0 split)
## acanthosis < 2.5 to the right, agree=0.683, adj=0.172, (0 split)
## age < 35.5 to the right, agree=0.641, adj=0.063, (0 split)
##
## Node number 5: 36 observations
## predicted class=d5 expected loss=0 P(node) =0.1411765
## class counts: 0 0 0 0 36 0
## probabilities: 0.000 0.000 0.000 0.000 1.000 0.000
##
## Node number 8: 64 observations
## predicted class=d1 expected loss=0.046875 P(node) =0.2509804
## class counts: 61 1 0 1 0 1
## probabilities: 0.953 0.016 0.000 0.016 0.000 0.016
##
## Node number 9: 103 observations, complexity param=0.06214689
## predicted class=d2 expected loss=0.6019417 P(node) =0.4039216
## class counts: 16 41 0 33 0 13
## probabilities: 0.155 0.398 0.000 0.320 0.000 0.126
## left son=18 (88 obs) right son=19 (15 obs)
## Primary splits:
## age < 14 to the right, improve=11.166950, (0 missing)
## scaling < 1.5 to the right, improve= 5.831848, (0 missing)
## exocytosis < 2.5 to the right, improve= 2.958431, (0 missing)
## erythema < 1.5 to the right, improve= 2.869471, (0 missing)
## acanthosis < 1.5 to the right, improve= 1.329670, (0 missing)
##
## Node number 18: 88 observations, complexity param=0.06214689
## predicted class=d2 expected loss=0.5454545 P(node) =0.345098
## class counts: 15 40 0 32 0 1
## probabilities: 0.170 0.455 0.000 0.364 0.000 0.011
## left son=36 (68 obs) right son=37 (20 obs)
## Primary splits:
## scaling < 1.5 to the right, improve=5.919519, (0 missing)
## erythema < 1.5 to the right, improve=3.061005, (0 missing)
## exocytosis < 2.5 to the left, improve=2.132700, (0 missing)
## acanthosis < 1.5 to the right, improve=1.979154, (0 missing)
## age < 56 to the right, improve=1.341841, (0 missing)
## Surrogate splits:
## age < 18.5 to the right, agree=0.807, adj=0.15, (0 split)
## erythema < 1.5 to the right, agree=0.795, adj=0.10, (0 split)
##
## Node number 19: 15 observations
## predicted class=d6 expected loss=0.2 P(node) =0.05882353
## class counts: 1 1 0 1 0 12
## probabilities: 0.067 0.067 0.000 0.067 0.000 0.800
##
## Node number 36: 68 observations
## predicted class=d2 expected loss=0.4705882 P(node) =0.2666667
## class counts: 14 36 0 17 0 1
## probabilities: 0.206 0.529 0.000 0.250 0.000 0.015
##
## Node number 37: 20 observations
## predicted class=d4 expected loss=0.25 P(node) =0.07843137
## class counts: 1 4 0 15 0 0
## probabilities: 0.050 0.200 0.000 0.750 0.000 0.000
library(rpart.plot)
## Warning: package 'rpart.plot' was built under R version 4.1.3
rpart.plot(fit)
predict_unseen=predict(object=fit,newdata=test_set,type="class")
predict_unseen
## 3 5 6 13 14 16 17 21 23 24 25 26 31 32 33 35 37 40 41 46
## d3 d3 d2 d2 d1 d4 d2 d1 d5 d3 d5 d1 d1 d6 d1 d4 d3 d1 d4 d3
## 47 52 53 65 79 80 82 83 86 88 89 92 100 101 102 103 106 108 112 113
## d4 d4 d1 d2 d3 d5 d2 d4 d2 d3 d1 d4 d6 d2 d5 d1 d1 d1 d3 d5
## 115 116 125 126 129 136 137 138 146 147 157 166 168 169 172 173 181 184 185 190
## d3 d5 d1 d2 d5 d1 d2 d2 d3 d2 d2 d1 d3 d2 d1 d1 d1 d1 d6 d1
## 193 200 203 206 213 216 217 219 222 224 225 226 232 235 239 243 244 245 247 248
## d3 d2 d5 d1 d3 d3 d3 d2 d2 d5 d5 d5 d2 d2 d2 d1 d1 d1 d1 d1
## 249 250 251 252 257 260 263 268 270 273 280 284 286 287 288 289 304 305 306 307
## d1 d3 d3 d3 d4 d2 d5 d6 d2 d1 d2 d1 d2 d2 d3 d3 d4 d1 d1 d4
## 315 317 329 342 343 345 350 352 354 355 366
## d3 d3 d4 d5 d5 d2 d6 d1 d5 d5 d1
## Levels: d1 d2 d3 d4 d5 d6
cm=table(test_set$class,predict_unseen)
cm
## predict_unseen
## d1 d2 d3 d4 d5 d6
## d1 31 3 0 0 0 0
## d2 0 14 0 3 0 1
## d3 0 0 22 0 0 0
## d4 0 7 0 8 0 0
## d5 0 0 0 0 16 0
## d6 1 1 0 0 0 4
sum(diag(cm))/sum(cm)
## [1] 0.8558559
Decision Tree
dataset<-read.csv("V:\\5th sem -3rd year\\dsp\\dermatology-2.csv")
View(dataset)
##Replacing the null values with mean
dataset$age[is.na(dataset$age)]<-mean(dataset$age,na.rm=TRUE)
View(dataset)
# Loading package
library(e1071)
## Warning: package 'e1071' was built under R version 4.1.3
library(caTools)
library(class)
## Warning: package 'class' was built under R version 4.1.3
dataset$class<-factor(dataset$class,levels=c(1,2,3,4,5,6),labels=c("d1","d2","d3","d4","d5","d6"))
# Loading data
library(dplyr)
datasetnew=dataset%>%select(1:2,11:17,32:35)
str(datasetnew)
## 'data.frame': 366 obs. of 13 variables:
## $ erythema : int 2 3 2 2 2 2 2 2 2 2 ...
## $ scaling : int 2 3 1 2 3 3 1 2 2 2 ...
## $ family_history : int 0 1 0 0 0 0 0 0 0 0 ...
## $ melanin_incontinence : int 0 0 1 0 1 0 0 2 0 0 ...
## $ eosinophils_in_the_infiltrate : int 0 0 0 0 0 2 0 0 0 0 ...
## $ pnl_infiltrate : int 0 1 0 3 0 1 0 0 0 0 ...
## $ fibrosis_of_the_papillary_dermis : int 0 0 0 0 0 0 3 0 0 0 ...
## $ exocytosis : int 3 1 1 0 1 2 1 2 2 3 ...
## $ acanthosis : int 2 2 2 2 2 2 3 3 1 2 ...
## $ inflammatory_monoluclear_inflitrate: int 1 1 2 3 2 1 2 3 2 2 ...
## $ band.like_infiltrate : int 0 0 3 0 3 0 0 3 0 0 ...
## $ age : num 55 8 26 40 45 41 18 57 22 30 ...
## $ class : Factor w/ 6 levels "d1","d2","d3",..: 2 1 3 1 3 2 5 3 4 4 ...
# Splitting data into train
# and test data
split <- sample.split(datasetnew, SplitRatio = 0.7)
train_cl <- subset(datasetnew, split == "TRUE")
test_cl <- subset(datasetnew, split == "FALSE")
dim(train_cl)
## [1] 253 13
# Feature Scaling
train_scale <- scale(train_cl[-13])
test_scale <- scale(test_cl[-13])
# Fitting KNN Model
# to training dataset
classifier_knn <- knn(train = train_scale,test = test_scale,cl = train_cl$class,k = 1)
classifier_knn
## [1] d6 d5 d4 d2 d3 d5 d1 d5 d2 d1 d2 d2 d2 d3 d4 d3 d1 d4 d1 d1 d1 d4 d4 d2 d5
## [26] d5 d3 d4 d5 d5 d2 d1 d1 d1 d3 d5 d4 d4 d4 d5 d1 d2 d1 d1 d3 d2 d1 d5 d3 d3
## [51] d1 d3 d3 d1 d3 d1 d1 d1 d3 d1 d3 d4 d4 d1 d1 d3 d3 d4 d5 d5 d1 d1 d1 d2 d1
## [76] d1 d1 d3 d4 d4 d4 d6 d4 d2 d1 d4 d2 d4 d3 d1 d5 d5 d3 d1 d4 d1 d3 d1 d4 d1
## [101] d3 d1 d5 d1 d3 d4 d2 d4 d1 d1 d2 d4 d1
## Levels: d1 d2 d3 d4 d5 d6
# Confusiin Matrix
cm <- table(test_cl$class, classifier_knn)
cm
## classifier_knn
## d1 d2 d3 d4 d5 d6
## d1 33 1 0 1 0 1
## d2 3 8 0 2 0 0
## d3 0 0 21 0 0 0
## d4 1 2 0 14 0 0
## d5 1 1 0 2 15 0
## d6 0 2 0 4 0 1
sum(diag(cm))/sum(cm)
## [1] 0.8141593
# Model Evaluation - Choosing K
# Calculate out of Sample error
# K = 3
classifier_knn <- knn(train = train_scale,test = test_scale,cl = train_cl$class,k = 3)
classifier_knn
## [1] d1 d5 d4 d2 d3 d5 d1 d5 d1 d1 d2 d4 d2 d3 d4 d3 d1 d4 d1 d1 d1 d4 d4 d2 d5
## [26] d5 d3 d4 d5 d5 d2 d1 d1 d1 d3 d5 d4 d4 d4 d5 d1 d4 d1 d1 d3 d2 d2 d5 d3 d3
## [51] d1 d3 d3 d1 d3 d1 d1 d1 d3 d1 d3 d4 d1 d1 d1 d3 d3 d4 d5 d5 d2 d2 d1 d2 d1
## [76] d1 d1 d3 d4 d4 d4 d6 d4 d4 d1 d4 d2 d4 d3 d1 d5 d5 d3 d1 d4 d1 d3 d1 d4 d1
## [101] d3 d4 d5 d1 d3 d4 d4 d4 d1 d1 d2 d4 d1
## Levels: d1 d2 d3 d4 d5 d6
# Confusiin Matrix
cm <- table(test_cl$class, classifier_knn)
cm
## classifier_knn
## d1 d2 d3 d4 d5 d6
## d1 33 2 0 1 0 0
## d2 1 9 0 3 0 0
## d3 0 0 21 0 0 0
## d4 0 0 0 17 0 0
## d5 3 0 0 1 15 0
## d6 0 1 0 5 0 1
sum(diag(cm))/sum(cm)
## [1] 0.8495575
# Model Evaluation - Choosing K
# Calculate out of Sample error
# K = 5
classifier_knn <- knn(train = train_scale,test = test_scale,cl = train_cl$class,k = 5)
classifier_knn
## [1] d1 d5 d4 d2 d3 d5 d1 d5 d1 d1 d2 d4 d2 d3 d2 d3 d1 d4 d1 d1 d1 d4 d4 d1 d5
## [26] d5 d3 d4 d5 d5 d2 d1 d1 d1 d3 d5 d4 d4 d4 d5 d1 d4 d1 d1 d3 d2 d1 d5 d3 d3
## [51] d1 d3 d3 d1 d3 d1 d1 d1 d3 d1 d3 d4 d1 d1 d1 d3 d3 d4 d5 d5 d2 d2 d1 d2 d1
## [76] d1 d1 d3 d4 d4 d4 d6 d4 d4 d1 d4 d1 d4 d3 d1 d5 d5 d3 d1 d4 d1 d3 d1 d4 d1
## [101] d3 d4 d5 d1 d3 d4 d4 d4 d1 d1 d2 d4 d1
## Levels: d1 d2 d3 d4 d5 d6
# Confusiin Matrix
cm <- table(test_cl$class, classifier_knn)
cm
## classifier_knn
## d1 d2 d3 d4 d5 d6
## d1 35 0 0 1 0 0
## d2 2 9 0 2 0 0
## d3 0 0 21 0 0 0
## d4 0 0 0 17 0 0
## d5 3 0 0 1 15 0
## d6 0 1 0 5 0 1
sum(diag(cm))/sum(cm)
## [1] 0.8672566
classifier_knn <- knn(train = train_scale,test = test_scale,cl = train_cl$class,k = 7)
classifier_knn
## [1] d1 d5 d4 d2 d3 d5 d1 d5 d2 d1 d1 d4 d2 d3 d4 d3 d1 d4 d1 d1 d1 d4 d4 d2 d5
## [26] d5 d3 d4 d5 d5 d2 d1 d1 d1 d3 d5 d4 d4 d2 d5 d1 d4 d1 d1 d3 d2 d2 d5 d3 d3
## [51] d1 d3 d3 d1 d3 d1 d1 d1 d3 d1 d3 d4 d1 d1 d1 d3 d3 d4 d5 d5 d2 d2 d1 d4 d1
## [76] d1 d1 d3 d4 d4 d4 d6 d4 d4 d1 d4 d1 d4 d3 d1 d5 d5 d3 d1 d4 d1 d3 d1 d4 d1
## [101] d3 d4 d5 d1 d3 d4 d4 d4 d1 d1 d2 d4 d1
## Levels: d1 d2 d3 d4 d5 d6
# Confusiin Matrix
cm <- table(test_cl$class, classifier_knn)
cm
## classifier_knn
## d1 d2 d3 d4 d5 d6
## d1 34 1 0 1 0 0
## d2 2 8 0 3 0 0
## d3 0 0 21 0 0 0
## d4 0 0 0 17 0 0
## d5 2 1 0 1 15 0
## d6 0 1 0 5 0 1
sum(diag(cm))/sum(cm)
## [1] 0.8495575
#install.packages('tidyverse')
library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.1.3
## -- Attaching packages --------------------------------------- tidyverse 1.3.2 --
## v ggplot2 3.3.6 v purrr 0.3.4
## v tibble 3.1.8 v stringr 1.4.1
## v tidyr 1.2.1 v forcats 0.5.2
## v readr 2.1.2
## Warning: package 'ggplot2' was built under R version 4.1.3
## Warning: package 'tibble' was built under R version 4.1.3
## Warning: package 'tidyr' was built under R version 4.1.3
## Warning: package 'readr' was built under R version 4.1.3
## Warning: package 'purrr' was built under R version 4.1.3
## Warning: package 'stringr' was built under R version 4.1.3
## Warning: package 'forcats' was built under R version 4.1.3
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
#install.packages('ggplot2')
library(ggplot2)
#install.packages('caret')
library(caret)
## Warning: package 'caret' was built under R version 4.1.3
## Loading required package: lattice
##
## Attaching package: 'caret'
##
## The following object is masked from 'package:purrr':
##
## lift
#install.packages('caretEnsemble')
library(caretEnsemble)
## Warning: package 'caretEnsemble' was built under R version 4.1.3
##
## Attaching package: 'caretEnsemble'
##
## The following object is masked from 'package:ggplot2':
##
## autoplot
#install.packages('psych')
library(psych)
## Warning: package 'psych' was built under R version 4.1.3
##
## Attaching package: 'psych'
##
## The following objects are masked from 'package:ggplot2':
##
## %+%, alpha
#install.packages('Amelia')
library(Amelia)
## Warning: package 'Amelia' was built under R version 4.1.3
## Loading required package: Rcpp
## Warning: package 'Rcpp' was built under R version 4.1.3
## ##
## ## Amelia II: Multiple Imputation
## ## (Version 1.8.0, built: 2021-05-26)
## ## Copyright (C) 2005-2022 James Honaker, Gary King and Matthew Blackwell
## ## Refer to http://gking.harvard.edu/amelia/ for more information
## ##
#install.packages('mice')
library(mice)
## Warning: package 'mice' was built under R version 4.1.3
##
## Attaching package: 'mice'
##
## The following object is masked from 'package:stats':
##
## filter
##
## The following objects are masked from 'package:base':
##
## cbind, rbind
#install.packages('GGally')
library(GGally)
## Warning: package 'GGally' was built under R version 4.1.3
## Registered S3 method overwritten by 'GGally':
## method from
## +.gg ggplot2
#install.packages('rpart')
library(rpart)
#install.packages('randomForest')
library(randomForest)
## Warning: package 'randomForest' was built under R version 4.1.3
## randomForest 4.7-1.1
## Type rfNews() to see new features/changes/bug fixes.
##
## Attaching package: 'randomForest'
##
## The following object is masked from 'package:psych':
##
## outlier
##
## The following object is masked from 'package:ggplot2':
##
## margin
##
## The following object is masked from 'package:dplyr':
##
## combine
dataset<-read.csv("V:\\5th sem -3rd year\\dsp\\dermatology-2.csv")
#View(dataset)
##Replacing the null values with mean
dataset$age[is.na(dataset$age)]<-mean(dataset$age,na.rm=TRUE)
#View(dataset)
#installing packages
#install.packages("e1071")
#install.packages("caTools")
#install.packages("caret")
library(e1071)
library(caTools)
library(caret)
#dataset$class<-factor(dataset$class,levels=c(1,2,3,4,5,6),labels=c("psoriasis",
# "seboreic dermatitis",
# "lichen planus",
# "pityriasis rosea",
# "cronic dermatitis",
# "pityriasis rubra pilaris"))
dataset$class<-factor(dataset$class,levels=c(1,2,3,4,5,6),labels=c("d1","d2",
"d3","d4",
"d5","d6"))
library(dplyr)
daset=dataset%>%select(1:2,11:17,32:35)
str(daset)
## 'data.frame': 366 obs. of 13 variables:
## $ erythema : int 2 3 2 2 2 2 2 2 2 2 ...
## $ scaling : int 2 3 1 2 3 3 1 2 2 2 ...
## $ family_history : int 0 1 0 0 0 0 0 0 0 0 ...
## $ melanin_incontinence : int 0 0 1 0 1 0 0 2 0 0 ...
## $ eosinophils_in_the_infiltrate : int 0 0 0 0 0 2 0 0 0 0 ...
## $ pnl_infiltrate : int 0 1 0 3 0 1 0 0 0 0 ...
## $ fibrosis_of_the_papillary_dermis : int 0 0 0 0 0 0 3 0 0 0 ...
## $ exocytosis : int 3 1 1 0 1 2 1 2 2 3 ...
## $ acanthosis : int 2 2 2 2 2 2 3 3 1 2 ...
## $ inflammatory_monoluclear_inflitrate: int 1 1 2 3 2 1 2 3 2 2 ...
## $ band.like_infiltrate : int 0 0 3 0 3 0 0 3 0 0 ...
## $ age : num 55 8 26 40 45 41 18 57 22 30 ...
## $ class : Factor w/ 6 levels "d1","d2","d3",..: 2 1 3 1 3 2 5 3 4 4 ...
dim(daset)
## [1] 366 13
#Understanding the data set – Naive Bayes In R
head(daset)
## erythema scaling family_history melanin_incontinence
## 1 2 2 0 0
## 2 3 3 1 0
## 3 2 1 0 1
## 4 2 2 0 0
## 5 2 3 0 1
## 6 2 3 0 0
## eosinophils_in_the_infiltrate pnl_infiltrate fibrosis_of_the_papillary_dermis
## 1 0 0 0
## 2 0 1 0
## 3 0 0 0
## 4 0 3 0
## 5 0 0 0
## 6 2 1 0
## exocytosis acanthosis inflammatory_monoluclear_inflitrate
## 1 3 2 1
## 2 1 2 1
## 3 1 2 2
## 4 0 2 3
## 5 1 2 2
## 6 2 2 1
## band.like_infiltrate age class
## 1 0 55 d2
## 2 0 8 d1
## 3 3 26 d3
## 4 0 40 d1
## 5 3 45 d3
## 6 0 41 d2
summary(daset)
## erythema scaling family_history melanin_incontinence
## Min. :0.000 Min. :0.000 Min. :0.0000 Min. :0.0000
## 1st Qu.:2.000 1st Qu.:1.000 1st Qu.:0.0000 1st Qu.:0.0000
## Median :2.000 Median :2.000 Median :0.0000 Median :0.0000
## Mean :2.068 Mean :1.795 Mean :0.1257 Mean :0.4044
## 3rd Qu.:2.000 3rd Qu.:2.000 3rd Qu.:0.0000 3rd Qu.:0.0000
## Max. :3.000 Max. :3.000 Max. :1.0000 Max. :3.0000
## eosinophils_in_the_infiltrate pnl_infiltrate
## Min. :0.0000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:0.0000
## Median :0.0000 Median :0.0000
## Mean :0.1393 Mean :0.5464
## 3rd Qu.:0.0000 3rd Qu.:1.0000
## Max. :2.0000 Max. :3.0000
## fibrosis_of_the_papillary_dermis exocytosis acanthosis
## Min. :0.0000 Min. :0.000 Min. :0.000
## 1st Qu.:0.0000 1st Qu.:0.000 1st Qu.:2.000
## Median :0.0000 Median :2.000 Median :2.000
## Mean :0.3361 Mean :1.369 Mean :1.956
## 3rd Qu.:0.0000 3rd Qu.:2.000 3rd Qu.:2.000
## Max. :3.0000 Max. :3.000 Max. :3.000
## inflammatory_monoluclear_inflitrate band.like_infiltrate age
## Min. :0.000 Min. :0.0000 Min. : 0.0
## 1st Qu.:1.000 1st Qu.:0.0000 1st Qu.:25.0
## Median :2.000 Median :0.0000 Median :36.0
## Mean :1.866 Mean :0.5546 Mean :36.3
## 3rd Qu.:2.000 3rd Qu.:0.0000 3rd Qu.:48.0
## Max. :3.000 Max. :3.0000 Max. :75.0
## class
## d1:112
## d2: 61
## d3: 72
## d4: 49
## d5: 52
## d6: 20
missmap(dataset)
set.seed(123)
split=sample.split(daset$class,SplitRatio = 0.65)
train_set=subset(daset,split==TRUE)
test_set=subset(daset,split==FALSE)
dim(train_set)
## [1] 239 13
dim(test_set)
## [1] 127 13
classifier=naiveBayes(x=train_set[-13],y=train_set$class)
summary(classifier)
## Length Class Mode
## apriori 6 table numeric
## tables 12 -none- list
## levels 6 -none- character
## isnumeric 12 -none- logical
## call 3 -none- call
y_pred=predict(classifier,newdata=test_set)
cm=table(test_set$class,y_pred)
cm
## y_pred
## d1 d2 d3 d4 d5 d6
## d1 17 6 0 14 0 2
## d2 0 12 0 9 0 0
## d3 0 0 25 0 0 0
## d4 0 0 0 17 0 0
## d5 0 0 0 0 18 0
## d6 0 0 0 3 0 4
sum(diag(cm))/sum(cm)
## [1] 0.7322835
Result