library(RCurl)
## Loading required package: bitops
Mushroom <- getURL('https://archive.ics.uci.edu/ml/machine-learning-databases/mushroom/agaricus-lepiota.data')
library(plyr)
df <- data.frame(read.csv(text=Mushroom, header=FALSE))
head(df)
## V1 V2 V3 V4 V5 V6 V7 V8 V9 V10 V11 V12 V13 V14 V15 V16 V17 V18 V19 V20
## 1 p x s n t p f c n k e e s s w w p w o p
## 2 e x s y t a f c b k e c s s w w p w o p
## 3 e b s w t l f c b n e c s s w w p w o p
## 4 p x y w t p f c n n e e s s w w p w o p
## 5 e x s g f n f w b k t e s s w w p w o e
## 6 e x y y t a f c b n e c s s w w p w o p
## V21 V22 V23
## 1 k s u
## 2 n n g
## 3 n n m
## 4 k s u
## 5 n a g
## 6 k n g
dim(df)
## [1] 8124 23
str(df)
## 'data.frame': 8124 obs. of 23 variables:
## $ V1 : Factor w/ 2 levels "e","p": 2 1 1 2 1 1 1 1 2 1 ...
## $ V2 : Factor w/ 6 levels "b","c","f","k",..: 6 6 1 6 6 6 1 1 6 1 ...
## $ V3 : Factor w/ 4 levels "f","g","s","y": 3 3 3 4 3 4 3 4 4 3 ...
## $ V4 : Factor w/ 10 levels "b","c","e","g",..: 5 10 9 9 4 10 9 9 9 10 ...
## $ V5 : Factor w/ 2 levels "f","t": 2 2 2 2 1 2 2 2 2 2 ...
## $ V6 : Factor w/ 9 levels "a","c","f","l",..: 7 1 4 7 6 1 1 4 7 1 ...
## $ V7 : Factor w/ 2 levels "a","f": 2 2 2 2 2 2 2 2 2 2 ...
## $ V8 : Factor w/ 2 levels "c","w": 1 1 1 1 2 1 1 1 1 1 ...
## $ V9 : Factor w/ 2 levels "b","n": 2 1 1 2 1 1 1 1 2 1 ...
## $ V10: Factor w/ 12 levels "b","e","g","h",..: 5 5 6 6 5 6 3 6 8 3 ...
## $ V11: Factor w/ 2 levels "e","t": 1 1 1 1 2 1 1 1 1 1 ...
## $ V12: Factor w/ 5 levels "?","b","c","e",..: 4 3 3 4 4 3 3 3 4 3 ...
## $ V13: Factor w/ 4 levels "f","k","s","y": 3 3 3 3 3 3 3 3 3 3 ...
## $ V14: Factor w/ 4 levels "f","k","s","y": 3 3 3 3 3 3 3 3 3 3 ...
## $ V15: Factor w/ 9 levels "b","c","e","g",..: 8 8 8 8 8 8 8 8 8 8 ...
## $ V16: Factor w/ 9 levels "b","c","e","g",..: 8 8 8 8 8 8 8 8 8 8 ...
## $ V17: Factor w/ 1 level "p": 1 1 1 1 1 1 1 1 1 1 ...
## $ V18: Factor w/ 4 levels "n","o","w","y": 3 3 3 3 3 3 3 3 3 3 ...
## $ V19: Factor w/ 3 levels "n","o","t": 2 2 2 2 2 2 2 2 2 2 ...
## $ V20: Factor w/ 5 levels "e","f","l","n",..: 5 5 5 5 1 5 5 5 5 5 ...
## $ V21: Factor w/ 9 levels "b","h","k","n",..: 3 4 4 3 4 3 3 4 3 3 ...
## $ V22: Factor w/ 6 levels "a","c","n","s",..: 4 3 3 4 1 3 3 4 5 4 ...
## $ V23: Factor w/ 7 levels "d","g","l","m",..: 6 2 4 6 2 2 4 4 2 4 ...
library(psych) #Summary Statisitcs by Group
#https://archive.ics.uci.edu/ml/machine-learning-databases/mushroom/agaricus-lepiota.names
#Change names for attributes and their valuses
names(df) <- c("classes","cap_shape","cap_surface","cap_color","bruises","odor","gill_attachment","gill_spacing","gill_size","gill_color","stalk_shape","stalk_root","stalk_surface_above_ring",
"stalk_surface_below_ring","stalk_color_above_ring","stalk_color_below_ring",
"veil_type","veil_color","ring_number","ring_type","spore_print_color","population","habitat")
names(df)
## [1] "classes" "cap_shape"
## [3] "cap_surface" "cap_color"
## [5] "bruises" "odor"
## [7] "gill_attachment" "gill_spacing"
## [9] "gill_size" "gill_color"
## [11] "stalk_shape" "stalk_root"
## [13] "stalk_surface_above_ring" "stalk_surface_below_ring"
## [15] "stalk_color_above_ring" "stalk_color_below_ring"
## [17] "veil_type" "veil_color"
## [19] "ring_number" "ring_type"
## [21] "spore_print_color" "population"
## [23] "habitat"
subDf <- data.frame(df$classes,df$odor,df$spore_print_color,df$stalk_surface_below_ring,df$stalk_color_above_ring,df$habitat,df$cap_color,df$population,df$cap_shape,df$cap_surface,df$bruises,df$gill_attachment,df$gill_spacing,df$gill_size,df$gill_color,df$stalk_shape,df$stalk_root,df$stalk_surface_above_ring,df$stalk_color_below_ring,df$veil_type,df$veil_color,df$ring_number,df$ring_type)
levels(subDf$df.classes) [levels(subDf$df.classes)=="p"] <- "poisonous"
levels(subDf$df.classes) [levels(subDf$df.classes)=="e"] <- "edible"
levels(subDf$df.odor) [levels(subDf$df.odor)=="a"] <- "almond"
levels(subDf$df.odor) [levels(subDf$df.odor)=="l"] <- "anise"
levels(subDf$df.odor) [levels(subDf$df.odor)=="c"] <- "creosote"
levels(subDf$df.odor) [levels(subDf$df.odor)=="y"] <- "fishy"
levels(subDf$df.odor) [levels(subDf$df.odor)=="f"] <- "foul"
levels(subDf$df.odor) [levels(subDf$df.odor)=="m"] <- "musty"
levels(subDf$df.odor) [levels(subDf$df.odor)=="n"] <- "none"
levels(subDf$df.odor) [levels(subDf$df.odor)=="p"] <- "pungent"
levels(subDf$df.odor) [levels(subDf$df.odor)=="s"] <- "spicy"
levels(subDf$df.spore_print_color) [levels(subDf$df.spore_print_color)=="k"] <- "black"
levels(subDf$df.spore_print_color) [levels(subDf$df.spore_print_color)=="n"] <- "brown"
levels(subDf$df.spore_print_color) [levels(subDf$df.spore_print_color)=="b"] <- "buff"
levels(subDf$df.spore_print_color) [levels(subDf$df.spore_print_color)=="h"] <- "chocolate"
levels(subDf$df.spore_print_color) [levels(subDf$df.spore_print_color)=="r"] <- "green"
levels(subDf$df.spore_print_color) [levels(subDf$df.spore_print_color)=="o"] <- "orange"
levels(subDf$df.spore_print_color) [levels(subDf$df.spore_print_color)=="u"] <- "purple"
levels(subDf$df.spore_print_color) [levels(subDf$df.spore_print_color)=="w"] <- "white"
levels(subDf$df.spore_print_color) [levels(subDf$df.spore_print_color)=="y"] <- "yellow"
levels(subDf$df.stalk_surface_below_ring) [levels(subDf$df.stalk_surface_below_ring)=="f"] <- "fibrous"
levels(subDf$df.stalk_surface_below_ring) [levels(subDf$df.stalk_surface_below_ring)=="y"] <- "scaly"
levels(subDf$df.stalk_surface_below_ring) [levels(subDf$df.stalk_surface_below_ring)=="k"] <- "silky"
levels(subDf$df.stalk_surface_below_ring) [levels(subDf$df.stalk_surface_below_ring)=="s"] <- "smooth"
levels(subDf$df.stalk_color_above_ring) [levels(subDf$df.stalk_color_above_ring)=="n"] <- "brown"
levels(subDf$df.stalk_color_above_ring) [levels(subDf$df.stalk_color_above_ring)=="b"] <- "buff"
levels(subDf$df.stalk_color_above_ring) [levels(subDf$df.stalk_color_above_ring)=="c"] <- "cinnamon"
levels(subDf$df.stalk_color_above_ring) [levels(subDf$df.stalk_color_above_ring)=="g"] <- "gray"
levels(subDf$df.stalk_color_above_ring) [levels(subDf$df.stalk_color_above_ring)=="o"] <- "orange"
levels(subDf$df.stalk_color_above_ring) [levels(subDf$df.stalk_color_above_ring)=="p"] <- "pink"
levels(subDf$df.stalk_color_above_ring) [levels(subDf$df.stalk_color_above_ring)=="e"] <- "red"
levels(subDf$df.stalk_color_above_ring) [levels(subDf$df.stalk_color_above_ring)=="w"] <- "white"
levels(subDf$df.stalk_color_above_ring) [levels(subDf$df.stalk_color_above_ring)=="y"] <- "yellow"
levels(subDf$df.habitat) [levels(subDf$df.habitat)=="g"] <- "grasses"
levels(subDf$df.habitat) [levels(subDf$df.habitat)=="l"] <- "leaves"
levels(subDf$df.habitat) [levels(subDf$df.habitat)=="m"] <- "meadows"
levels(subDf$df.habitat) [levels(subDf$df.habitat)=="p"] <- "paths"
levels(subDf$df.habitat) [levels(subDf$df.habitat)=="u"] <- "urban"
levels(subDf$df.habitat) [levels(subDf$df.habitat)=="w"] <- "waste"
levels(subDf$df.habitat) [levels(subDf$df.habitat)=="d"] <- "woods"
levels(subDf$df.population) [levels(subDf$df.population)=="a"] <- "abundant"
levels(subDf$df.population) [levels(subDf$df.population)=="c"] <- "clustered"
levels(subDf$df.population) [levels(subDf$df.population)=="n"] <- "numerous"
levels(subDf$df.population) [levels(subDf$df.population)=="s"] <- "scattered"
levels(subDf$df.population) [levels(subDf$df.population)=="v"] <- "several"
levels(subDf$df.population) [levels(subDf$df.population)=="y"] <- "solitary"
levels(subDf$df.cap_color) [levels(subDf$df.cap_color)=="n"] <- "brown"
levels(subDf$df.cap_color) [levels(subDf$df.cap_color)=="b"] <- "buff"
levels(subDf$df.cap_color) [levels(subDf$df.cap_color)=="c"] <- "cinnamon"
levels(subDf$df.cap_color) [levels(subDf$df.cap_color)=="g"] <- "gray"
levels(subDf$df.cap_color) [levels(subDf$df.cap_color)=="r"] <- "green"
levels(subDf$df.cap_color) [levels(subDf$df.cap_color)=="p"] <- "pink"
levels(subDf$df.cap_color) [levels(subDf$df.cap_color)=="u"] <- "purple"
levels(subDf$df.cap_color) [levels(subDf$df.cap_color)=="e"] <- "red"
levels(subDf$df.cap_color) [levels(subDf$df.cap_color)=="w"] <- "white"
levels(subDf$df.cap_color) [levels(subDf$df.cap_color)=="y"] <- "yellow"
head(subDf)
## df.classes df.odor df.spore_print_color df.stalk_surface_below_ring
## 1 poisonous pungent black smooth
## 2 edible almond brown smooth
## 3 edible anise brown smooth
## 4 poisonous pungent black smooth
## 5 edible none brown smooth
## 6 edible almond black smooth
## df.stalk_color_above_ring df.habitat df.cap_color df.population
## 1 white urban brown scattered
## 2 white grasses yellow numerous
## 3 white meadows white numerous
## 4 white urban white scattered
## 5 white grasses gray abundant
## 6 white grasses yellow numerous
## df.cap_shape df.cap_surface df.bruises df.gill_attachment
## 1 x s t f
## 2 x s t f
## 3 b s t f
## 4 x y t f
## 5 x s f f
## 6 x y t f
## df.gill_spacing df.gill_size df.gill_color df.stalk_shape df.stalk_root
## 1 c n k e e
## 2 c b k e c
## 3 c b n e c
## 4 c n n e e
## 5 w b k t e
## 6 c b n e c
## df.stalk_surface_above_ring df.stalk_color_below_ring df.veil_type
## 1 s w p
## 2 s w p
## 3 s w p
## 4 s w p
## 5 s w p
## 6 s w p
## df.veil_color df.ring_number df.ring_type
## 1 w o p
## 2 w o p
## 3 w o p
## 4 w o p
## 5 w o e
## 6 w o p
summary(subDf)
## df.classes df.odor df.spore_print_color
## edible :4208 none :3528 white :2388
## poisonous:3916 foul :2160 brown :1968
## spicy : 576 black :1872
## fishy : 576 chocolate:1632
## almond : 400 green : 72
## anise : 400 buff : 48
## (Other): 484 (Other) : 144
## df.stalk_surface_below_ring df.stalk_color_above_ring df.habitat
## fibrous: 600 white :4464 woods :3148
## silky :2304 pink :1872 grasses:2148
## smooth :4936 gray : 576 leaves : 832
## scaly : 284 brown : 448 meadows: 292
## buff : 432 paths :1144
## orange : 192 urban : 368
## (Other): 140 waste : 192
## df.cap_color df.population df.cap_shape df.cap_surface df.bruises
## brown :2284 abundant : 384 b: 452 f:2320 f:4748
## gray :1840 clustered: 340 c: 4 g: 4 t:3376
## red :1500 numerous : 400 f:3152 s:2556
## yellow :1072 scattered:1248 k: 828 y:3244
## white :1040 several :4040 s: 32
## buff : 168 solitary :1712 x:3656
## (Other): 220
## df.gill_attachment df.gill_spacing df.gill_size df.gill_color
## a: 210 c:6812 b:5612 b :1728
## f:7914 w:1312 n:2512 p :1492
## w :1202
## n :1048
## g : 752
## h : 732
## (Other):1170
## df.stalk_shape df.stalk_root df.stalk_surface_above_ring
## e:3516 ?:2480 f: 552
## t:4608 b:3776 k:2372
## c: 556 s:5176
## e:1120 y: 24
## r: 192
##
##
## df.stalk_color_below_ring df.veil_type df.veil_color df.ring_number
## w :4384 p:8124 n: 96 n: 36
## p :1872 o: 96 o:7488
## g : 576 w:7924 t: 600
## n : 512 y: 8
## b : 432
## o : 192
## (Other): 156
## df.ring_type
## e:2776
## f: 48
## l:1296
## n: 36
## p:3968
##
##
#Disjunctive rules for poisonous mushrooms, from most generalto most specific:
#P_1 odor=NOT(almond.OR.anise.OR.none) -> odor=(creosote=c,fishy=y,foul=f,musty=m,pungent=p,spicy=s)
#120 poisonous cases missed, 98.52% accuracy
odor_cyfmps <- as.data.frame(subset(subDf,df.odor!="almond"|df.odor!="anise"|df.odor!="none" & df.classes =="poisonous",select=c(df.classes,df.odor)))
ftable(odor_cyfmps)
## df.odor almond creosote foul anise musty none pungent spicy fishy
## df.classes
## edible 400 0 0 400 0 3408 0 0 0
## poisonous 0 192 2160 0 36 120 256 576 576
#calculate the accuracy
total <- nrow(odor_cyfmps) #Numerical sample size of subset odor
print( 1- 120/total) #Prop. of edible in odor method
## [1] 0.985229
library ('ggplot2')
##
## Attaching package: 'ggplot2'
## The following objects are masked from 'package:psych':
##
## %+%, alpha
qplot(df.odor, data = odor_cyfmps, fill= df.classes)

#P_2) spore-print-color=green
# 48 cases missed, 99.41% accuracy
sporeGreen <- as.data.frame(subset(subDf,select=c(df.classes,df.spore_print_color)))
ftable(sporeGreen) # frequency of atruribues
## df.spore_print_color buff chocolate black brown orange green purple white yellow
## df.classes
## edible 48 48 1648 1744 48 0 48 576 48
## poisonous 0 1584 224 224 0 72 0 1812 0
sporeGreen1 <- as.data.frame(subset(subDf,df.spore_print_color=="green",select=c(df.classes,df.spore_print_color)))
ftable(sporeGreen1) # frequency of atruribues
## df.spore_print_color buff chocolate black brown orange green purple white yellow
## df.classes
## edible 0 0 0 0 0 0 0 0 0
## poisonous 0 0 0 0 0 72 0 0 0
#I doute 48 cases missed in this condition since all green mushroom are poisonous.
sum(is.na(sporeGreen$df.spore_print_color))
## [1] 0
#calculate the accuracy if 48 cases missed
total <- nrow(sporeGreen) #Numerical sample size of subset odor
print( 1- 48/total) #Prop. of edible in odor method
## [1] 0.9940916
library ('ggplot2')
qplot(df.spore_print_color, data = sporeGreen, fill= df.classes)

#P_3) odor=none.AND.stalk-surface-below-ring=scaly.AND.(stalk-color-above-ring=NOT.brown)
#8 cases missed, 99.90% accuracy
odor_stalkFaceBelow_stalkColAbove <-as.data.frame(subset(subDf,df.odor="none" & df.stalk_surface_below_ring=="scaly" & df.stalk_color_above_ring!="brown",select=c(df.classes,df.odor,df.stalk_surface_below_ring,df.stalk_color_above_ring)))
summary(odor_stalkFaceBelow_stalkColAbove)
## df.classes df.odor df.stalk_surface_below_ring
## edible :4208 none :3528 fibrous: 600
## poisonous:3916 foul :2160 silky :2304
## spicy : 576 smooth :4936
## fishy : 576 scaly : 284
## almond : 400
## anise : 400
## (Other): 484
## df.stalk_color_above_ring
## white :4464
## pink :1872
## gray : 576
## brown : 448
## buff : 432
## orange : 192
## (Other): 140
ftable(odor_stalkFaceBelow_stalkColAbove)
## df.stalk_color_above_ring buff cinnamon red gray brown orange pink white yellow
## df.classes df.odor df.stalk_surface_below_ring
## edible almond fibrous 0 0 0 0 0 0 0 0 0
## silky 0 0 0 0 0 0 0 0 0
## smooth 0 0 0 0 0 0 0 304 0
## scaly 0 0 0 0 0 0 0 96 0
## creosote fibrous 0 0 0 0 0 0 0 0 0
## silky 0 0 0 0 0 0 0 0 0
## smooth 0 0 0 0 0 0 0 0 0
## scaly 0 0 0 0 0 0 0 0 0
## foul fibrous 0 0 0 0 0 0 0 0 0
## silky 0 0 0 0 0 0 0 0 0
## smooth 0 0 0 0 0 0 0 0 0
## scaly 0 0 0 0 0 0 0 0 0
## anise fibrous 0 0 0 0 0 0 0 0 0
## silky 0 0 0 0 0 0 0 0 0
## smooth 0 0 0 0 0 0 0 304 0
## scaly 0 0 0 0 0 0 0 96 0
## musty fibrous 0 0 0 0 0 0 0 0 0
## silky 0 0 0 0 0 0 0 0 0
## smooth 0 0 0 0 0 0 0 0 0
## scaly 0 0 0 0 0 0 0 0 0
## none fibrous 0 0 0 0 0 0 0 456 0
## silky 0 0 0 0 0 0 0 144 0
## smooth 0 0 96 576 0 192 576 1352 0
## scaly 0 0 0 0 16 0 0 0 0
## pungent fibrous 0 0 0 0 0 0 0 0 0
## silky 0 0 0 0 0 0 0 0 0
## smooth 0 0 0 0 0 0 0 0 0
## scaly 0 0 0 0 0 0 0 0 0
## spicy fibrous 0 0 0 0 0 0 0 0 0
## silky 0 0 0 0 0 0 0 0 0
## smooth 0 0 0 0 0 0 0 0 0
## scaly 0 0 0 0 0 0 0 0 0
## fishy fibrous 0 0 0 0 0 0 0 0 0
## silky 0 0 0 0 0 0 0 0 0
## smooth 0 0 0 0 0 0 0 0 0
## scaly 0 0 0 0 0 0 0 0 0
## poisonous almond fibrous 0 0 0 0 0 0 0 0 0
## silky 0 0 0 0 0 0 0 0 0
## smooth 0 0 0 0 0 0 0 0 0
## scaly 0 0 0 0 0 0 0 0 0
## creosote fibrous 0 0 0 0 0 0 0 0 0
## silky 0 0 0 0 0 0 0 0 0
## smooth 0 0 0 0 0 0 0 192 0
## scaly 0 0 0 0 0 0 0 0 0
## foul fibrous 0 0 0 0 0 0 0 144 0
## silky 432 0 0 0 432 0 576 144 0
## smooth 0 0 0 0 0 0 144 288 0
## scaly 0 0 0 0 0 0 0 0 0
## anise fibrous 0 0 0 0 0 0 0 0 0
## silky 0 0 0 0 0 0 0 0 0
## smooth 0 0 0 0 0 0 0 0 0
## scaly 0 0 0 0 0 0 0 0 0
## musty fibrous 0 0 0 0 0 0 0 0 0
## silky 0 0 0 0 0 0 0 0 0
## smooth 0 0 0 0 0 0 0 0 0
## scaly 0 36 0 0 0 0 0 0 0
## none fibrous 0 0 0 0 0 0 0 0 0
## silky 0 0 0 0 0 0 0 0 0
## smooth 0 0 0 0 0 0 0 80 0
## scaly 0 0 0 0 0 0 0 32 8
## pungent fibrous 0 0 0 0 0 0 0 0 0
## silky 0 0 0 0 0 0 0 0 0
## smooth 0 0 0 0 0 0 0 256 0
## scaly 0 0 0 0 0 0 0 0 0
## spicy fibrous 0 0 0 0 0 0 0 0 0
## silky 0 0 0 0 0 0 144 144 0
## smooth 0 0 0 0 0 0 144 144 0
## scaly 0 0 0 0 0 0 0 0 0
## fishy fibrous 0 0 0 0 0 0 0 0 0
## silky 0 0 0 0 0 0 144 144 0
## smooth 0 0 0 0 0 0 144 144 0
## scaly 0 0 0 0 0 0 0 0 0
#calculate the accuracy if 8 cases missed
total <- nrow(odor_stalkFaceBelow_stalkColAbove) #Numerical sample size of subset odor
print( 1- 8/total) #Prop. of edible in odor method
## [1] 0.9990153
#P_4) habitat=leaves.AND.cap-color=white
#100% accuracy
hab_capCol <- as.data.frame(subset(subDf,df.habitat="leaves" & df.cap_color=="white",select=c(df.classes,df.habitat,df.cap_color)))
ftable(hab_capCol)
## df.cap_color buff cinnamon red gray brown pink green purple white yellow
## df.classes df.habitat
## edible woods 0 0 576 576 584 0 16 16 64 48
## grasses 0 0 0 400 304 0 0 0 528 176
## leaves 0 24 0 0 216 0 0 0 0 0
## meadows 0 0 0 0 0 0 0 0 128 128
## paths 0 8 0 8 64 8 0 0 0 48
## urban 0 0 0 48 48 0 0 0 0 0
## waste 48 0 48 0 48 48 0 0 0 0
## poisonous woods 0 12 300 280 316 64 0 0 64 232
## grasses 60 0 0 264 64 12 0 0 124 216
## leaves 0 0 288 0 288 0 0 0 8 8
## meadows 12 0 0 0 0 12 0 0 12 0
## paths 0 0 288 216 288 0 0 0 0 216
## urban 48 0 0 48 64 0 0 0 112 0
## waste 0 0 0 0 0 0 0 0 0 0
library ('ggplot2')
qplot(df.habitat=="leaves",df.cap_color=="white", data = hab_capCol, fill=df.classes,color=df.classes)

#P_4') population=clustered.AND.cap_color=white
pop_capCol <- as.data.frame(subset(subDf,df.population="clustered" & df.cap_color=="white",select=c(df.classes,df.population,df.cap_color)))
ftable(pop_capCol) # frequency of attributes
## df.cap_color buff cinnamon red gray brown pink green purple white yellow
## df.classes df.population
## edible abundant 0 0 0 128 128 0 0 0 128 0
## clustered 48 0 48 0 144 48 0 0 0 0
## numerous 0 0 0 72 0 0 0 0 200 128
## scattered 0 0 0 200 176 0 0 0 328 176
## several 0 28 288 316 436 4 8 8 56 48
## solitary 0 4 288 316 380 4 8 8 8 48
## poisonous abundant 0 0 0 0 0 0 0 0 0 0
## clustered 0 12 12 0 12 0 0 0 8 8
## numerous 0 0 0 0 0 0 0 0 0 0
## scattered 48 0 0 80 64 32 0 0 144 0
## several 72 0 864 404 944 56 0 0 168 340
## solitary 0 0 0 324 0 0 0 0 0 324
library ('ggplot2')
qplot(df.population=="clustered",df.cap_color=="white", data = pop_capCol, fill=df.classes,color=df.classes)

#Junctive rules: odor=(almond.OR.anise.OR.none).AND.spore-print-color=NOT.green
#gives 48 errors, or 99.41% accuracy on the whole dataset.
odor_spore <- as.data.frame(subset(subDf,df.odor=="almond"|df.odor=="anise"|df.odor=="none" & df.spore_print_color!="green",select=c(df.classes,df.odor,df.spore_print_color)))
ftable(odor_spore) # frequency of attributes
## df.spore_print_color buff chocolate black brown orange green purple white yellow
## df.classes df.odor
## edible almond 0 0 176 200 0 0 24 0 0
## creosote 0 0 0 0 0 0 0 0 0
## foul 0 0 0 0 0 0 0 0 0
## anise 0 0 176 200 0 0 24 0 0
## musty 0 0 0 0 0 0 0 0 0
## none 48 48 1296 1344 48 0 0 576 48
## pungent 0 0 0 0 0 0 0 0 0
## spicy 0 0 0 0 0 0 0 0 0
## fishy 0 0 0 0 0 0 0 0 0
## poisonous almond 0 0 0 0 0 0 0 0 0
## creosote 0 0 0 0 0 0 0 0 0
## foul 0 0 0 0 0 0 0 0 0
## anise 0 0 0 0 0 0 0 0 0
## musty 0 0 0 0 0 0 0 0 0
## none 0 0 0 0 0 0 0 48 0
## pungent 0 0 0 0 0 0 0 0 0
## spicy 0 0 0 0 0 0 0 0 0
## fishy 0 0 0 0 0 0 0 0 0
#calculate the accuracy if 48 cases missed
total <- nrow(odor_spore) #Numerical sample size of subset odor
print( 1- 48/total) #Prop. of edible in odor method
## [1] 0.9887218
library ('ggplot2')
qplot(df.odor=="almond"|df.odor=="anise"|df.odor=="none",df.spore_print_color!="green", data = odor_spore, fill=df.classes,color=df.classes)
