Data source: https://archive.ics.uci.edu/ml/machine-learning-databases/mushroom/agaricus-lepiota.data
(a) Mushroom records drawn from The Audubon Society Field Guide to North
American Mushrooms (1981). G. H. Lincoff (Pres.), New York: Alfred
A. Knopf
(b) Donor: Jeff Schlimmer (Jeffrey.Schlimmer@a.gp.cs.cmu.edu)
(c) Date: 27 April 1987
retrieved: 2/1/2019
1. classes: edible=e, poisonous=p
2. cap-shape: bell=b,conical=c,convex=x,flat=f,
knobbed=k,sunken=s
3. cap-surface: fibrous=f,grooves=g,scaly=y,smooth=s
4. cap-color: brown=n,buff=b,cinnamon=c,gray=g,green=r,
pink=p,purple=u,red=e,white=w,yellow=y
5. bruises?: bruises=t,no=f
6. odor: almond=a,anise=l,creosote=c,fishy=y,foul=f,
musty=m,none=n,pungent=p,spicy=s
7. gill-attachment: attached=a,descending=d,free=f,notched=n
7. gill-spacing: close=c,crowded=w,distant=d
9. gill-size: broad=b,narrow=n
10. gill-color: black=k,brown=n,buff=b,chocolate=h,gray=g,
green=r,orange=o,pink=p,purple=u,red=e,
white=w,yellow=y
11. stalk-shape: enlarging=e,tapering=t
12. stalk-root: bulbous=b,club=c,cup=u,equal=e,
rhizomorphs=z,rooted=r,missing=?
13. stalk-surface-above-ring: fibrous=f,scaly=y,silky=k,smooth=s
14. stalk-surface-below-ring: fibrous=f,scaly=y,silky=k,smooth=s
15. stalk-color-above-ring: brown=n,buff=b,cinnamon=c,gray=g,orange=o,
pink=p,red=e,white=w,yellow=y
16. stalk-color-below-ring: brown=n,buff=b,cinnamon=c,gray=g,orange=o,
pink=p,red=e,white=w,yellow=y
17. veil-type: partial=p,universal=u
18. veil-color: brown=n,orange=o,white=w,yellow=y
19. ring-number: none=n,one=o,two=t
20. ring-type: cobwebby=c,evanescent=e,flaring=f,large=l,
none=n,pendant=p,sheathing=s,zone=z
21. spore-print-color: black=k,brown=n,buff=b,chocolate=h,green=r,
orange=o,purple=u,white=w,yellow=y
22. population: abundant=a,clustered=c,numerous=n,
scattered=s,several=v,solitary=y
23. habitat: grasses=g,leaves=l,meadows=m,paths=p,
urban=u,waste=w,woods=d
##getting and saving the data.
data = read.csv(file = "https://archive.ics.uci.edu/ml/machine-learning-databases/mushroom/agaricus-lepiota.data", header = FALSE, sep = ",")
## data preview
head(data)
## V1 V2 V3 V4 V5 V6 V7 V8 V9 V10 V11 V12 V13 V14 V15 V16 V17 V18 V19 V20
## 1 p x s n t p f c n k e e s s w w p w o p
## 2 e x s y t a f c b k e c s s w w p w o p
## 3 e b s w t l f c b n e c s s w w p w o p
## 4 p x y w t p f c n n e e s s w w p w o p
## 5 e x s g f n f w b k t e s s w w p w o e
## 6 e x y y t a f c b n e c s s w w p w o p
## V21 V22 V23
## 1 k s u
## 2 n n g
## 3 n n m
## 4 k s u
## 5 n a g
## 6 k n g
## a subset from the and seelcting 5 variables
data.frame = subset(data, select = c(V1,V2,V4,V9,V11))
## chaging the colunms name
colnames(data.frame) <- c("class","cap-shape","cap-color","gill-size","stalk-shape")
## data preview
head(data.frame)
## class cap-shape cap-color gill-size stalk-shape
## 1 p x n n e
## 2 e x y b e
## 3 e b w b e
## 4 p x w n e
## 5 e x g b t
## 6 e x y b e
## selecting the abbreviation and replacing for names in each column
levels(data.frame$class) <- c("edible","poisonous")
levels(data.frame$`cap-shape`) <- c("bell","conical","flat","knobbed","sunken","convex")
levels(data.frame$`cap-color`) <- c("buff","cinnamon","red","gray","brown","pink","green","purple","white","yellow")
levels(data.frame$`gill-size`) <- c("broad","narrow")
levels(data.frame$`stalk-shape`) <- c("enlarging","tapering")
## data preview
head(data.frame)
## class cap-shape cap-color gill-size stalk-shape
## 1 poisonous convex brown narrow enlarging
## 2 edible convex yellow broad enlarging
## 3 edible bell white broad enlarging
## 4 poisonous convex white narrow enlarging
## 5 edible convex gray broad tapering
## 6 edible convex yellow broad enlarging
## data summary
summary(data.frame)
## class cap-shape cap-color gill-size
## edible :4208 bell : 452 brown :2284 broad :5612
## poisonous:3916 conical: 4 gray :1840 narrow:2512
## flat :3152 red :1500
## knobbed: 828 yellow :1072
## sunken : 32 white :1040
## convex :3656 buff : 168
## (Other): 220
## stalk-shape
## enlarging:3516
## tapering :4608
##
##
##
##
##