The data set of this assignment is from https://archive.ics.uci.edu/ml/datasets/Mushroom

The table contains 23 columns and the values are dectibed as following:

  1. edible-or-poisonous edible-e. poisonous=p
  2. cap-shape: bell=b,conical=c,convex=x,flat=f,knobbed=k,sunken=s
  3. cap-surface: fibrous=f,grooves=g,scaly=y,smooth=s
  4. cap-color: brown=n,buff=b,cinnamon=c,gray=g,green=r,pink=p,purple=u,red=e,white=w,yellow=y
  5. bruises?: bruises=t,no=f
  6. odor: almond=a,anise=l,creosote=c,fishy=y,foul=f,musty=m,none=n,pungent=p,spicy=s
  7. gill-attachment: attached=a,descending=d,free=f,notched=n
  8. gill-spacing: close=c,crowded=w,distant=d
  9. gill-size: broad=b,narrow=n
  10. gill-color: black=k,brown=n,buff=b,chocolate=h,gray=g,green=r,orange=o,pink=p,purple=u,red=e,white=w,yellow=y
  11. stalk-shape: enlarging=e,tapering=t
  12. stalk-root: bulbous=b,club=c,cup=u,equal=e,rhizomorphs=z,rooted=r,missing=?
  13. stalk-surface-above-ring: fibrous=f,scaly=y,silky=k,smooth=s
  14. stalk-surface-below-ring: fibrous=f,scaly=y,silky=k,smooth=s
  15. stalk-color-above-ring: brown=n,buff=b,cinnamon=c,gray=g,orange=o,pink=p,red=e,white=w,yellow=y
  16. stalk-color-below-ring: brown=n,buff=b,cinnamon=c,gray=g,orange=o,pink=p,red=e,white=w,yellow=y
  17. veil-type: partial=p,universal=u
  18. veil-color: brown=n,orange=o,white=w,yellow=y
  19. ring-number: none=n,one=o,two=t
  20. ring-type: cobwebby=c,evanescent=e,flaring=f,large=l, none=n,pendant=p,sheathing=s,zone=z
  21. spore-print-color: black=k,brown=n,buff=b,chocolate=h,green=r,orange=o,purple=u,white=w,yellow=y
  22. population: abundant=a,clustered=c,numerous=n,scattered=s,several=v,solitary=y
  23. habitat: grasses=g,leaves=l,meadows=m,paths=p,urban=u,waste=w,woods=d

The content of this work is to tansform the data to be more reader friendly and easier for downstream analysis.

First, load all necessary libraries

library(plyr)

Import data file from gitHub and take a quick view of the data set

dataTable <- read.table("https://raw.githubusercontent.com/ezaccountz/DATA_607-AS_1/master/agaricus-lepiota.data", sep = ",")
head(dataTable)
##   V1 V2 V3 V4 V5 V6 V7 V8 V9 V10 V11 V12 V13 V14 V15 V16 V17 V18 V19 V20
## 1  p  x  s  n  t  p  f  c  n   k   e   e   s   s   w   w   p   w   o   p
## 2  e  x  s  y  t  a  f  c  b   k   e   c   s   s   w   w   p   w   o   p
## 3  e  b  s  w  t  l  f  c  b   n   e   c   s   s   w   w   p   w   o   p
## 4  p  x  y  w  t  p  f  c  n   n   e   e   s   s   w   w   p   w   o   p
## 5  e  x  s  g  f  n  f  w  b   k   t   e   s   s   w   w   p   w   o   e
## 6  e  x  y  y  t  a  f  c  b   n   e   c   s   s   w   w   p   w   o   p
##   V21 V22 V23
## 1   k   s   u
## 2   n   n   g
## 3   n   n   m
## 4   k   s   u
## 5   n   a   g
## 6   k   n   g

Next, we create a subset of the data, incuding the column indicating whether a mushroom is eible or poisonous.
Since colors are easier to be recognized by people, we will analyze the mushrooms by their cap color, gill color, veil color and spore print color attributes.

mushrooms <- subset(dataTable, select = c(V1, V4, V10, V18, V21))
names(mushrooms) <- c("edible_or_poisonous","cap_color","gill_color","veil_color","spore_print_color")
mushrooms$edible_or_poisonous <- revalue(mushrooms$edible_or_poisonous,c("e" = "edible","p"="poisonous" ))
mushrooms$cap_color <- revalue(mushrooms$cap_color, c("n" = "brown", "b" = "buff", "c" = "cinnamon", "g" = "gray", "r" = "green", "p" = "pink", "u" = "purple", "e" = "red", "w" = "white", "y" = "yellow"))
mushrooms$gill_color <- revalue(mushrooms$gill_color, c("k" = "black", "n" = "brown", "b" = "buff", "h" = "chocolate", "g" = "gray", "r" = "green", "o" = "orange", "p" = "pink", "u" = "purple", "e" = "red", "w" = "white", "y" = "yellow"))
mushrooms$veil_color <- revalue(mushrooms$veil_color, c("n" = "brown", "o" = "orange", "w" = "white", "y" = "yellow"))
mushrooms$spore_print_color <- revalue(mushrooms$spore_print_color,c("k" = "black", "n" = "brown", "b" = "buff", "h" = "chocolate", "r" = "green", "o" = "orange", "u" = "purple", "w" = "white", "y" = "yellow"))
head(mushrooms)
##   edible_or_poisonous cap_color gill_color veil_color spore_print_color
## 1           poisonous     brown      black      white             black
## 2              edible    yellow      black      white             brown
## 3              edible     white      brown      white             brown
## 4           poisonous     white      brown      white             black
## 5              edible      gray      black      white             brown
## 6              edible    yellow      brown      white             black

Now let’s find out that mushrooms with cap in what color are safe to eat.

m_cap_color <- data.frame(matrix(ncol = length(unique(mushrooms$cap_color)), nrow = length(unique(mushrooms$edible_or_poisonous))))
colnames(m_cap_color) <- levels(mushrooms$cap_color)
rownames(m_cap_color) <- levels(mushrooms$edible_or_poisonous)
for(rn in rownames(m_cap_color)){
  for(cn in colnames(m_cap_color))
    m_cap_color[rn, cn] <- length(mushrooms$cap_color[mushrooms$cap_color == cn & mushrooms$edible_or_poisonous == rn])/length(mushrooms$cap_color)
}
m_cap_color
##                  buff    cinnamon        red       gray     brown
## edible    0.005908419 0.003938946 0.07680945 0.12703102 0.1555884
## poisonous 0.014771049 0.001477105 0.10782866 0.09945839 0.1255539
##                  pink       green      purple      white     yellow
## edible    0.006893156 0.001969473 0.001969473 0.08862629 0.04923683
## poisonous 0.010832102 0.000000000 0.000000000 0.03938946 0.08271787

It is clear that mushrooms with cap color green or purple are 100% safe to eat.

Next, we check the mushooms’ gill color

m_gill_color <- data.frame(matrix(ncol = length(unique(mushrooms$gill_color)), nrow = length(unique(mushrooms$edible_or_poisonous))))
colnames(m_gill_color) <- levels(mushrooms$gill_color)
rownames(m_gill_color) <- levels(mushrooms$edible_or_poisonous)
for(rn in rownames(m_gill_color)){
  for(cn in colnames(m_gill_color))
    m_gill_color[rn, cn] <- length(mushrooms$gill_color[mushrooms$gill_color == cn & mushrooms$edible_or_poisonous == rn])/length(mushrooms$gill_color)
}
m_gill_color
##                buff        red       gray  chocolate       black
## edible    0.0000000 0.01181684 0.03052683 0.02511078 0.042343673
## poisonous 0.2127031 0.00000000 0.06203840 0.06499261 0.007877893
##                brown      orange       pink      green      purple
## edible    0.11521418 0.007877893 0.10487445 0.00000000 0.054652880
## poisonous 0.01378631 0.000000000 0.07877893 0.00295421 0.005908419
##                white      yellow
## edible    0.11767602 0.007877893
## poisonous 0.03028065 0.002708026

Mushrooms with gill color red or orange are 100% safe to eat.
Mushrooms with gill color green are 100% poisonous.

We can aslo check the mushrooms’ veil color

m_veil_color <- data.frame(matrix(ncol = length(unique(mushrooms$veil_color)), nrow = length(unique(mushrooms$edible_or_poisonous))))
colnames(m_veil_color) <- levels(mushrooms$veil_color)
rownames(m_veil_color) <- levels(mushrooms$edible_or_poisonous)
for(rn in rownames(m_veil_color)){
  for(cn in colnames(m_veil_color))
    m_veil_color[rn, cn] <- length(mushrooms$veil_color[mushrooms$veil_color == cn & mushrooms$edible_or_poisonous == rn])/length(mushrooms$veil_color)
}
m_veil_color
##                brown     orange     white       yellow
## edible    0.01181684 0.01181684 0.4943378 0.0000000000
## poisonous 0.00000000 0.00000000 0.4810438 0.0009847366

Mushrooms with veil color brown or orange are 100% safe to eat.
Mushrooms with veil color yellow are 100% poisonous.

Fainlly, let’s look at the mushrooms’ spore print color

m_spore_print_color <- data.frame(matrix(ncol = length(unique(mushrooms$spore_print_color)), nrow = length(unique(mushrooms$edible_or_poisonous))))
colnames(m_spore_print_color) <- levels(mushrooms$spore_print_color)
rownames(m_spore_print_color) <- levels(mushrooms$edible_or_poisonous)
for(rn in rownames(m_spore_print_color)){
  for(cn in colnames(m_spore_print_color))
    m_spore_print_color[rn, cn] <- length(mushrooms$spore_print_color[mushrooms$spore_print_color == cn & mushrooms$edible_or_poisonous == rn])/length(mushrooms$spore_print_color)
}
m_spore_print_color
##                  buff   chocolate      black      brown      orange
## edible    0.005908419 0.005908419 0.20285574 0.21467258 0.005908419
## poisonous 0.000000000 0.194977843 0.02757262 0.02757262 0.000000000
##                 green      purple      white      yellow
## edible    0.000000000 0.005908419 0.07090103 0.005908419
## poisonous 0.008862629 0.000000000 0.22304284 0.000000000

Mushrooms with veil color buff, orange, purple or yellow are 100% safe to eat.
Mushrooms with veil color green are 100% poisonous.