The data set of this assignment is from https://archive.ics.uci.edu/ml/datasets/Mushroom
The table contains 23 columns and the values are dectibed as following:
The content of this work is to tansform the data to be more reader friendly and easier for downstream analysis.
First, load all necessary libraries
library(plyr)
Import data file from gitHub and take a quick view of the data set
dataTable <- read.table("https://raw.githubusercontent.com/ezaccountz/DATA_607-AS_1/master/agaricus-lepiota.data", sep = ",")
head(dataTable)
## V1 V2 V3 V4 V5 V6 V7 V8 V9 V10 V11 V12 V13 V14 V15 V16 V17 V18 V19 V20
## 1 p x s n t p f c n k e e s s w w p w o p
## 2 e x s y t a f c b k e c s s w w p w o p
## 3 e b s w t l f c b n e c s s w w p w o p
## 4 p x y w t p f c n n e e s s w w p w o p
## 5 e x s g f n f w b k t e s s w w p w o e
## 6 e x y y t a f c b n e c s s w w p w o p
## V21 V22 V23
## 1 k s u
## 2 n n g
## 3 n n m
## 4 k s u
## 5 n a g
## 6 k n g
Next, we create a subset of the data, incuding the column indicating whether a mushroom is eible or poisonous.
Since colors are easier to be recognized by people, we will analyze the mushrooms by their cap color, gill color, veil color and spore print color attributes.
mushrooms <- subset(dataTable, select = c(V1, V4, V10, V18, V21))
names(mushrooms) <- c("edible_or_poisonous","cap_color","gill_color","veil_color","spore_print_color")
mushrooms$edible_or_poisonous <- revalue(mushrooms$edible_or_poisonous,c("e" = "edible","p"="poisonous" ))
mushrooms$cap_color <- revalue(mushrooms$cap_color, c("n" = "brown", "b" = "buff", "c" = "cinnamon", "g" = "gray", "r" = "green", "p" = "pink", "u" = "purple", "e" = "red", "w" = "white", "y" = "yellow"))
mushrooms$gill_color <- revalue(mushrooms$gill_color, c("k" = "black", "n" = "brown", "b" = "buff", "h" = "chocolate", "g" = "gray", "r" = "green", "o" = "orange", "p" = "pink", "u" = "purple", "e" = "red", "w" = "white", "y" = "yellow"))
mushrooms$veil_color <- revalue(mushrooms$veil_color, c("n" = "brown", "o" = "orange", "w" = "white", "y" = "yellow"))
mushrooms$spore_print_color <- revalue(mushrooms$spore_print_color,c("k" = "black", "n" = "brown", "b" = "buff", "h" = "chocolate", "r" = "green", "o" = "orange", "u" = "purple", "w" = "white", "y" = "yellow"))
head(mushrooms)
## edible_or_poisonous cap_color gill_color veil_color spore_print_color
## 1 poisonous brown black white black
## 2 edible yellow black white brown
## 3 edible white brown white brown
## 4 poisonous white brown white black
## 5 edible gray black white brown
## 6 edible yellow brown white black
Now let’s find out that mushrooms with cap in what color are safe to eat.
m_cap_color <- data.frame(matrix(ncol = length(unique(mushrooms$cap_color)), nrow = length(unique(mushrooms$edible_or_poisonous))))
colnames(m_cap_color) <- levels(mushrooms$cap_color)
rownames(m_cap_color) <- levels(mushrooms$edible_or_poisonous)
for(rn in rownames(m_cap_color)){
for(cn in colnames(m_cap_color))
m_cap_color[rn, cn] <- length(mushrooms$cap_color[mushrooms$cap_color == cn & mushrooms$edible_or_poisonous == rn])/length(mushrooms$cap_color)
}
m_cap_color
## buff cinnamon red gray brown
## edible 0.005908419 0.003938946 0.07680945 0.12703102 0.1555884
## poisonous 0.014771049 0.001477105 0.10782866 0.09945839 0.1255539
## pink green purple white yellow
## edible 0.006893156 0.001969473 0.001969473 0.08862629 0.04923683
## poisonous 0.010832102 0.000000000 0.000000000 0.03938946 0.08271787
It is clear that mushrooms with cap color green or purple are 100% safe to eat.
Next, we check the mushooms’ gill color
m_gill_color <- data.frame(matrix(ncol = length(unique(mushrooms$gill_color)), nrow = length(unique(mushrooms$edible_or_poisonous))))
colnames(m_gill_color) <- levels(mushrooms$gill_color)
rownames(m_gill_color) <- levels(mushrooms$edible_or_poisonous)
for(rn in rownames(m_gill_color)){
for(cn in colnames(m_gill_color))
m_gill_color[rn, cn] <- length(mushrooms$gill_color[mushrooms$gill_color == cn & mushrooms$edible_or_poisonous == rn])/length(mushrooms$gill_color)
}
m_gill_color
## buff red gray chocolate black
## edible 0.0000000 0.01181684 0.03052683 0.02511078 0.042343673
## poisonous 0.2127031 0.00000000 0.06203840 0.06499261 0.007877893
## brown orange pink green purple
## edible 0.11521418 0.007877893 0.10487445 0.00000000 0.054652880
## poisonous 0.01378631 0.000000000 0.07877893 0.00295421 0.005908419
## white yellow
## edible 0.11767602 0.007877893
## poisonous 0.03028065 0.002708026
Mushrooms with gill color red or orange are 100% safe to eat.
Mushrooms with gill color green are 100% poisonous.
We can aslo check the mushrooms’ veil color
m_veil_color <- data.frame(matrix(ncol = length(unique(mushrooms$veil_color)), nrow = length(unique(mushrooms$edible_or_poisonous))))
colnames(m_veil_color) <- levels(mushrooms$veil_color)
rownames(m_veil_color) <- levels(mushrooms$edible_or_poisonous)
for(rn in rownames(m_veil_color)){
for(cn in colnames(m_veil_color))
m_veil_color[rn, cn] <- length(mushrooms$veil_color[mushrooms$veil_color == cn & mushrooms$edible_or_poisonous == rn])/length(mushrooms$veil_color)
}
m_veil_color
## brown orange white yellow
## edible 0.01181684 0.01181684 0.4943378 0.0000000000
## poisonous 0.00000000 0.00000000 0.4810438 0.0009847366
Mushrooms with veil color brown or orange are 100% safe to eat.
Mushrooms with veil color yellow are 100% poisonous.
Fainlly, let’s look at the mushrooms’ spore print color
m_spore_print_color <- data.frame(matrix(ncol = length(unique(mushrooms$spore_print_color)), nrow = length(unique(mushrooms$edible_or_poisonous))))
colnames(m_spore_print_color) <- levels(mushrooms$spore_print_color)
rownames(m_spore_print_color) <- levels(mushrooms$edible_or_poisonous)
for(rn in rownames(m_spore_print_color)){
for(cn in colnames(m_spore_print_color))
m_spore_print_color[rn, cn] <- length(mushrooms$spore_print_color[mushrooms$spore_print_color == cn & mushrooms$edible_or_poisonous == rn])/length(mushrooms$spore_print_color)
}
m_spore_print_color
## buff chocolate black brown orange
## edible 0.005908419 0.005908419 0.20285574 0.21467258 0.005908419
## poisonous 0.000000000 0.194977843 0.02757262 0.02757262 0.000000000
## green purple white yellow
## edible 0.000000000 0.005908419 0.07090103 0.005908419
## poisonous 0.008862629 0.000000000 0.22304284 0.000000000
Mushrooms with veil color buff, orange, purple or yellow are 100% safe to eat.
Mushrooms with veil color green are 100% poisonous.