1. Original Mushroom Dataset

2. Transformation of Data

List of Attributes

For reference: New column names to be assigned.

  1. Class
  2. Cap Shape => C_shape
  3. Cap Surface => C_surface
  4. Cap Color => C_color
  5. Bruises
  6. Oder
  7. Gill Attachment => G_Attachment
  8. Gill Spacing => G_spacing
  9. Gill Size => G_Size
  10. Gill Color => G_color
  11. Stalk Shape => S_shape
  12. Stalk Root => S_root
  13. Stalk Surface Above Ring => S_surfaceAR
  14. Stalk Surface Below Ring => S_surfaceBR
  15. Stalk Color Above Ring => S_colorAR
  16. Stalk Color Below Ring => S_colorBR
  17. Veil Type => V_type
  18. Veil Color => V_color
  19. Ring Number => R_number
  20. Ring Type => R_type
  21. Spore Print Color => SporeCol
  22. Population
  23. Habitat

Modified Mushroom Dataset

A more detailed display of attributes that categorizes the different types of Mushrooms.

Mdf <- originalMdf

#Renaming the column names for readability
colnames(Mdf) <- c("Class", "C_shape", "C_surface", "C_color", "Bruises", "Odor", "G_Attachment", "G_spacing", "G_size", "G_color", "S_shape", "S_root", "S_surfaceAR", "S_surfaceBR", "S_colorAR", "S_colorBR", "V_type", "V_color", "R_number", "R_type", "SporeCol", "Population", "Habitat")

#Renaming values in the column for readability
Mdf$Class[Mdf$Class == 'p'] <- "Poisonous"
Mdf$Class[Mdf$Class == 'e'] <- "Edible"

Mdf$C_shape[Mdf$C_shape == 'b'] <- "Bell"
Mdf$C_shape[Mdf$C_shape == 'c'] <- "Conical"
Mdf$C_shape[Mdf$C_shape == 'x'] <- "Convex"
Mdf$C_shape[Mdf$C_shape == 'f'] <- "Flat"
Mdf$C_shape[Mdf$C_shape == 'k'] <- "Knobbed"
Mdf$C_shape[Mdf$C_shape == 's'] <- "Sunken"

Mdf$C_surface[Mdf$C_surface == 'f'] <- "Fibrous"
Mdf$C_surface[Mdf$C_surface == 'g'] <- "Grooves"
Mdf$C_surface[Mdf$C_surface == 'y'] <- "Scaly"
Mdf$C_surface[Mdf$C_surface == 's'] <- "Smooth"

Mdf$C_color[Mdf$C_color == 'n'] <- "Brown"
Mdf$C_color[Mdf$C_color == 'b'] <- "Buff"
Mdf$C_color[Mdf$C_color == 'c'] <- "Cinnamon"
Mdf$C_color[Mdf$C_color == 'g'] <- "Gray"
Mdf$C_color[Mdf$C_color == 'r'] <- "Green"
Mdf$C_color[Mdf$C_color == 'p'] <- "Pink"
Mdf$C_color[Mdf$C_color == 'u'] <- "Purple"
Mdf$C_color[Mdf$C_color == 'e'] <- "Red"
Mdf$C_color[Mdf$C_color == 'w'] <- "White"
Mdf$C_color[Mdf$C_color == 'y'] <- "Yellow"

Mdf$Bruises[Mdf$Bruises == 't'] <- "Yes"
Mdf$Bruises[Mdf$Bruises == 'f'] <- "No"

Mdf$Odor[Mdf$Odor == 'a'] <- "Almond"
Mdf$Odor[Mdf$Odor == 'l'] <- "Anise"
Mdf$Odor[Mdf$Odor == 'c'] <- "Creosote"
Mdf$Odor[Mdf$Odor == 'y'] <- "Fishy"
Mdf$Odor[Mdf$Odor == 'f'] <- "Foul"
Mdf$Odor[Mdf$Odor == 'm'] <- "Musty"
Mdf$Odor[Mdf$Odor == 'n'] <- "None"
Mdf$Odor[Mdf$Odor == 'p'] <- "Pungent"
Mdf$Odor[Mdf$Odor == 's'] <- "Spicy"

Mdf$G_Attachment[Mdf$G_Attachment == 'a'] <- "Attached"
Mdf$G_Attachment[Mdf$G_Attachment == 'd'] <- "Descending"
Mdf$G_Attachment[Mdf$G_Attachment == 'f'] <- "Free"
Mdf$G_Attachment[Mdf$G_Attachment == 'n'] <- "Notched"

Mdf$G_spacing[Mdf$G_spacing == 'c'] <- "Closed"
Mdf$G_spacing[Mdf$G_spacing == 'w'] <- "Crowded"
Mdf$G_spacing[Mdf$G_spacing == 'd'] <- "Distant"

Mdf$G_size[Mdf$G_size == 'b'] <- "Broad"
Mdf$G_size[Mdf$G_size == 'n'] <- "Narrow"

Mdf$G_color[Mdf$G_color == 'k'] <- "Black"
Mdf$G_color[Mdf$G_color == 'n'] <- "Brown"
Mdf$G_color[Mdf$G_color == 'b'] <- "Buff"
Mdf$G_color[Mdf$G_color == 'h'] <- "Chocolate"
Mdf$G_color[Mdf$G_color == 'g'] <- "Gray"
Mdf$G_color[Mdf$G_color == 'r'] <- "Green"
Mdf$G_color[Mdf$G_color == 'o'] <- "Orange"
Mdf$G_color[Mdf$G_color == 'p'] <- "Pink"
Mdf$G_color[Mdf$G_color == 'u'] <- "Purple"
Mdf$G_color[Mdf$G_color == 'e'] <- "Red"
Mdf$G_color[Mdf$G_color == 'w'] <- "White"
Mdf$G_color[Mdf$G_color == 'y'] <- "Yellow"

Mdf$S_shape[Mdf$S_shape == 'e'] <- "Enlarging"
Mdf$S_shape[Mdf$S_shape == 't'] <- "Tapering"

Mdf$S_root[Mdf$S_root == 'b'] <- "Bulbous"
Mdf$S_root[Mdf$S_root == 'c'] <- "Club"
Mdf$S_root[Mdf$S_root == 'u'] <- "Cup"
Mdf$S_root[Mdf$S_root == 'e'] <- "Equal"

Mdf$S_surfaceAR[Mdf$S_surfaceAR == 'f'] <- "Fibrous"
Mdf$S_surfaceAR[Mdf$S_surfaceAR == 'y'] <- "Scaly"
Mdf$S_surfaceAR[Mdf$S_surfaceAR == 'k'] <- "Silky"
Mdf$S_surfaceAR[Mdf$S_surfaceAR == 's'] <- "Smooth"

Mdf$S_surfaceBR[Mdf$S_surfaceBR == 'f'] <- "Fibrous"
Mdf$S_surfaceBR[Mdf$S_surfaceBR == 'y'] <- "Scaly"
Mdf$S_surfaceBR[Mdf$S_surfaceBR == 'k'] <- "Silky"
Mdf$S_surfaceBR[Mdf$S_surfaceBR == 's'] <- "Smooth"
 
Mdf$S_colorAR[Mdf$S_colorAR == 'n'] <- "Brown"
Mdf$S_colorAR[Mdf$S_colorAR == 'b'] <- "Brown"
Mdf$S_colorAR[Mdf$S_colorAR == 'c'] <- "Brown"
Mdf$S_colorAR[Mdf$S_colorAR == 'g'] <- "Gray"
Mdf$S_colorAR[Mdf$S_colorAR == 'o'] <- "Orange"
Mdf$S_colorAR[Mdf$S_colorAR == 'p'] <- "Pink"
Mdf$S_colorAR[Mdf$S_colorAR == 'e'] <- "Red"
Mdf$S_colorAR[Mdf$S_colorAR == 'w'] <- "White"
Mdf$S_colorAR[Mdf$S_colorAR == 'y'] <- "Yellow"

Mdf$S_colorBR[Mdf$S_colorBR == 'n'] <- "Brown"
Mdf$S_colorBR[Mdf$S_colorBR == 'b'] <- "Brown"
Mdf$S_colorBR[Mdf$S_colorBR == 'c'] <- "Brown"
Mdf$S_colorBR[Mdf$S_colorBR == 'g'] <- "Gray"
Mdf$S_colorBR[Mdf$S_colorBR == 'o'] <- "Orange"
Mdf$S_colorBR[Mdf$S_colorBR == 'p'] <- "Pink"
Mdf$S_colorBR[Mdf$S_colorBR == 'e'] <- "Red"
Mdf$S_colorBR[Mdf$S_colorBR == 'w'] <- "White"
Mdf$S_colorBR[Mdf$S_colorBR == 'y'] <- "Yellow"

Mdf$V_type[Mdf$V_type == 'p'] <- "Partial"
Mdf$V_type[Mdf$V_type == 'u'] <- "Universal"

Mdf$V_color[Mdf$V_color == 'n'] <- "Brown"
Mdf$V_color[Mdf$V_color == 'o'] <- "Orange"
Mdf$V_color[Mdf$V_color == 'w'] <- "White"
Mdf$V_color[Mdf$V_color == 'y'] <- "Yellow"

Mdf$R_number[Mdf$R_number == 'n'] <- "None"
Mdf$R_number[Mdf$R_number == 'o'] <- "One"
Mdf$R_number[Mdf$R_number == 't'] <- "Two"

Mdf$R_type[Mdf$R_type == 'c'] <- "Cobwebby"
Mdf$R_type[Mdf$R_type == 'e'] <- "Evanescent"
Mdf$R_type[Mdf$R_type == 'f'] <- "Flaring"
Mdf$R_type[Mdf$R_type == 'l'] <- "Large"
Mdf$R_type[Mdf$R_type == 'n'] <- "None"
Mdf$R_type[Mdf$R_type == 'p'] <- "Pendant"
Mdf$R_type[Mdf$R_type == 's'] <- "Sheathing"
Mdf$R_type[Mdf$R_type == 'z'] <- "Zone"

Mdf$SporeCol[Mdf$SporeCol == 'k'] <- "Black" 
Mdf$SporeCol[Mdf$SporeCol == 'n'] <- "Brown"
Mdf$SporeCol[Mdf$SporeCol == 'b'] <- "Buff"
Mdf$SporeCol[Mdf$SporeCol == 'h'] <- "Chocolate"
Mdf$SporeCol[Mdf$SporeCol == 'r'] <- "Green"
Mdf$SporeCol[Mdf$SporeCol == 'o'] <- "Orange"
Mdf$SporeCol[Mdf$SporeCol == 'u'] <- "Purple"
Mdf$SporeCol[Mdf$SporeCol == 'w'] <- "White"
Mdf$SporeCol[Mdf$SporeCol == 'y'] <- "Yellow"

Mdf$Population[Mdf$Population == 'a'] <- "Abundant"
Mdf$Population[Mdf$Population == 'c'] <- "Clustered"
Mdf$Population[Mdf$Population == 'n'] <- "Numerous"
Mdf$Population[Mdf$Population == 's'] <- "Scattered"
Mdf$Population[Mdf$Population == 'v'] <- "Several"
Mdf$Population[Mdf$Population == 'y'] <- "Solitary"

Mdf$Habitat[Mdf$Habitat == 'g'] <- "Grasses"
Mdf$Habitat[Mdf$Habitat == 'l'] <- "Leaves"
Mdf$Habitat[Mdf$Habitat == 'm'] <- "Meadows"
Mdf$Habitat[Mdf$Habitat == 'p'] <- "Paths"
Mdf$Habitat[Mdf$Habitat == 'u'] <- "Urban"
Mdf$Habitat[Mdf$Habitat == 'w'] <- "Waste"
Mdf$Habitat[Mdf$Habitat == 'd'] <- "Woods"

head(Mdf)
##       Class C_shape C_surface C_color Bruises    Odor G_Attachment
## 1 Poisonous  Convex    Smooth   Brown     Yes Pungent         Free
## 2    Edible  Convex    Smooth  Yellow     Yes  Almond         Free
## 3    Edible    Bell    Smooth   White     Yes   Anise         Free
## 4 Poisonous  Convex     Scaly   White     Yes Pungent         Free
## 5    Edible  Convex    Smooth    Gray      No    None         Free
## 6    Edible  Convex     Scaly  Yellow     Yes  Almond         Free
##   G_spacing G_size G_color   S_shape S_root S_surfaceAR S_surfaceBR
## 1    Closed Narrow   Black Enlarging  Equal      Smooth      Smooth
## 2    Closed  Broad   Black Enlarging   Club      Smooth      Smooth
## 3    Closed  Broad   Brown Enlarging   Club      Smooth      Smooth
## 4    Closed Narrow   Brown Enlarging  Equal      Smooth      Smooth
## 5   Crowded  Broad   Black  Tapering  Equal      Smooth      Smooth
## 6    Closed  Broad   Brown Enlarging   Club      Smooth      Smooth
##   S_colorAR S_colorBR  V_type V_color R_number     R_type SporeCol
## 1     White     White Partial   White      One    Pendant    Black
## 2     White     White Partial   White      One    Pendant    Brown
## 3     White     White Partial   White      One    Pendant    Brown
## 4     White     White Partial   White      One    Pendant    Black
## 5     White     White Partial   White      One Evanescent    Brown
## 6     White     White Partial   White      One    Pendant    Black
##   Population Habitat
## 1  Scattered   Urban
## 2   Numerous Grasses
## 3   Numerous Meadows
## 4  Scattered   Urban
## 5   Abundant Grasses
## 6   Numerous Grasses

Subset

Displays a table of the 5 characterics of Mushrooms that mycologists mostly observe when categorizing mushrooms. Such characteristics are:

  • Class: Poisonous or Edible
  • C_shape: Shape of the mushroom cap
  • C_surface: Surface type of the cap
  • C_color: Color of Cap
  • Habitat: Home of the mushroom type
library(plyr)
library(dplyr)

M2df <- select(Mdf, Class, C_shape, C_surface, C_color, Habitat)

head(M2df, 25)
##        Class C_shape C_surface C_color Habitat
## 1  Poisonous  Convex    Smooth   Brown   Urban
## 2     Edible  Convex    Smooth  Yellow Grasses
## 3     Edible    Bell    Smooth   White Meadows
## 4  Poisonous  Convex     Scaly   White   Urban
## 5     Edible  Convex    Smooth    Gray Grasses
## 6     Edible  Convex     Scaly  Yellow Grasses
## 7     Edible    Bell    Smooth   White Meadows
## 8     Edible    Bell     Scaly   White Meadows
## 9  Poisonous  Convex     Scaly   White Grasses
## 10    Edible    Bell    Smooth  Yellow Meadows
## 11    Edible  Convex     Scaly  Yellow Grasses
## 12    Edible  Convex     Scaly  Yellow Meadows
## 13    Edible    Bell    Smooth  Yellow Grasses
## 14 Poisonous  Convex     Scaly   White   Urban
## 15    Edible  Convex   Fibrous   Brown Grasses
## 16    Edible  Sunken   Fibrous    Gray   Urban
## 17    Edible    Flat   Fibrous   White Grasses
## 18 Poisonous  Convex    Smooth   Brown Grasses
## 19 Poisonous  Convex     Scaly   White   Urban
## 20 Poisonous  Convex    Smooth   Brown   Urban
## 21    Edible    Bell    Smooth  Yellow Meadows
## 22 Poisonous  Convex     Scaly   Brown Grasses
## 23    Edible    Bell     Scaly  Yellow Meadows
## 24    Edible    Bell     Scaly   White Meadows
## 25    Edible    Bell    Smooth   White Meadows
#Totals for values in each column 
summary(factor(M2df$Class))
##    Edible Poisonous 
##      4208      3916
summary(factor(M2df$C_shape))
##    Bell Conical  Convex    Flat Knobbed  Sunken 
##     452       4    3656    3152     828      32
summary(factor(M2df$C_surface))
## Fibrous Grooves   Scaly  Smooth 
##    2320       4    3244    2556
summary(factor(M2df$C_color))
##    Brown     Buff Cinnamon     Gray    Green     Pink   Purple      Red 
##     2284      168       44     1840       16      144       16     1500 
##    White   Yellow 
##     1040     1072
summary(factor(M2df$Habitat))
## Grasses  Leaves Meadows   Paths   Urban   Waste   Woods 
##    2148     832     292    1144     368     192    3148


Characteristics of Poisonous mushrooms

#Details of poisonous mushrooms
poison <- M2df %>% filter(Class == 'Poisonous')
head(poison, 20)
##        Class C_shape C_surface C_color Habitat
## 1  Poisonous  Convex    Smooth   Brown   Urban
## 2  Poisonous  Convex     Scaly   White   Urban
## 3  Poisonous  Convex     Scaly   White Grasses
## 4  Poisonous  Convex     Scaly   White   Urban
## 5  Poisonous  Convex    Smooth   Brown Grasses
## 6  Poisonous  Convex     Scaly   White   Urban
## 7  Poisonous  Convex    Smooth   Brown   Urban
## 8  Poisonous  Convex     Scaly   Brown Grasses
## 9  Poisonous    Flat    Smooth   White Grasses
## 10 Poisonous  Convex     Scaly   White   Urban
## 11 Poisonous  Convex     Scaly   Brown   Urban
## 12 Poisonous  Convex     Scaly   White Grasses
## 13 Poisonous  Convex     Scaly   Brown   Urban
## 14 Poisonous  Convex    Smooth   White   Urban
## 15 Poisonous  Convex     Scaly   Brown   Urban
## 16 Poisonous  Convex     Scaly   White Grasses
## 17 Poisonous  Convex     Scaly   White   Urban
## 18 Poisonous  Convex    Smooth   White Grasses
## 19 Poisonous    Flat     Scaly   Brown Grasses
## 20 Poisonous  Convex     Scaly   White   Urban
table(poison$Habitat)
## 
## Grasses  Leaves Meadows   Paths   Urban   Woods 
##     740     592      36    1008     272    1268

Most poisonous mushroom a found in the Woods.

Characteristics of Edible mushrooms

#Details of edible mushrooms

edible <- M2df %>% filter(Class == 'Edible')
head(edible, 20)
##     Class C_shape C_surface C_color Habitat
## 1  Edible  Convex    Smooth  Yellow Grasses
## 2  Edible    Bell    Smooth   White Meadows
## 3  Edible  Convex    Smooth    Gray Grasses
## 4  Edible  Convex     Scaly  Yellow Grasses
## 5  Edible    Bell    Smooth   White Meadows
## 6  Edible    Bell     Scaly   White Meadows
## 7  Edible    Bell    Smooth  Yellow Meadows
## 8  Edible  Convex     Scaly  Yellow Grasses
## 9  Edible  Convex     Scaly  Yellow Meadows
## 10 Edible    Bell    Smooth  Yellow Grasses
## 11 Edible  Convex   Fibrous   Brown Grasses
## 12 Edible  Sunken   Fibrous    Gray   Urban
## 13 Edible    Flat   Fibrous   White Grasses
## 14 Edible    Bell    Smooth  Yellow Meadows
## 15 Edible    Bell     Scaly  Yellow Meadows
## 16 Edible    Bell     Scaly   White Meadows
## 17 Edible    Bell    Smooth   White Meadows
## 18 Edible  Convex     Scaly  Yellow Meadows
## 19 Edible  Convex     Scaly   White Meadows
## 20 Edible    Flat   Fibrous   Brown   Urban
table(edible$C_surface)
## 
## Fibrous   Scaly  Smooth 
##    1560    1504    1144

Fibrous Mushrooms are mostly edible.