knitr::opts_chunk$set(echo = TRUE)

Install Packages

#install.packages("plyr")
#install.packages("rprojroot")
#install.packages("rmarkdown")
#install.packages("rmarkdown", repos = "https://mran.revolutionanalytics.com/snapshot/2016-01-02")
#devtools::install_url("http://cran.r-project.org/src/contrib/rmarkdown0.5.1.tar.gz")

Import the Libraries & Files

library(data.table)
## Warning: package 'data.table' was built under R version 3.2.5
library(plyr)
## Warning: package 'plyr' was built under R version 3.2.5
original_file <- data.frame(read.table('https://archive.ics.uci.edu/ml/machine-learning-databases/mushroom/agaricus-lepiota.data', 
                                       sep=","))
head(original_file,5)
##   V1 V2 V3 V4 V5 V6 V7 V8 V9 V10 V11 V12 V13 V14 V15 V16 V17 V18 V19 V20
## 1  p  x  s  n  t  p  f  c  n   k   e   e   s   s   w   w   p   w   o   p
## 2  e  x  s  y  t  a  f  c  b   k   e   c   s   s   w   w   p   w   o   p
## 3  e  b  s  w  t  l  f  c  b   n   e   c   s   s   w   w   p   w   o   p
## 4  p  x  y  w  t  p  f  c  n   n   e   e   s   s   w   w   p   w   o   p
## 5  e  x  s  g  f  n  f  w  b   k   t   e   s   s   w   w   p   w   o   e
##   V21 V22 V23
## 1   k   s   u
## 2   n   n   g
## 3   n   n   m
## 4   k   s   u
## 5   n   a   g

Create a Reformatted File With Column Name Changes

reformatted_file <- original_file
setnames(reformatted_file, old = c('V1','V2', 'V3', 'V4',
                                   'V5', 'V6','V7', 'V8',
                                   'V9','V10', 'V11', 'V12', 'V13',
                                   'V14','V15', 'V16', 'V17', 'V18',
                                   'V19','V20', 'V21', 'V22', 'V23' ), 
         new = c('type',
                 'cap_shape',
                 'cap_surface',
                 'cap_color',
                 'bruises',
                 'odor',
                 'gill_attachment',
                 'gill_spacing',
                 'gill_size',
                 'gill_color',
                 'stalk_shape',
                 'stalk_root',
                 'stalk_surface_above_ring',
                 'stalk_surface_below_ring',
                 'stalk_color_above_ring',
                 'stalk_color_below_ring',
                 'veil_type',
                 'veil_color',
                 'ring_number',
                 'ring_type',
                 'spore_print_color',
                 'population',
                 'habitat'))
head(reformatted_file, 10)
##    type cap_shape cap_surface cap_color bruises odor gill_attachment
## 1     p         x           s         n       t    p               f
## 2     e         x           s         y       t    a               f
## 3     e         b           s         w       t    l               f
## 4     p         x           y         w       t    p               f
## 5     e         x           s         g       f    n               f
## 6     e         x           y         y       t    a               f
## 7     e         b           s         w       t    a               f
## 8     e         b           y         w       t    l               f
## 9     p         x           y         w       t    p               f
## 10    e         b           s         y       t    a               f
##    gill_spacing gill_size gill_color stalk_shape stalk_root
## 1             c         n          k           e          e
## 2             c         b          k           e          c
## 3             c         b          n           e          c
## 4             c         n          n           e          e
## 5             w         b          k           t          e
## 6             c         b          n           e          c
## 7             c         b          g           e          c
## 8             c         b          n           e          c
## 9             c         n          p           e          e
## 10            c         b          g           e          c
##    stalk_surface_above_ring stalk_surface_below_ring
## 1                         s                        s
## 2                         s                        s
## 3                         s                        s
## 4                         s                        s
## 5                         s                        s
## 6                         s                        s
## 7                         s                        s
## 8                         s                        s
## 9                         s                        s
## 10                        s                        s
##    stalk_color_above_ring stalk_color_below_ring veil_type veil_color
## 1                       w                      w         p          w
## 2                       w                      w         p          w
## 3                       w                      w         p          w
## 4                       w                      w         p          w
## 5                       w                      w         p          w
## 6                       w                      w         p          w
## 7                       w                      w         p          w
## 8                       w                      w         p          w
## 9                       w                      w         p          w
## 10                      w                      w         p          w
##    ring_number ring_type spore_print_color population habitat
## 1            o         p                 k          s       u
## 2            o         p                 n          n       g
## 3            o         p                 n          n       m
## 4            o         p                 k          s       u
## 5            o         e                 n          a       g
## 6            o         p                 k          n       g
## 7            o         p                 k          n       m
## 8            o         p                 n          s       m
## 9            o         p                 k          v       g
## 10           o         p                 k          s       m

Change the Contents of The Columns to Full Name

reformatted_file$type <- ifelse(reformatted_file$type  == 'p', "poisonous",
                               ifelse(reformatted_file$type  == 'e', "edible","unknown"))
reformatted_file$cap_shape<-mapvalues(reformatted_file$cap_shape, from=c("b","c","x","f","k","s"), to=c("bell","conical","convex","flat"," knobbed","sunken"))
reformatted_file$cap_surface<-mapvalues(reformatted_file$cap_surface, from=c("f","g","y","s"), to=c(" fibrous","grooves","scaly","smooth"))
reformatted_file$cap_color<-mapvalues(reformatted_file$cap_color, from=c("n","b","c","g","r","p","u","e","w","y"), to=c(" brown","buff","cinnamon","gray","green"," pink","purple","red","white","yellow"))
reformatted_file$bruises <-mapvalues(reformatted_file$bruises, from=c("t","f"), to=c(" bruises","no"))
reformatted_file$odor <-mapvalues(reformatted_file$odor, from=c("a","l","c","y","f","m","n","p","s"), to=c(" almond","anise","creosote","fishy","foul"," musty","none","pungent","spicy"))
reformatted_file$gill_attachment <-mapvalues(reformatted_file$gill_attachment, from=c("a","f"), to=c(" attached","free"))

reformatted_file$gill_spacing <-mapvalues(reformatted_file$gill_spacing, from=c("c","w"), to=c(" close","crowded"))
reformatted_file$gill_size <-mapvalues(reformatted_file$gill_size , from=c("b","n"), to=c(" broad","narrow"))
reformatted_file$gill_color <-mapvalues(reformatted_file$gill_color, from=c("k","n","b","h","g","w","y","r","o","p","u","e"), to=c(" black","brown","buff","chocolate","gray"," white","yellow","green","orange","pink","purple","red"))
reformatted_file$stalk_shape<-mapvalues(reformatted_file$stalk_shape, from=c("e","t"), to=c(" enlarging","tapering"))
reformatted_file$stalk_root<-mapvalues(reformatted_file$stalk_root, from=c("b","c","e","r","?"), to=c(" bulbous","club","equal","rooted","missing"))
reformatted_file$stalk_surface_above_ring<-mapvalues(reformatted_file$stalk_surface_above_ring, from=c("f","y","k","s"), to=c(" fibrous","scaly","silky","smooth"))
reformatted_file$stalk_surface_below_ring<-mapvalues(reformatted_file$stalk_surface_below_ring, from=c("f","y","k","s"), to=c(" fibrous","scaly","silky","smooth"))
reformatted_file$stalk_color_above_ring<-mapvalues(reformatted_file$stalk_color_above_ring, from=c("n","b","c","g","o","p","e","w","y"), to=c(" brown","buff","cinnamon","gray","orange"," pink","red","white","yellow"))
reformatted_file$stalk_color_below_ring<-mapvalues(reformatted_file$stalk_color_below_ring, from=c("n","b","c","g","o","p","e","w","y"), to=c(" brown","buff","cinnamon","gray","orange"," pink","red","white","yellow"))
reformatted_file$veil_type<-mapvalues(reformatted_file$veil_type, from=c("p","u"), to=c(" partial","universal"))
reformatted_file$veil_color<-mapvalues(reformatted_file$veil_color, from=c("n","o","w","y"), to=c(" brown","orange","white","yellow"))
reformatted_file$ring_number<-mapvalues(reformatted_file$ring_number, from=c("n","o","t"), to=c(" none","one","two"))
reformatted_file$ring_type<-mapvalues(reformatted_file$ring_type, from=c("c","e","f","l","n","p","s","z"), to=c(" cobwebby","evanescent","flaring","large"," none","pendant","sheathing","zone"))
reformatted_file$spore_print_color<-mapvalues(reformatted_file$spore_print_color, from=c("k","n","b","h","r","o","u","w","y"), to=c(" black","brown","buff","chocolate","green"," orange","purple","white","yellow"))
reformatted_file$population<-mapvalues(reformatted_file$population, from=c("a","c","n","s","v","y"), to=c(" abundant","clustered","numerous"," scattered","several","solitary"))
reformatted_file$habitat<-mapvalues(reformatted_file$habitat, from=c("g","l","m","p","u","w","d"), to=c(" grasses","leaves","meadows","paths"," urban","waste","woods"))

head(reformatted_file,5)
##        type cap_shape cap_surface cap_color  bruises    odor
## 1 poisonous    convex      smooth     brown  bruises pungent
## 2    edible    convex      smooth    yellow  bruises  almond
## 3    edible      bell      smooth     white  bruises   anise
## 4 poisonous    convex       scaly     white  bruises pungent
## 5    edible    convex      smooth      gray       no    none
##   gill_attachment gill_spacing gill_size gill_color stalk_shape stalk_root
## 1            free        close    narrow      black   enlarging      equal
## 2            free        close     broad      black   enlarging       club
## 3            free        close     broad      brown   enlarging       club
## 4            free        close    narrow      brown   enlarging      equal
## 5            free      crowded     broad      black    tapering      equal
##   stalk_surface_above_ring stalk_surface_below_ring stalk_color_above_ring
## 1                   smooth                   smooth                  white
## 2                   smooth                   smooth                  white
## 3                   smooth                   smooth                  white
## 4                   smooth                   smooth                  white
## 5                   smooth                   smooth                  white
##   stalk_color_below_ring veil_type veil_color ring_number  ring_type
## 1                  white   partial      white         one    pendant
## 2                  white   partial      white         one    pendant
## 3                  white   partial      white         one    pendant
## 4                  white   partial      white         one    pendant
## 5                  white   partial      white         one evanescent
##   spore_print_color population  habitat
## 1             black  scattered    urban
## 2             brown   numerous  grasses
## 3             brown   numerous  meadows
## 4             black  scattered    urban
## 5             brown   abundant  grasses

Poisonous Dataframe- filtered from the formatted data frame

Poisionous_df <- reformatted_file[reformatted_file$type == "poisonous",]
head(Poisionous_df, 5)
##         type cap_shape cap_surface cap_color  bruises    odor
## 1  poisonous    convex      smooth     brown  bruises pungent
## 4  poisonous    convex       scaly     white  bruises pungent
## 9  poisonous    convex       scaly     white  bruises pungent
## 14 poisonous    convex       scaly     white  bruises pungent
## 18 poisonous    convex      smooth     brown  bruises pungent
##    gill_attachment gill_spacing gill_size gill_color stalk_shape
## 1             free        close    narrow      black   enlarging
## 4             free        close    narrow      brown   enlarging
## 9             free        close    narrow       pink   enlarging
## 14            free        close    narrow      black   enlarging
## 18            free        close    narrow      brown   enlarging
##    stalk_root stalk_surface_above_ring stalk_surface_below_ring
## 1       equal                   smooth                   smooth
## 4       equal                   smooth                   smooth
## 9       equal                   smooth                   smooth
## 14      equal                   smooth                   smooth
## 18      equal                   smooth                   smooth
##    stalk_color_above_ring stalk_color_below_ring veil_type veil_color
## 1                   white                  white   partial      white
## 4                   white                  white   partial      white
## 9                   white                  white   partial      white
## 14                  white                  white   partial      white
## 18                  white                  white   partial      white
##    ring_number ring_type spore_print_color population  habitat
## 1          one   pendant             black  scattered    urban
## 4          one   pendant             black  scattered    urban
## 9          one   pendant             black    several  grasses
## 14         one   pendant             brown    several    urban
## 18         one   pendant             black  scattered  grasses

Edible Dataframe- filtered from the formatted dataframe

Edible_df <- reformatted_file[reformatted_file$type == "edible",]
head(Edible_df,5)
##     type cap_shape cap_surface cap_color  bruises    odor gill_attachment
## 2 edible    convex      smooth    yellow  bruises  almond            free
## 3 edible      bell      smooth     white  bruises   anise            free
## 5 edible    convex      smooth      gray       no    none            free
## 6 edible    convex       scaly    yellow  bruises  almond            free
## 7 edible      bell      smooth     white  bruises  almond            free
##   gill_spacing gill_size gill_color stalk_shape stalk_root
## 2        close     broad      black   enlarging       club
## 3        close     broad      brown   enlarging       club
## 5      crowded     broad      black    tapering      equal
## 6        close     broad      brown   enlarging       club
## 7        close     broad       gray   enlarging       club
##   stalk_surface_above_ring stalk_surface_below_ring stalk_color_above_ring
## 2                   smooth                   smooth                  white
## 3                   smooth                   smooth                  white
## 5                   smooth                   smooth                  white
## 6                   smooth                   smooth                  white
## 7                   smooth                   smooth                  white
##   stalk_color_below_ring veil_type veil_color ring_number  ring_type
## 2                  white   partial      white         one    pendant
## 3                  white   partial      white         one    pendant
## 5                  white   partial      white         one evanescent
## 6                  white   partial      white         one    pendant
## 7                  white   partial      white         one    pendant
##   spore_print_color population  habitat
## 2             brown   numerous  grasses
## 3             brown   numerous  meadows
## 5             brown   abundant  grasses
## 6             black   numerous  grasses
## 7             black   numerous  meadows