1 Pre-Requistes : Available Libraries

Mushrooms Dataset. A famous-if slightly moldy-dataset about mushrooms can be found in the UCI repository here: https://archive.ics.uci.edu/ml/datasets/Mushroom

2 Load data from CSV file

#mushroomsData <- read_csv("https://archive.ics.uci.edu/ml/machine-learning-databases/mushroom/agaricus-lepiota.data")

#myURL<-"https://archive.ics.uci.edu/ml/machine-learning-databases/mushroom/agaricus-lepiota.data"
#myDataURL <- getURL(myURL)
#mushroomsData <- read.csv(text = myDataURL)

myWorkingDir <- getwd()
mySourceFile <- paste(myWorkingDir,"/agaricus-lepiota.data.txt", sep = "")
localMushroomsData <- read.csv(file=mySourceFile, header=TRUE, sep=",")

myGitHubURL<-"https://raw.githubusercontent.com/destination4debabrata/CUNY-Assignments/master/DATA%20607%2002%5B15961%5D/Week%201%20Assignment%20%5BJan%2028%20-%20Feb%2003%5D/agaricus-lepiota.data.txt"
myGitHubDataURL <- getURL(myGitHubURL)
mushroomsData <- read.csv(text = myGitHubDataURL)

3 Read data first 10 lines

head(mushroomsData, n=10)
##    p x s n t p.1 f c n.1 k e e.1 s.1 s.2 w w.1 p.2 w.2 o p.3 k.1 s.3 u
## 1  e x s y t   a f c   b k e   c   s   s w   w   p   w o   p   n   n g
## 2  e b s w t   l f c   b n e   c   s   s w   w   p   w o   p   n   n m
## 3  p x y w t   p f c   n n e   e   s   s w   w   p   w o   p   k   s u
## 4  e x s g f   n f w   b k t   e   s   s w   w   p   w o   e   n   a g
## 5  e x y y t   a f c   b n e   c   s   s w   w   p   w o   p   k   n g
## 6  e b s w t   a f c   b g e   c   s   s w   w   p   w o   p   k   n m
## 7  e b y w t   l f c   b n e   c   s   s w   w   p   w o   p   n   s m
## 8  p x y w t   p f c   n p e   e   s   s w   w   p   w o   p   k   v g
## 9  e b s y t   a f c   b g e   c   s   s w   w   p   w o   p   k   s m
## 10 e x y y t   l f c   b g e   c   s   s w   w   p   w o   p   n   n g

4 Add header to the data from data dictionary

https://archive.ics.uci.edu/ml/machine-learning-databases/mushroom/agaricus-lepiota.names

colnames(mushroomsData) <- c("Classes","CapShape","CapSurface","CapColor","Bruises","Odor","GillAttachment","GillSpacing","GillSize","GillColor","StalkShape","Stalkroot","StalkSurfaceAboveRing","StalkSurfaceBelowRing","StalkColorAboveRing","StalkColorBeloRing","VeilType","VeilColor","RingNumber","RingType","SporePrintColor","Population","Habitat")
head(mushroomsData, n=10)
##    Classes CapShape CapSurface CapColor Bruises Odor GillAttachment
## 1        e        x          s        y       t    a              f
## 2        e        b          s        w       t    l              f
## 3        p        x          y        w       t    p              f
## 4        e        x          s        g       f    n              f
## 5        e        x          y        y       t    a              f
## 6        e        b          s        w       t    a              f
## 7        e        b          y        w       t    l              f
## 8        p        x          y        w       t    p              f
## 9        e        b          s        y       t    a              f
## 10       e        x          y        y       t    l              f
##    GillSpacing GillSize GillColor StalkShape Stalkroot
## 1            c        b         k          e         c
## 2            c        b         n          e         c
## 3            c        n         n          e         e
## 4            w        b         k          t         e
## 5            c        b         n          e         c
## 6            c        b         g          e         c
## 7            c        b         n          e         c
## 8            c        n         p          e         e
## 9            c        b         g          e         c
## 10           c        b         g          e         c
##    StalkSurfaceAboveRing StalkSurfaceBelowRing StalkColorAboveRing
## 1                      s                     s                   w
## 2                      s                     s                   w
## 3                      s                     s                   w
## 4                      s                     s                   w
## 5                      s                     s                   w
## 6                      s                     s                   w
## 7                      s                     s                   w
## 8                      s                     s                   w
## 9                      s                     s                   w
## 10                     s                     s                   w
##    StalkColorBeloRing VeilType VeilColor RingNumber RingType
## 1                   w        p         w          o        p
## 2                   w        p         w          o        p
## 3                   w        p         w          o        p
## 4                   w        p         w          o        e
## 5                   w        p         w          o        p
## 6                   w        p         w          o        p
## 7                   w        p         w          o        p
## 8                   w        p         w          o        p
## 9                   w        p         w          o        p
## 10                  w        p         w          o        p
##    SporePrintColor Population Habitat
## 1                n          n       g
## 2                n          n       m
## 3                k          s       u
## 4                n          a       g
## 5                k          n       g
## 6                k          n       m
## 7                n          s       m
## 8                k          v       g
## 9                k          s       m
## 10               n          n       g

4.2 Get Summary of the mushrooms data

summary(mushroomsData)
##  Classes  CapShape CapSurface    CapColor    Bruises       Odor     
##  e:4208   b: 452   f:2320     n      :2283   f:4748   n      :3528  
##  p:3915   c:   4   g:   4     g      :1840   t:3375   f      :2160  
##           f:3152   s:2555     e      :1500            s      : 576  
##           k: 828   y:3244     y      :1072            y      : 576  
##           s:  32              w      :1040            a      : 400  
##           x:3655              b      : 168            l      : 400  
##                               (Other): 220            (Other): 483  
##  GillAttachment GillSpacing GillSize   GillColor    StalkShape Stalkroot
##  a: 210         c:6811      b:5612   b      :1728   e:3515     ?:2480   
##  f:7913         w:1312      n:2511   p      :1492   t:4608     b:3776   
##                                      w      :1202              c: 556   
##                                      n      :1048              e:1119   
##                                      g      : 752              r: 192   
##                                      h      : 732                       
##                                      (Other):1169                       
##  StalkSurfaceAboveRing StalkSurfaceBelowRing StalkColorAboveRing
##  f: 552                f: 600                w      :4463       
##  k:2372                k:2304                p      :1872       
##  s:5175                s:4935                g      : 576       
##  y:  24                y: 284                n      : 448       
##                                              b      : 432       
##                                              o      : 192       
##                                              (Other): 140       
##  StalkColorBeloRing VeilType VeilColor RingNumber RingType SporePrintColor
##  w      :4383       p:8123   n:  96    n:  36     e:2776   w      :2388   
##  p      :1872                o:  96    o:7487     f:  48   n      :1968   
##  g      : 576                w:7923    t: 600     l:1296   k      :1871   
##  n      : 512                y:   8               n:  36   h      :1632   
##  b      : 432                                     p:3967   r      :  72   
##  o      : 192                                              b      :  48   
##  (Other): 156                                              (Other): 144   
##  Population Habitat 
##  a: 384     d:3148  
##  c: 340     g:2148  
##  n: 400     l: 832  
##  s:1247     m: 292  
##  v:4040     p:1144  
##  y:1712     u: 367  
##             w: 192

4.3 Replace acronyms with full forms

mushroomsData <- mutate(mushroomsData,Classes=ifelse(Classes=="e","edible","poisonous"))
mushroomsData <- mutate(mushroomsData,CapShape=plyr::mapvalues(CapShape,from=c("b","c","x","f","k","s"),
                                                                          to=c("bell","conical","convex","flat","knobbed","sunken")))
mushroomsData <- mutate(mushroomsData,CapSurface=plyr::mapvalues(CapSurface,from=c("f","g","y","s"),
                                                                              to=c("fibrous","grooves","scaly","smooth")))
mushroomsData <- mutate(mushroomsData,CapColor=plyr::mapvalues(CapColor,from=c("n","b","c","g","r","p","u","e","w","y"),                                                                          to=c("brown","buff","cinnamon","gray","green","pink","purple","red","white","yellow")))
mushroomsData <- mutate(mushroomsData,Bruises=plyr::mapvalues(Bruises,from=c("t","f"),
                                                                              to=c("bruises","no")))
mushroomsData <- mutate(mushroomsData,Odor=plyr::mapvalues(Odor,from=c("a","l","c","y","f","m","n","p","s"),
                                                                              to=c("almond","anise","creosote","fishy","foul","musty","none","pungent","spicy")))
head(mushroomsData, n=10)
##      Classes CapShape CapSurface CapColor Bruises    Odor GillAttachment
## 1     edible   convex     smooth   yellow bruises  almond              f
## 2     edible     bell     smooth    white bruises   anise              f
## 3  poisonous   convex      scaly    white bruises pungent              f
## 4     edible   convex     smooth     gray      no    none              f
## 5     edible   convex      scaly   yellow bruises  almond              f
## 6     edible     bell     smooth    white bruises  almond              f
## 7     edible     bell      scaly    white bruises   anise              f
## 8  poisonous   convex      scaly    white bruises pungent              f
## 9     edible     bell     smooth   yellow bruises  almond              f
## 10    edible   convex      scaly   yellow bruises   anise              f
##    GillSpacing GillSize GillColor StalkShape Stalkroot
## 1            c        b         k          e         c
## 2            c        b         n          e         c
## 3            c        n         n          e         e
## 4            w        b         k          t         e
## 5            c        b         n          e         c
## 6            c        b         g          e         c
## 7            c        b         n          e         c
## 8            c        n         p          e         e
## 9            c        b         g          e         c
## 10           c        b         g          e         c
##    StalkSurfaceAboveRing StalkSurfaceBelowRing StalkColorAboveRing
## 1                      s                     s                   w
## 2                      s                     s                   w
## 3                      s                     s                   w
## 4                      s                     s                   w
## 5                      s                     s                   w
## 6                      s                     s                   w
## 7                      s                     s                   w
## 8                      s                     s                   w
## 9                      s                     s                   w
## 10                     s                     s                   w
##    StalkColorBeloRing VeilType VeilColor RingNumber RingType
## 1                   w        p         w          o        p
## 2                   w        p         w          o        p
## 3                   w        p         w          o        p
## 4                   w        p         w          o        e
## 5                   w        p         w          o        p
## 6                   w        p         w          o        p
## 7                   w        p         w          o        p
## 8                   w        p         w          o        p
## 9                   w        p         w          o        p
## 10                  w        p         w          o        p
##    SporePrintColor Population Habitat
## 1                n          n       g
## 2                n          n       m
## 3                k          s       u
## 4                n          a       g
## 5                k          n       g
## 6                k          n       m
## 7                n          s       m
## 8                k          v       g
## 9                k          s       m
## 10               n          n       g

4.4 Get record counts for each column value

count(mushroomsData, "Classes")
##     Classes freq
## 1    edible 4208
## 2 poisonous 3915
count(mushroomsData, "CapShape")
##   CapShape freq
## 1     bell  452
## 2  conical    4
## 3     flat 3152
## 4  knobbed  828
## 5   sunken   32
## 6   convex 3655
count(mushroomsData, "CapSurface")
##   CapSurface freq
## 1    fibrous 2320
## 2    grooves    4
## 3     smooth 2555
## 4      scaly 3244
count(mushroomsData, "CapColor")
##    CapColor freq
## 1      buff  168
## 2  cinnamon   44
## 3       red 1500
## 4      gray 1840
## 5     brown 2283
## 6      pink  144
## 7     green   16
## 8    purple   16
## 9     white 1040
## 10   yellow 1072
count(mushroomsData, "Bruises")
##   Bruises freq
## 1      no 4748
## 2 bruises 3375
count(mushroomsData, "Odor")
##       Odor freq
## 1   almond  400
## 2 creosote  192
## 3     foul 2160
## 4    anise  400
## 5    musty   36
## 6     none 3528
## 7  pungent  255
## 8    spicy  576
## 9    fishy  576

5 Subset of the Data Frame of first - 5 columns / 10 rows

newMushroomsData <- mushroomsData[2:10, 1:5]
newMushroomsData
##      Classes CapShape CapSurface CapColor Bruises
## 2     edible     bell     smooth    white bruises
## 3  poisonous   convex      scaly    white bruises
## 4     edible   convex     smooth     gray      no
## 5     edible   convex      scaly   yellow bruises
## 6     edible     bell     smooth    white bruises
## 7     edible     bell      scaly    white bruises
## 8  poisonous   convex      scaly    white bruises
## 9     edible     bell     smooth   yellow bruises
## 10    edible   convex      scaly   yellow bruises

6 Data Exploration via Visualization

barplot(table(mushroomsData$Classes)/100, main="Comparison of # of Edible vs Poisonous Mushrooms",col=c('green','red'))