Description of the task.-

Take the data and loaded from the Mushroom Dataset, create a data frame with a subset of the columns in the dataset. Include the column that indicates edible or poisonous and three or four other columns.Finally, should also add meaningful column names and replace the abbreviations used in the data.

1.- Searching the Datafile of Mushroom Dataset.

data <-"https://archive.ics.uci.edu/ml/machine-learning-databases/mushroom/agaricus-lepiota.data"

2.- Loading the Datafile into DataFrame

datafile <- read.table(file = data, sep=',', header=FALSE)
head(datafile)
##   V1 V2 V3 V4 V5 V6 V7 V8 V9 V10 V11 V12 V13 V14 V15 V16 V17 V18 V19 V20
## 1  p  x  s  n  t  p  f  c  n   k   e   e   s   s   w   w   p   w   o   p
## 2  e  x  s  y  t  a  f  c  b   k   e   c   s   s   w   w   p   w   o   p
## 3  e  b  s  w  t  l  f  c  b   n   e   c   s   s   w   w   p   w   o   p
## 4  p  x  y  w  t  p  f  c  n   n   e   e   s   s   w   w   p   w   o   p
## 5  e  x  s  g  f  n  f  w  b   k   t   e   s   s   w   w   p   w   o   e
## 6  e  x  y  y  t  a  f  c  b   n   e   c   s   s   w   w   p   w   o   p
##   V21 V22 V23
## 1   k   s   u
## 2   n   n   g
## 3   n   n   m
## 4   k   s   u
## 5   n   a   g
## 6   k   n   g

The dataframe for the datafile indicate the following:

dim(datafile)
## [1] 8124   23
summary(datafile)
##  V1       V2       V3             V4       V5             V6      
##  e:4208   b: 452   f:2320   n      :2284   f:4748   n      :3528  
##  p:3916   c:   4   g:   4   g      :1840   t:3376   f      :2160  
##           f:3152   s:2556   e      :1500            s      : 576  
##           k: 828   y:3244   y      :1072            y      : 576  
##           s:  32            w      :1040            a      : 400  
##           x:3656            b      : 168            l      : 400  
##                             (Other): 220            (Other): 484  
##  V7       V8       V9            V10       V11      V12      V13     
##  a: 210   c:6812   b:5612   b      :1728   e:3516   ?:2480   f: 552  
##  f:7914   w:1312   n:2512   p      :1492   t:4608   b:3776   k:2372  
##                             w      :1202            c: 556   s:5176  
##                             n      :1048            e:1120   y:  24  
##                             g      : 752            r: 192           
##                             h      : 732                             
##                             (Other):1170                             
##  V14           V15            V16       V17      V18      V19     
##  f: 600   w      :4464   w      :4384   p:8124   n:  96   n:  36  
##  k:2304   p      :1872   p      :1872            o:  96   o:7488  
##  s:4936   g      : 576   g      : 576            w:7924   t: 600  
##  y: 284   n      : 448   n      : 512            y:   8           
##           b      : 432   b      : 432                             
##           o      : 192   o      : 192                             
##           (Other): 140   (Other): 156                             
##  V20           V21       V22      V23     
##  e:2776   w      :2388   a: 384   d:3148  
##  f:  48   n      :1968   c: 340   g:2148  
##  l:1296   k      :1872   n: 400   l: 832  
##  n:  36   h      :1632   s:1248   m: 292  
##  p:3968   r      :  72   v:4040   p:1144  
##           b      :  48   y:1712   u: 368  
##           (Other): 144            w: 192

3.- Rename the columnsof the dataframe

library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
names(datafile)<-c("class","capshape","capsurface","capcolor","bruises","odor","gillattachment","gillspacing","gillsize","gillcolor","stalkshape","stalkroot","stalksurface_above_ring","stalksurface_below_ring","stalkcolor_above_ring","stalkcolor_below_ring","veiltype","veilcolor","ringnumber","ringtype","sporeprint_color","population","habitat")

4.- subsetting the dataframe

datafile<- datafile %>%
  select(class, capsurface, capcolor, population, habitat)
lapply(datafile, function(mushroom) table(mushroom)) 
## $class
## mushroom
##    e    p 
## 4208 3916 
## 
## $capsurface
## mushroom
##    f    g    s    y 
## 2320    4 2556 3244 
## 
## $capcolor
## mushroom
##    b    c    e    g    n    p    r    u    w    y 
##  168   44 1500 1840 2284  144   16   16 1040 1072 
## 
## $population
## mushroom
##    a    c    n    s    v    y 
##  384  340  400 1248 4040 1712 
## 
## $habitat
## mushroom
##    d    g    l    m    p    u    w 
## 3148 2148  832  292 1144  368  192

5.- Records of variables of the dataframes

datafile<-datafile %>%
mutate(class=recode(class,p="poison",e="edible"),
     capsurface = recode(capsurface, f="fibrous", g="grooves", y="scaly",s="smooth"),
     capcolor = recode(capcolor, b="buff", c="binnamon", g="gray", n="brown", p="pink", r="green", u="purple", e="red", w="white",y="yellow"),
 population = recode(population, a="abundant", c="clustered", n="numerous", s="scattered", v="several",y="solitary"),
 habitat = recode(habitat, g="grasses", l="leaves", m="meadows", p="paths", u="urban", w="waste",d="woods"))
summary(datafile)
##     class        capsurface      capcolor        population  
##  edible:4208   fibrous:2320   brown  :2284   abundant : 384  
##  poison:3916   grooves:   4   gray   :1840   clustered: 340  
##                smooth :2556   red    :1500   numerous : 400  
##                scaly  :3244   yellow :1072   scattered:1248  
##                               white  :1040   several  :4040  
##                               buff   : 168   solitary :1712  
##                               (Other): 220                   
##     habitat    
##  woods  :3148  
##  grasses:2148  
##  leaves : 832  
##  meadows: 292  
##  paths  :1144  
##  urban  : 368  
##  waste  : 192