Assignment =>

Study the Mushrooms dataset and the associated description of the data (i.e. “data dictionary”). You may need to look around a bit, but it’s there! You should take the data, and create a data frame with a subset of the columns (and if you like rows) in the dataset. You should include the column that indicates edible or poisonous and three or four other columns. You should also add meaningful column names and replace the abbreviations used in the data-for example, in the appropriate column, “e” might become “edible.” Your deliverable is the R code to perform these transformation tasks.

Solution =>

Read CSV [A] File Name to read [B] There is no header [C] “,” is separator [D] A character vector of strings which are to be interpreted as “NA” (NOT Available or missing)

mushroom_full_dataFile<-read.csv("https://archive.ics.uci.edu/ml/machine-learning-databases/mushroom/agaricus-lepiota.data", header= FALSE, sep=",",na.strings = "?")
head (mushroom_full_dataFile)
##   V1 V2 V3 V4 V5 V6 V7 V8 V9 V10 V11 V12 V13 V14 V15 V16 V17 V18 V19 V20
## 1  p  x  s  n  t  p  f  c  n   k   e   e   s   s   w   w   p   w   o   p
## 2  e  x  s  y  t  a  f  c  b   k   e   c   s   s   w   w   p   w   o   p
## 3  e  b  s  w  t  l  f  c  b   n   e   c   s   s   w   w   p   w   o   p
## 4  p  x  y  w  t  p  f  c  n   n   e   e   s   s   w   w   p   w   o   p
## 5  e  x  s  g  f  n  f  w  b   k   t   e   s   s   w   w   p   w   o   e
## 6  e  x  y  y  t  a  f  c  b   n   e   c   s   s   w   w   p   w   o   p
##   V21 V22 V23
## 1   k   s   u
## 2   n   n   g
## 3   n   n   m
## 4   k   s   u
## 5   n   a   g
## 6   k   n   g
mushroom_dictionay <- read.table("./data_dictionary.txt",row.names = 1, sep=":")
head(mushroom_dictionay)
##                                                                                          V2
## class                                                                     poison=p,edible=e
## cap-shape                               bell=b,conical=c,convex=x,flat=f,knobbed=k,sunken=s
## cap-surface                                            fibrous=f,grooves=g,scaly=y,smooth=s
## cap-color   brown=n,buff=b,cinnamon=c,gray=g,green=r,pink=p,purple=u,red=e,white=w,yellow=y
## bruises?                                                                     bruises=t,no=f
## odor            almond=a,anise=l,creosote=c,fishy=y,foul=f,musty=m,none=n,pungent=p,spicy=s

Take the first column of dictionary and use it as the rows names of the data file

names(mushroom_full_dataFile) <- row.names(mushroom_dictionay)
head(mushroom_full_dataFile)
##   class cap-shape cap-surface cap-color bruises? odor gill-attachment
## 1     p         x           s         n        t    p               f
## 2     e         x           s         y        t    a               f
## 3     e         b           s         w        t    l               f
## 4     p         x           y         w        t    p               f
## 5     e         x           s         g        f    n               f
## 6     e         x           y         y        t    a               f
##   gill-spacing gill-size gill-color stalk-shape stalk-root
## 1            c         n          k           e          e
## 2            c         b          k           e          c
## 3            c         b          n           e          c
## 4            c         n          n           e          e
## 5            w         b          k           t          e
## 6            c         b          n           e          c
##   stalk-surface-above-ring stalk-surface-below-ring stalk-color-above-ring
## 1                        s                        s                      w
## 2                        s                        s                      w
## 3                        s                        s                      w
## 4                        s                        s                      w
## 5                        s                        s                      w
## 6                        s                        s                      w
##   stalk-color-below-ring veil-type veil-color ring-number ring-type
## 1                      w         p          w           o         p
## 2                      w         p          w           o         p
## 3                      w         p          w           o         p
## 4                      w         p          w           o         p
## 5                      w         p          w           o         e
## 6                      w         p          w           o         p
##   spore-print-color population habitat
## 1                 k          s       u
## 2                 n          n       g
## 3                 n          n       m
## 4                 k          s       u
## 5                 n          a       g
## 6                 k          n       g

Select a class column and four others

mushroom_sub_df <- subset(mushroom_full_dataFile, select=c("class","cap-color","habitat", "ring-number","odor"))
head(mushroom_sub_df)
##   class cap-color habitat ring-number odor
## 1     p         n       u           o    p
## 2     e         y       g           o    a
## 3     e         w       m           o    l
## 4     p         w       u           o    p
## 5     e         g       g           o    n
## 6     e         y       g           o    a

Create function to replace abbreviation with its translation from data dictionary (uses gsub with regex)

getTranslation <- function(a) {
  return(sapply(1:nrow(mushroom_sub_df), function(x)
    gsub(paste('(^|.*,)(.*)=',mushroom_sub_df[x,a],'.*',sep=""),"\\2",mushroom_dictionay[a,1] )
  ))
}
mushroom_translated_df <- sapply(colnames(mushroom_sub_df), function(x) getTranslation(x))
head(mushroom_translated_df)
##      class    cap-color habitat   ring-number odor     
## [1,] "poison" "brown"   "urban"   "one"       "pungent"
## [2,] "edible" "yellow"  "grasses" "one"       "almond" 
## [3,] "edible" "white"   "meadows" "one"       "anise"  
## [4,] "poison" "white"   "urban"   "one"       "pungent"
## [5,] "edible" "gray"    "grasses" "one"       "none"   
## [6,] "edible" "yellow"  "grasses" "one"       "almond"