## Reading mashroom data from the URL, since the data does not have a header, the
## 'header=False' argument is used
library(plyr)

msrmDS <- read.csv("https://archive.ics.uci.edu/ml/machine-learning-databases/mushroom/agaricus-lepiota.data", header=FALSE, sep=",")


## Dimension of the dataset 
dim(msrmDS)
## [1] 8124   23
## so the dataset has 23 variables (colums) and 8124 rows

## Create a subset of the data with 1000 randomly selected rows and 
## the 1st, 2nd, 6th and 20th through 23rd  columns, the first columns indicates 
## if they are edible or poisonous 

msrmDS <- msrmDS[sample(1:nrow(msrmDS),1000,replace=FALSE), c(1,2,6,20:23)]
View(msrmDS)

## Naming the columns based on the original data dictionary
names(msrmDS) <- c("classes","cap-shape","odor","ring-type",
                   "spore-print-color","population","habitat")
View(msrmDS)

## TRANSFORMATION OF DATA
## mapvalues and revalue, both functions of 'plyr' package achieve
# the same so both funtions were used to see the results
## Transforming data for "classes" variable
msrmDS$classes <- mapvalues(msrmDS$classes, from = c('e','p'), to = c('edible',
                                                                      'poisonous'))

## Transforming data for "cap-shape" variable
msrmDS$`cap-shape` <- revalue(msrmDS$`cap-shape`,  c('b' = 'bell','c' = 'conical', 
                                                     'x' = 'convex','f' = 'flat',
                                                     'k' = 'knobbed', 's' = 'sunken'))

## Transforming data for "odor" variable
msrmDS$odor <- revalue(msrmDS$odor, c( 'a' = 'almond','l' = 'anise',
                                       'c' = 'creosote','y' = 'fishy', 
                                       'f' = 'foul','m' = 'musty',
                                       'n' = 'none','p' = 'pungent','s' = 'spicy'))

## Transforming data for "ring-type" variable. While the subset of data does 
## not have all values- the statement below addresses all the possible values 
## for "ring-type" column

msrmDS$`ring-type`<-revalue(msrmDS$`ring-type`,c('c'='cobwebby','e'='evanescent',
                                                 'f'='flaring','l'='large',
                                                 'n'='none','p'='pendant',
                                                  's'='sheathing','z'='zone'))
## The following `from` values were not present in `x`: c, s, z
## Transforming data for "spore-print-color" variable
msrmDS$`spore-print-color`<- revalue(msrmDS$`spore-print-color`,c('k'='black','n'='brown',
                                                                  'b'='buff','h'='chocolate',
                                                                  'r'='green','o'='orange',
                                                                  'u'='purple','w'='white',
                                                                  'y'='yellow'))

## Transforming data for "population" variable
msrmDS$population<-revalue(msrmDS$population,c('a' = 'abundant', 'c' = 'clustered',
                                               'n' = 'numerous','s' = 'scattered',
                                               'v' = 'several','y' = 'soletary'))

## Transforming data for "habitat" variable
msrmDS$habitat<-revalue(msrmDS$habitat,c( 'g' = 'grasses', 'l' = 'leaves',
                                          'm' = 'meadows','p' = 'paths',
                                          'u' = 'urban','w' = 'waster','d' = 'woods'))

head(msrmDS)
##        classes cap-shape    odor  ring-type spore-print-color population
## 7450    edible   knobbed    none    pendant             white   soletary
## 29      edible      flat    none    pendant             black   soletary
## 1778 poisonous      flat pungent    pendant             brown  scattered
## 99      edible      bell  almond    pendant             brown  scattered
## 1357    edible    convex    none evanescent             black  scattered
## 1415    edible      flat    none evanescent             brown   abundant
##      habitat
## 7450   paths
## 29     urban
## 1778   urban
## 99   grasses
## 1357 grasses
## 1415 grasses