if (!require("data.table")) install.packages('data.table')
## Loading required package: data.table
if (!require("plyr")) install.packages('plyr')
## Loading required package: plyr
library(data.table)
library(plyr)
# DATA607 Home Work Assignment 1 : Mushroom Data Set Transformation
# Step 1: is to get the data set from web.


mydata=fread('https://archive.ics.uci.edu/ml/machine-learning-databases/mushroom/agaricus-lepiota.data',header=FALSE)
mydata=data.frame(mydata)
head(mydata)
##   V1 V2 V3 V4 V5 V6 V7 V8 V9 V10 V11 V12 V13 V14 V15 V16 V17 V18 V19 V20
## 1  p  x  s  n  t  p  f  c  n   k   e   e   s   s   w   w   p   w   o   p
## 2  e  x  s  y  t  a  f  c  b   k   e   c   s   s   w   w   p   w   o   p
## 3  e  b  s  w  t  l  f  c  b   n   e   c   s   s   w   w   p   w   o   p
## 4  p  x  y  w  t  p  f  c  n   n   e   e   s   s   w   w   p   w   o   p
## 5  e  x  s  g  f  n  f  w  b   k   t   e   s   s   w   w   p   w   o   e
## 6  e  x  y  y  t  a  f  c  b   n   e   c   s   s   w   w   p   w   o   p
##   V21 V22 V23
## 1   k   s   u
## 2   n   n   g
## 3   n   n   m
## 4   k   s   u
## 5   n   a   g
## 6   k   n   g
# Step 2 : New Data has columns 1 to 6
newdata=subset(mydata, select = c(1:6))
head(newdata)
##   V1 V2 V3 V4 V5 V6
## 1  p  x  s  n  t  p
## 2  e  x  s  y  t  a
## 3  e  b  s  w  t  l
## 4  p  x  y  w  t  p
## 5  e  x  s  g  f  n
## 6  e  x  y  y  t  a
# Step 3: Replace column names to meaningful names
# 1.classes, 2.shape, 3.surface, 4.color, 5.bruises, 6.odor 
# Using plyr rename function, going to replace the columns name.

modata=plyr::rename(newdata,c("V1"="Classes","V2"="Shape","V3"="Surface","V4"="Color","V5"="Bruises","V6"="Odor"))
head(modata)
##   Classes Shape Surface Color Bruises Odor
## 1       p     x       s     n       t    p
## 2       e     x       s     y       t    a
## 3       e     b       s     w       t    l
## 4       p     x       y     w       t    p
## 5       e     x       s     g       f    n
## 6       e     x       y     y       t    a
# Step 4: Replace the e= edible and p=poisonous and values across columns
x<-c("e"="Edible","p"="Poisonous")
y<-c("b"="Bell","c"="Conical","x"="Convex","f"="Flat","k"="Knobbed", "s"="Sunken")
a<-c("f"="Fibrous","g"="Grooves","y"="Scaly","s"="Smooth")
b<-c("n"="Brown","b"="Buff","c"="Cinnamon","g"="Gray","r"="Green", "p"="Pink", "u"="Purple", "e"="Red", "w"="White", "y"="Yellow")
c<-c("t"="Bruises","f"="No")
d<-c("a"="Almond","l"="Anise","c"="Creosote","y"="Fishy","f"="Foul", "m"="Musty", "n"="None", "p"="Pungent", "s"="Spicy")

# Step 5: Applying the Revalue function in plyr to rename the values.

modata$Classes=revalue(modata$Classes,x)
modata$Shape=revalue(modata$Shape,y)
modata$Surface=revalue(modata$Surface,a)
modata$Color=revalue(modata$Color,b)
modata$Bruises=revalue(modata$Bruises,c)
modata$Odor=revalue(modata$Odor,d)

head(modata)
##     Classes  Shape Surface  Color Bruises    Odor
## 1 Poisonous Convex  Smooth  Brown Bruises Pungent
## 2    Edible Convex  Smooth Yellow Bruises  Almond
## 3    Edible   Bell  Smooth  White Bruises   Anise
## 4 Poisonous Convex   Scaly  White Bruises Pungent
## 5    Edible Convex  Smooth   Gray      No    None
## 6    Edible Convex   Scaly Yellow Bruises  Almond
#The whole data set can be printed by un commenting the below line.
#modata