Importing the data

url<-"https://archive.ics.uci.edu/ml/machine-learning-databases/mushroom/agaricus-lepiota.data"
Mushroom_data <- read.csv(file = url, header = FALSE, sep=",")
head(Mushroom_data)
##   V1 V2 V3 V4 V5 V6 V7 V8 V9 V10 V11 V12 V13 V14 V15 V16 V17 V18 V19 V20
## 1  p  x  s  n  t  p  f  c  n   k   e   e   s   s   w   w   p   w   o   p
## 2  e  x  s  y  t  a  f  c  b   k   e   c   s   s   w   w   p   w   o   p
## 3  e  b  s  w  t  l  f  c  b   n   e   c   s   s   w   w   p   w   o   p
## 4  p  x  y  w  t  p  f  c  n   n   e   e   s   s   w   w   p   w   o   p
## 5  e  x  s  g  f  n  f  w  b   k   t   e   s   s   w   w   p   w   o   e
## 6  e  x  y  y  t  a  f  c  b   n   e   c   s   s   w   w   p   w   o   p
##   V21 V22 V23
## 1   k   s   u
## 2   n   n   g
## 3   n   n   m
## 4   k   s   u
## 5   n   a   g
## 6   k   n   g

Creating new data frame from only certain columns

mushroom <-Mushroom_data[,c(1, 4, 10, 22, 23)]
head(mushroom)
##   V1 V4 V10 V22 V23
## 1  p  n   k   s   u
## 2  e  y   k   n   g
## 3  e  w   n   n   m
## 4  p  w   n   s   u
## 5  e  g   k   a   g
## 6  e  y   n   n   g

Adding Headers

rawColumns=c("class", "capcolor", "gillcolor", "population", "Habitat")
colnames(mushroom) <- rawColumns
head(mushroom)
##   class capcolor gillcolor population Habitat
## 1     p        n         k          s       u
## 2     e        y         k          n       g
## 3     e        w         n          n       m
## 4     p        w         n          s       u
## 5     e        g         k          a       g
## 6     e        y         n          n       g

Replacing the abbreviations used in the data

df <- data.frame(mushroom$class, mushroom$capcolor, mushroom$gillcolor, mushroom$population, mushroom$Habitat)


levels(df$mushroom.class)[levels(df$mushroom.class)== "p"]<- "poisonous"
levels(df$mushroom.class)[levels(df$mushroom.class)== "e"]<- "edible"

levels(df$mushroom.capcolor)[levels(df$mushroom.capcolor)== "n"]<- "brown"
levels(df$mushroom.capcolor)[levels(df$mushroom.capcolor)== "b"]<- "buff"
levels(df$mushroom.capcolor)[levels(df$mushroom.capcolor)== "c"]<- "cinnamon"
levels(df$mushroom.capcolor)[levels(df$mushroom.capcolor)== "g"]<- "gray"
levels(df$mushroom.capcolor)[levels(df$mushroom.capcolor)== "r"]<- "green"
levels(df$mushroom.capcolor)[levels(df$mushroom.capcolor)== "p"]<- "pink"
levels(df$mushroom.capcolor)[levels(df$mushroom.capcolor)== "u"]<- "purple"
levels(df$mushroom.capcolor)[levels(df$mushroom.capcolor)== "e"]<- "red"
levels(df$mushroom.capcolor)[levels(df$mushroom.capcolor)== "w"]<- "white"
levels(df$mushroom.capcolor)[levels(df$mushroom.capcolor)== "y"]<- "yellow"

levels(df$mushroom.gillcolor)[levels(df$mushroom.gillcolor)== "k"]<- "black"
levels(df$mushroom.gillcolor)[levels(df$mushroom.gillcolor)== "n"]<- "brown"
levels(df$mushroom.gillcolor)[levels(df$mushroom.gillcolor)== "b"]<- "buff"
levels(df$mushroom.gillcolor)[levels(df$mushroom.gillcolor)== "h"]<- "chocolate"
levels(df$mushroom.gillcolor)[levels(df$mushroom.gillcolor)== "g"]<- "gray"
levels(df$mushroom.gillcolor)[levels(df$mushroom.gillcolor)== "r"]<- "green"
levels(df$mushroom.gillcolor)[levels(df$mushroom.gillcolor)== "o"]<- "orange"
levels(df$mushroom.gillcolor)[levels(df$mushroom.gillcolor)== "p"]<- "pink"
levels(df$mushroom.gillcolor)[levels(df$mushroom.gillcolor)== "u"]<- "purple"
levels(df$mushroom.gillcolor)[levels(df$mushroom.gillcolor)== "e"]<- "red"
levels(df$mushroom.gillcolor)[levels(df$mushroom.gillcolor)== "w"]<- "white"
levels(df$mushroom.gillcolor)[levels(df$mushroom.gillcolor)== "y"]<- "yellow"


levels(df$mushroom.population)[levels(df$mushroom.population)== "a"]<- "abundant"
levels(df$mushroom.population)[levels(df$mushroom.population)== "c"]<- "clustered"
levels(df$mushroom.population)[levels(df$mushroom.population)== "n"]<- "numerous"
levels(df$mushroom.population)[levels(df$mushroom.population)== "s"]<- "scattered"
levels(df$mushroom.population)[levels(df$mushroom.population)== "v"]<- "several"
levels(df$mushroom.population)[levels(df$mushroom.population)== "y"]<- "solitary"

levels(df$mushroom.Habitat)[levels(df$mushroom.Habitat)== "g"]<- "grasses"
levels(df$mushroom.Habitat)[levels(df$mushroom.Habitat)== "l"]<- "leaves"
levels(df$mushroom.Habitat)[levels(df$mushroom.Habitat)== "m"]<- "meadow"
levels(df$mushroom.Habitat)[levels(df$mushroom.Habitat)== "p"]<- "paths"
levels(df$mushroom.Habitat)[levels(df$mushroom.Habitat)== "u"]<- "urban"
levels(df$mushroom.Habitat)[levels(df$mushroom.Habitat)== "w"]<- "waste"
levels(df$mushroom.Habitat)[levels(df$mushroom.Habitat)== "d"]<- "woods"


head(df)
##   mushroom.class mushroom.capcolor mushroom.gillcolor mushroom.population
## 1      poisonous             brown              black           scattered
## 2         edible            yellow              black            numerous
## 3         edible             white              brown            numerous
## 4      poisonous             white              brown           scattered
## 5         edible              gray              black            abundant
## 6         edible            yellow              brown            numerous
##   mushroom.Habitat
## 1            urban
## 2          grasses
## 3           meadow
## 4            urban
## 5          grasses
## 6          grasses

Summary of data

summary(mushroom)
##  class       capcolor      gillcolor    population Habitat 
##  e:4208   n      :2284   b      :1728   a: 384     d:3148  
##  p:3916   g      :1840   p      :1492   c: 340     g:2148  
##           e      :1500   w      :1202   n: 400     l: 832  
##           y      :1072   n      :1048   s:1248     m: 292  
##           w      :1040   g      : 752   v:4040     p:1144  
##           b      : 168   h      : 732   y:1712     u: 368  
##           (Other): 220   (Other):1170              w: 192