Read mushroom data set

df <- read.csv("https://archive.ics.uci.edu/ml/machine-learning-databases/mushroom/agaricus-lepiota.data", header = FALSE)
str(df)
## 'data.frame':    8124 obs. of  23 variables:
##  $ V1 : Factor w/ 2 levels "e","p": 2 1 1 2 1 1 1 1 2 1 ...
##  $ V2 : Factor w/ 6 levels "b","c","f","k",..: 6 6 1 6 6 6 1 1 6 1 ...
##  $ V3 : Factor w/ 4 levels "f","g","s","y": 3 3 3 4 3 4 3 4 4 3 ...
##  $ V4 : Factor w/ 10 levels "b","c","e","g",..: 5 10 9 9 4 10 9 9 9 10 ...
##  $ V5 : Factor w/ 2 levels "f","t": 2 2 2 2 1 2 2 2 2 2 ...
##  $ V6 : Factor w/ 9 levels "a","c","f","l",..: 7 1 4 7 6 1 1 4 7 1 ...
##  $ V7 : Factor w/ 2 levels "a","f": 2 2 2 2 2 2 2 2 2 2 ...
##  $ V8 : Factor w/ 2 levels "c","w": 1 1 1 1 2 1 1 1 1 1 ...
##  $ V9 : Factor w/ 2 levels "b","n": 2 1 1 2 1 1 1 1 2 1 ...
##  $ V10: Factor w/ 12 levels "b","e","g","h",..: 5 5 6 6 5 6 3 6 8 3 ...
##  $ V11: Factor w/ 2 levels "e","t": 1 1 1 1 2 1 1 1 1 1 ...
##  $ V12: Factor w/ 5 levels "?","b","c","e",..: 4 3 3 4 4 3 3 3 4 3 ...
##  $ V13: Factor w/ 4 levels "f","k","s","y": 3 3 3 3 3 3 3 3 3 3 ...
##  $ V14: Factor w/ 4 levels "f","k","s","y": 3 3 3 3 3 3 3 3 3 3 ...
##  $ V15: Factor w/ 9 levels "b","c","e","g",..: 8 8 8 8 8 8 8 8 8 8 ...
##  $ V16: Factor w/ 9 levels "b","c","e","g",..: 8 8 8 8 8 8 8 8 8 8 ...
##  $ V17: Factor w/ 1 level "p": 1 1 1 1 1 1 1 1 1 1 ...
##  $ V18: Factor w/ 4 levels "n","o","w","y": 3 3 3 3 3 3 3 3 3 3 ...
##  $ V19: Factor w/ 3 levels "n","o","t": 2 2 2 2 2 2 2 2 2 2 ...
##  $ V20: Factor w/ 5 levels "e","f","l","n",..: 5 5 5 5 1 5 5 5 5 5 ...
##  $ V21: Factor w/ 9 levels "b","h","k","n",..: 3 4 4 3 4 3 3 4 3 3 ...
##  $ V22: Factor w/ 6 levels "a","c","n","s",..: 4 3 3 4 1 3 3 4 5 4 ...
##  $ V23: Factor w/ 7 levels "d","g","l","m",..: 6 2 4 6 2 2 4 4 2 4 ...
dim(df)
## [1] 8124   23

Rename column names

colnames(df) <- c("class", "cap-shape", "cap-surface", "cap-color", "bruises?", "odor", 
  "gill-attachment", "gill-spacing", "gill-size", "gill-color", "stalk-shape",
  "stalk-root", "stalk-surface-above-ring", "stalk-surface-below-ring", 
  "stalk-color-above-ring", "stalk-color-below-ring", "veil-type", "veil-color",
  "ring-number", "ring-type", "spore-print-color", "population", "habitat")
head(df)
##   class cap-shape cap-surface cap-color bruises? odor gill-attachment
## 1     p         x           s         n        t    p               f
## 2     e         x           s         y        t    a               f
## 3     e         b           s         w        t    l               f
## 4     p         x           y         w        t    p               f
## 5     e         x           s         g        f    n               f
## 6     e         x           y         y        t    a               f
##   gill-spacing gill-size gill-color stalk-shape stalk-root
## 1            c         n          k           e          e
## 2            c         b          k           e          c
## 3            c         b          n           e          c
## 4            c         n          n           e          e
## 5            w         b          k           t          e
## 6            c         b          n           e          c
##   stalk-surface-above-ring stalk-surface-below-ring stalk-color-above-ring
## 1                        s                        s                      w
## 2                        s                        s                      w
## 3                        s                        s                      w
## 4                        s                        s                      w
## 5                        s                        s                      w
## 6                        s                        s                      w
##   stalk-color-below-ring veil-type veil-color ring-number ring-type
## 1                      w         p          w           o         p
## 2                      w         p          w           o         p
## 3                      w         p          w           o         p
## 4                      w         p          w           o         p
## 5                      w         p          w           o         e
## 6                      w         p          w           o         p
##   spore-print-color population habitat
## 1                 k          s       u
## 2                 n          n       g
## 3                 n          n       m
## 4                 k          s       u
## 5                 n          a       g
## 6                 k          n       g

Create new dataframe using subset of columns

mushrooms <- subset(df, select=c("class", "odor", "gill-size", "population", "habitat"))

Check if new data frame has same number of observation as original

dim(df)
## [1] 8124   23
dim(mushrooms)
## [1] 8124    5

Replace abbreviations with actual values

levels(mushrooms$class)[levels(mushrooms$class) == "e"] <- "edible"
levels(mushrooms$class)[levels(mushrooms$class) == "p"] <- "poisonous"

levels(mushrooms$odor)[levels(mushrooms$odor) == "a"] <- "almond"
levels(mushrooms$odor)[levels(mushrooms$odor) == "l"] <- "anise"
levels(mushrooms$odor)[levels(mushrooms$odor) == "c"] <- "creosote"
levels(mushrooms$odor)[levels(mushrooms$odor) == "y"] <- "fishy"
levels(mushrooms$odor)[levels(mushrooms$odor) == "f"] <- "foul"
levels(mushrooms$odor)[levels(mushrooms$odor) == "m"] <- "musty"
levels(mushrooms$odor)[levels(mushrooms$odor) == "n"] <- "none"
levels(mushrooms$odor)[levels(mushrooms$odor) == "p"] <- "pungent"
levels(mushrooms$odor)[levels(mushrooms$odor) == "s"] <- "spicy"

levels(mushrooms$'gill-size')[levels(mushrooms$'gill-size') == "b"] <- "broad"
levels(mushrooms$'gill-size')[levels(mushrooms$'gill-size') == "n"] <- "narrow"

levels(mushrooms$population)[levels(mushrooms$population) == "a"] <- "abundant"
levels(mushrooms$population)[levels(mushrooms$population) == "c"] <- "clustered"
levels(mushrooms$population)[levels(mushrooms$population) == "n"] <- "numerous"
levels(mushrooms$population)[levels(mushrooms$population) == "s"] <- "scattered"
levels(mushrooms$population)[levels(mushrooms$population) == "v"] <- "several"
levels(mushrooms$population)[levels(mushrooms$population) == "y"] <- "solitary"

levels(mushrooms$habitat)[levels(mushrooms$habitat) == "g"] <- "grasses"
levels(mushrooms$habitat)[levels(mushrooms$habitat) == "l"] <- "leaves"
levels(mushrooms$habitat)[levels(mushrooms$habitat) == "m"] <- "meadows"
levels(mushrooms$habitat)[levels(mushrooms$habitat) == "p"] <- "paths"
levels(mushrooms$habitat)[levels(mushrooms$habitat) == "u"] <- "urban"
levels(mushrooms$habitat)[levels(mushrooms$habitat) == "w"] <- "waste"
levels(mushrooms$habitat)[levels(mushrooms$habitat) == "d"] <- "woods"

head(mushrooms)
##       class    odor gill-size population habitat
## 1 poisonous pungent    narrow  scattered   urban
## 2    edible  almond     broad   numerous grasses
## 3    edible   anise     broad   numerous meadows
## 4 poisonous pungent    narrow  scattered   urban
## 5    edible    none     broad   abundant grasses
## 6    edible  almond     broad   numerous grasses
tail(mushrooms)
##          class  odor gill-size population habitat
## 8119 poisonous  foul    narrow    several   woods
## 8120    edible  none     broad  clustered  leaves
## 8121    edible  none     broad    several  leaves
## 8122    edible  none     broad  clustered  leaves
## 8123 poisonous fishy    narrow    several  leaves
## 8124    edible  none     broad  clustered  leaves