Importing the data

library(readr)
mushrooms <- read_csv("https://archive.ics.uci.edu/ml/machine-learning-databases/mushroom/agaricus-lepiota.data", 
    col_names = FALSE)

head(mushrooms)
## # A tibble: 6 x 23
##      X1    X2    X3    X4    X5    X6    X7    X8    X9   X10   X11   X12
##   <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr>
## 1     p     x     s     n     t     p     f     c     n     k     e     e
## 2     e     x     s     y     t     a     f     c     b     k     e     c
## 3     e     b     s     w     t     l     f     c     b     n     e     c
## 4     p     x     y     w     t     p     f     c     n     n     e     e
## 5     e     x     s     g     f     n     f     w     b     k     t     e
## 6     e     x     y     y     t     a     f     c     b     n     e     c
## # ... with 11 more variables: X13 <chr>, X14 <chr>, X15 <chr>, X16 <chr>,
## #   X17 <chr>, X18 <chr>, X19 <chr>, X20 <chr>, X21 <chr>, X22 <chr>,
## #   X23 <chr>

Creating a subset of the “mushrooms” data frame

After studying the attribute information, I decided to pick the columns “class”, “cap-shape”, “cap-color”, “odor”, “gill-size”, “population”, and “habitat”.

Narrowing down the columns

mushrooms <- mushrooms[,c(1,2,4,6,9,22,23)]
head(mushrooms)
## # A tibble: 6 x 7
##      X1    X2    X4    X6    X9   X22   X23
##   <chr> <chr> <chr> <chr> <chr> <chr> <chr>
## 1     p     x     n     p     n     s     u
## 2     e     x     y     a     b     n     g
## 3     e     b     w     l     b     n     m
## 4     p     x     w     p     n     s     u
## 5     e     x     g     n     b     a     g
## 6     e     x     y     a     b     n     g

Changing the column names

colnames(mushrooms) <- c("class", "cap_shape", "cap_color", "odor", "gill_size", "population", "habitat")
head(mushrooms)
## # A tibble: 6 x 7
##   class cap_shape cap_color  odor gill_size population habitat
##   <chr>     <chr>     <chr> <chr>     <chr>      <chr>   <chr>
## 1     p         x         n     p         n          s       u
## 2     e         x         y     a         b          n       g
## 3     e         b         w     l         b          n       m
## 4     p         x         w     p         n          s       u
## 5     e         x         g     n         b          a       g
## 6     e         x         y     a         b          n       g

Changing the attributes names

I did this by using counters ‘i’(used for rows) and ‘j’(used for columns) and using nested ‘while’ loops to run through each column, row by row. Inside the while loops are ‘if’ statements that use ‘switch’ statements to replace the attribute letter with the corresponding name

I tried to find better ways to do this by using statements like mushrooms$columnname, sub(), and gsub() but couldn’t get it working right

i <- 1
j <- 1

while(j < dim(mushrooms)[2]+1) #While j is less than number of columns+1
{
  i <- 1
  while(i < dim(mushrooms)[1]+1) #while i is less than numbers of rows+1
  {
    if(j == 1) #Changing attribute names for the first column (if j = 2 then second column, etc...)
    {
      #I had to use the 'as.character' because the type in the data frame was shown as 'list' so 
      #  it wouldn't let me replace the character
      mushrooms[i,j] <- switch(as.character(mushrooms[i,j]), "e"="edible", "p"="poisonous")
    }
    
    if(j == 2)
    {
      mushrooms[i,j] <- switch(as.character(mushrooms[i,j]), "b"="bell", "c"="conical", "x"="convex", "f"="flat", "k"="knobbed", "s"="sunken")
    }
    
    if(j == 3)
    {
      mushrooms[i,j] <- switch(as.character(mushrooms[i,j]), "n"="brown", "b"="buff","c"="cinnamon","g"="gray","r"="green","p"="pink","u"="purple","e"="red","w"="white","y"="yellow")
    }
    
    if(j == 4)
    {
      mushrooms[i,j] <- switch(as.character(mushrooms[i,j]), "a"="almond","l"="anise","c"="creasote","y"="fishy","f"="foul","m"="musty","n"="none","p"="pungent","s"="spicy")
    }
    
    if(j == 5)
    {
      mushrooms[i,j] <- switch(as.character(mushrooms[i,j]), "b"="broad","n"="narrow")
    }
    
    if(j == 6)
    {
      mushrooms[i,j] <- switch(as.character(mushrooms[i,j]), "a"="abundant","c"="clustered","n"="numerous","s"="scattered","v"="several","y"="solitary")
    }
    
    if(j == 7)
    {
      mushrooms[i,j] <- switch(as.character(mushrooms[i,j]), "g"="grasses","l"="leaves","m"="meadows","p"="paths","u"="urban","w"="waste","d"="woods")
    }
    i <- i + 1
  }
  j <- j + 1
}

head(mushrooms)
## # A tibble: 6 x 7
##       class cap_shape cap_color    odor gill_size population habitat
##       <chr>     <chr>     <chr>   <chr>     <chr>      <chr>   <chr>
## 1 poisonous    convex     brown pungent    narrow  scattered   urban
## 2    edible    convex    yellow  almond     broad   numerous grasses
## 3    edible      bell     white   anise     broad   numerous meadows
## 4 poisonous    convex     white pungent    narrow  scattered   urban
## 5    edible    convex      gray    none     broad   abundant grasses
## 6    edible    convex    yellow  almond     broad   numerous grasses