I downloaded the mushrooms dataset from https://archive.ics.uci.edu/ml/datasets/Mushroom. Then I uploaded the file to my github repository. I used the curl package, to retrieve the dataset from my repository.

library(curl)
y <- read.csv(curl("https://raw.githubusercontent.com/nabilahossain/Image/master/assignment%203.txt"), header = FALSE)
head(y[1:10], 10)
##    V1 V2 V3 V4 V5 V6 V7 V8 V9 V10
## 1   p  x  s  n  t  p  f  c  n   k
## 2   e  x  s  y  t  a  f  c  b   k
## 3   e  b  s  w  t  l  f  c  b   n
## 4   p  x  y  w  t  p  f  c  n   n
## 5   e  x  s  g  f  n  f  w  b   k
## 6   e  x  y  y  t  a  f  c  b   n
## 7   e  b  s  w  t  a  f  c  b   g
## 8   e  b  y  w  t  l  f  c  b   n
## 9   p  x  y  w  t  p  f  c  n   p
## 10  e  b  s  y  t  a  f  c  b   g

I choose to look at the columns that hold information for “classes”, “cap shape”, “cap surface”, “odor”, “population” and “habitat” from the mushrooms dataset. I choose appropriate columns and assign their names.

x <- data.frame(y[c(1, 2, 3, 6, 22, 23)])
names(x) <- c("Classes", "Cap_Shape", "Cap_Surface", "Odor", "Population", "Habitat")
head(x, 3)
##   Classes Cap_Shape Cap_Surface Odor Population Habitat
## 1       p         x           s    p          s       u
## 2       e         x           s    a          n       g
## 3       e         b           s    l          n       m

I check if I got the right number of rows and columns.

ncol(x)
## [1] 6
nrow(x)
## [1] 8124

I will replace the abbreviations for “classes”, and check if they are correct.

x$Classes <- ifelse(x$Classes=="p", "Poisonous", 
              ifelse(x$Classes=="e", "Edible", "N/A")
)
head(x, 5)
##     Classes Cap_Shape Cap_Surface Odor Population Habitat
## 1 Poisonous         x           s    p          s       u
## 2    Edible         x           s    a          n       g
## 3    Edible         b           s    l          n       m
## 4 Poisonous         x           y    p          s       u
## 5    Edible         x           s    n          a       g

Next I will replace the abbreviations for columns: “classes”, “cap_shape”, “cap_surface”, “odor”, “population” and “habitat”.

For Cap_Shape:

x$Cap_Shape <- ifelse(x$Cap_Shape=="b", "Bell", 
              ifelse(x$Cap_Shape=="c", "Conical",
              ifelse(x$Cap_Shape=="x", "Convex",
              ifelse(x$Cap_Shape=="f", "Flat",
              ifelse(x$Cap_Shape=="k", "Knobbed",
              ifelse(x$Cap_Shape=="s", "Sunken", "N/A")
)))))
head(x, 3)
##     Classes Cap_Shape Cap_Surface Odor Population Habitat
## 1 Poisonous    Convex           s    p          s       u
## 2    Edible    Convex           s    a          n       g
## 3    Edible      Bell           s    l          n       m

For Cap_Surface and Odor:

x$Cap_Surface <- ifelse(x$Cap_Surface=="f", "Fibrous",
              ifelse(x$Cap_Surface=="g", "Grooves",
              ifelse(x$Cap_Surface=="y", "Scaly",
              ifelse(x$Cap_Surface=="s", "Smooth", "N/A")
)))
x$Odor <- ifelse(x$Odor=="a", "Almond",  
              ifelse(x$Odor=="l", "Anise",
              ifelse(x$Odor=="c", "Creosote",
              ifelse(x$Odor=="y", "Fishy",
              ifelse(x$Odor=="f", "Foul", 
              ifelse(x$Odor=="m", "Musty",
              ifelse(x$Odor=="n", "None",
              ifelse(x$Odor=="p", "Pungent",
              ifelse(x$Odor=="s", "Spicy", "N/A")
))))))))
tail(x, 3)
##        Classes Cap_Shape Cap_Surface  Odor Population Habitat
## 8122    Edible      Flat      Smooth  None          c       l
## 8123 Poisonous   Knobbed       Scaly Fishy          v       l
## 8124    Edible    Convex      Smooth  None          c       l

For Population and Habitat:

x$Population <- ifelse(x$Population=="a", "Abundant",
              ifelse(x$Population=="c", "Clustered",
              ifelse(x$Population=="n", "Numerous",
              ifelse(x$Population=="s", "Scattered",
              ifelse(x$Population=="v", "Several",
              ifelse(x$Population=="y", "Solitary", "N/A")
)))))

x$Habitat <- ifelse(x$Habitat=="g", "grasses",  
              ifelse(x$Habitat=="l", "Leaves",
              ifelse(x$Habitat=="m", "Meadows",
              ifelse(x$Habitat=="p", "Paths",       
              ifelse(x$Habitat=="u", "Urban",
              ifelse(x$Habitat=="w", "Waste",
              ifelse(x$Habitat=="d", "Woods", "N/A")
))))))

head(x, 3) 
##     Classes Cap_Shape Cap_Surface    Odor Population Habitat
## 1 Poisonous    Convex      Smooth Pungent  Scattered   Urban
## 2    Edible    Convex      Smooth  Almond   Numerous grasses
## 3    Edible      Bell      Smooth   Anise   Numerous Meadows

I check if I was able to replace the abbreviations for the subset correctly. I look at the first 30 rows (without unique row number) of the subset of mushrooms in a table format (using the knitr package):

rownames(x) <- NULL
library(knitr)
kable(head(x, 30))
Classes Cap_Shape Cap_Surface Odor Population Habitat
Poisonous Convex Smooth Pungent Scattered Urban
Edible Convex Smooth Almond Numerous grasses
Edible Bell Smooth Anise Numerous Meadows
Poisonous Convex Scaly Pungent Scattered Urban
Edible Convex Smooth None Abundant grasses
Edible Convex Scaly Almond Numerous grasses
Edible Bell Smooth Almond Numerous Meadows
Edible Bell Scaly Anise Scattered Meadows
Poisonous Convex Scaly Pungent Several grasses
Edible Bell Smooth Almond Scattered Meadows
Edible Convex Scaly Anise Numerous grasses
Edible Convex Scaly Almond Scattered Meadows
Edible Bell Smooth Almond Scattered grasses
Poisonous Convex Scaly Pungent Several Urban
Edible Convex Fibrous None Abundant grasses
Edible Sunken Fibrous None Solitary Urban
Edible Flat Fibrous None Abundant grasses
Poisonous Convex Smooth Pungent Scattered grasses
Poisonous Convex Scaly Pungent Scattered Urban
Poisonous Convex Smooth Pungent Scattered Urban
Edible Bell Smooth Almond Scattered Meadows
Poisonous Convex Scaly Pungent Several grasses
Edible Bell Scaly Anise Scattered Meadows
Edible Bell Scaly Almond Numerous Meadows
Edible Bell Smooth Anise Scattered Meadows
Poisonous Flat Smooth Pungent Several grasses
Edible Convex Scaly Almond Numerous Meadows
Edible Convex Scaly Anise Numerous Meadows
Edible Flat Fibrous None Solitary Urban
Edible Convex Smooth Almond Several Woods

If we wanted to look at only the first 15 edible mushrooms (without unique row number), in a table format, it would look like this:

SetE <- subset(x, Classes=="Edible")
Classes Cap_Shape Cap_Surface Odor Population Habitat
Edible Convex Smooth Almond Numerous grasses
Edible Bell Smooth Anise Numerous Meadows
Edible Convex Smooth None Abundant grasses
Edible Convex Scaly Almond Numerous grasses
Edible Bell Smooth Almond Numerous Meadows
Edible Bell Scaly Anise Scattered Meadows
Edible Bell Smooth Almond Scattered Meadows
Edible Convex Scaly Anise Numerous grasses
Edible Convex Scaly Almond Scattered Meadows
Edible Bell Smooth Almond Scattered grasses
Edible Convex Fibrous None Abundant grasses
Edible Sunken Fibrous None Solitary Urban
Edible Flat Fibrous None Abundant grasses
Edible Bell Smooth Almond Scattered Meadows
Edible Bell Scaly Anise Scattered Meadows