library(readr)
mushrooms <- read_csv("https://archive.ics.uci.edu/ml/machine-learning-databases/mushroom/agaricus-lepiota.data",
col_names = FALSE)
head(mushrooms)
## # A tibble: 6 x 23
## X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12
## <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr>
## 1 p x s n t p f c n k e e
## 2 e x s y t a f c b k e c
## 3 e b s w t l f c b n e c
## 4 p x y w t p f c n n e e
## 5 e x s g f n f w b k t e
## 6 e x y y t a f c b n e c
## # ... with 11 more variables: X13 <chr>, X14 <chr>, X15 <chr>, X16 <chr>,
## # X17 <chr>, X18 <chr>, X19 <chr>, X20 <chr>, X21 <chr>, X22 <chr>,
## # X23 <chr>
After studying the attribute information, I decided to pick the columns “class”, “cap-shape”, “cap-color”, “odor”, “gill-size”, “population”, and “habitat”.
mushrooms <- mushrooms[,c(1,2,4,6,9,22,23)]
head(mushrooms)
## # A tibble: 6 x 7
## X1 X2 X4 X6 X9 X22 X23
## <chr> <chr> <chr> <chr> <chr> <chr> <chr>
## 1 p x n p n s u
## 2 e x y a b n g
## 3 e b w l b n m
## 4 p x w p n s u
## 5 e x g n b a g
## 6 e x y a b n g
colnames(mushrooms) <- c("class", "cap_shape", "cap_color", "odor", "gill_size", "population", "habitat")
head(mushrooms)
## # A tibble: 6 x 7
## class cap_shape cap_color odor gill_size population habitat
## <chr> <chr> <chr> <chr> <chr> <chr> <chr>
## 1 p x n p n s u
## 2 e x y a b n g
## 3 e b w l b n m
## 4 p x w p n s u
## 5 e x g n b a g
## 6 e x y a b n g
I did this by using counters ‘i’(used for rows) and ‘j’(used for columns) and using nested ‘while’ loops to run through each column, row by row. Inside the while loops are ‘if’ statements that use ‘switch’ statements to replace the attribute letter with the corresponding name
I tried to find better ways to do this by using statements like mushrooms$columnname, sub(), and gsub() but couldn’t get it working right
i <- 1
j <- 1
while(j < dim(mushrooms)[2]+1) #While j is less than number of columns+1
{
i <- 1
while(i < dim(mushrooms)[1]+1) #while i is less than numbers of rows+1
{
if(j == 1) #Changing attribute names for the first column (if j = 2 then second column, etc...)
{
#I had to use the 'as.character' because the type in the data frame was shown as 'list' so
# it wouldn't let me replace the character
mushrooms[i,j] <- switch(as.character(mushrooms[i,j]), "e"="edible", "p"="poisonous")
}
if(j == 2)
{
mushrooms[i,j] <- switch(as.character(mushrooms[i,j]), "b"="bell", "c"="conical", "x"="convex", "f"="flat", "k"="knobbed", "s"="sunken")
}
if(j == 3)
{
mushrooms[i,j] <- switch(as.character(mushrooms[i,j]), "n"="brown", "b"="buff","c"="cinnamon","g"="gray","r"="green","p"="pink","u"="purple","e"="red","w"="white","y"="yellow")
}
if(j == 4)
{
mushrooms[i,j] <- switch(as.character(mushrooms[i,j]), "a"="almond","l"="anise","c"="creasote","y"="fishy","f"="foul","m"="musty","n"="none","p"="pungent","s"="spicy")
}
if(j == 5)
{
mushrooms[i,j] <- switch(as.character(mushrooms[i,j]), "b"="broad","n"="narrow")
}
if(j == 6)
{
mushrooms[i,j] <- switch(as.character(mushrooms[i,j]), "a"="abundant","c"="clustered","n"="numerous","s"="scattered","v"="several","y"="solitary")
}
if(j == 7)
{
mushrooms[i,j] <- switch(as.character(mushrooms[i,j]), "g"="grasses","l"="leaves","m"="meadows","p"="paths","u"="urban","w"="waste","d"="woods")
}
i <- i + 1
}
j <- j + 1
}
head(mushrooms)
## # A tibble: 6 x 7
## class cap_shape cap_color odor gill_size population habitat
## <chr> <chr> <chr> <chr> <chr> <chr> <chr>
## 1 poisonous convex brown pungent narrow scattered urban
## 2 edible convex yellow almond broad numerous grasses
## 3 edible bell white anise broad numerous meadows
## 4 poisonous convex white pungent narrow scattered urban
## 5 edible convex gray none broad abundant grasses
## 6 edible convex yellow almond broad numerous grasses