DATA 607 - Assignment 1

Your task is to study the dataset and the associated description of the data (i.e. “data dictionary”). You may need to look around a bit, but it’s there! You should take the data, and create a data frame with a subset of the columns in the dataset. You should include the column that indicates edible or poisonous and three or four other columns. You should also add meaningful column names and replace the abbreviations used in the data-for example, in the appropriate column, “e” might become “edible.” Your deliverable is the R code to perform these transformation tasks.

Import the mushroom database

mushroomDB<- read.csv("https://archive.ics.uci.edu/ml/machine-learning-databases/mushroom/agaricus-lepiota.data")
Narrow the database to include only the following characteristics:
Class, Cap Color, Bruised, Odor, Gill Color, Stalk Color, Veil Color, Spore Color, Population, and Habitat
mDB2 <- subset(mushroomDB, select = c(1,4,5,6,10,15,18,21,22,23))

Using the information from (~/agaricus-lepiota.names), rename the columns/variables

colnames(mDB2) <- c("Class","Cap Color","Bruised","Odor","Gill Color","Stalk Color","Veil Color","Spore Color","Population","Habitat")
mDB2[,1] <- ifelse(mDB2[,1] == "e", "Edible", ifelse(mDB2[,1] == "p", "Poisonous", 99))
mDB2[,2] <- ifelse(mDB2[,2] == "n", "Brown",
              ifelse(mDB2[,2] == "b", "Buff",
              ifelse(mDB2[,2] == "c", "Cinnamon",
              ifelse(mDB2[,2] == "g", "Gray",
              ifelse(mDB2[,2] == "r", "Green",
              ifelse(mDB2[,2] == "p", "Pink",
              ifelse(mDB2[,2] == "u", "Purple",
              ifelse(mDB2[,2] == "e", "Red",
              ifelse(mDB2[,2] == "w", "White",
              ifelse(mDB2[,2] == "y", "Yellow", "N/A")
                  )))))))))
mDB2[,3] <- ifelse(mDB2[,3] == "t", "Yes", ifelse(mDB2[,3] == "f", "No", "N/A"))
mDB2[,4] <- ifelse(mDB2[,4] == "a", "Almond",
              ifelse(mDB2[,4] == "l", "Anise",
              ifelse(mDB2[,4] == "c", "Creosote",
              ifelse(mDB2[,4] == "y", "Fishy",
              ifelse(mDB2[,4] == "f", "Foul",
              ifelse(mDB2[,4] == "m", "Musty",
              ifelse(mDB2[,4] == "n", "None",
              ifelse(mDB2[,4] == "p", "Pungent",
              ifelse(mDB2[,4] == "s", "Spicy", "N/A"
                  )))))))))
mDB2[,5] <- ifelse(mDB2[,5] == "k", "Black",
              ifelse(mDB2[,5] == "n", "Brown",
              ifelse(mDB2[,5] == "b", "Buff",
              ifelse(mDB2[,5] == "h", "Chocolate",
              ifelse(mDB2[,5] == "g", "Gray",
              ifelse(mDB2[,5] == "r", "Green",
              ifelse(mDB2[,5] == "o", "Orange",
              ifelse(mDB2[,5] == "p", "Pink",
              ifelse(mDB2[,5] == "u", "Purple",
              ifelse(mDB2[,5] == "e", "Red",
              ifelse(mDB2[,5] == "w", "White",
              ifelse(mDB2[,5] == "y", "Yellow", "N/A"
                  ))))))))))))
mDB2[,6] <- ifelse(mDB2[,6] == "n", "Brown",
              ifelse(mDB2[,6] == "b", "Buff",
              ifelse(mDB2[,6] == "c", "Cinnamon",
              ifelse(mDB2[,6] == "g", "Gray",
              ifelse(mDB2[,6] == "o", "Orange",
              ifelse(mDB2[,6] == "p", "Pink",
              ifelse(mDB2[,6] == "e", "Red",
              ifelse(mDB2[,6] == "w", "White",
              ifelse(mDB2[,6] == "y", "Yellow", "N/A"
                  )))))))))
mDB2[,7] <- ifelse(mDB2[,7] == "n", "Brown",
              ifelse(mDB2[,7] == "o", "Orange",
              ifelse(mDB2[,7] == "w", "White",
              ifelse(mDB2[,7] == "y", "Yellow", "N/A"
                  ))))
mDB2[,8] <- ifelse(mDB2[,8] == "k", "Black",
              ifelse(mDB2[,8] == "n", "Brown",
              ifelse(mDB2[,8] == "b", "Buff",
              ifelse(mDB2[,8] == "h", "Chocolate",
              ifelse(mDB2[,8] == "r", "Green",
              ifelse(mDB2[,8] == "o", "Orange",
              ifelse(mDB2[,8] == "u", "Purple",
              ifelse(mDB2[,8] == "w", "White",
              ifelse(mDB2[,8] == "y", "Yellow", "N/A"
                  )))))))))
mDB2[,9] <- ifelse(mDB2[,9] == "a", "Abundant",
              ifelse(mDB2[,9] == "c", "Clustered",
              ifelse(mDB2[,9] == "n", "Numerous",
              ifelse(mDB2[,9] == "s", "Scattered",
              ifelse(mDB2[,9] == "v", "Several",
              ifelse(mDB2[,9] == "y", "Solitary", "N/A"
                  ))))))
mDB2[,10] <- ifelse(mDB2[,10] == "g", "Grasses",
              ifelse(mDB2[,10] == "l", "Leaves",
              ifelse(mDB2[,10] == "m", "Meadows",
              ifelse(mDB2[,10] == "p", "Paths",
              ifelse(mDB2[,10] == "u", "Urban",
              ifelse(mDB2[,10] == "w", "Waste",
              ifelse(mDB2[,10] == "d", "Woods", "N/A"
                  )))))))

Use the DataTable package to simplify, tidy, and speed up the data presentation.

I prefer it over rstudio’s df_print

 library(DT)
 datatable(mDB2, extensions = 'Scroller', options = list(
   deferRender = TRUE,
   scrollY = 200,
   scroller = TRUE
          ))