Description of the Assignment

Mushrooms Dataset. A famous-if slightly moldy-dataset about mushrooms can be found in the UCI repository here: https://archive.ics.uci.edu/ml/datasets/Mushroom.

Your task is to study the dataset and the associated description of the data (i.e. “data dictionary”). You may need to look around a bit, but it’s there! You should take the data, and create a data frame with a subset of the columns (and if you like rows) in the dataset. You should include the column that indicates edible or poisonous and three or four other columns. You should also add meaningful column names and replace the abbreviations used in the data-for example, in the appropriate.

Read mushroom dataset from URL

mushroom_Url <- "https://archive.ics.uci.edu/ml/machine-learning-databases/mushroom/agaricus-lepiota.data"
mushrooms_data <- read.table(file = mushroom_Url, header = FALSE, sep = ",")

Select columns: Class, Cap Shape, Odor, Population, Habitat

cols_indx <- c(1,2,6,22,23)

Create dataframe with 10 rows, 6 columns. Also name the columns.

mushrooms_subset <- mushrooms_data[1:10,cols_indx]
names(mushrooms_subset) <- c("Class","Cap Shape", "Odor", "Population", "Habitat")

# original values for displaying later on
mushrooms_orig <- mushrooms_subset

Transformation functions for each selected columns

Describe.Class <- function(val){
  switch (as.character(val),
          'p' = 'poisonous',
          'e' = 'edible'
  )
}

Describe.CapShape <- function(val){
  switch (as.character(val),
          'b' = 'bell',
          'c' = 'conical',
          'x' = 'convex',
          'f' = 'flat',
          'k' = 'knobbed',
          's' = 'sunken'
  )
}

Describe.Odor <- function(val){
  switch (as.character(val),
          'a' = 'almond',
          'l' = 'anise',
          'c' = 'creosote',
          'y' = 'fishy',
          'f' = 'foul',
          'm' = 'musty',
          'n' = 'none',
          'p' = 'pungent',
          's' = 'spicy'
  )
}
Describe.Population <- function(val){
  switch (as.character(val),
          'a' = 'abundant',
          'c' = 'clustered',
          'n' = 'numerous',
          's' = 'scattered',
          'v' = 'several',
          'y' = 'solitary'
  )
}

Describe.Habitat <- function(val){
  switch (as.character(val),
          'g' = 'grasses',
          'l' = 'leaves',
          'm' = 'meadows',
          'p' = 'paths',
          'u' = 'urban',
          'w' = 'waste',
          'd' = 'woods'
  )
}

Apply transformation functions on data frame elements

mushrooms_subset$Class <- sapply(mushrooms_subset$Class, Describe.Class)
mushrooms_subset$`Cap Shape` <- sapply(mushrooms_subset$`Cap Shape`, Describe.CapShape)
mushrooms_subset$Odor <- sapply(mushrooms_subset$Odor, Describe.Odor)
mushrooms_subset$Population <- sapply(mushrooms_subset$Population, Describe.Population)
mushrooms_subset$Habitat <- sapply(mushrooms_subset$Habitat, Describe.Habitat)

Display original and transformed data

head(mushrooms_orig)
##   Class Cap Shape Odor Population Habitat
## 1     p         x    p          s       u
## 2     e         x    a          n       g
## 3     e         b    l          n       m
## 4     p         x    p          s       u
## 5     e         x    n          a       g
## 6     e         x    a          n       g
head(mushrooms_subset)
##       Class Cap Shape    Odor Population Habitat
## 1 poisonous    convex pungent  scattered   urban
## 2    edible    convex  almond   numerous grasses
## 3    edible      bell   anise   numerous meadows
## 4 poisonous    convex pungent  scattered   urban
## 5    edible    convex    none   abundant grasses
## 6    edible    convex  almond   numerous grasses

Export to files

# Export 'raw' data to a working directory in CSV format
write.csv(mushrooms_orig,file="Mushroom Subset Original.csv")

# Export transformed data to a working directory in CSV format
write.csv(mushrooms_subset,file="Mushroom Subset Transformed.csv")

References

Used the transform methods found in https://rpubs.com/arun_infy13/93236, with the following minor modifications:

  • Populate mushroom data frame from original link (mushroom dataset)
  • Use as.character() in switch logic
  • Export ‘raw’ and transformed data to files