Obtain Munshrooms data from web

Mushrooms data is available from the following link: https://archive.ics.uci.edu/ml/machine-learning-databases/mushroom/agaricus-lepiota.data

Reference: Mushrooms Data Set

  1. Obtain the data from the link:
theURL <- "https://archive.ics.uci.edu/ml/machine-learning-databases/mushroom/agaricus-lepiota.data;
mushroomsDB <- read.table(theURL, header = FALSE,  sep = ",", stringsAsFactors = FALSE);
write.table(mushroomsDB, file = "Mushroom.csv", sep = ",");
  1. Upload the data to Github.
  2. Use the “raw” url of the CSV file from Github, else the data retrieved will be wrong.

  3. Obtain data from Github url.show

theURL <- "https://raw.githubusercontent.com/arunk13/MSDA-Assignments/master/BridgeCourse/Week3/Mushroom.csv";
mushroomsDB <- read.table(theURL, header = TRUE,  sep = ",", stringsAsFactors = FALSE);

Overview of the mushroom dataset

Number of rows in the dataset : 8124
Number of columns in the dataset : 23

Subsetting the dataset

subsetting to a smaller data frame of 100 rows and 7 columns. The columns chosen for this analysis are: “class”, “cap-shape”, “cap-surface”, “cap-color”, “odor”, “population”, “habitat”

mushroomsDB <- mushroomsDB[1:10, c(1, 2, 3, 4, 6, 22, 23)]
names(mushroomsDB) <- c("class", "cap-shape", "cap-surface", "cap-color", "odor", "population", "habitat");

A snapshot of the subset

head(mushroomsDB)
##   class cap-shape cap-surface cap-color odor population habitat
## 1     p         x           s         n    p          s       u
## 2     e         x           s         y    a          n       g
## 3     e         b           s         w    l          n       m
## 4     p         x           y         w    p          s       u
## 5     e         x           s         g    n          a       g
## 6     e         x           y         y    a          n       g

Data transformation

To transfor each key in column data to their respective values, the below functions are used.

#Below function transforms the class data
transformClassData <- function(key){
  switch (key,
    'p' = 'poisonous',
    'e' = 'edible'
  )
}

#Below function transforms the cap-shape column
transformCapShapeData <- function(key){
  switch (key,
          'b' = 'bell',
          'c' = 'conical',
          'x' = 'convex',
          'f' = 'flat',
          'k' = 'knobbed',
          's' = 'sunken'
  )
}

#Below function transforms the cap-surface column
transformCapSurfaceData <- function(key){
  switch (key,
          'f' = 'fibrous',
          'g' = 'grooves',
          'y' = 'scaly',
          's' = 'smooth'
  )
}

#Below function transforms the cap-color column
transformCapColorData <- function(key){
  switch (key,
          'n' = 'brown',
          'b' = 'buff',
          'c' = 'cinnamon',
          'g' = 'gray',
          'r' = 'green',
          'p' = 'pink',
          'u' = 'purple',
          'e' = 'red',
          'w' = 'white',
          'y' = 'yellow'
  )
}

#Below function transforms the odor column
transformOdorData <- function(key){
  switch (key,
          'a' = 'almond',
          'l' = 'anise',
          'c' = 'creosote',
          'y' = 'fishy',
          'f' = 'foul',
          'm' = 'musty',
          'n' = 'none',
          'p' = 'pungent',
          's' = 'spicy'
  )
}

#Below function transforms the population column
transformPopulationData <- function(key){
  switch (key,
          'a' = 'abundant',
          'c' = 'clustered',
          'n' = 'numerous',
          's' = 'scattered',
          'v' = 'several',
          'y' = 'soletary'
  )
}

#Below function transforms the habitat column
transformHabitatData <- function(key){
  switch (key,
          'g' = 'grasses',
          'l' = 'leaves',
          'm' = 'meadows',
          'p' = 'paths',
          'u' = 'urban',
          'w' = 'waster',
          'd' = 'woods'
  )
}

Applying data transformation on the mushroom dataset

mushroomsDB$class <- sapply(mushroomsDB$class, transformClassData);
mushroomsDB$`cap-shape` <- sapply(mushroomsDB$`cap-shape`, transformCapShapeData);
mushroomsDB$`cap-color` <- sapply(mushroomsDB$`cap-color`, transformCapColorData);
mushroomsDB$`cap-surface` <- sapply(mushroomsDB$`cap-surface`, transformCapSurfaceData);
mushroomsDB$odor <- sapply(mushroomsDB$odor, transformOdorData);
mushroomsDB$population <- sapply(mushroomsDB$population, transformPopulationData);
mushroomsDB$habitat <- sapply(mushroomsDB$habitat, transformHabitatData);

A snapshot of transformed data:

mushroomsDB
##        class cap-shape cap-surface cap-color    odor population habitat
## 1  poisonous    convex      smooth     brown pungent  scattered   urban
## 2     edible    convex      smooth    yellow  almond   numerous grasses
## 3     edible      bell      smooth     white   anise   numerous meadows
## 4  poisonous    convex       scaly     white pungent  scattered   urban
## 5     edible    convex      smooth      gray    none   abundant grasses
## 6     edible    convex       scaly    yellow  almond   numerous grasses
## 7     edible      bell      smooth     white  almond   numerous meadows
## 8     edible      bell       scaly     white   anise  scattered meadows
## 9  poisonous    convex       scaly     white pungent    several grasses
## 10    edible      bell      smooth    yellow  almond  scattered meadows