Mushrooms data is available from the following link: https://archive.ics.uci.edu/ml/machine-learning-databases/mushroom/agaricus-lepiota.data
Reference: Mushrooms Data Set
theURL <- "https://archive.ics.uci.edu/ml/machine-learning-databases/mushroom/agaricus-lepiota.data;
mushroomsDB <- read.table(theURL, header = FALSE, sep = ",", stringsAsFactors = FALSE);
write.table(mushroomsDB, file = "Mushroom.csv", sep = ",");
Use the “raw” url of the CSV file from Github, else the data retrieved will be wrong.
Obtain data from Github url.show
theURL <- "https://raw.githubusercontent.com/arunk13/MSDA-Assignments/master/BridgeCourse/Week3/Mushroom.csv";
mushroomsDB <- read.table(theURL, header = TRUE, sep = ",", stringsAsFactors = FALSE);
Number of rows in the dataset : 8124
Number of columns in the dataset : 23
subsetting to a smaller data frame of 100 rows and 7 columns. The columns chosen for this analysis are: “class”, “cap-shape”, “cap-surface”, “cap-color”, “odor”, “population”, “habitat”
mushroomsDB <- mushroomsDB[1:10, c(1, 2, 3, 4, 6, 22, 23)]
names(mushroomsDB) <- c("class", "cap-shape", "cap-surface", "cap-color", "odor", "population", "habitat");
A snapshot of the subset
head(mushroomsDB)
## class cap-shape cap-surface cap-color odor population habitat
## 1 p x s n p s u
## 2 e x s y a n g
## 3 e b s w l n m
## 4 p x y w p s u
## 5 e x s g n a g
## 6 e x y y a n g
To transfor each key in column data to their respective values, the below functions are used.
#Below function transforms the class data
transformClassData <- function(key){
switch (key,
'p' = 'poisonous',
'e' = 'edible'
)
}
#Below function transforms the cap-shape column
transformCapShapeData <- function(key){
switch (key,
'b' = 'bell',
'c' = 'conical',
'x' = 'convex',
'f' = 'flat',
'k' = 'knobbed',
's' = 'sunken'
)
}
#Below function transforms the cap-surface column
transformCapSurfaceData <- function(key){
switch (key,
'f' = 'fibrous',
'g' = 'grooves',
'y' = 'scaly',
's' = 'smooth'
)
}
#Below function transforms the cap-color column
transformCapColorData <- function(key){
switch (key,
'n' = 'brown',
'b' = 'buff',
'c' = 'cinnamon',
'g' = 'gray',
'r' = 'green',
'p' = 'pink',
'u' = 'purple',
'e' = 'red',
'w' = 'white',
'y' = 'yellow'
)
}
#Below function transforms the odor column
transformOdorData <- function(key){
switch (key,
'a' = 'almond',
'l' = 'anise',
'c' = 'creosote',
'y' = 'fishy',
'f' = 'foul',
'm' = 'musty',
'n' = 'none',
'p' = 'pungent',
's' = 'spicy'
)
}
#Below function transforms the population column
transformPopulationData <- function(key){
switch (key,
'a' = 'abundant',
'c' = 'clustered',
'n' = 'numerous',
's' = 'scattered',
'v' = 'several',
'y' = 'soletary'
)
}
#Below function transforms the habitat column
transformHabitatData <- function(key){
switch (key,
'g' = 'grasses',
'l' = 'leaves',
'm' = 'meadows',
'p' = 'paths',
'u' = 'urban',
'w' = 'waster',
'd' = 'woods'
)
}
mushroomsDB$class <- sapply(mushroomsDB$class, transformClassData);
mushroomsDB$`cap-shape` <- sapply(mushroomsDB$`cap-shape`, transformCapShapeData);
mushroomsDB$`cap-color` <- sapply(mushroomsDB$`cap-color`, transformCapColorData);
mushroomsDB$`cap-surface` <- sapply(mushroomsDB$`cap-surface`, transformCapSurfaceData);
mushroomsDB$odor <- sapply(mushroomsDB$odor, transformOdorData);
mushroomsDB$population <- sapply(mushroomsDB$population, transformPopulationData);
mushroomsDB$habitat <- sapply(mushroomsDB$habitat, transformHabitatData);
A snapshot of transformed data:
mushroomsDB
## class cap-shape cap-surface cap-color odor population habitat
## 1 poisonous convex smooth brown pungent scattered urban
## 2 edible convex smooth yellow almond numerous grasses
## 3 edible bell smooth white anise numerous meadows
## 4 poisonous convex scaly white pungent scattered urban
## 5 edible convex smooth gray none abundant grasses
## 6 edible convex scaly yellow almond numerous grasses
## 7 edible bell smooth white almond numerous meadows
## 8 edible bell scaly white anise scattered meadows
## 9 poisonous convex scaly white pungent several grasses
## 10 edible bell smooth yellow almond scattered meadows