Mushrooms Dataset. A famous-if slightly moldy-dataset about mushrooms can be found in the UCI repository here: https://archive.ics.uci.edu/ml/datasets/Mushroom.
Your task is to study the dataset and the associated description of the data (i.e. “data dictionary”). You may need to look around a bit, but it’s there! You should take the data, and create a data frame with a subset of the columns (and if you like rows) in the dataset. You should include the column that indicates edible or poisonous and three or four other columns. You should also add meaningful column names and replace the abbreviations used in the data-for example, in the appropriate.
mushroom_Url <- "https://archive.ics.uci.edu/ml/machine-learning-databases/mushroom/agaricus-lepiota.data"
mushrooms_data <- read.table(file = mushroom_Url, header = FALSE, sep = ",")
cols_indx <- c(1,2,6,22,23)
mushrooms_subset <- mushrooms_data[1:10,cols_indx]
names(mushrooms_subset) <- c("Class","Cap Shape", "Odor", "Population", "Habitat")
# original values for displaying later on
mushrooms_orig <- mushrooms_subset
Describe.Class <- function(val){
switch (as.character(val),
'p' = 'poisonous',
'e' = 'edible'
)
}
Describe.CapShape <- function(val){
switch (as.character(val),
'b' = 'bell',
'c' = 'conical',
'x' = 'convex',
'f' = 'flat',
'k' = 'knobbed',
's' = 'sunken'
)
}
Describe.Odor <- function(val){
switch (as.character(val),
'a' = 'almond',
'l' = 'anise',
'c' = 'creosote',
'y' = 'fishy',
'f' = 'foul',
'm' = 'musty',
'n' = 'none',
'p' = 'pungent',
's' = 'spicy'
)
}
Describe.Population <- function(val){
switch (as.character(val),
'a' = 'abundant',
'c' = 'clustered',
'n' = 'numerous',
's' = 'scattered',
'v' = 'several',
'y' = 'solitary'
)
}
Describe.Habitat <- function(val){
switch (as.character(val),
'g' = 'grasses',
'l' = 'leaves',
'm' = 'meadows',
'p' = 'paths',
'u' = 'urban',
'w' = 'waste',
'd' = 'woods'
)
}
mushrooms_subset$Class <- sapply(mushrooms_subset$Class, Describe.Class)
mushrooms_subset$`Cap Shape` <- sapply(mushrooms_subset$`Cap Shape`, Describe.CapShape)
mushrooms_subset$Odor <- sapply(mushrooms_subset$Odor, Describe.Odor)
mushrooms_subset$Population <- sapply(mushrooms_subset$Population, Describe.Population)
mushrooms_subset$Habitat <- sapply(mushrooms_subset$Habitat, Describe.Habitat)
head(mushrooms_orig)
## Class Cap Shape Odor Population Habitat
## 1 p x p s u
## 2 e x a n g
## 3 e b l n m
## 4 p x p s u
## 5 e x n a g
## 6 e x a n g
head(mushrooms_subset)
## Class Cap Shape Odor Population Habitat
## 1 poisonous convex pungent scattered urban
## 2 edible convex almond numerous grasses
## 3 edible bell anise numerous meadows
## 4 poisonous convex pungent scattered urban
## 5 edible convex none abundant grasses
## 6 edible convex almond numerous grasses
# Export 'raw' data to a working directory in CSV format
write.csv(mushrooms_orig,file="Mushroom Subset Original.csv")
# Export transformed data to a working directory in CSV format
write.csv(mushrooms_subset,file="Mushroom Subset Transformed.csv")
Used the transform methods found in https://rpubs.com/arun_infy13/93236, with the following minor modifications: