Mushroom dataset: https://archive.ics.uci.edu/ml/datasets/Mushroom. Mushroom records drawn from The Audubon Society Field Guide to North American Mushrooms (1981). G. H. Lincoff (Pres.).Study the dataset and the associated description of the data (i.e. “data dictionary”). Take the data and create a data frame with a subset of the columns in the dataset. Add meaningful column names and replace the abbreviations used in the data. Deliverable is the R code to perform these transformation tasks.

installation of dataset: mushroom

data <- "https://archive.ics.uci.edu/ml/machine-learning-databases/mushroom/agaricus-lepiota.data"
mushroom <- read.table(file = data, header = FALSE, sep = ",")

get general information of mushroom dataset

head(mushroom)
##   V1 V2 V3 V4 V5 V6 V7 V8 V9 V10 V11 V12 V13 V14 V15 V16 V17 V18 V19 V20
## 1  p  x  s  n  t  p  f  c  n   k   e   e   s   s   w   w   p   w   o   p
## 2  e  x  s  y  t  a  f  c  b   k   e   c   s   s   w   w   p   w   o   p
## 3  e  b  s  w  t  l  f  c  b   n   e   c   s   s   w   w   p   w   o   p
## 4  p  x  y  w  t  p  f  c  n   n   e   e   s   s   w   w   p   w   o   p
## 5  e  x  s  g  f  n  f  w  b   k   t   e   s   s   w   w   p   w   o   e
## 6  e  x  y  y  t  a  f  c  b   n   e   c   s   s   w   w   p   w   o   p
##   V21 V22 V23
## 1   k   s   u
## 2   n   n   g
## 3   n   n   m
## 4   k   s   u
## 5   n   a   g
## 6   k   n   g

dimension of dataset

dim(mushroom)
## [1] 8124   23

column name

Subset the data to get 5 columns: class(V1), odor(V6), stalk-shape(V11) ring-type(V20), population(V22), and all rows.

subset <- mushroom[, c("V1", "V6", "V11", "V20", "V22")]
names(subset)
## [1] "V1"  "V6"  "V11" "V20" "V22"
head(subset)
##   V1 V6 V11 V20 V22
## 1  p  p   e   p   s
## 2  e  a   e   p   n
## 3  e  l   e   p   n
## 4  p  p   e   p   s
## 5  e  n   t   e   a
## 6  e  a   e   p   n

Rename the colume names

names(subset) <- c("Class","Odor","Stalk_Shape","Ring_type","Population")
head(subset)
##   Class Odor Stalk_Shape Ring_type Population
## 1     p    p           e         p          s
## 2     e    a           e         p          n
## 3     e    l           e         p          n
## 4     p    p           e         p          s
## 5     e    n           t         e          a
## 6     e    a           e         p          n

Replaces the abbreviation with more detailed description

Class e=edible, p=poision

Odor: almond=a,anise=l,creosote=c,fishy=y,foul=f, musty=m,none=n,pungent=p,spicy=s

Stalk-Shape: enlarging=e,tapering=t

Ring-type: cobwebby=c,evanescent=e,flaring=f,large=l, none=n,pendant=p,sheathing=s,zone=z

Population: abundant=a,clustered=c,numerous=n, scattered=s,several=v,solitary=y

levels(subset$Class) <- c(levels(subset$Class), "edible", "poisonous")
subset$Class[subset$Class == 'e'] <- 'edible'
subset$Class[subset$Class == 'p'] <- 'poisonous'

levels(subset$Odor) <- c(levels(subset$Odor), "almond", "anise", "creosote", "fishy", "foul", "musty", "none", "pungent", "spicy")
subset$Odor[subset$Odor == 'a'] <- 'almond'
subset$Odor[subset$Odor == 'l'] <- 'anise'
subset$Odor[subset$Odor == 'c'] <- 'creosote'
subset$Odor[subset$Odor == 'y'] <- 'fishy'
subset$Odor[subset$Odor == 'f'] <- 'foul'
subset$Odor[subset$Odor == 'm'] <- 'musty'
subset$Odor[subset$Odor == 'n'] <- 'none'
subset$Odor[subset$Odor == 'p'] <- 'pungent'
subset$Odor[subset$Odor == 's'] <- 'spicy'

levels(subset$Stalk_Shape) <- c(levels(subset$Stalk_Shape), "enlarging","tapering")
subset$Stalk_Shape[subset$Stalk_Shape == 'e'] <- 'enlarging'
subset$Stalk_Shape[subset$Stalk_Shape == 't'] <- 'tapering'

levels(subset$Ring_type) <- c(levels(subset$Ring_type), "cobwebby", "evanescent", "flaring", "large", "none", "pendant", "sheathing", "zone")
subset$Ring_type[subset$Ring_type == 'c'] <- 'cobwebby'
subset$Ring_type[subset$Ring_type == 'e'] <- 'evanescent'
subset$Ring_type[subset$Ring_type == 'f'] <- 'flaring'
subset$Ring_type[subset$Ring_type == 'l'] <- 'large'
subset$Ring_type[subset$Ring_type == 'n'] <- 'none'
subset$Ring_type[subset$Ring_type == 'p'] <- 'pendant'
subset$Ring_type[subset$Ring_type == 's'] <- 'sheathing'
subset$Ring_type[subset$Ring_type == 'z'] <- 'zone'

levels(subset$Population) <- c(levels(subset$Population), "abundant","clustered", "numerous", "scattered", "several", "solitary")
subset$Population[subset$Population == 'a'] <- 'abundant'
subset$Population[subset$Population == 'c'] <- 'clustered'
subset$Population[subset$Population == 'n'] <- 'numerous'
subset$Population[subset$Population == 's'] <- 'scattered'
subset$Population[subset$Population == 'v'] <- 'several'
subset$Population[subset$Population == 'y'] <- 'solitary'

Dataset after transformation

head(subset)
##       Class    Odor Stalk_Shape  Ring_type Population
## 1 poisonous pungent   enlarging    pendant  scattered
## 2    edible  almond   enlarging    pendant   numerous
## 3    edible   anise   enlarging    pendant   numerous
## 4 poisonous pungent   enlarging    pendant  scattered
## 5    edible    none    tapering evanescent   abundant
## 6    edible  almond   enlarging    pendant   numerous