installation of dataset: mushroom
data <- "https://archive.ics.uci.edu/ml/machine-learning-databases/mushroom/agaricus-lepiota.data"
mushroom <- read.table(file = data, header = FALSE, sep = ",")
dimension of dataset
dim(mushroom)
## [1] 8124 23
column name
Subset the data to get 5 columns: class(V1), odor(V6), stalk-shape(V11) ring-type(V20), population(V22), and all rows.
subset <- mushroom[, c("V1", "V6", "V11", "V20", "V22")]
names(subset)
## [1] "V1" "V6" "V11" "V20" "V22"
head(subset)
## V1 V6 V11 V20 V22
## 1 p p e p s
## 2 e a e p n
## 3 e l e p n
## 4 p p e p s
## 5 e n t e a
## 6 e a e p n
Rename the colume names
names(subset) <- c("Class","Odor","Stalk_Shape","Ring_type","Population")
head(subset)
## Class Odor Stalk_Shape Ring_type Population
## 1 p p e p s
## 2 e a e p n
## 3 e l e p n
## 4 p p e p s
## 5 e n t e a
## 6 e a e p n
Replaces the abbreviation with more detailed description
Class e=edible, p=poision
Odor: almond=a,anise=l,creosote=c,fishy=y,foul=f, musty=m,none=n,pungent=p,spicy=s
Stalk-Shape: enlarging=e,tapering=t
Ring-type: cobwebby=c,evanescent=e,flaring=f,large=l, none=n,pendant=p,sheathing=s,zone=z
Population: abundant=a,clustered=c,numerous=n, scattered=s,several=v,solitary=y
levels(subset$Class) <- c(levels(subset$Class), "edible", "poisonous")
subset$Class[subset$Class == 'e'] <- 'edible'
subset$Class[subset$Class == 'p'] <- 'poisonous'
levels(subset$Odor) <- c(levels(subset$Odor), "almond", "anise", "creosote", "fishy", "foul", "musty", "none", "pungent", "spicy")
subset$Odor[subset$Odor == 'a'] <- 'almond'
subset$Odor[subset$Odor == 'l'] <- 'anise'
subset$Odor[subset$Odor == 'c'] <- 'creosote'
subset$Odor[subset$Odor == 'y'] <- 'fishy'
subset$Odor[subset$Odor == 'f'] <- 'foul'
subset$Odor[subset$Odor == 'm'] <- 'musty'
subset$Odor[subset$Odor == 'n'] <- 'none'
subset$Odor[subset$Odor == 'p'] <- 'pungent'
subset$Odor[subset$Odor == 's'] <- 'spicy'
levels(subset$Stalk_Shape) <- c(levels(subset$Stalk_Shape), "enlarging","tapering")
subset$Stalk_Shape[subset$Stalk_Shape == 'e'] <- 'enlarging'
subset$Stalk_Shape[subset$Stalk_Shape == 't'] <- 'tapering'
levels(subset$Ring_type) <- c(levels(subset$Ring_type), "cobwebby", "evanescent", "flaring", "large", "none", "pendant", "sheathing", "zone")
subset$Ring_type[subset$Ring_type == 'c'] <- 'cobwebby'
subset$Ring_type[subset$Ring_type == 'e'] <- 'evanescent'
subset$Ring_type[subset$Ring_type == 'f'] <- 'flaring'
subset$Ring_type[subset$Ring_type == 'l'] <- 'large'
subset$Ring_type[subset$Ring_type == 'n'] <- 'none'
subset$Ring_type[subset$Ring_type == 'p'] <- 'pendant'
subset$Ring_type[subset$Ring_type == 's'] <- 'sheathing'
subset$Ring_type[subset$Ring_type == 'z'] <- 'zone'
levels(subset$Population) <- c(levels(subset$Population), "abundant","clustered", "numerous", "scattered", "several", "solitary")
subset$Population[subset$Population == 'a'] <- 'abundant'
subset$Population[subset$Population == 'c'] <- 'clustered'
subset$Population[subset$Population == 'n'] <- 'numerous'
subset$Population[subset$Population == 's'] <- 'scattered'
subset$Population[subset$Population == 'v'] <- 'several'
subset$Population[subset$Population == 'y'] <- 'solitary'