IMPORT MUSHROOM DATASET
geturl <- "https://archive.ics.uci.edu/ml/machine-learning-databases/mushroom/agaricus-lepiota.data"
df <- read.table(file = geturl, header = FALSE, sep = ",")
head(df)
## V1 V2 V3 V4 V5 V6 V7 V8 V9 V10 V11 V12 V13 V14 V15 V16 V17 V18 V19 V20
## 1 p x s n t p f c n k e e s s w w p w o p
## 2 e x s y t a f c b k e c s s w w p w o p
## 3 e b s w t l f c b n e c s s w w p w o p
## 4 p x y w t p f c n n e e s s w w p w o p
## 5 e x s g f n f w b k t e s s w w p w o e
## 6 e x y y t a f c b n e c s s w w p w o p
## V21 V22 V23
## 1 k s u
## 2 n n g
## 3 n n m
## 4 k s u
## 5 n a g
## 6 k n g
CREATE THE DATASET
M1 <- df[,1:2]
M2 <- df[,22:23]
#the two are combined into one data frame
mushroom <- cbind(M1, M2)
head(mushroom)
## V1 V2 V22 V23
## 1 p x s u
## 2 e x n g
## 3 e b n m
## 4 p x s u
## 5 e x a g
## 6 e x n g
RENAME COLUMN
names(mushroom) <- c("Edible/Poisonous", "Cap-Shape","Population","Habitat")
REPLACE ABBREVIATIONS
levels(mushroom$`Edible/Poisonous`) <- c(levels(mushroom$`Edible/Poisonous`), c("Edible","Poisonous"))
mushroom$`Edible/Poisonous`[mushroom$`Edible/Poisonous` == "e"] <- "Edible"
mushroom$`Edible/Poisonous`[mushroom$`Edible/Poisonous` == "p"] <- "Poisonous"
levels(mushroom$`Cap-Shape`) <- c(levels(mushroom$`Cap-Shape`), c("Bell","Conical","Convex","Flat","Knobbed","Sunken"))
mushroom$`Cap-Shape`[mushroom$`Cap-Shape` == "b"] <- "Bell"
mushroom$`Cap-Shape`[mushroom$`Cap-Shape` == "c"] <- "Conical"
mushroom$`Cap-Shape`[mushroom$`Cap-Shape` == "x"] <- "Convex"
mushroom$`Cap-Shape`[mushroom$`Cap-Shape` == "f"] <- "Flat"
mushroom$`Cap-Shape`[mushroom$`Cap-Shape` == "k"] <- "Knobbed"
mushroom$`Cap-Shape`[mushroom$`Cap-Shape` == "s"] <- "Sunken"
levels(mushroom$`Population`) <- c(levels(mushroom$`Population`), c("Abundant","Clustered","Numerous","Scattered","Several","Solitary"))
mushroom$`Population`[mushroom$`Population` == "a"] <- "Abundant"
mushroom$`Population`[mushroom$`Population` == "c"] <- "Clustered"
mushroom$`Population`[mushroom$`Population` == "n"] <- "Numerous"
mushroom$`Population`[mushroom$`Population` == "s"] <- "Scattered"
mushroom$`Population`[mushroom$`Population` == "v"] <- "Several"
mushroom$`Population`[mushroom$`Population` == "y"] <- "Solitary"
levels(mushroom$`Habitat`) <- c(levels(mushroom$`Habitat`), c("Grasses","Leaves","Meadows","Paths","Urban","Waste","Woods"))
mushroom$`Habitat`[mushroom$`Habitat` == "g"] <- "Grasses"
mushroom$`Habitat`[mushroom$`Habitat` == "l"] <- "Leaves"
mushroom$`Habitat`[mushroom$`Habitat` == "m"] <- "Meadows"
mushroom$`Habitat`[mushroom$`Habitat` == "p"] <- "Paths"
mushroom$`Habitat`[mushroom$`Habitat` == "u"] <- "Urban"
mushroom$`Habitat`[mushroom$`Habitat` == "w"] <- "Waste"
mushroom$`Habitat`[mushroom$`Habitat` == "d"] <- "Woods"
FINAL DATASET
head(mushroom)
## Edible/Poisonous Cap-Shape Population Habitat
## 1 Poisonous Convex Scattered Urban
## 2 Edible Convex Numerous Grasses
## 3 Edible Bell Numerous Meadows
## 4 Poisonous Convex Scattered Urban
## 5 Edible Convex Abundant Grasses
## 6 Edible Convex Numerous Grasses