R Markdown

IMPORT MUSHROOM DATASET

geturl <- "https://archive.ics.uci.edu/ml/machine-learning-databases/mushroom/agaricus-lepiota.data"
  df <- read.table(file = geturl, header = FALSE, sep = ",")
 head(df)
##   V1 V2 V3 V4 V5 V6 V7 V8 V9 V10 V11 V12 V13 V14 V15 V16 V17 V18 V19 V20
## 1  p  x  s  n  t  p  f  c  n   k   e   e   s   s   w   w   p   w   o   p
## 2  e  x  s  y  t  a  f  c  b   k   e   c   s   s   w   w   p   w   o   p
## 3  e  b  s  w  t  l  f  c  b   n   e   c   s   s   w   w   p   w   o   p
## 4  p  x  y  w  t  p  f  c  n   n   e   e   s   s   w   w   p   w   o   p
## 5  e  x  s  g  f  n  f  w  b   k   t   e   s   s   w   w   p   w   o   e
## 6  e  x  y  y  t  a  f  c  b   n   e   c   s   s   w   w   p   w   o   p
##   V21 V22 V23
## 1   k   s   u
## 2   n   n   g
## 3   n   n   m
## 4   k   s   u
## 5   n   a   g
## 6   k   n   g

CREATE THE DATASET

M1 <- df[,1:2]
M2 <- df[,22:23]
#the two are combined into one data frame
mushroom <- cbind(M1, M2)
head(mushroom)
##   V1 V2 V22 V23
## 1  p  x   s   u
## 2  e  x   n   g
## 3  e  b   n   m
## 4  p  x   s   u
## 5  e  x   a   g
## 6  e  x   n   g

RENAME COLUMN

names(mushroom) <- c("Edible/Poisonous", "Cap-Shape","Population","Habitat")

REPLACE ABBREVIATIONS

levels(mushroom$`Edible/Poisonous`) <- c(levels(mushroom$`Edible/Poisonous`), c("Edible","Poisonous"))
mushroom$`Edible/Poisonous`[mushroom$`Edible/Poisonous` == "e"] <- "Edible"
mushroom$`Edible/Poisonous`[mushroom$`Edible/Poisonous` == "p"] <- "Poisonous"


levels(mushroom$`Cap-Shape`) <- c(levels(mushroom$`Cap-Shape`), c("Bell","Conical","Convex","Flat","Knobbed","Sunken"))
mushroom$`Cap-Shape`[mushroom$`Cap-Shape` == "b"] <- "Bell"
mushroom$`Cap-Shape`[mushroom$`Cap-Shape` == "c"] <- "Conical"
mushroom$`Cap-Shape`[mushroom$`Cap-Shape` == "x"] <- "Convex"
mushroom$`Cap-Shape`[mushroom$`Cap-Shape` == "f"] <- "Flat"
mushroom$`Cap-Shape`[mushroom$`Cap-Shape` == "k"] <- "Knobbed"
mushroom$`Cap-Shape`[mushroom$`Cap-Shape` == "s"] <- "Sunken"

levels(mushroom$`Population`) <- c(levels(mushroom$`Population`), c("Abundant","Clustered","Numerous","Scattered","Several","Solitary"))
mushroom$`Population`[mushroom$`Population` == "a"] <- "Abundant"
mushroom$`Population`[mushroom$`Population` == "c"] <- "Clustered"
mushroom$`Population`[mushroom$`Population` == "n"] <- "Numerous"
mushroom$`Population`[mushroom$`Population` == "s"] <- "Scattered"
mushroom$`Population`[mushroom$`Population` == "v"] <- "Several"
mushroom$`Population`[mushroom$`Population` == "y"] <- "Solitary"

levels(mushroom$`Habitat`) <- c(levels(mushroom$`Habitat`), c("Grasses","Leaves","Meadows","Paths","Urban","Waste","Woods"))
mushroom$`Habitat`[mushroom$`Habitat` == "g"] <- "Grasses"
mushroom$`Habitat`[mushroom$`Habitat` == "l"] <- "Leaves"
mushroom$`Habitat`[mushroom$`Habitat` == "m"] <- "Meadows"
mushroom$`Habitat`[mushroom$`Habitat` == "p"] <- "Paths"
mushroom$`Habitat`[mushroom$`Habitat` == "u"] <- "Urban"
mushroom$`Habitat`[mushroom$`Habitat` == "w"] <- "Waste"
mushroom$`Habitat`[mushroom$`Habitat` == "d"] <- "Woods"

FINAL DATASET

head(mushroom)
##   Edible/Poisonous Cap-Shape Population Habitat
## 1        Poisonous    Convex  Scattered   Urban
## 2           Edible    Convex   Numerous Grasses
## 3           Edible      Bell   Numerous Meadows
## 4        Poisonous    Convex  Scattered   Urban
## 5           Edible    Convex   Abundant Grasses
## 6           Edible    Convex   Numerous Grasses