fileUrl <- "https://archive.ics.uci.edu/ml/machine-learning-databases/mushroom/agaricus-lepiota.data"
# To download the datset:
#download.file(fileUrl, destfile = "Mushrooms.csv")
#list.files("Mushrooms.csv")
head(read.csv(fileUrl, sep = ",", header=FALSE))
## V1 V2 V3 V4 V5 V6 V7 V8 V9 V10 V11 V12 V13 V14 V15 V16 V17 V18 V19 V20
## 1 p x s n t p f c n k e e s s w w p w o p
## 2 e x s y t a f c b k e c s s w w p w o p
## 3 e b s w t l f c b n e c s s w w p w o p
## 4 p x y w t p f c n n e e s s w w p w o p
## 5 e x s g f n f w b k t e s s w w p w o e
## 6 e x y y t a f c b n e c s s w w p w o p
## V21 V22 V23
## 1 k s u
## 2 n n g
## 3 n n m
## 4 k s u
## 5 n a g
## 6 k n g
library(dplyr)
# Rename columns and recode entries for the selected five columns
shrooms <-
read.csv(fileUrl, sep = ",", header=FALSE) %>%
rename(
Edible = V1,
CapShape = V2,
CapSurface = V3,
CapColor = V4,
Bruises = V5,
Odor = V6,
GillAtt = V7,
GillSpacing = V8,
GillSize = V9,
GillColor = V10,
StalkShape = V11,
StalkRoot = V12,
StalkSurfaceAboveRing = V13,
StalkSurfaceBelowRing = V14,
StalkColorAboveRing = V15,
StalkColorBelowRing = V16,
VeilType = V17,
VeilColor = V18,
RingNumber = V19,
RingType = V20,
SporePrintColor = V21,
Population = V22,
Habitat = V23) %>%
mutate(
Edible = recode(Edible,
e = "edible",
p = "poinsonous"),
CapShape = recode(CapShape,
b="bell",
c="conical",
x="convex",
f="flat",
k="knobbed",
s="sunken"),
CapSurface = recode(CapSurface,
f='fibrous',
g='grooves',
y='scaly',
s='smooth'),
Odor = recode(Odor,
a='almond',
l='anise',
c='creosote',
y='fishy',
f='foul',
m='musty',
n='none',
p='pungent',
s='spicy'),
Population = recode(Population,
a='abundant',
c='clustered',
n='numerous',
s='scattered',
v='several',
y='solitary')) %>%
select(Edible, CapShape, CapSurface, Odor, Population)
head(shrooms)
## Edible CapShape CapSurface Odor Population
## 1 poinsonous convex smooth pungent scattered
## 2 edible convex smooth almond numerous
## 3 edible bell smooth anise numerous
## 4 poinsonous convex scaly pungent scattered
## 5 edible convex smooth none abundant
## 6 edible convex scaly almond numerous