Lets begin by pulling in the data from the UCI Repository and giving column names.
mushdata <- read.csv(url('https://archive.ics.uci.edu/ml/machine-learning-databases/mushroom/agaricus-lepiota.data'))
mushdf <- as.data.frame(mushdata)
names(mushdf) = c('Edible', 'CapShape', 'CapSurface', 'CapColor', 'Bruises', 'Odor', 'GillAttachment', 'GillSpacing', 'GillSize', 'GillColor', 'StalkShape', 'StalkRoot', 'StalkSurfaceAR', 'StalkSurfaceBR', 'StalkColorAR', 'StalkColorBR', 'VeilType', 'VeilColor', 'RingNumber', 'RingType', 'SporePrintColor', 'population', 'habitat')
head(mushdf)
## Edible CapShape CapSurface CapColor Bruises Odor GillAttachment
## 1 e x s y t a f
## 2 e b s w t l f
## 3 p x y w t p f
## 4 e x s g f n f
## 5 e x y y t a f
## 6 e b s w t a f
## GillSpacing GillSize GillColor StalkShape StalkRoot StalkSurfaceAR
## 1 c b k e c s
## 2 c b n e c s
## 3 c n n e e s
## 4 w b k t e s
## 5 c b n e c s
## 6 c b g e c s
## StalkSurfaceBR StalkColorAR StalkColorBR VeilType VeilColor RingNumber
## 1 s w w p w o
## 2 s w w p w o
## 3 s w w p w o
## 4 s w w p w o
## 5 s w w p w o
## 6 s w w p w o
## RingType SporePrintColor population habitat
## 1 p n n g
## 2 p n n m
## 3 p k s u
## 4 e n a g
## 5 p k n g
## 6 p k n m
Create a subset of the df that includes the column indicating edible or not as well as a few others.
submush <- subset(mushdf, select = c(Edible, CapShape, StalkShape, GillAttachment, RingNumber))
head(submush)
## Edible CapShape StalkShape GillAttachment RingNumber
## 1 e x e f o
## 2 e b e f o
## 3 p x e f o
## 4 e x t f o
## 5 e x e f o
## 6 e b e f o
Replace abbreviations with their full text.
library(magrittr)
submush$Edible %<>%
gsub('^[e]$','Edible', .) %>%
gsub('^[p]$','Poisonous', .)
submush$CapShape %<>%
gsub('^[b]$','Bell', .) %>%
gsub('^[c]$','Conical', .) %>%
gsub('^[x]$','Convex', .) %>%
gsub('^[f]$','Flat', .) %>%
gsub('^[k]$','Knobbed', .) %>%
gsub('^[s]$','Sunken', .)
submush$StalkShape %<>%
gsub('^[e]$','Enlarging', .) %>%
gsub('^[t]$','Tapering', .)
submush$GillAttachment %<>%
gsub('^[a]$', 'Attached', .) %>%
gsub('^[d]$','Descending',.) %>%
gsub('^[f]$', 'Free',.) %>%
gsub('^[n]$','Notched',.)
submush$RingNumber %<>%
gsub('^[n]$','None',.) %>%
gsub('^[o]$','One', .) %>%
gsub('^[t]$','Two',.)
head(submush)
## Edible CapShape StalkShape GillAttachment RingNumber
## 1 Edible Convex Enlarging Free One
## 2 Edible Bell Enlarging Free One
## 3 Poisonous Convex Enlarging Free One
## 4 Edible Convex Tapering Free One
## 5 Edible Convex Enlarging Free One
## 6 Edible Bell Enlarging Free One