Load essential library
library(plyr)
Load the data
mushroom<-read.csv("https://archive.ics.uci.edu/ml/machine-learning-databases/mushroom/agaricus-lepiota.data", header = FALSE, sep =",")
head(mushroom)
Rename column Headers
colnames(mushroom)<-c("class", "cap-shape", "cap-surface", "cap-color", "bruises", "odor", "gill-attachment", "gill-spacing", "gill-size", "gill-color", "stalk-shape", "stalk-root", "stalk-surface-above ring", "stalk-surface-below ring", "stalk color-above-ring", "stalk-color-below-ring", "veil-type", "veil-color", "ring-number", "ring-type", "spore-print-color", "population", "habitats")
mush<-data.frame(mushroom)
head(mush)
Select a subset of the data
mush<-subset(mush, select=c("class","cap.shape", "cap.color","odor", "habitats"))
head(mush)
Modifying the data to make it more user friendly by replacing abbreviations
levels(mush$class)<-c("edible", "poisonous")
levels(mush$class)
## [1] "edible" "poisonous"
levels(mush$cap.shape)<-c("bell","conical","flat","knobbed","sunken","convex")
levels(mush$cap.shape)
## [1] "bell" "conical" "flat" "knobbed" "sunken" "convex"
levels(mush$cap.color)<-c("buff","cinnamon","red","gray","brown","pink","green","purple","white","yellow")
levels(mush$cap.color)
## [1] "buff" "cinnamon" "red" "gray" "brown" "pink"
## [7] "green" "purple" "white" "yellow"
levels(mush$odor)<-c("almond","creosote","foul","anise","musty","none","pungent","spicy","fishy")
levels(mush$odor)
## [1] "almond" "creosote" "foul" "anise" "musty" "none"
## [7] "pungent" "spicy" "fishy"
levels(mush$habitats)<-c("woods","grasses","leaves","meadows","paths","urban","waste")
levels(mush$habitats)
## [1] "woods" "grasses" "leaves" "meadows" "paths" "urban" "waste"
head(mush)
Group the data by class
edible<-subset(mush,class="edible")
dim(edible)
## [1] 8124 5
summary(edible)
## class cap.shape cap.color odor
## edible :4208 bell : 452 brown :2284 none :3528
## poisonous:3916 conical: 4 gray :1840 foul :2160
## flat :3152 red :1500 spicy : 576
## knobbed: 828 yellow :1072 fishy : 576
## sunken : 32 white :1040 almond : 400
## convex :3656 buff : 168 anise : 400
## (Other): 220 (Other): 484
## habitats
## woods :3148
## grasses:2148
## leaves : 832
## meadows: 292
## paths :1144
## urban : 368
## waste : 192
poisonous<-subset(mush, class="poisonous")
dim(poisonous)
## [1] 8124 5
summary(poisonous)
## class cap.shape cap.color odor
## edible :4208 bell : 452 brown :2284 none :3528
## poisonous:3916 conical: 4 gray :1840 foul :2160
## flat :3152 red :1500 spicy : 576
## knobbed: 828 yellow :1072 fishy : 576
## sunken : 32 white :1040 almond : 400
## convex :3656 buff : 168 anise : 400
## (Other): 220 (Other): 484
## habitats
## woods :3148
## grasses:2148
## leaves : 832
## meadows: 292
## paths :1144
## urban : 368
## waste : 192
Plotting some data
plot(edible$habitats, las=3)

plot(poisonous$habitats, las=3)
