library(RCurl)
## Loading required package: bitops
library(ggplot2)
Get the data from UCI:
Mushroom <-read.csv("https://archive.ics.uci.edu/ml/machine-learning-databases/mushroom/agaricus-lepiota.data", header= FALSE, sep=",")
Dimensions of Mashroom:
dim(Mushroom)
## [1] 8124 23
Summary of Mashroom:
summary(Mushroom)
## V1 V2 V3 V4 V5 V6
## e:4208 b: 452 f:2320 n :2284 f:4748 n :3528
## p:3916 c: 4 g: 4 g :1840 t:3376 f :2160
## f:3152 s:2556 e :1500 s : 576
## k: 828 y:3244 y :1072 y : 576
## s: 32 w :1040 a : 400
## x:3656 b : 168 l : 400
## (Other): 220 (Other): 484
## V7 V8 V9 V10 V11 V12 V13
## a: 210 c:6812 b:5612 b :1728 e:3516 ?:2480 f: 552
## f:7914 w:1312 n:2512 p :1492 t:4608 b:3776 k:2372
## w :1202 c: 556 s:5176
## n :1048 e:1120 y: 24
## g : 752 r: 192
## h : 732
## (Other):1170
## V14 V15 V16 V17 V18 V19
## f: 600 w :4464 w :4384 p:8124 n: 96 n: 36
## k:2304 p :1872 p :1872 o: 96 o:7488
## s:4936 g : 576 g : 576 w:7924 t: 600
## y: 284 n : 448 n : 512 y: 8
## b : 432 b : 432
## o : 192 o : 192
## (Other): 140 (Other): 156
## V20 V21 V22 V23
## e:2776 w :2388 a: 384 d:3148
## f: 48 n :1968 c: 340 g:2148
## l:1296 k :1872 n: 400 l: 832
## n: 36 h :1632 s:1248 m: 292
## p:3968 r : 72 v:4040 p:1144
## b : 48 y:1712 u: 368
## (Other): 144 w: 192
Selecting the required data(classes, cap-shape, cap-color, odor, population, habitat:
Mushroom_data <- Mushroom[,c(1,2,4,6,22,23)]
Summary of Mashroom_data:
summary(Mushroom_data)
## V1 V2 V4 V6 V22 V23
## e:4208 b: 452 n :2284 n :3528 a: 384 d:3148
## p:3916 c: 4 g :1840 f :2160 c: 340 g:2148
## f:3152 e :1500 s : 576 n: 400 l: 832
## k: 828 y :1072 y : 576 s:1248 m: 292
## s: 32 w :1040 a : 400 v:4040 p:1144
## x:3656 b : 168 l : 400 y:1712 u: 368
## (Other): 220 (Other): 484 w: 192
Adding meaningful names to the columns:
Mushroom_d <- Mushroom[,c(1,2,4,6,22,23)]
colnames(Mushroom_d) <- c("classes","cap_shape","cap_color","odor", "population", "habitat")
levels(Mushroom_d$classes)[levels(Mushroom_d$classes)=="e"] <- "Edible"
levels(Mushroom_d$classes)[levels(Mushroom_d$classes)=="p"] <- "Poisonous"
levels(Mushroom_d$cap_shape)[levels(Mushroom_d$cap_shape)=="b"] <- "bell"
levels(Mushroom_d$cap_shape)[levels(Mushroom_d$cap_shape)=="c"] <- "conical"
levels(Mushroom_d$cap_shape)[levels(Mushroom_d$cap_shape)=="x"] <- "convex"
levels(Mushroom_d$cap_shape)[levels(Mushroom_d$cap_shape)=="f"] <- "flat"
levels(Mushroom_d$cap_shape)[levels(Mushroom_d$cap_shape)=="k"] <- "knobbed"
levels(Mushroom_d$cap_shape)[levels(Mushroom_d$cap_shape)=="s"] <- "sunken"
levels(Mushroom_d$cap_color)[levels(Mushroom_d$cap_color)=="n"] <- "brown"
levels(Mushroom_d$cap_color)[levels(Mushroom_d$cap_color)=="b"] <- "buff"
levels(Mushroom_d$cap_color)[levels(Mushroom_d$cap_color)=="c"] <- "cinnamon"
levels(Mushroom_d$cap_color)[levels(Mushroom_d$cap_color)=="g"] <- "gray"
levels(Mushroom_d$cap_color)[levels(Mushroom_d$cap_color)=="r"] <- "green"
levels(Mushroom_d$cap_color)[levels(Mushroom_d$cap_color)=="p"] <- "pink"
levels(Mushroom_d$cap_color)[levels(Mushroom_d$cap_color)=="u"] <- "purple"
levels(Mushroom_d$cap_color)[levels(Mushroom_d$cap_color)=="e"] <- "red"
levels(Mushroom_d$cap_color)[levels(Mushroom_d$cap_color)=="w"] <- "white"
levels(Mushroom_d$cap_color)[levels(Mushroom_d$cap_color)=="y"] <- "yellow"
levels(Mushroom_d$odor)[levels(Mushroom_d$odor)=="a"] <- "almond"
levels(Mushroom_d$odor)[levels(Mushroom_d$odor)=="l"] <- "anise"
levels(Mushroom_d$odor)[levels(Mushroom_d$odor)=="c"] <- "creosote"
levels(Mushroom_d$odor)[levels(Mushroom_d$odor)=="y"] <- "fishy"
levels(Mushroom_d$odor)[levels(Mushroom_d$odor)=="f"] <- "foul"
levels(Mushroom_d$odor)[levels(Mushroom_d$odor)=="m"] <- "musty"
levels(Mushroom_d$odor)[levels(Mushroom_d$odor)=="n"] <- "none"
levels(Mushroom_d$odor)[levels(Mushroom_d$odor)=="p"] <- "pungent"
levels(Mushroom_d$odor)[levels(Mushroom_d$odor)=="s"] <- "spicy"
levels(Mushroom_d$population)[levels(Mushroom_d$population)=="a"] <- "abundant"
levels(Mushroom_d$population)[levels(Mushroom_d$population)=="c"] <- "clustered"
levels(Mushroom_d$population)[levels(Mushroom_d$population)=="n"] <- "numerous"
levels(Mushroom_d$population)[levels(Mushroom_d$population)=="s"] <- "scattered"
levels(Mushroom_d$population)[levels(Mushroom_d$population)=="v"] <- "several"
levels(Mushroom_d$population)[levels(Mushroom_d$population)=="y"] <- "solitary"
levels(Mushroom_d$habitat)[levels(Mushroom_d$habitat)=="g"] <- "grasses"
levels(Mushroom_d$habitat)[levels(Mushroom_d$habitat)=="l"] <- "leaves"
levels(Mushroom_d$habitat)[levels(Mushroom_d$habitat)=="m"] <- "meadows"
levels(Mushroom_d$habitat)[levels(Mushroom_d$habitat)=="p"] <- "paths"
levels(Mushroom_d$habitat)[levels(Mushroom_d$habitat)=="u"] <- "urban"
levels(Mushroom_d$habitat)[levels(Mushroom_d$habitat)=="w"] <- "waste"
levels(Mushroom_d$habitat)[levels(Mushroom_d$habitat)=="d"] <- "woods"
head(Mushroom_d)
## classes cap_shape cap_color odor population habitat
## 1 Poisonous convex brown pungent scattered urban
## 2 Edible convex yellow almond numerous grasses
## 3 Edible bell white anise numerous meadows
## 4 Poisonous convex white pungent scattered urban
## 5 Edible convex gray none abundant grasses
## 6 Edible convex yellow almond numerous grasses
Summary of Mashroom_data after renaming:
summary(Mushroom_d)
## classes cap_shape cap_color odor
## Edible :4208 bell : 452 brown :2284 none :3528
## Poisonous:3916 conical: 4 gray :1840 foul :2160
## flat :3152 red :1500 spicy : 576
## knobbed: 828 yellow :1072 fishy : 576
## sunken : 32 white :1040 almond : 400
## convex :3656 buff : 168 anise : 400
## (Other): 220 (Other): 484
## population habitat
## abundant : 384 woods :3148
## clustered: 340 grasses:2148
## numerous : 400 leaves : 832
## scattered:1248 meadows: 292
## several :4040 paths :1144
## solitary :1712 urban : 368
## waste : 192
Plot population distribution:
qplot(population, data = Mushroom_d, fill= classes)