mushroom <- read.table('https://raw.githubusercontent.com/AlainKuiete/DATA607/master/agaricus-lepiota.data', sep = ",")
Looking at the head of mushrooms dataset
head(mushroom)
## V1 V2 V3 V4 V5 V6 V7 V8 V9 V10 V11 V12 V13 V14 V15 V16 V17 V18 V19 V20
## 1 p x s n t p f c n k e e s s w w p w o p
## 2 e x s y t a f c b k e c s s w w p w o p
## 3 e b s w t l f c b n e c s s w w p w o p
## 4 p x y w t p f c n n e e s s w w p w o p
## 5 e x s g f n f w b k t e s s w w p w o e
## 6 e x y y t a f c b n e c s s w w p w o p
## V21 V22 V23
## 1 k s u
## 2 n n g
## 3 n n m
## 4 k s u
## 5 n a g
## 6 k n g
The tail
tail(mushroom)
## V1 V2 V3 V4 V5 V6 V7 V8 V9 V10 V11 V12 V13 V14 V15 V16 V17 V18 V19
## 8119 p k y n f f f c n b t ? k s p w p w o
## 8120 e k s n f n a c b y e ? s s o o p o o
## 8121 e x s n f n a c b y e ? s s o o p n o
## 8122 e f s n f n a c b n e ? s s o o p o o
## 8123 p k y n f y f c n b t ? s k w w p w o
## 8124 e x s n f n a c b y e ? s s o o p o o
## V20 V21 V22 V23
## 8119 e w v d
## 8120 p b c l
## 8121 p b v l
## 8122 p b c l
## 8123 e w v l
## 8124 p o c l
The summary of the dataset
summary(mushroom)
## V1 V2 V3 V4 V5 V6
## e:4208 b: 452 f:2320 n :2284 f:4748 n :3528
## p:3916 c: 4 g: 4 g :1840 t:3376 f :2160
## f:3152 s:2556 e :1500 s : 576
## k: 828 y:3244 y :1072 y : 576
## s: 32 w :1040 a : 400
## x:3656 b : 168 l : 400
## (Other): 220 (Other): 484
## V7 V8 V9 V10 V11 V12 V13
## a: 210 c:6812 b:5612 b :1728 e:3516 ?:2480 f: 552
## f:7914 w:1312 n:2512 p :1492 t:4608 b:3776 k:2372
## w :1202 c: 556 s:5176
## n :1048 e:1120 y: 24
## g : 752 r: 192
## h : 732
## (Other):1170
## V14 V15 V16 V17 V18 V19
## f: 600 w :4464 w :4384 p:8124 n: 96 n: 36
## k:2304 p :1872 p :1872 o: 96 o:7488
## s:4936 g : 576 g : 576 w:7924 t: 600
## y: 284 n : 448 n : 512 y: 8
## b : 432 b : 432
## o : 192 o : 192
## (Other): 140 (Other): 156
## V20 V21 V22 V23
## e:2776 w :2388 a: 384 d:3148
## f: 48 n :1968 c: 340 g:2148
## l:1296 k :1872 n: 400 l: 832
## n: 36 h :1632 s:1248 m: 292
## p:3968 r : 72 v:4040 p:1144
## b : 48 y:1712 u: 368
## (Other): 144 w: 192
The structure of the mushrooms dataset
str(mushroom)
## 'data.frame': 8124 obs. of 23 variables:
## $ V1 : Factor w/ 2 levels "e","p": 2 1 1 2 1 1 1 1 2 1 ...
## $ V2 : Factor w/ 6 levels "b","c","f","k",..: 6 6 1 6 6 6 1 1 6 1 ...
## $ V3 : Factor w/ 4 levels "f","g","s","y": 3 3 3 4 3 4 3 4 4 3 ...
## $ V4 : Factor w/ 10 levels "b","c","e","g",..: 5 10 9 9 4 10 9 9 9 10 ...
## $ V5 : Factor w/ 2 levels "f","t": 2 2 2 2 1 2 2 2 2 2 ...
## $ V6 : Factor w/ 9 levels "a","c","f","l",..: 7 1 4 7 6 1 1 4 7 1 ...
## $ V7 : Factor w/ 2 levels "a","f": 2 2 2 2 2 2 2 2 2 2 ...
## $ V8 : Factor w/ 2 levels "c","w": 1 1 1 1 2 1 1 1 1 1 ...
## $ V9 : Factor w/ 2 levels "b","n": 2 1 1 2 1 1 1 1 2 1 ...
## $ V10: Factor w/ 12 levels "b","e","g","h",..: 5 5 6 6 5 6 3 6 8 3 ...
## $ V11: Factor w/ 2 levels "e","t": 1 1 1 1 2 1 1 1 1 1 ...
## $ V12: Factor w/ 5 levels "?","b","c","e",..: 4 3 3 4 4 3 3 3 4 3 ...
## $ V13: Factor w/ 4 levels "f","k","s","y": 3 3 3 3 3 3 3 3 3 3 ...
## $ V14: Factor w/ 4 levels "f","k","s","y": 3 3 3 3 3 3 3 3 3 3 ...
## $ V15: Factor w/ 9 levels "b","c","e","g",..: 8 8 8 8 8 8 8 8 8 8 ...
## $ V16: Factor w/ 9 levels "b","c","e","g",..: 8 8 8 8 8 8 8 8 8 8 ...
## $ V17: Factor w/ 1 level "p": 1 1 1 1 1 1 1 1 1 1 ...
## $ V18: Factor w/ 4 levels "n","o","w","y": 3 3 3 3 3 3 3 3 3 3 ...
## $ V19: Factor w/ 3 levels "n","o","t": 2 2 2 2 2 2 2 2 2 2 ...
## $ V20: Factor w/ 5 levels "e","f","l","n",..: 5 5 5 5 1 5 5 5 5 5 ...
## $ V21: Factor w/ 9 levels "b","h","k","n",..: 3 4 4 3 4 3 3 4 3 3 ...
## $ V22: Factor w/ 6 levels "a","c","n","s",..: 4 3 3 4 1 3 3 4 5 4 ...
## $ V23: Factor w/ 7 levels "d","g","l","m",..: 6 2 4 6 2 2 4 4 2 4 ...
md <- data.frame(is.consm = mushroom$V1,
cap.chape = mushroom$V2,
cap.color = mushroom$V4,
odor = mushroom$V6,
ring.type = mushroom$V20,
population = mushroom$V22)
str(md)
## 'data.frame': 8124 obs. of 6 variables:
## $ is.consm : Factor w/ 2 levels "e","p": 2 1 1 2 1 1 1 1 2 1 ...
## $ cap.chape : Factor w/ 6 levels "b","c","f","k",..: 6 6 1 6 6 6 1 1 6 1 ...
## $ cap.color : Factor w/ 10 levels "b","c","e","g",..: 5 10 9 9 4 10 9 9 9 10 ...
## $ odor : Factor w/ 9 levels "a","c","f","l",..: 7 1 4 7 6 1 1 4 7 1 ...
## $ ring.type : Factor w/ 5 levels "e","f","l","n",..: 5 5 5 5 1 5 5 5 5 5 ...
## $ population: Factor w/ 6 levels "a","c","n","s",..: 4 3 3 4 1 3 3 4 5 4 ...
summary(md)
## is.consm cap.chape cap.color odor ring.type population
## e:4208 b: 452 n :2284 n :3528 e:2776 a: 384
## p:3916 c: 4 g :1840 f :2160 f: 48 c: 340
## f:3152 e :1500 s : 576 l:1296 n: 400
## k: 828 y :1072 y : 576 n: 36 s:1248
## s: 32 w :1040 a : 400 p:3968 v:4040
## x:3656 b : 168 l : 400 y:1712
## (Other): 220 (Other): 484
levels(md$is.consm) <- list(edible = "e", poisonous = "p")
levels(md$cap.chape) <- list(bell = "b", conical = "c", convex = "x", flat ="f", knobbed = "k", sunken="s" )
levels(md$cap.color) <- list(brown ="n", buff = "b", cinnamon = "c", gray = "g", green = "r", pink = "p", purple = "u", red = "e", white = "w", yellow = "y" )
levels(md$odor) <-list(almond = "a", anise = "l", creosote = "c", fishy = "y", foul = "f", musty = "m", none = "n", pungent = "p", spicy = "s" )
levels(md$ring.type) <- list(cobwebby = "c", evanescent = "e", flaring = "f", large = "l", none = "n", pendant = "p", sheathing = "s", zone = "z")
levels(md$population) <- list(abundant = "a", clustered = "c", numerous = "n", scattered = "s", several = "v", solitary = "y" )
str(md)
## 'data.frame': 8124 obs. of 6 variables:
## $ is.consm : Factor w/ 2 levels "edible","poisonous": 2 1 1 2 1 1 1 1 2 1 ...
## $ cap.chape : Factor w/ 6 levels "bell","conical",..: 3 3 1 3 3 3 1 1 3 1 ...
## $ cap.color : Factor w/ 10 levels "brown","buff",..: 1 10 9 9 4 10 9 9 9 10 ...
## $ odor : Factor w/ 9 levels "almond","anise",..: 8 1 2 8 7 1 1 2 8 1 ...
## $ ring.type : Factor w/ 8 levels "cobwebby","evanescent",..: 6 6 6 6 2 6 6 6 6 6 ...
## $ population: Factor w/ 6 levels "abundant","clustered",..: 4 3 3 4 1 3 3 4 5 4 ...
summary(md)
## is.consm cap.chape cap.color odor
## edible :4208 bell : 452 brown :2284 none :3528
## poisonous:3916 conical: 4 gray :1840 foul :2160
## convex :3656 red :1500 fishy : 576
## flat :3152 yellow :1072 spicy : 576
## knobbed: 828 white :1040 almond : 400
## sunken : 32 buff : 168 anise : 400
## (Other): 220 (Other): 484
## ring.type population
## pendant :3968 abundant : 384
## evanescent:2776 clustered: 340
## large :1296 numerous : 400
## flaring : 48 scattered:1248
## none : 36 several :4040
## cobwebby : 0 solitary :1712
## (Other) : 0
library(ggplot2)
qplot(md$cap.color, main = "Mushroom Cap color Attribute",
xlab = "Cap color", ylab = "Count",
color = I("red"))