Mushrooms Dataset. A famous-if slightly moldy-dataset about mushrooms can be found in the UCI repository here: https://archive.ics.uci.edu/ml/datasets/Mushroom
#mushroomsData <- read_csv("https://archive.ics.uci.edu/ml/machine-learning-databases/mushroom/agaricus-lepiota.data")
#myURL<-"https://archive.ics.uci.edu/ml/machine-learning-databases/mushroom/agaricus-lepiota.data"
#myDataURL <- getURL(myURL)
#mushroomsData <- read.csv(text = myDataURL)
myWorkingDir <- getwd()
mySourceFile <- paste(myWorkingDir,"/agaricus-lepiota.data.txt", sep = "")
localMushroomsData <- read.csv(file=mySourceFile, header=TRUE, sep=",")
myGitHubURL<-"https://raw.githubusercontent.com/destination4debabrata/CUNY-Assignments/master/DATA%20607%2002%5B15961%5D/Week%201%20Assignment%20%5BJan%2028%20-%20Feb%2003%5D/agaricus-lepiota.data.txt"
myGitHubDataURL <- getURL(myGitHubURL)
mushroomsData <- read.csv(text = myGitHubDataURL)head(mushroomsData, n=10)## p x s n t p.1 f c n.1 k e e.1 s.1 s.2 w w.1 p.2 w.2 o p.3 k.1 s.3 u
## 1 e x s y t a f c b k e c s s w w p w o p n n g
## 2 e b s w t l f c b n e c s s w w p w o p n n m
## 3 p x y w t p f c n n e e s s w w p w o p k s u
## 4 e x s g f n f w b k t e s s w w p w o e n a g
## 5 e x y y t a f c b n e c s s w w p w o p k n g
## 6 e b s w t a f c b g e c s s w w p w o p k n m
## 7 e b y w t l f c b n e c s s w w p w o p n s m
## 8 p x y w t p f c n p e e s s w w p w o p k v g
## 9 e b s y t a f c b g e c s s w w p w o p k s m
## 10 e x y y t l f c b g e c s s w w p w o p n n g
https://archive.ics.uci.edu/ml/machine-learning-databases/mushroom/agaricus-lepiota.names
colnames(mushroomsData) <- c("Classes","CapShape","CapSurface","CapColor","Bruises","Odor","GillAttachment","GillSpacing","GillSize","GillColor","StalkShape","Stalkroot","StalkSurfaceAboveRing","StalkSurfaceBelowRing","StalkColorAboveRing","StalkColorBeloRing","VeilType","VeilColor","RingNumber","RingType","SporePrintColor","Population","Habitat")
head(mushroomsData, n=10)## Classes CapShape CapSurface CapColor Bruises Odor GillAttachment
## 1 e x s y t a f
## 2 e b s w t l f
## 3 p x y w t p f
## 4 e x s g f n f
## 5 e x y y t a f
## 6 e b s w t a f
## 7 e b y w t l f
## 8 p x y w t p f
## 9 e b s y t a f
## 10 e x y y t l f
## GillSpacing GillSize GillColor StalkShape Stalkroot
## 1 c b k e c
## 2 c b n e c
## 3 c n n e e
## 4 w b k t e
## 5 c b n e c
## 6 c b g e c
## 7 c b n e c
## 8 c n p e e
## 9 c b g e c
## 10 c b g e c
## StalkSurfaceAboveRing StalkSurfaceBelowRing StalkColorAboveRing
## 1 s s w
## 2 s s w
## 3 s s w
## 4 s s w
## 5 s s w
## 6 s s w
## 7 s s w
## 8 s s w
## 9 s s w
## 10 s s w
## StalkColorBeloRing VeilType VeilColor RingNumber RingType
## 1 w p w o p
## 2 w p w o p
## 3 w p w o p
## 4 w p w o e
## 5 w p w o p
## 6 w p w o p
## 7 w p w o p
## 8 w p w o p
## 9 w p w o p
## 10 w p w o p
## SporePrintColor Population Habitat
## 1 n n g
## 2 n n m
## 3 k s u
## 4 n a g
## 5 k n g
## 6 k n m
## 7 n s m
## 8 k v g
## 9 k s m
## 10 n n g
colnames(mushroomsData)## [1] "Classes" "CapShape"
## [3] "CapSurface" "CapColor"
## [5] "Bruises" "Odor"
## [7] "GillAttachment" "GillSpacing"
## [9] "GillSize" "GillColor"
## [11] "StalkShape" "Stalkroot"
## [13] "StalkSurfaceAboveRing" "StalkSurfaceBelowRing"
## [15] "StalkColorAboveRing" "StalkColorBeloRing"
## [17] "VeilType" "VeilColor"
## [19] "RingNumber" "RingType"
## [21] "SporePrintColor" "Population"
## [23] "Habitat"
summary(mushroomsData)## Classes CapShape CapSurface CapColor Bruises Odor
## e:4208 b: 452 f:2320 n :2283 f:4748 n :3528
## p:3915 c: 4 g: 4 g :1840 t:3375 f :2160
## f:3152 s:2555 e :1500 s : 576
## k: 828 y:3244 y :1072 y : 576
## s: 32 w :1040 a : 400
## x:3655 b : 168 l : 400
## (Other): 220 (Other): 483
## GillAttachment GillSpacing GillSize GillColor StalkShape Stalkroot
## a: 210 c:6811 b:5612 b :1728 e:3515 ?:2480
## f:7913 w:1312 n:2511 p :1492 t:4608 b:3776
## w :1202 c: 556
## n :1048 e:1119
## g : 752 r: 192
## h : 732
## (Other):1169
## StalkSurfaceAboveRing StalkSurfaceBelowRing StalkColorAboveRing
## f: 552 f: 600 w :4463
## k:2372 k:2304 p :1872
## s:5175 s:4935 g : 576
## y: 24 y: 284 n : 448
## b : 432
## o : 192
## (Other): 140
## StalkColorBeloRing VeilType VeilColor RingNumber RingType SporePrintColor
## w :4383 p:8123 n: 96 n: 36 e:2776 w :2388
## p :1872 o: 96 o:7487 f: 48 n :1968
## g : 576 w:7923 t: 600 l:1296 k :1871
## n : 512 y: 8 n: 36 h :1632
## b : 432 p:3967 r : 72
## o : 192 b : 48
## (Other): 156 (Other): 144
## Population Habitat
## a: 384 d:3148
## c: 340 g:2148
## n: 400 l: 832
## s:1247 m: 292
## v:4040 p:1144
## y:1712 u: 367
## w: 192
mushroomsData <- mutate(mushroomsData,Classes=ifelse(Classes=="e","edible","poisonous"))
mushroomsData <- mutate(mushroomsData,CapShape=plyr::mapvalues(CapShape,from=c("b","c","x","f","k","s"),
to=c("bell","conical","convex","flat","knobbed","sunken")))
mushroomsData <- mutate(mushroomsData,CapSurface=plyr::mapvalues(CapSurface,from=c("f","g","y","s"),
to=c("fibrous","grooves","scaly","smooth")))
mushroomsData <- mutate(mushroomsData,CapColor=plyr::mapvalues(CapColor,from=c("n","b","c","g","r","p","u","e","w","y"), to=c("brown","buff","cinnamon","gray","green","pink","purple","red","white","yellow")))
mushroomsData <- mutate(mushroomsData,Bruises=plyr::mapvalues(Bruises,from=c("t","f"),
to=c("bruises","no")))
mushroomsData <- mutate(mushroomsData,Odor=plyr::mapvalues(Odor,from=c("a","l","c","y","f","m","n","p","s"),
to=c("almond","anise","creosote","fishy","foul","musty","none","pungent","spicy")))
head(mushroomsData, n=10)## Classes CapShape CapSurface CapColor Bruises Odor GillAttachment
## 1 edible convex smooth yellow bruises almond f
## 2 edible bell smooth white bruises anise f
## 3 poisonous convex scaly white bruises pungent f
## 4 edible convex smooth gray no none f
## 5 edible convex scaly yellow bruises almond f
## 6 edible bell smooth white bruises almond f
## 7 edible bell scaly white bruises anise f
## 8 poisonous convex scaly white bruises pungent f
## 9 edible bell smooth yellow bruises almond f
## 10 edible convex scaly yellow bruises anise f
## GillSpacing GillSize GillColor StalkShape Stalkroot
## 1 c b k e c
## 2 c b n e c
## 3 c n n e e
## 4 w b k t e
## 5 c b n e c
## 6 c b g e c
## 7 c b n e c
## 8 c n p e e
## 9 c b g e c
## 10 c b g e c
## StalkSurfaceAboveRing StalkSurfaceBelowRing StalkColorAboveRing
## 1 s s w
## 2 s s w
## 3 s s w
## 4 s s w
## 5 s s w
## 6 s s w
## 7 s s w
## 8 s s w
## 9 s s w
## 10 s s w
## StalkColorBeloRing VeilType VeilColor RingNumber RingType
## 1 w p w o p
## 2 w p w o p
## 3 w p w o p
## 4 w p w o e
## 5 w p w o p
## 6 w p w o p
## 7 w p w o p
## 8 w p w o p
## 9 w p w o p
## 10 w p w o p
## SporePrintColor Population Habitat
## 1 n n g
## 2 n n m
## 3 k s u
## 4 n a g
## 5 k n g
## 6 k n m
## 7 n s m
## 8 k v g
## 9 k s m
## 10 n n g
count(mushroomsData, "Classes")## Classes freq
## 1 edible 4208
## 2 poisonous 3915
count(mushroomsData, "CapShape")## CapShape freq
## 1 bell 452
## 2 conical 4
## 3 flat 3152
## 4 knobbed 828
## 5 sunken 32
## 6 convex 3655
count(mushroomsData, "CapSurface")## CapSurface freq
## 1 fibrous 2320
## 2 grooves 4
## 3 smooth 2555
## 4 scaly 3244
count(mushroomsData, "CapColor")## CapColor freq
## 1 buff 168
## 2 cinnamon 44
## 3 red 1500
## 4 gray 1840
## 5 brown 2283
## 6 pink 144
## 7 green 16
## 8 purple 16
## 9 white 1040
## 10 yellow 1072
count(mushroomsData, "Bruises")## Bruises freq
## 1 no 4748
## 2 bruises 3375
count(mushroomsData, "Odor")## Odor freq
## 1 almond 400
## 2 creosote 192
## 3 foul 2160
## 4 anise 400
## 5 musty 36
## 6 none 3528
## 7 pungent 255
## 8 spicy 576
## 9 fishy 576
newMushroomsData <- mushroomsData[2:10, 1:5]
newMushroomsData## Classes CapShape CapSurface CapColor Bruises
## 2 edible bell smooth white bruises
## 3 poisonous convex scaly white bruises
## 4 edible convex smooth gray no
## 5 edible convex scaly yellow bruises
## 6 edible bell smooth white bruises
## 7 edible bell scaly white bruises
## 8 poisonous convex scaly white bruises
## 9 edible bell smooth yellow bruises
## 10 edible convex scaly yellow bruises
barplot(table(mushroomsData$Classes)/100, main="Comparison of # of Edible vs Poisonous Mushrooms",col=c('green','red'))