library(RCurl)
theUrl <- "https://raw.githubusercontent.com/bpersaud104/Data607/master/agaricus-lepiota.csv"
mushroom <- read.table(file = theUrl, header = TRUE, sep = ",")
head(mushroom)
## p x s n t p.1 f c n.1 k e e.1 s.1 s.2 w w.1 p.2 w.2 o p.3 k.1 s.3 u
## 1 e x s y t a f c b k e c s s w w p w o p n n g
## 2 e b s w t l f c b n e c s s w w p w o p n n m
## 3 p x y w t p f c n n e e s s w w p w o p k s u
## 4 e x s g f n f w b k t e s s w w p w o e n a g
## 5 e x y y t a f c b n e c s s w w p w o p k n g
## 6 e b s w t a f c b g e c s s w w p w o p k n m
summary(mushroom)
## p x s n t p.1
## e:4208 b: 452 f:2320 n :2283 f:4748 n :3528
## p:3915 c: 4 g: 4 g :1840 t:3375 f :2160
## f:3152 s:2555 e :1500 s : 576
## k: 828 y:3244 y :1072 y : 576
## s: 32 w :1040 a : 400
## x:3655 b : 168 l : 400
## (Other): 220 (Other): 483
## f c n.1 k e e.1 s.1
## a: 210 c:6811 b:5612 b :1728 e:3515 ?:2480 f: 552
## f:7913 w:1312 n:2511 p :1492 t:4608 b:3776 k:2372
## w :1202 c: 556 s:5175
## n :1048 e:1119 y: 24
## g : 752 r: 192
## h : 732
## (Other):1169
## s.2 w w.1 p.2 w.2 o
## f: 600 w :4463 w :4383 p:8123 n: 96 n: 36
## k:2304 p :1872 p :1872 o: 96 o:7487
## s:4935 g : 576 g : 576 w:7923 t: 600
## y: 284 n : 448 n : 512 y: 8
## b : 432 b : 432
## o : 192 o : 192
## (Other): 140 (Other): 156
## p.3 k.1 s.3 u
## e:2776 w :2388 a: 384 d:3148
## f: 48 n :1968 c: 340 g:2148
## l:1296 k :1871 n: 400 l: 832
## n: 36 h :1632 s:1247 m: 292
## p:3967 r : 72 v:4040 p:1144
## b : 48 y:1712 u: 367
## (Other): 144 w: 192
x <- "https://raw.githubusercontent.com/bpersaud104/Data607/master/Mushroom%20Data%20Dictionary.txt"
dictionary <- read.table(file = x, row.names = 1, header = FALSE, sep = ":", quote = "")
row.names(dictionary)
## [1] " 1. classes" " 2. cap-shape"
## [3] " 3. cap-surface" " 4. cap-color"
## [5] " 5. bruises?" " 6. odor"
## [7] " 7. gill-attachment" " 8. gill-spacing"
## [9] " 9. gill-size" " 10. gill-color"
## [11] " 11. stalk-shape" " 12. stalk-root"
## [13] " 13. stalk-surface-above-ring" " 14. stalk-surface-below-ring"
## [15] " 15. stalk-color-above-ring" " 16. stalk-color-below-ring"
## [17] " 17. veil-type" " 18. veil-color"
## [19] " 19. ring-number" " 20. ring-type"
## [21] " 21. spore-print-color" " 22. population"
## [23] " 23. habitat"
head(dictionary, 23)
## V2
## 1. classes edible=e, poisonous=p
## 2. cap-shape bell=b,conical=c,convex=x,flat=f,knobbed=k,sunken=s
## 3. cap-surface fibrous=f,grooves=g,scaly=y,smooth=s
## 4. cap-color brown=n,buff=b,cinnamon=c,gray=g,green=r,pink=p,purple=u,red=e,white=w,yellow=y
## 5. bruises? bruises=t,no=f
## 6. odor almond=a,anise=l,creosote=c,fishy=y,foul=f,musty=m,none=n,pungent=p,spicy=s
## 7. gill-attachment attached=a,descending=d,free=f,notched=n
## 8. gill-spacing close=c,crowded=w,distant=d
## 9. gill-size broad=b,narrow=n
## 10. gill-color black=k,brown=n,buff=b,chocolate=h,gray=g,green=r,orange=o,pink=p,purple=u,red=e,white=w,yellow=y
## 11. stalk-shape enlarging=e,tapering=t
## 12. stalk-root bulbous=b,club=c,cup=u,equal=e,rhizomorphs=z,rooted=r,missing=?
## 13. stalk-surface-above-ring fibrous=f,scaly=y,silky=k,smooth=s
## 14. stalk-surface-below-ring fibrous=f,scaly=y,silky=k,smooth=s
## 15. stalk-color-above-ring brown=n,buff=b,cinnamon=c,gray=g,orange=o,pink=p,red=e,white=w,yellow=y
## 16. stalk-color-below-ring brown=n,buff=b,cinnamon=c,gray=g,orange=o,pink=p,red=e,white=w,yellow=y
## 17. veil-type partial=p,universal=u
## 18. veil-color brown=n,orange=o,white=w,yellow=y
## 19. ring-number none=n,one=o,two=t
## 20. ring-type cobwebby=c,evanescent=e,flaring=f,large=l,none=n,pendant=p,sheathing=s,zone=z
## 21. spore-print-color black=k,brown=n,buff=b,chocolate=h,green=r,orange=o,purple=u,white=w,yellow=y
## 22. population abundant=a,clustered=c,numerous=n,scattered=s,several=v,solitary=y
## 23. habitat grasses=g,leaves=l,meadows=m,paths=p,urban=u,waste=w,woods=d
mushroom_subset <- data.frame(subset(mushroom[c(1, 4, 6, 9, 22)]))
head(mushroom_subset)
## p n p.1 n.1 s.3
## 1 e y a b n
## 2 e w l b n
## 3 p w p n s
## 4 e g n b a
## 5 e y a b n
## 6 e w a b n
summary(mushroom_subset)
## p n p.1 n.1 s.3
## e:4208 n :2283 n :3528 b:5612 a: 384
## p:3915 g :1840 f :2160 n:2511 c: 340
## e :1500 s : 576 n: 400
## y :1072 y : 576 s:1247
## w :1040 a : 400 v:4040
## b : 168 l : 400 y:1712
## (Other): 220 (Other): 483
#Rename columns
names(mushroom_subset) <- c("p" = "Classes", "n" = "Cap-color", "p.1" = "Odor", "n.1" = "Gil-size", "s.3" = "Population")
head(mushroom_subset)
## Classes Cap-color Odor Gil-size Population
## 1 e y a b n
## 2 e w l b n
## 3 p w p n s
## 4 e g n b a
## 5 e y a b n
## 6 e w a b n
# Rename abbreviations in column one
levels(mushroom_subset$Classes) <- c(levels(mushroom_subset$Classes), "Edible", "Poisonous")
mushroom_subset$Classes[mushroom_subset$Classes == "e"] <- "Edible"
mushroom_subset$Classes[mushroom_subset$Classes == "p"] <- "Poisonous"
# Rename abbreviations in column two
levels(mushroom_subset$`Cap-color`) <- c(levels(mushroom_subset$`Cap-color`), "Brown", "Buff", "Cinnamon", "Gray", "Green", "Pink", "Purple", "Red", "White", "Yellow")
mushroom_subset$`Cap-color`[mushroom_subset$`Cap-color` == "n"] <- "Brown"
mushroom_subset$`Cap-color`[mushroom_subset$`Cap-color` == "b"] <- "Buff"
mushroom_subset$`Cap-color`[mushroom_subset$`Cap-color` == "c"] <- "Cinnamon"
mushroom_subset$`Cap-color`[mushroom_subset$`Cap-color` == "g"] <- "Gray"
mushroom_subset$`Cap-color`[mushroom_subset$`Cap-color` == "r"] <- "Green"
mushroom_subset$`Cap-color`[mushroom_subset$`Cap-color` == "p"] <- "Pink"
mushroom_subset$`Cap-color`[mushroom_subset$`Cap-color` == "u"] <- "Purple"
mushroom_subset$`Cap-color`[mushroom_subset$`Cap-color` == "e"] <- "Red"
mushroom_subset$`Cap-color`[mushroom_subset$`Cap-color` == "w"] <- "White"
mushroom_subset$`Cap-color`[mushroom_subset$`Cap-color` == "y"] <- "Yellow"
#Rename abbreviations in column three
levels(mushroom_subset$Odor) <- c(levels(mushroom_subset$Odor), "Almond", "Anise", "Creosote", "Fishy", "Foul", "Musty", "None", "Pungent", "Spicy")
mushroom_subset$Odor[mushroom_subset$Odor == "a"] <- "Almond"
mushroom_subset$Odor[mushroom_subset$Odor == "l"] <- "Anise"
mushroom_subset$Odor[mushroom_subset$Odor == "c"] <- "Creosote"
mushroom_subset$Odor[mushroom_subset$Odor == "y"] <- "Fishy"
mushroom_subset$Odor[mushroom_subset$Odor == "f"] <- "Foul"
mushroom_subset$Odor[mushroom_subset$Odor == "m"] <- "Musty"
mushroom_subset$Odor[mushroom_subset$Odor == "n"] <- "None"
mushroom_subset$Odor[mushroom_subset$Odor == "p"] <- "Pungent"
mushroom_subset$Odor[mushroom_subset$Odor == "s"] <- "Spicy"
#Rename abbreviations in column four
levels(mushroom_subset$`Gil-size`) <- c(levels(mushroom_subset$`Gil-size`), "Broad", "Narrow")
mushroom_subset$`Gil-size`[mushroom_subset$`Gil-size` == "b"] <- "Broad"
mushroom_subset$`Gil-size`[mushroom_subset$`Gil-size` == "n"] <- "Narrow"
#Rename abbreviations in column five
levels(mushroom_subset$Population) <- c(levels(mushroom_subset$Population), "Abundant", "Clustered", "Numerous", "Scattered", "Several", "Solitary")
mushroom_subset$Population[mushroom_subset$Population == "a"] <- "Abundant"
mushroom_subset$Population[mushroom_subset$Population == "c"] <- "Clustered"
mushroom_subset$Population[mushroom_subset$Population == "n"] <- "Numerous"
mushroom_subset$Population[mushroom_subset$Population == "s"] <- "Scattered"
mushroom_subset$Population[mushroom_subset$Population == "v"] <- "Several"
mushroom_subset$Population[mushroom_subset$Population == "y"] <- "Solitary"
summary(mushroom_subset)
## Classes Cap-color Odor Gil-size
## e : 0 Brown :2283 None :3528 b : 0
## p : 0 Gray :1840 Foul :2160 n : 0
## Edible :4208 Red :1500 Fishy : 576 Broad :5612
## Poisonous:3915 Yellow :1072 Spicy : 576 Narrow:2511
## White :1040 Almond : 400
## Buff : 168 Anise : 400
## (Other): 220 (Other): 483
## Population
## Several :4040
## Solitary :1712
## Scattered:1247
## Numerous : 400
## Abundant : 384
## Clustered: 340
## (Other) : 0
head(mushroom_subset, 50)
## Classes Cap-color Odor Gil-size Population
## 1 Edible Yellow Almond Broad Numerous
## 2 Edible White Anise Broad Numerous
## 3 Poisonous White Pungent Narrow Scattered
## 4 Edible Gray None Broad Abundant
## 5 Edible Yellow Almond Broad Numerous
## 6 Edible White Almond Broad Numerous
## 7 Edible White Anise Broad Scattered
## 8 Poisonous White Pungent Narrow Several
## 9 Edible Yellow Almond Broad Scattered
## 10 Edible Yellow Anise Broad Numerous
## 11 Edible Yellow Almond Broad Scattered
## 12 Edible Yellow Almond Broad Scattered
## 13 Poisonous White Pungent Narrow Several
## 14 Edible Brown None Broad Abundant
## 15 Edible Gray None Narrow Solitary
## 16 Edible White None Broad Abundant
## 17 Poisonous Brown Pungent Narrow Scattered
## 18 Poisonous White Pungent Narrow Scattered
## 19 Poisonous Brown Pungent Narrow Scattered
## 20 Edible Yellow Almond Broad Scattered
## 21 Poisonous Brown Pungent Narrow Several
## 22 Edible Yellow Anise Broad Scattered
## 23 Edible White Almond Broad Numerous
## 24 Edible White Anise Broad Scattered
## 25 Poisonous White Pungent Narrow Several
## 26 Edible Yellow Almond Broad Numerous
## 27 Edible White Anise Broad Numerous
## 28 Edible Brown None Narrow Solitary
## 29 Edible Yellow Almond Narrow Several
## 30 Edible Yellow Anise Broad Numerous
## 31 Poisonous White Pungent Narrow Scattered
## 32 Edible Yellow Anise Broad Numerous
## 33 Edible Brown Anise Broad Solitary
## 34 Edible Yellow Anise Broad Scattered
## 35 Edible Yellow Anise Narrow Several
## 36 Edible Gray None Narrow Several
## 37 Poisonous Brown Pungent Narrow Scattered
## 38 Edible Yellow Almond Narrow Several
## 39 Edible Yellow Anise Broad Scattered
## 40 Edible Yellow Almond Broad Scattered
## 41 Edible Yellow Anise Broad Solitary
## 42 Edible Brown None Narrow Solitary
## 43 Poisonous White Pungent Narrow Several
## 44 Edible Yellow Almond Broad Numerous
## 45 Edible White Almond Broad Numerous
## 46 Edible Yellow Anise Broad Scattered
## 47 Edible White Anise Broad Numerous
## 48 Edible Yellow Anise Broad Scattered
## 49 Edible Yellow Anise Broad Scattered
## 50 Edible Brown Almond Broad Scattered