library(RCurl)

Load Data into R

theUrl <- "https://raw.githubusercontent.com/bpersaud104/Data607/master/agaricus-lepiota.csv"
mushroom <- read.table(file = theUrl, header = TRUE, sep = ",")
head(mushroom)
##   p x s n t p.1 f c n.1 k e e.1 s.1 s.2 w w.1 p.2 w.2 o p.3 k.1 s.3 u
## 1 e x s y t   a f c   b k e   c   s   s w   w   p   w o   p   n   n g
## 2 e b s w t   l f c   b n e   c   s   s w   w   p   w o   p   n   n m
## 3 p x y w t   p f c   n n e   e   s   s w   w   p   w o   p   k   s u
## 4 e x s g f   n f w   b k t   e   s   s w   w   p   w o   e   n   a g
## 5 e x y y t   a f c   b n e   c   s   s w   w   p   w o   p   k   n g
## 6 e b s w t   a f c   b g e   c   s   s w   w   p   w o   p   k   n m
summary(mushroom)
##  p        x        s              n        t             p.1      
##  e:4208   b: 452   f:2320   n      :2283   f:4748   n      :3528  
##  p:3915   c:   4   g:   4   g      :1840   t:3375   f      :2160  
##           f:3152   s:2555   e      :1500            s      : 576  
##           k: 828   y:3244   y      :1072            y      : 576  
##           s:  32            w      :1040            a      : 400  
##           x:3655            b      : 168            l      : 400  
##                             (Other): 220            (Other): 483  
##  f        c        n.1            k        e        e.1      s.1     
##  a: 210   c:6811   b:5612   b      :1728   e:3515   ?:2480   f: 552  
##  f:7913   w:1312   n:2511   p      :1492   t:4608   b:3776   k:2372  
##                             w      :1202            c: 556   s:5175  
##                             n      :1048            e:1119   y:  24  
##                             g      : 752            r: 192           
##                             h      : 732                             
##                             (Other):1169                             
##  s.2            w             w.1       p.2      w.2      o       
##  f: 600   w      :4463   w      :4383   p:8123   n:  96   n:  36  
##  k:2304   p      :1872   p      :1872            o:  96   o:7487  
##  s:4935   g      : 576   g      : 576            w:7923   t: 600  
##  y: 284   n      : 448   n      : 512            y:   8           
##           b      : 432   b      : 432                             
##           o      : 192   o      : 192                             
##           (Other): 140   (Other): 156                             
##  p.3           k.1       s.3      u       
##  e:2776   w      :2388   a: 384   d:3148  
##  f:  48   n      :1968   c: 340   g:2148  
##  l:1296   k      :1871   n: 400   l: 832  
##  n:  36   h      :1632   s:1247   m: 292  
##  p:3967   r      :  72   v:4040   p:1144  
##           b      :  48   y:1712   u: 367  
##           (Other): 144            w: 192

Data Dictionary

x <- "https://raw.githubusercontent.com/bpersaud104/Data607/master/Mushroom%20Data%20Dictionary.txt"
dictionary <- read.table(file = x, row.names = 1, header = FALSE, sep = ":", quote = "")
row.names(dictionary)
##  [1] "     1. classes"                  "     2. cap-shape"               
##  [3] "     3. cap-surface"              "     4. cap-color"               
##  [5] "     5. bruises?"                 "     6. odor"                    
##  [7] "     7. gill-attachment"          "     8. gill-spacing"            
##  [9] "     9. gill-size"                "     10. gill-color"             
## [11] "    11. stalk-shape"              "    12. stalk-root"              
## [13] "    13. stalk-surface-above-ring" "    14. stalk-surface-below-ring"
## [15] "    15. stalk-color-above-ring"   "    16. stalk-color-below-ring"  
## [17] "    17. veil-type"                "    18. veil-color"              
## [19] "    19. ring-number"              "    20. ring-type"               
## [21] "    21. spore-print-color"        "    22. population"              
## [23] "    23. habitat"
head(dictionary, 23)
##                                                                                                                                                V2
##      1. classes                                                                                                             edible=e, poisonous=p
##      2. cap-shape                                                                             bell=b,conical=c,convex=x,flat=f,knobbed=k,sunken=s
##      3. cap-surface                                                                                          fibrous=f,grooves=g,scaly=y,smooth=s
##      4. cap-color                                                 brown=n,buff=b,cinnamon=c,gray=g,green=r,pink=p,purple=u,red=e,white=w,yellow=y
##      5. bruises?                                                                                                                   bruises=t,no=f
##      6. odor                                                          almond=a,anise=l,creosote=c,fishy=y,foul=f,musty=m,none=n,pungent=p,spicy=s
##      7. gill-attachment                                                                                  attached=a,descending=d,free=f,notched=n
##      8. gill-spacing                                                                                                  close=c,crowded=w,distant=d
##      9. gill-size                                                                                                                broad=b,narrow=n
##      10. gill-color                             black=k,brown=n,buff=b,chocolate=h,gray=g,green=r,orange=o,pink=p,purple=u,red=e,white=w,yellow=y
##     11. stalk-shape                                                                                                        enlarging=e,tapering=t
##     12. stalk-root                                                                bulbous=b,club=c,cup=u,equal=e,rhizomorphs=z,rooted=r,missing=?
##     13. stalk-surface-above-ring                                                                               fibrous=f,scaly=y,silky=k,smooth=s
##     14. stalk-surface-below-ring                                                                               fibrous=f,scaly=y,silky=k,smooth=s
##     15. stalk-color-above-ring                                            brown=n,buff=b,cinnamon=c,gray=g,orange=o,pink=p,red=e,white=w,yellow=y
##     16. stalk-color-below-ring                                            brown=n,buff=b,cinnamon=c,gray=g,orange=o,pink=p,red=e,white=w,yellow=y
##     17. veil-type                                                                                                           partial=p,universal=u
##     18. veil-color                                                                                              brown=n,orange=o,white=w,yellow=y
##     19. ring-number                                                                                                            none=n,one=o,two=t
##     20. ring-type                                                   cobwebby=c,evanescent=e,flaring=f,large=l,none=n,pendant=p,sheathing=s,zone=z
##     21. spore-print-color                                           black=k,brown=n,buff=b,chocolate=h,green=r,orange=o,purple=u,white=w,yellow=y
##     22. population                                                             abundant=a,clustered=c,numerous=n,scattered=s,several=v,solitary=y
##     23. habitat                                                                      grasses=g,leaves=l,meadows=m,paths=p,urban=u,waste=w,woods=d

Creating a Subset

mushroom_subset <- data.frame(subset(mushroom[c(1, 4, 6, 9, 22)]))
head(mushroom_subset)
##   p n p.1 n.1 s.3
## 1 e y   a   b   n
## 2 e w   l   b   n
## 3 p w   p   n   s
## 4 e g   n   b   a
## 5 e y   a   b   n
## 6 e w   a   b   n
summary(mushroom_subset)
##  p              n             p.1       n.1      s.3     
##  e:4208   n      :2283   n      :3528   b:5612   a: 384  
##  p:3915   g      :1840   f      :2160   n:2511   c: 340  
##           e      :1500   s      : 576            n: 400  
##           y      :1072   y      : 576            s:1247  
##           w      :1040   a      : 400            v:4040  
##           b      : 168   l      : 400            y:1712  
##           (Other): 220   (Other): 483

Transforming Abbreviations in Subset

#Rename columns
names(mushroom_subset) <- c("p" = "Classes", "n" = "Cap-color", "p.1" = "Odor", "n.1" = "Gil-size", "s.3" = "Population")
head(mushroom_subset)
##   Classes Cap-color Odor Gil-size Population
## 1       e         y    a        b          n
## 2       e         w    l        b          n
## 3       p         w    p        n          s
## 4       e         g    n        b          a
## 5       e         y    a        b          n
## 6       e         w    a        b          n
# Rename abbreviations in column one
levels(mushroom_subset$Classes) <- c(levels(mushroom_subset$Classes), "Edible", "Poisonous") 
mushroom_subset$Classes[mushroom_subset$Classes == "e"] <- "Edible"
mushroom_subset$Classes[mushroom_subset$Classes == "p"] <- "Poisonous"

# Rename abbreviations in column two
levels(mushroom_subset$`Cap-color`) <- c(levels(mushroom_subset$`Cap-color`), "Brown", "Buff", "Cinnamon", "Gray", "Green", "Pink", "Purple", "Red", "White", "Yellow")
mushroom_subset$`Cap-color`[mushroom_subset$`Cap-color` == "n"] <- "Brown"
mushroom_subset$`Cap-color`[mushroom_subset$`Cap-color` == "b"] <- "Buff"
mushroom_subset$`Cap-color`[mushroom_subset$`Cap-color` == "c"] <- "Cinnamon"
mushroom_subset$`Cap-color`[mushroom_subset$`Cap-color` == "g"] <- "Gray"
mushroom_subset$`Cap-color`[mushroom_subset$`Cap-color` == "r"] <- "Green"
mushroom_subset$`Cap-color`[mushroom_subset$`Cap-color` == "p"] <- "Pink"
mushroom_subset$`Cap-color`[mushroom_subset$`Cap-color` == "u"] <- "Purple"
mushroom_subset$`Cap-color`[mushroom_subset$`Cap-color` == "e"] <- "Red"
mushroom_subset$`Cap-color`[mushroom_subset$`Cap-color` == "w"] <- "White"
mushroom_subset$`Cap-color`[mushroom_subset$`Cap-color` == "y"] <- "Yellow"

#Rename abbreviations in column three
levels(mushroom_subset$Odor) <- c(levels(mushroom_subset$Odor), "Almond", "Anise", "Creosote", "Fishy", "Foul", "Musty", "None", "Pungent", "Spicy")
mushroom_subset$Odor[mushroom_subset$Odor == "a"] <- "Almond"
mushroom_subset$Odor[mushroom_subset$Odor == "l"] <- "Anise"
mushroom_subset$Odor[mushroom_subset$Odor == "c"] <- "Creosote"
mushroom_subset$Odor[mushroom_subset$Odor == "y"] <- "Fishy"
mushroom_subset$Odor[mushroom_subset$Odor == "f"] <- "Foul"
mushroom_subset$Odor[mushroom_subset$Odor == "m"] <- "Musty"
mushroom_subset$Odor[mushroom_subset$Odor == "n"] <- "None"
mushroom_subset$Odor[mushroom_subset$Odor == "p"] <- "Pungent"
mushroom_subset$Odor[mushroom_subset$Odor == "s"] <- "Spicy"

#Rename abbreviations in column four
levels(mushroom_subset$`Gil-size`) <- c(levels(mushroom_subset$`Gil-size`), "Broad", "Narrow")
mushroom_subset$`Gil-size`[mushroom_subset$`Gil-size` == "b"] <- "Broad"
mushroom_subset$`Gil-size`[mushroom_subset$`Gil-size` == "n"] <- "Narrow"

#Rename abbreviations in column five
levels(mushroom_subset$Population) <- c(levels(mushroom_subset$Population), "Abundant", "Clustered", "Numerous", "Scattered", "Several", "Solitary")
mushroom_subset$Population[mushroom_subset$Population == "a"] <- "Abundant"
mushroom_subset$Population[mushroom_subset$Population == "c"] <- "Clustered"
mushroom_subset$Population[mushroom_subset$Population == "n"] <- "Numerous"
mushroom_subset$Population[mushroom_subset$Population == "s"] <- "Scattered"
mushroom_subset$Population[mushroom_subset$Population == "v"] <- "Several"
mushroom_subset$Population[mushroom_subset$Population == "y"] <- "Solitary"

Updated Subset after Transformations

summary(mushroom_subset)
##       Classes       Cap-color         Odor        Gil-size   
##  e        :   0   Brown  :2283   None   :3528   b     :   0  
##  p        :   0   Gray   :1840   Foul   :2160   n     :   0  
##  Edible   :4208   Red    :1500   Fishy  : 576   Broad :5612  
##  Poisonous:3915   Yellow :1072   Spicy  : 576   Narrow:2511  
##                   White  :1040   Almond : 400                
##                   Buff   : 168   Anise  : 400                
##                   (Other): 220   (Other): 483                
##      Population  
##  Several  :4040  
##  Solitary :1712  
##  Scattered:1247  
##  Numerous : 400  
##  Abundant : 384  
##  Clustered: 340  
##  (Other)  :   0
head(mushroom_subset, 50)
##      Classes Cap-color    Odor Gil-size Population
## 1     Edible    Yellow  Almond    Broad   Numerous
## 2     Edible     White   Anise    Broad   Numerous
## 3  Poisonous     White Pungent   Narrow  Scattered
## 4     Edible      Gray    None    Broad   Abundant
## 5     Edible    Yellow  Almond    Broad   Numerous
## 6     Edible     White  Almond    Broad   Numerous
## 7     Edible     White   Anise    Broad  Scattered
## 8  Poisonous     White Pungent   Narrow    Several
## 9     Edible    Yellow  Almond    Broad  Scattered
## 10    Edible    Yellow   Anise    Broad   Numerous
## 11    Edible    Yellow  Almond    Broad  Scattered
## 12    Edible    Yellow  Almond    Broad  Scattered
## 13 Poisonous     White Pungent   Narrow    Several
## 14    Edible     Brown    None    Broad   Abundant
## 15    Edible      Gray    None   Narrow   Solitary
## 16    Edible     White    None    Broad   Abundant
## 17 Poisonous     Brown Pungent   Narrow  Scattered
## 18 Poisonous     White Pungent   Narrow  Scattered
## 19 Poisonous     Brown Pungent   Narrow  Scattered
## 20    Edible    Yellow  Almond    Broad  Scattered
## 21 Poisonous     Brown Pungent   Narrow    Several
## 22    Edible    Yellow   Anise    Broad  Scattered
## 23    Edible     White  Almond    Broad   Numerous
## 24    Edible     White   Anise    Broad  Scattered
## 25 Poisonous     White Pungent   Narrow    Several
## 26    Edible    Yellow  Almond    Broad   Numerous
## 27    Edible     White   Anise    Broad   Numerous
## 28    Edible     Brown    None   Narrow   Solitary
## 29    Edible    Yellow  Almond   Narrow    Several
## 30    Edible    Yellow   Anise    Broad   Numerous
## 31 Poisonous     White Pungent   Narrow  Scattered
## 32    Edible    Yellow   Anise    Broad   Numerous
## 33    Edible     Brown   Anise    Broad   Solitary
## 34    Edible    Yellow   Anise    Broad  Scattered
## 35    Edible    Yellow   Anise   Narrow    Several
## 36    Edible      Gray    None   Narrow    Several
## 37 Poisonous     Brown Pungent   Narrow  Scattered
## 38    Edible    Yellow  Almond   Narrow    Several
## 39    Edible    Yellow   Anise    Broad  Scattered
## 40    Edible    Yellow  Almond    Broad  Scattered
## 41    Edible    Yellow   Anise    Broad   Solitary
## 42    Edible     Brown    None   Narrow   Solitary
## 43 Poisonous     White Pungent   Narrow    Several
## 44    Edible    Yellow  Almond    Broad   Numerous
## 45    Edible     White  Almond    Broad   Numerous
## 46    Edible    Yellow   Anise    Broad  Scattered
## 47    Edible     White   Anise    Broad   Numerous
## 48    Edible    Yellow   Anise    Broad  Scattered
## 49    Edible    Yellow   Anise    Broad  Scattered
## 50    Edible     Brown  Almond    Broad  Scattered