1. Load the selected dataset into R.
# Original data location: "https://archive.ics.uci.edu/ml/machine-learning-databases/mushroom/agaricus-lepiota.data"
# Personal Git for this assignment: "https://raw.githubusercontent.com/ChadRyanBailey/607-Week1-Assignment/master/Mushroom%20Data.csv"
fileLocation <- "https://raw.githubusercontent.com/ChadRyanBailey/607-Week1-Assignment/master/Mushroom%20Data.csv"
mushrooms <- read.table(file = fileLocation, header = FALSE, sep = ",")
  1. Add meaningful column names (using provided data dictionary).
names(mushrooms) <- c("edible-or-poisonous"  
                      ,"cap-shape"
                      ,"cap-surface"
                      ,"cap-color"
                      ,"bruises?"
                      ,"odor"
                      ,"gill-attachment"
                      ,"gill-spacing"
                      ,"gill-size"
                      ,"gill-color"
                      ,"stalk-shape"
                      ,"stalk-root"
                      ,"stalk-surface-above-ring"
                      ,"stalk-surface-below-ring"
                      ,"stalk-color-above-ring"
                      ,"stalk-color-below-ring"
                      ,"veil-type"
                      ,"veil-color"
                      ,"ring-number"
                      ,"ring-type"
                      ,"spore-print-color"
                      ,"population"
                      ,"habitat")
  1. Review the first few rows of the data and a summary of the data.
head(mushrooms)
##   edible-or-poisonous cap-shape cap-surface cap-color bruises? odor
## 1                   p         x           s         n        t    p
## 2                   e         x           s         y        t    a
## 3                   e         b           s         w        t    l
## 4                   p         x           y         w        t    p
## 5                   e         x           s         g        f    n
## 6                   e         x           y         y        t    a
##   gill-attachment gill-spacing gill-size gill-color stalk-shape stalk-root
## 1               f            c         n          k           e          e
## 2               f            c         b          k           e          c
## 3               f            c         b          n           e          c
## 4               f            c         n          n           e          e
## 5               f            w         b          k           t          e
## 6               f            c         b          n           e          c
##   stalk-surface-above-ring stalk-surface-below-ring stalk-color-above-ring
## 1                        s                        s                      w
## 2                        s                        s                      w
## 3                        s                        s                      w
## 4                        s                        s                      w
## 5                        s                        s                      w
## 6                        s                        s                      w
##   stalk-color-below-ring veil-type veil-color ring-number ring-type
## 1                      w         p          w           o         p
## 2                      w         p          w           o         p
## 3                      w         p          w           o         p
## 4                      w         p          w           o         p
## 5                      w         p          w           o         e
## 6                      w         p          w           o         p
##   spore-print-color population habitat
## 1                 k          s       u
## 2                 n          n       g
## 3                 n          n       m
## 4                 k          s       u
## 5                 n          a       g
## 6                 k          n       g
summary(mushrooms)
##  edible-or-poisonous cap-shape cap-surface   cap-color    bruises?
##  e:4208              b: 452    f:2320      n      :2284   f:4748  
##  p:3916              c:   4    g:   4      g      :1840   t:3376  
##                      f:3152    s:2556      e      :1500           
##                      k: 828    y:3244      y      :1072           
##                      s:  32                w      :1040           
##                      x:3656                b      : 168           
##                                            (Other): 220           
##       odor      gill-attachment gill-spacing gill-size   gill-color  
##  n      :3528   a: 210          c:6812       b:5612    b      :1728  
##  f      :2160   f:7914          w:1312       n:2512    p      :1492  
##  s      : 576                                          w      :1202  
##  y      : 576                                          n      :1048  
##  a      : 400                                          g      : 752  
##  l      : 400                                          h      : 732  
##  (Other): 484                                          (Other):1170  
##  stalk-shape stalk-root stalk-surface-above-ring stalk-surface-below-ring
##  e:3516      ?:2480     f: 552                   f: 600                  
##  t:4608      b:3776     k:2372                   k:2304                  
##              c: 556     s:5176                   s:4936                  
##              e:1120     y:  24                   y: 284                  
##              r: 192                                                      
##                                                                          
##                                                                          
##  stalk-color-above-ring stalk-color-below-ring veil-type veil-color
##  w      :4464           w      :4384           p:8124    n:  96    
##  p      :1872           p      :1872                     o:  96    
##  g      : 576           g      : 576                     w:7924    
##  n      : 448           n      : 512                     y:   8    
##  b      : 432           b      : 432                               
##  o      : 192           o      : 192                               
##  (Other): 140           (Other): 156                               
##  ring-number ring-type spore-print-color population habitat 
##  n:  36      e:2776    w      :2388      a: 384     d:3148  
##  o:7488      f:  48    n      :1968      c: 340     g:2148  
##  t: 600      l:1296    k      :1872      n: 400     l: 832  
##              n:  36    h      :1632      s:1248     m: 292  
##              p:3968    r      :  72      v:4040     p:1144  
##                        b      :  48      y:1712     u: 368  
##                        (Other): 144                 w: 192
  1. Subset the data, including the column “edible-or-poisonous” and three or four other columns
mushrooms_subset <- mushrooms[, c("edible-or-poisonous"
                                  ,"gill-spacing"
                                  ,"gill-size"
                                  ,"population"
                                  ,"habitat")]
head(mushrooms_subset)
##   edible-or-poisonous gill-spacing gill-size population habitat
## 1                   p            c         n          s       u
## 2                   e            c         b          n       g
## 3                   e            c         b          n       m
## 4                   p            c         n          s       u
## 5                   e            w         b          a       g
## 6                   e            c         b          n       g
summary(mushrooms_subset)
##  edible-or-poisonous gill-spacing gill-size population habitat 
##  e:4208              c:6812       b:5612    a: 384     d:3148  
##  p:3916              w:1312       n:2512    c: 340     g:2148  
##                                             n: 400     l: 832  
##                                             s:1248     m: 292  
##                                             v:4040     p:1144  
##                                             y:1712     u: 368  
##                                                        w: 192
  1. Replace the abbreviations used in the data
eop <- mushrooms_subset[,"edible-or-poisonous"]
eop <- sub("e", "edible", eop)
eop <- sub("p", "poisonous", eop)
mushrooms_subset[,"edible-or-poisonous"] <-eop

gspacing <- mushrooms_subset[,"gill-spacing"]
gspacing <- sub("c", "close", gspacing)
gspacing <- sub("w", "crowded", gspacing)
mushrooms_subset[,"gill-spacing"] <- gspacing

gsize <- mushrooms_subset[,"gill-size"]
gsize <- sub("b", "broad", gsize)
gsize <- sub("n", "narrow", gsize)
mushrooms_subset[,"gill-size"] <- gsize


population <- mushrooms_subset[,"population"]
population <- sub("\\ba", "abundant", population)
population <- sub("\\bc", "clustered", population)
population <- sub("\\bn", "numerous", population)
population <- sub("\\bs", "scattered", population)
population <- sub("\\bv", "several", population)
population <- sub("\\by", "solitary", population)
mushrooms_subset[,"population"] <- population


habitat <- mushrooms_subset[,"habitat"]
habitat <- sub("\\bd", "woods", habitat)
habitat <- sub("\\bg", "grasses", habitat)
habitat <- sub("\\bl", "leaves", habitat)
habitat <- sub("\\bm", "meadows", habitat)
habitat <- sub("\\bp", "paths", habitat)
habitat <- sub("\\bu", "urban", habitat)
habitat <- sub("\\bw\\b", "waste", habitat)
mushrooms_subset[,"habitat"] <- habitat

#convert character fields back to factors
mushrooms_subset <- as.data.frame(unclass(mushrooms_subset))

#review transformed data
head(mushrooms_subset)
##   edible.or.poisonous gill.spacing gill.size population habitat
## 1           poisonous        close    narrow  scattered   urban
## 2              edible        close     broad   numerous grasses
## 3              edible        close     broad   numerous meadows
## 4           poisonous        close    narrow  scattered   urban
## 5              edible      crowded     broad   abundant grasses
## 6              edible        close     broad   numerous grasses
summary(mushrooms_subset)
##  edible.or.poisonous  gill.spacing   gill.size        population  
##  edible   :4208      close  :6812   broad :5612   abundant : 384  
##  poisonous:3916      crowded:1312   narrow:2512   clustered: 340  
##                                                   numerous : 400  
##                                                   scattered:1248  
##                                                   several  :4040  
##                                                   solitary :1712  
##                                                                   
##     habitat    
##  grasses:2148  
##  leaves : 832  
##  meadows: 292  
##  paths  :1144  
##  urban  : 368  
##  waste  : 192  
##  woods  :3148