DATA 607 - ASSIGNMENT 1

Harpreet Shoker

Loading Data (csv file) into R

mushroom_datafile <- read.csv(url('https://archive.ics.uci.edu/ml/machine-learning-databases/mushroom/agaricus-lepiota.data'))
head(mushroom_datafile)
##   p x s n t p.1 f c n.1 k e e.1 s.1 s.2 w w.1 p.2 w.2 o p.3 k.1 s.3 u
## 1 e x s y t   a f c   b k e   c   s   s w   w   p   w o   p   n   n g
## 2 e b s w t   l f c   b n e   c   s   s w   w   p   w o   p   n   n m
## 3 p x y w t   p f c   n n e   e   s   s w   w   p   w o   p   k   s u
## 4 e x s g f   n f w   b k t   e   s   s w   w   p   w o   e   n   a g
## 5 e x y y t   a f c   b n e   c   s   s w   w   p   w o   p   k   n g
## 6 e b s w t   a f c   b g e   c   s   s w   w   p   w o   p   k   n m

Loading library plyr to use rename function

library(plyr)
mushroom_datafile1 <-rename(mushroom_datafile, c("p" = "classtype", "x" = "cap-shape", "s" = "cap-surface", "n" = "cap-color", "t" = "bruises", "p.1"="odor", "f" = "gill-attachement", "c" = "gill-spacing", "n.1" = "gill-size", "k" = "gill-color", "e" = "stalk shape", "e.1" = "stalk root", "s.1" = "stalk surface above ring", "s.2" = "stalk surface below ring", "w" = "stalk color above veil", "w.1" = "stalk color below veil", "p.2" = "veil type", "w.2" = "veil-color", "o" = "ring number", "p.3" = "ring type", "k.1" = "spore print color", "s.3" = "population", "u" = "habitat" ))
head(mushroom_datafile1)
##   classtype cap-shape cap-surface cap-color bruises odor gill-attachement
## 1         e         x           s         y       t    a                f
## 2         e         b           s         w       t    l                f
## 3         p         x           y         w       t    p                f
## 4         e         x           s         g       f    n                f
## 5         e         x           y         y       t    a                f
## 6         e         b           s         w       t    a                f
##   gill-spacing gill-size gill-color stalk shape stalk root
## 1            c         b          k           e          c
## 2            c         b          n           e          c
## 3            c         n          n           e          e
## 4            w         b          k           t          e
## 5            c         b          n           e          c
## 6            c         b          g           e          c
##   stalk surface above ring stalk surface below ring stalk color above veil
## 1                        s                        s                      w
## 2                        s                        s                      w
## 3                        s                        s                      w
## 4                        s                        s                      w
## 5                        s                        s                      w
## 6                        s                        s                      w
##   stalk color below veil veil type veil-color ring number ring type
## 1                      w         p          w           o         p
## 2                      w         p          w           o         p
## 3                      w         p          w           o         p
## 4                      w         p          w           o         e
## 5                      w         p          w           o         p
## 6                      w         p          w           o         p
##   spore print color population habitat
## 1                 n          n       g
## 2                 n          n       m
## 3                 k          s       u
## 4                 n          a       g
## 5                 k          n       g
## 6                 k          n       m

Creating subset of data here i m using classtype , odor ,gill color , veil color , habitat

mushroon_subset <- subset(mushroom_datafile1,select=c("classtype","odor","gill-size","veil-color","habitat"))
head(mushroon_subset)
##   classtype odor gill-size veil-color habitat
## 1         e    a         b          w       g
## 2         e    l         b          w       m
## 3         p    p         n          w       u
## 4         e    n         b          w       g
## 5         e    a         b          w       g
## 6         e    a         b          w       m
levels(mushroon_subset$classtype) = c("edible", "poisonous")
levels(mushroon_subset$odor) = c("almond", "anise", "creosote", "fishy", "foul", "musty", "none", "pungent", "spicy")
levels(mushroon_subset$`gill-size`) = c("broad", "narrow")
levels(mushroon_subset$`veil-color`) = c("brown", "orange", "white", "yellow")
levels(mushroon_subset$habitat) = c("woods", "grasses", "leaves", "meadows", "paths", "urban", "waste")
head(mushroon_subset)
##   classtype   odor gill-size veil-color habitat
## 1    edible almond     broad      white grasses
## 2    edible  fishy     broad      white meadows
## 3 poisonous   none    narrow      white   urban
## 4    edible  musty     broad      white grasses
## 5    edible almond     broad      white grasses
## 6    edible almond     broad      white meadows