DATA 607 - ASSIGNMENT 1
Harpreet Shoker
Loading Data (csv file) into R
mushroom_datafile <- read.csv(url('https://archive.ics.uci.edu/ml/machine-learning-databases/mushroom/agaricus-lepiota.data'))
head(mushroom_datafile)## p x s n t p.1 f c n.1 k e e.1 s.1 s.2 w w.1 p.2 w.2 o p.3 k.1 s.3 u
## 1 e x s y t a f c b k e c s s w w p w o p n n g
## 2 e b s w t l f c b n e c s s w w p w o p n n m
## 3 p x y w t p f c n n e e s s w w p w o p k s u
## 4 e x s g f n f w b k t e s s w w p w o e n a g
## 5 e x y y t a f c b n e c s s w w p w o p k n g
## 6 e b s w t a f c b g e c s s w w p w o p k n m
Loading library plyr to use rename function
library(plyr)
mushroom_datafile1 <-rename(mushroom_datafile, c("p" = "classtype", "x" = "cap-shape", "s" = "cap-surface", "n" = "cap-color", "t" = "bruises", "p.1"="odor", "f" = "gill-attachement", "c" = "gill-spacing", "n.1" = "gill-size", "k" = "gill-color", "e" = "stalk shape", "e.1" = "stalk root", "s.1" = "stalk surface above ring", "s.2" = "stalk surface below ring", "w" = "stalk color above veil", "w.1" = "stalk color below veil", "p.2" = "veil type", "w.2" = "veil-color", "o" = "ring number", "p.3" = "ring type", "k.1" = "spore print color", "s.3" = "population", "u" = "habitat" ))
head(mushroom_datafile1)## classtype cap-shape cap-surface cap-color bruises odor gill-attachement
## 1 e x s y t a f
## 2 e b s w t l f
## 3 p x y w t p f
## 4 e x s g f n f
## 5 e x y y t a f
## 6 e b s w t a f
## gill-spacing gill-size gill-color stalk shape stalk root
## 1 c b k e c
## 2 c b n e c
## 3 c n n e e
## 4 w b k t e
## 5 c b n e c
## 6 c b g e c
## stalk surface above ring stalk surface below ring stalk color above veil
## 1 s s w
## 2 s s w
## 3 s s w
## 4 s s w
## 5 s s w
## 6 s s w
## stalk color below veil veil type veil-color ring number ring type
## 1 w p w o p
## 2 w p w o p
## 3 w p w o p
## 4 w p w o e
## 5 w p w o p
## 6 w p w o p
## spore print color population habitat
## 1 n n g
## 2 n n m
## 3 k s u
## 4 n a g
## 5 k n g
## 6 k n m
Creating subset of data here i m using classtype , odor ,gill color , veil color , habitat
mushroon_subset <- subset(mushroom_datafile1,select=c("classtype","odor","gill-size","veil-color","habitat"))
head(mushroon_subset)## classtype odor gill-size veil-color habitat
## 1 e a b w g
## 2 e l b w m
## 3 p p n w u
## 4 e n b w g
## 5 e a b w g
## 6 e a b w m
levels(mushroon_subset$classtype) = c("edible", "poisonous")
levels(mushroon_subset$odor) = c("almond", "anise", "creosote", "fishy", "foul", "musty", "none", "pungent", "spicy")
levels(mushroon_subset$`gill-size`) = c("broad", "narrow")
levels(mushroon_subset$`veil-color`) = c("brown", "orange", "white", "yellow")
levels(mushroon_subset$habitat) = c("woods", "grasses", "leaves", "meadows", "paths", "urban", "waste")
head(mushroon_subset)## classtype odor gill-size veil-color habitat
## 1 edible almond broad white grasses
## 2 edible fishy broad white meadows
## 3 poisonous none narrow white urban
## 4 edible musty broad white grasses
## 5 edible almond broad white grasses
## 6 edible almond broad white meadows