Importing the data
url<-"https://archive.ics.uci.edu/ml/machine-learning-databases/mushroom/agaricus-lepiota.data"
Mushroom_data <- read.csv(file = url, header = FALSE, sep=",")
head(Mushroom_data)
## V1 V2 V3 V4 V5 V6 V7 V8 V9 V10 V11 V12 V13 V14 V15 V16 V17 V18 V19 V20
## 1 p x s n t p f c n k e e s s w w p w o p
## 2 e x s y t a f c b k e c s s w w p w o p
## 3 e b s w t l f c b n e c s s w w p w o p
## 4 p x y w t p f c n n e e s s w w p w o p
## 5 e x s g f n f w b k t e s s w w p w o e
## 6 e x y y t a f c b n e c s s w w p w o p
## V21 V22 V23
## 1 k s u
## 2 n n g
## 3 n n m
## 4 k s u
## 5 n a g
## 6 k n g
Creating new data frame from only certain columns
mushroom <-Mushroom_data[,c(1, 4, 10, 22, 23)]
head(mushroom)
## V1 V4 V10 V22 V23
## 1 p n k s u
## 2 e y k n g
## 3 e w n n m
## 4 p w n s u
## 5 e g k a g
## 6 e y n n g
Replacing the abbreviations used in the data
df <- data.frame(mushroom$class, mushroom$capcolor, mushroom$gillcolor, mushroom$population, mushroom$Habitat)
levels(df$mushroom.class)[levels(df$mushroom.class)== "p"]<- "poisonous"
levels(df$mushroom.class)[levels(df$mushroom.class)== "e"]<- "edible"
levels(df$mushroom.capcolor)[levels(df$mushroom.capcolor)== "n"]<- "brown"
levels(df$mushroom.capcolor)[levels(df$mushroom.capcolor)== "b"]<- "buff"
levels(df$mushroom.capcolor)[levels(df$mushroom.capcolor)== "c"]<- "cinnamon"
levels(df$mushroom.capcolor)[levels(df$mushroom.capcolor)== "g"]<- "gray"
levels(df$mushroom.capcolor)[levels(df$mushroom.capcolor)== "r"]<- "green"
levels(df$mushroom.capcolor)[levels(df$mushroom.capcolor)== "p"]<- "pink"
levels(df$mushroom.capcolor)[levels(df$mushroom.capcolor)== "u"]<- "purple"
levels(df$mushroom.capcolor)[levels(df$mushroom.capcolor)== "e"]<- "red"
levels(df$mushroom.capcolor)[levels(df$mushroom.capcolor)== "w"]<- "white"
levels(df$mushroom.capcolor)[levels(df$mushroom.capcolor)== "y"]<- "yellow"
levels(df$mushroom.gillcolor)[levels(df$mushroom.gillcolor)== "k"]<- "black"
levels(df$mushroom.gillcolor)[levels(df$mushroom.gillcolor)== "n"]<- "brown"
levels(df$mushroom.gillcolor)[levels(df$mushroom.gillcolor)== "b"]<- "buff"
levels(df$mushroom.gillcolor)[levels(df$mushroom.gillcolor)== "h"]<- "chocolate"
levels(df$mushroom.gillcolor)[levels(df$mushroom.gillcolor)== "g"]<- "gray"
levels(df$mushroom.gillcolor)[levels(df$mushroom.gillcolor)== "r"]<- "green"
levels(df$mushroom.gillcolor)[levels(df$mushroom.gillcolor)== "o"]<- "orange"
levels(df$mushroom.gillcolor)[levels(df$mushroom.gillcolor)== "p"]<- "pink"
levels(df$mushroom.gillcolor)[levels(df$mushroom.gillcolor)== "u"]<- "purple"
levels(df$mushroom.gillcolor)[levels(df$mushroom.gillcolor)== "e"]<- "red"
levels(df$mushroom.gillcolor)[levels(df$mushroom.gillcolor)== "w"]<- "white"
levels(df$mushroom.gillcolor)[levels(df$mushroom.gillcolor)== "y"]<- "yellow"
levels(df$mushroom.population)[levels(df$mushroom.population)== "a"]<- "abundant"
levels(df$mushroom.population)[levels(df$mushroom.population)== "c"]<- "clustered"
levels(df$mushroom.population)[levels(df$mushroom.population)== "n"]<- "numerous"
levels(df$mushroom.population)[levels(df$mushroom.population)== "s"]<- "scattered"
levels(df$mushroom.population)[levels(df$mushroom.population)== "v"]<- "several"
levels(df$mushroom.population)[levels(df$mushroom.population)== "y"]<- "solitary"
levels(df$mushroom.Habitat)[levels(df$mushroom.Habitat)== "g"]<- "grasses"
levels(df$mushroom.Habitat)[levels(df$mushroom.Habitat)== "l"]<- "leaves"
levels(df$mushroom.Habitat)[levels(df$mushroom.Habitat)== "m"]<- "meadow"
levels(df$mushroom.Habitat)[levels(df$mushroom.Habitat)== "p"]<- "paths"
levels(df$mushroom.Habitat)[levels(df$mushroom.Habitat)== "u"]<- "urban"
levels(df$mushroom.Habitat)[levels(df$mushroom.Habitat)== "w"]<- "waste"
levels(df$mushroom.Habitat)[levels(df$mushroom.Habitat)== "d"]<- "woods"
head(df)
## mushroom.class mushroom.capcolor mushroom.gillcolor mushroom.population
## 1 poisonous brown black scattered
## 2 edible yellow black numerous
## 3 edible white brown numerous
## 4 poisonous white brown scattered
## 5 edible gray black abundant
## 6 edible yellow brown numerous
## mushroom.Habitat
## 1 urban
## 2 grasses
## 3 meadow
## 4 urban
## 5 grasses
## 6 grasses
Summary of data
summary(mushroom)
## class capcolor gillcolor population Habitat
## e:4208 n :2284 b :1728 a: 384 d:3148
## p:3916 g :1840 p :1492 c: 340 g:2148
## e :1500 w :1202 n: 400 l: 832
## y :1072 n :1048 s:1248 m: 292
## w :1040 g : 752 v:4040 p:1144
## b : 168 h : 732 y:1712 u: 368
## (Other): 220 (Other):1170 w: 192