WEEK 1 ASSIGNMENT: MUSHROOM DATASET

Prepared for CUNY SPS DATA 607 by JMcEachern
Source: Mushroom Dataset

Set up library:

library(stringr)
library(RCurl)
## Loading required package: bitops

Create dataframe and view dimensions:

x <- getURL("http://archive.ics.uci.edu/ml/machine-learning-databases/mushroom/agaricus-lepiota.data")
y <- data.frame(read.csv(text=x, header=F))
dim(y)
## [1] 8124   23

View beginning of dataframe:

head(y)
##   V1 V2 V3 V4 V5 V6 V7 V8 V9 V10 V11 V12 V13 V14 V15 V16 V17 V18 V19 V20
## 1  p  x  s  n  t  p  f  c  n   k   e   e   s   s   w   w   p   w   o   p
## 2  e  x  s  y  t  a  f  c  b   k   e   c   s   s   w   w   p   w   o   p
## 3  e  b  s  w  t  l  f  c  b   n   e   c   s   s   w   w   p   w   o   p
## 4  p  x  y  w  t  p  f  c  n   n   e   e   s   s   w   w   p   w   o   p
## 5  e  x  s  g  f  n  f  w  b   k   t   e   s   s   w   w   p   w   o   e
## 6  e  x  y  y  t  a  f  c  b   n   e   c   s   s   w   w   p   w   o   p
##   V21 V22 V23
## 1   k   s   u
## 2   n   n   g
## 3   n   n   m
## 4   k   s   u
## 5   n   a   g
## 6   k   n   g

Subset data & rename columns:

mushroom_db <- subset(y, select = c(V1, V2, V3, V4))
colnames(mushroom_db)[1] <-"classes"
colnames(mushroom_db)[2] <-"shape"
colnames(mushroom_db)[3] <-"surface"
colnames(mushroom_db)[4] <-"color"
colnames(mushroom_db)
## [1] "classes" "shape"   "surface" "color"

View beginning of subset:

head(mushroom_db)
##   classes shape surface color
## 1       p     x       s     n
## 2       e     x       s     y
## 3       e     b       s     w
## 4       p     x       y     w
## 5       e     x       s     g
## 6       e     x       y     y

Rename variables & view beginning of subset:

mushroom_db$classes <- as.character(mushroom_db$classes)
mushroom_db$classes[mushroom_db$classes == 'p'] <- 'poisonous'
mushroom_db$classes[mushroom_db$classes == 'e'] <- 'edible'

mushroom_db$shape <- as.character(mushroom_db$shape)
mushroom_db$shape[mushroom_db$shape == 'b'] <- 'bell'
mushroom_db$shape[mushroom_db$shape == 'c'] <- 'conical'
mushroom_db$shape[mushroom_db$shape == 'x'] <- 'convex'
mushroom_db$shape[mushroom_db$shape == 'f'] <- 'flat'
mushroom_db$shape[mushroom_db$shape == 'k'] <- 'knobbed'
mushroom_db$shape[mushroom_db$shape == 's'] <- 'sunken'

mushroom_db$surface <- as.character(mushroom_db$surface)
mushroom_db$surface[mushroom_db$surface == 'f'] <- 'fibrous'
mushroom_db$surface[mushroom_db$surface == 'g'] <- 'grooves'
mushroom_db$surface[mushroom_db$surface == 's'] <- 'smooth'
mushroom_db$surface[mushroom_db$surface == 'y'] <- 'scaly'

mushroom_db$color <- as.character(mushroom_db$color)
mushroom_db$color[mushroom_db$color == 'n'] <- 'brown'
mushroom_db$color[mushroom_db$color == 'b'] <- 'buff'
mushroom_db$color[mushroom_db$color == 'c'] <- 'cinnamon'
mushroom_db$color[mushroom_db$color == 'g'] <- 'gray'
mushroom_db$color[mushroom_db$color == 'r'] <- 'green'
mushroom_db$color[mushroom_db$color == 'p'] <- 'pink'
mushroom_db$color[mushroom_db$color == 'u'] <- 'purple'
mushroom_db$color[mushroom_db$color == 'e'] <- 'red'
mushroom_db$color[mushroom_db$color == 'w'] <- 'white'
mushroom_db$color[mushroom_db$color == 'y'] <- 'yellow'

head(mushroom_db)
##     classes  shape surface  color
## 1 poisonous convex  smooth  brown
## 2    edible convex  smooth yellow
## 3    edible   bell  smooth  white
## 4 poisonous convex   scaly  white
## 5    edible convex  smooth   gray
## 6    edible convex   scaly yellow