Prepared for CUNY SPS DATA 607 by JMcEachern
Source: Mushroom Dataset
Set up library:
library(stringr)
library(RCurl)
## Loading required package: bitops
Create dataframe and view dimensions:
x <- getURL("http://archive.ics.uci.edu/ml/machine-learning-databases/mushroom/agaricus-lepiota.data")
y <- data.frame(read.csv(text=x, header=F))
dim(y)
## [1] 8124 23
View beginning of dataframe:
head(y)
## V1 V2 V3 V4 V5 V6 V7 V8 V9 V10 V11 V12 V13 V14 V15 V16 V17 V18 V19 V20
## 1 p x s n t p f c n k e e s s w w p w o p
## 2 e x s y t a f c b k e c s s w w p w o p
## 3 e b s w t l f c b n e c s s w w p w o p
## 4 p x y w t p f c n n e e s s w w p w o p
## 5 e x s g f n f w b k t e s s w w p w o e
## 6 e x y y t a f c b n e c s s w w p w o p
## V21 V22 V23
## 1 k s u
## 2 n n g
## 3 n n m
## 4 k s u
## 5 n a g
## 6 k n g
Subset data & rename columns:
mushroom_db <- subset(y, select = c(V1, V2, V3, V4))
colnames(mushroom_db)[1] <-"classes"
colnames(mushroom_db)[2] <-"shape"
colnames(mushroom_db)[3] <-"surface"
colnames(mushroom_db)[4] <-"color"
colnames(mushroom_db)
## [1] "classes" "shape" "surface" "color"
View beginning of subset:
head(mushroom_db)
## classes shape surface color
## 1 p x s n
## 2 e x s y
## 3 e b s w
## 4 p x y w
## 5 e x s g
## 6 e x y y
Rename variables & view beginning of subset:
mushroom_db$classes <- as.character(mushroom_db$classes)
mushroom_db$classes[mushroom_db$classes == 'p'] <- 'poisonous'
mushroom_db$classes[mushroom_db$classes == 'e'] <- 'edible'
mushroom_db$shape <- as.character(mushroom_db$shape)
mushroom_db$shape[mushroom_db$shape == 'b'] <- 'bell'
mushroom_db$shape[mushroom_db$shape == 'c'] <- 'conical'
mushroom_db$shape[mushroom_db$shape == 'x'] <- 'convex'
mushroom_db$shape[mushroom_db$shape == 'f'] <- 'flat'
mushroom_db$shape[mushroom_db$shape == 'k'] <- 'knobbed'
mushroom_db$shape[mushroom_db$shape == 's'] <- 'sunken'
mushroom_db$surface <- as.character(mushroom_db$surface)
mushroom_db$surface[mushroom_db$surface == 'f'] <- 'fibrous'
mushroom_db$surface[mushroom_db$surface == 'g'] <- 'grooves'
mushroom_db$surface[mushroom_db$surface == 's'] <- 'smooth'
mushroom_db$surface[mushroom_db$surface == 'y'] <- 'scaly'
mushroom_db$color <- as.character(mushroom_db$color)
mushroom_db$color[mushroom_db$color == 'n'] <- 'brown'
mushroom_db$color[mushroom_db$color == 'b'] <- 'buff'
mushroom_db$color[mushroom_db$color == 'c'] <- 'cinnamon'
mushroom_db$color[mushroom_db$color == 'g'] <- 'gray'
mushroom_db$color[mushroom_db$color == 'r'] <- 'green'
mushroom_db$color[mushroom_db$color == 'p'] <- 'pink'
mushroom_db$color[mushroom_db$color == 'u'] <- 'purple'
mushroom_db$color[mushroom_db$color == 'e'] <- 'red'
mushroom_db$color[mushroom_db$color == 'w'] <- 'white'
mushroom_db$color[mushroom_db$color == 'y'] <- 'yellow'
head(mushroom_db)
## classes shape surface color
## 1 poisonous convex smooth brown
## 2 edible convex smooth yellow
## 3 edible bell smooth white
## 4 poisonous convex scaly white
## 5 edible convex smooth gray
## 6 edible convex scaly yellow