Read in data

mydata <- read.csv("https://raw.githubusercontent.com/ajbentley/cuny_ms_ds/master/607/agaricus-lepiota.csv", header = FALSE)

Change titles of original dataframe

colnames(mydata) <- c('poisonous','cap_shape','cap_surface','cap_color','bruises','odor','gill_attachment','gill_spacing','gill_size','gill_color','stalk_shape','stalk_root','stalk_surface_above_ring','stalk_surface_below_ring','stalk_color_above_ring','stalk_color_below_ring','veil_type','veil_color','ring_number','ring_type','spore_print_color','population','habitat')

Make new dataframe isolating variables on color

df <- mydata[,grep("color",colnames(mydata))] 

head(df)
##   cap_color gill_color stalk_color_above_ring stalk_color_below_ring
## 1         n          k                      w                      w
## 2         y          k                      w                      w
## 3         w          n                      w                      w
## 4         w          n                      w                      w
## 5         g          k                      w                      w
## 6         y          n                      w                      w
##   veil_color spore_print_color
## 1          w                 k
## 2          w                 n
## 3          w                 n
## 4          w                 k
## 5          w                 n
## 6          w                 k

Change color codes to color names in each variable

require(plyr)
## Loading required package: plyr
df$cap_color <- revalue(df$cap_color, c('w' = 'white', 'k' = 'black', 'n' = 'brown', 'b' = 'buff', 'h' = 'chocolate', 'c' = 'cinnamon', 'g' = 'gray', 'r' = 'green', 'o' = 'orange', 'p' = 'pink', 'u' = 'purple', 'e' = 'red', 'y' = 'yellow'))
## The following `from` values were not present in `x`: k, h, o
df$gill_color <- revalue(df$gill_color, c('w' = 'white', 'k' = 'black', 'n' = 'brown', 'b' = 'buff', 'h' = 'chocolate', 'c' = 'cinnamon', 'g' = 'gray', 'r' = 'green', 'o' = 'orange', 'p' = 'pink', 'u' = 'purple', 'e' = 'red', 'y' = 'yellow'))
## The following `from` values were not present in `x`: c
df$stalk_color_above_ring <- revalue(df$stalk_color_above_ring, c('w' = 'white', 'k' = 'black', 'n' = 'brown', 'b' = 'buff', 'h' = 'chocolate', 'c' = 'cinnamon', 'g' = 'gray', 'r' = 'green', 'o' = 'orange', 'p' = 'pink', 'u' = 'purple', 'e' = 'red', 'y' = 'yellow'))
## The following `from` values were not present in `x`: k, h, r, u
df$stalk_color_below_ring <- revalue(df$stalk_color_below_ring, c('w' = 'white', 'k' = 'black', 'n' = 'brown', 'b' = 'buff', 'h' = 'chocolate', 'c' = 'cinnamon', 'g' = 'gray', 'r' = 'green', 'o' = 'orange', 'p' = 'pink', 'u' = 'purple', 'e' = 'red', 'y' = 'yellow'))
## The following `from` values were not present in `x`: k, h, r, u
df$veil_color <- revalue(df$veil_color, c('w' = 'white', 'k' = 'black', 'n' = 'brown', 'b' = 'buff', 'h' = 'chocolate', 'c' = 'cinnamon', 'g' = 'gray', 'r' = 'green', 'o' = 'orange', 'p' = 'pink', 'u' = 'purple', 'e' = 'red', 'y' = 'yellow'))
## The following `from` values were not present in `x`: k, b, h, c, g, r, p, u, e
df$spore_print_color <- revalue(df$spore_print_color, c('w' = 'white', 'k' = 'black', 'n' = 'brown', 'b' = 'buff', 'h' = 'chocolate', 'c' = 'cinnamon', 'g' = 'gray', 'r' = 'green', 'o' = 'orange', 'p' = 'pink', 'u' = 'purple', 'e' = 'red', 'y' = 'yellow'))
## The following `from` values were not present in `x`: c, g, p, e

Add poison column to the new dataframe

dfp <- cbind(mydata[1],df)

Change poison codes to edible / poisonous

dfp$poisonous <- revalue(dfp$poisonous, c('e' = 'edible', 'p' = 'poisonous'))

head(dfp)
##   poisonous cap_color gill_color stalk_color_above_ring
## 1 poisonous     brown      black                  white
## 2    edible    yellow      black                  white
## 3    edible     white      brown                  white
## 4 poisonous     white      brown                  white
## 5    edible      gray      black                  white
## 6    edible    yellow      brown                  white
##   stalk_color_below_ring veil_color spore_print_color
## 1                  white      white             black
## 2                  white      white             brown
## 3                  white      white             brown
## 4                  white      white             black
## 5                  white      white             brown
## 6                  white      white             black

Final note: While coding I check heads/tails/etc obsessively but I don’t include them in my submitted code unless requested.