mydata <- read.csv("https://raw.githubusercontent.com/ajbentley/cuny_ms_ds/master/607/agaricus-lepiota.csv", header = FALSE)
colnames(mydata) <- c('poisonous','cap_shape','cap_surface','cap_color','bruises','odor','gill_attachment','gill_spacing','gill_size','gill_color','stalk_shape','stalk_root','stalk_surface_above_ring','stalk_surface_below_ring','stalk_color_above_ring','stalk_color_below_ring','veil_type','veil_color','ring_number','ring_type','spore_print_color','population','habitat')
df <- mydata[,grep("color",colnames(mydata))]
head(df)
## cap_color gill_color stalk_color_above_ring stalk_color_below_ring
## 1 n k w w
## 2 y k w w
## 3 w n w w
## 4 w n w w
## 5 g k w w
## 6 y n w w
## veil_color spore_print_color
## 1 w k
## 2 w n
## 3 w n
## 4 w k
## 5 w n
## 6 w k
require(plyr)
## Loading required package: plyr
df$cap_color <- revalue(df$cap_color, c('w' = 'white', 'k' = 'black', 'n' = 'brown', 'b' = 'buff', 'h' = 'chocolate', 'c' = 'cinnamon', 'g' = 'gray', 'r' = 'green', 'o' = 'orange', 'p' = 'pink', 'u' = 'purple', 'e' = 'red', 'y' = 'yellow'))
## The following `from` values were not present in `x`: k, h, o
df$gill_color <- revalue(df$gill_color, c('w' = 'white', 'k' = 'black', 'n' = 'brown', 'b' = 'buff', 'h' = 'chocolate', 'c' = 'cinnamon', 'g' = 'gray', 'r' = 'green', 'o' = 'orange', 'p' = 'pink', 'u' = 'purple', 'e' = 'red', 'y' = 'yellow'))
## The following `from` values were not present in `x`: c
df$stalk_color_above_ring <- revalue(df$stalk_color_above_ring, c('w' = 'white', 'k' = 'black', 'n' = 'brown', 'b' = 'buff', 'h' = 'chocolate', 'c' = 'cinnamon', 'g' = 'gray', 'r' = 'green', 'o' = 'orange', 'p' = 'pink', 'u' = 'purple', 'e' = 'red', 'y' = 'yellow'))
## The following `from` values were not present in `x`: k, h, r, u
df$stalk_color_below_ring <- revalue(df$stalk_color_below_ring, c('w' = 'white', 'k' = 'black', 'n' = 'brown', 'b' = 'buff', 'h' = 'chocolate', 'c' = 'cinnamon', 'g' = 'gray', 'r' = 'green', 'o' = 'orange', 'p' = 'pink', 'u' = 'purple', 'e' = 'red', 'y' = 'yellow'))
## The following `from` values were not present in `x`: k, h, r, u
df$veil_color <- revalue(df$veil_color, c('w' = 'white', 'k' = 'black', 'n' = 'brown', 'b' = 'buff', 'h' = 'chocolate', 'c' = 'cinnamon', 'g' = 'gray', 'r' = 'green', 'o' = 'orange', 'p' = 'pink', 'u' = 'purple', 'e' = 'red', 'y' = 'yellow'))
## The following `from` values were not present in `x`: k, b, h, c, g, r, p, u, e
df$spore_print_color <- revalue(df$spore_print_color, c('w' = 'white', 'k' = 'black', 'n' = 'brown', 'b' = 'buff', 'h' = 'chocolate', 'c' = 'cinnamon', 'g' = 'gray', 'r' = 'green', 'o' = 'orange', 'p' = 'pink', 'u' = 'purple', 'e' = 'red', 'y' = 'yellow'))
## The following `from` values were not present in `x`: c, g, p, e
dfp <- cbind(mydata[1],df)
dfp$poisonous <- revalue(dfp$poisonous, c('e' = 'edible', 'p' = 'poisonous'))
head(dfp)
## poisonous cap_color gill_color stalk_color_above_ring
## 1 poisonous brown black white
## 2 edible yellow black white
## 3 edible white brown white
## 4 poisonous white brown white
## 5 edible gray black white
## 6 edible yellow brown white
## stalk_color_below_ring veil_color spore_print_color
## 1 white white black
## 2 white white brown
## 3 white white brown
## 4 white white black
## 5 white white brown
## 6 white white black
Final note: While coding I check heads/tails/etc obsessively but I don’t include them in my submitted code unless requested.