library(repmis)
library(pander)
library(stringr)
mushrooms <- source_data ('https://archive.ics.uci.edu/ml/machine-learning-databases/mushroom/agaricus-lepiota.data')
## Downloading data from: https://archive.ics.uci.edu/ml/machine-learning-databases/mushroom/agaricus-lepiota.data
## SHA-1 hash of the downloaded data file is:
## 7277e7bc03888ea5684b2281a62e0a9350caa00c
pander(head(mushrooms), type = "grid", caption = "Sample of Original Dataset")
Sample of Original Dataset
p x s n t p f c n k e e s s w w p w o p k s u
e x s y t a f c b k e c s s w w p w o p n n g
e b s w t l f c b n e c s s w w p w o p n n m
p x y w t p f c n n e e s s w w p w o p k s u
e x s g f n f w b k t e s s w w p w o e n a g
e x y y t a f c b n e c s s w w p w o p k n g
e b s w t a f c b g e c s s w w p w o p k n m
sub <- mushrooms[,c(1,4,6,18,23)]
colnames(sub) <- c("Poisonous", "cap_color", "Odor", "Veil_color", "Habitat")

haves <-c('e', 'p')
wants <-c('edible', 'poisonous')

for (i in 1:length(haves)){
  sub[,"Poisonous"][sub[,"Poisonous"] == haves[i]] <- wants[i]
}
## I tried to make this a function, and believe I was very close, but could not make it work (global/local problem?)
##convert <- function ( data, col, have_vector = haves, want_vector = wants){
##  len <- length(have_vector)
##    for (i in 1:len){
##       data[,col][data[, col] == have_vector[i]] <- want_vector[i]
##    }
##}
haves <- c('n', 'b', 'c', 'g', 'r', 'p', 'u', 'e', 'w', 'y')
string = "brown=n,buff=b,cinnamon=c,gray=g,green=r, pink=p,purple=u,red=e,white=w,yellow=y"
wants <-str_extract_all(string, pattern = "[[:alpha:]]{2,16}")
wants <-wants[[1]]
##  I pasted the string from the data dictionary to exract to haves and wants with regular expressions.  I was not able to get the one letter haves, but saved a lot of typing by getting the wants automatically
for (i in 1:length(haves)){
  sub[,"cap_color"][sub[,"cap_color"] == haves[i]] <- wants[i]
}

haves <- c('a', 'l', 'c', 'y', 'f', 'm', 'n', 'p', 's')
string = "almond=a,anise=l,creosote=c,fishy=y,foul=f, musty=m,none=n,pungent=p,spicy=s"
wants <-str_extract_all(string, pattern = "[[:alpha:]]{2,16}")
wants <-wants[[1]]
for (i in 1:length(haves)){
  sub[,"Odor"][sub[,"Odor"] == haves[i]] <- wants[i]
}

haves <- c('n', 'o', 'w', 'y')
string = "brown=n,orange=o,white=w,yellow=y"
wants <-str_extract_all(string, pattern = "[[:alpha:]]{2,16}")
wants <-wants[[1]]
for (i in 1:length(haves)){
  sub[,"Veil_color"][sub[,"Veil_color"] == haves[i]] <- wants[i]
}

haves <- c('g', 'l', 'm', 'p', 'u', 'w', 'd')
string = "habitat: grasses=g,leaves=l,meadows=m,paths=p, urban=u,waste=w,woods=d"
wants <-str_extract_all(string, pattern = "[[:alpha:]]{2,16}")
wants <-wants[[1]]
for (i in 1:length(haves)){
  sub[,"Habitat"][sub[,"Habitat"] == haves[i]] <- wants[i]
}

pander(head(sub,20), type = "grid", caption = "Sample of Transformed Dataset")
Sample of Transformed Dataset
Poisonous cap_color Odor Veil_color Habitat
edible yellow almond white habitat
edible white anise white leaves
poisonous white pungent white paths
edible gray none white habitat
edible yellow almond white habitat
edible white almond white leaves
edible white anise white leaves
poisonous white pungent white habitat
edible yellow almond white leaves
edible yellow anise white habitat
edible yellow almond white leaves
edible yellow almond white habitat
poisonous white pungent white paths
edible brown none white habitat
edible gray none white paths
edible white none white habitat
poisonous brown pungent white habitat
poisonous white pungent white paths
poisonous brown pungent white paths
edible yellow almond white leaves