Generating Subset Data Frame

In this first step, we are taking the data from a local environment and generating a data frame called mushrooms. This data frame is subset so that it only shows mushrooms that live in an urban habitat. The variables included are whether or not the mushroom was poisonous, along with dimensions of the mushroom that give us a better understanding of how the mushroom may look. These include: mushroom cap shape, cap surface, and stalk shape.

mushrooms <- read.table("~/Documents/SCHOOL/SPS/FALL 2017/DATA 607/Mushrooms_HW1/agaricus-lepiota.data.txt", header=FALSE, sep=",")
mushrooms = subset(mushrooms, V23=='u', select=c(V1, V2, V3, V11, V23))
head(mushrooms, n = 10)
##    V1 V2 V3 V11 V23
## 1   p  x  s   e   u
## 4   p  x  y   e   u
## 14  p  x  y   e   u
## 16  e  s  f   e   u
## 19  p  x  y   e   u
## 20  p  x  s   e   u
## 29  e  f  f   e   u
## 32  p  x  y   e   u
## 37  e  s  f   e   u
## 38  p  x  y   e   u

Adding Column Names

The above data needs to be re-worked so that others can understand its meaning. In this first part, we are re-naming the column names.

names(mushrooms)[names(mushrooms)=="V1"] <- "edible"
names(mushrooms)[names(mushrooms)=="V2"] <- "cap-shape"
names(mushrooms)[names(mushrooms)=="V3"] <- "cap-surface"
names(mushrooms)[names(mushrooms)=="V11"] <- "stalk-shape"
names(mushrooms)[names(mushrooms)=="V23"] <- "habitat"

Converting Data to Characters, Renaming Data, Adding Back Levels

In this section, we are taking all of the letters and transforming them to the names they represent. This is accomplished by first changing the values into characters and then changing the characters to the names found in the agaricus-lepiota.names.txt dataset. A stackoverflow forum was referenced to accomplish this task. Finally, the levels are added back and the final table is shown below.

https://stackoverflow.com/questions/5824173/replace-a-value-in-a-data-frame-based-on-a-conditional-if-statement-in-r

mushrooms$edible <- as.character(mushrooms$edible)
mushrooms$`cap-shape` <- as.character(mushrooms$`cap-shape`)
mushrooms$`cap-surface` <- as.character(mushrooms$`cap-surface`)
mushrooms$`stalk-shape` <- as.character(mushrooms$`stalk-shape`)
mushrooms$habitat <- as.character(mushrooms$habitat)

mushrooms$edible[mushrooms$edible == "e"] <- "edible"
mushrooms$edible[mushrooms$edible == "p"] <- "poisonous"
mushrooms$`cap-shape`[mushrooms$`cap-shape` == "b"] <- "bell"
mushrooms$`cap-shape`[mushrooms$`cap-shape` == "c"] <- "conical"
mushrooms$`cap-shape`[mushrooms$`cap-shape` == "x"] <- "convex"
mushrooms$`cap-shape`[mushrooms$`cap-shape` == "f"] <- "flat"
mushrooms$`cap-shape`[mushrooms$`cap-shape` == "k"] <- "knobbed"
mushrooms$`cap-shape`[mushrooms$`cap-shape` == "s"] <- "sunken"
mushrooms$`cap-surface`[mushrooms$`cap-surface` == "f"] <- "fibrous"
mushrooms$`cap-surface`[mushrooms$`cap-surface` == "g"] <- "grooves"
mushrooms$`cap-surface`[mushrooms$`cap-surface` == "y"] <- "scaly"
mushrooms$`cap-surface`[mushrooms$`cap-surface` == "s"] <- "smooth"
mushrooms$`stalk-shape` [mushrooms$`stalk-shape` == "e"] <- "enlarging"
mushrooms$`stalk-shape` [mushrooms$`stalk-shape` == "t"] <- "tapering"
mushrooms$habitat [mushrooms$habitat == "u"] <- "urban"

mushrooms$edible <- as.factor(mushrooms$edible)
mushrooms$`cap-shape` <- as.factor(mushrooms$`cap-shape`)
mushrooms$`cap-surface` <- as.factor(mushrooms$`cap-surface`)
mushrooms$`stalk-shape` <- as.factor(mushrooms$`stalk-shape`)
mushrooms$habitat <- as.factor(mushrooms$habitat)

head(mushrooms, n=10)
##       edible cap-shape cap-surface stalk-shape habitat
## 1  poisonous    convex      smooth   enlarging   urban
## 4  poisonous    convex       scaly   enlarging   urban
## 14 poisonous    convex       scaly   enlarging   urban
## 16    edible    sunken     fibrous   enlarging   urban
## 19 poisonous    convex       scaly   enlarging   urban
## 20 poisonous    convex      smooth   enlarging   urban
## 29    edible      flat     fibrous   enlarging   urban
## 32 poisonous    convex       scaly   enlarging   urban
## 37    edible    sunken     fibrous   enlarging   urban
## 38 poisonous    convex       scaly   enlarging   urban