browseURL("https://archive.ics.uci.edu/ml/machine-learning-databases/mushroom/agaricus-lepiota.names")
mushroom.master <- read.table("https://archive.ics.uci.edu/ml/machine-learning-databases/mushroom/agaricus-lepiota.data", header = FALSE, sep = ",", stringsAsFactors = TRUE)
str(mushroom.master)
## 'data.frame': 8124 obs. of 23 variables:
## $ V1 : Factor w/ 2 levels "e","p": 2 1 1 2 1 1 1 1 2 1 ...
## $ V2 : Factor w/ 6 levels "b","c","f","k",..: 6 6 1 6 6 6 1 1 6 1 ...
## $ V3 : Factor w/ 4 levels "f","g","s","y": 3 3 3 4 3 4 3 4 4 3 ...
## $ V4 : Factor w/ 10 levels "b","c","e","g",..: 5 10 9 9 4 10 9 9 9 10 ...
## $ V5 : Factor w/ 2 levels "f","t": 2 2 2 2 1 2 2 2 2 2 ...
## $ V6 : Factor w/ 9 levels "a","c","f","l",..: 7 1 4 7 6 1 1 4 7 1 ...
## $ V7 : Factor w/ 2 levels "a","f": 2 2 2 2 2 2 2 2 2 2 ...
## $ V8 : Factor w/ 2 levels "c","w": 1 1 1 1 2 1 1 1 1 1 ...
## $ V9 : Factor w/ 2 levels "b","n": 2 1 1 2 1 1 1 1 2 1 ...
## $ V10: Factor w/ 12 levels "b","e","g","h",..: 5 5 6 6 5 6 3 6 8 3 ...
## $ V11: Factor w/ 2 levels "e","t": 1 1 1 1 2 1 1 1 1 1 ...
## $ V12: Factor w/ 5 levels "?","b","c","e",..: 4 3 3 4 4 3 3 3 4 3 ...
## $ V13: Factor w/ 4 levels "f","k","s","y": 3 3 3 3 3 3 3 3 3 3 ...
## $ V14: Factor w/ 4 levels "f","k","s","y": 3 3 3 3 3 3 3 3 3 3 ...
## $ V15: Factor w/ 9 levels "b","c","e","g",..: 8 8 8 8 8 8 8 8 8 8 ...
## $ V16: Factor w/ 9 levels "b","c","e","g",..: 8 8 8 8 8 8 8 8 8 8 ...
## $ V17: Factor w/ 1 level "p": 1 1 1 1 1 1 1 1 1 1 ...
## $ V18: Factor w/ 4 levels "n","o","w","y": 3 3 3 3 3 3 3 3 3 3 ...
## $ V19: Factor w/ 3 levels "n","o","t": 2 2 2 2 2 2 2 2 2 2 ...
## $ V20: Factor w/ 5 levels "e","f","l","n",..: 5 5 5 5 1 5 5 5 5 5 ...
## $ V21: Factor w/ 9 levels "b","h","k","n",..: 3 4 4 3 4 3 3 4 3 3 ...
## $ V22: Factor w/ 6 levels "a","c","n","s",..: 4 3 3 4 1 3 3 4 5 4 ...
## $ V23: Factor w/ 7 levels "d","g","l","m",..: 6 2 4 6 2 2 4 4 2 4 ...
# library(XML)
# dimensions.html = htmlTreeParse("https://archive.ics.uci.edu/ml/machine-learning-databases/mushroom/agaricus-lepiota.names")
colnames(mushroom.master) <- c("class", "cap.shape", "cap.surface", "cap.color", "bruises", "odor", "gill.attachment", "gill.spacing", "gill.size", "gill.color", "stalk.shape", "stalk.root", "stalk.surface.above.ring", "stalk.surface.below.ring", "stalk.color.above.ring", "stalk.color.below.ring", "veil.type", "veil.color", "ring.number", "ring.type", "spore.print.color", "population", "habitat")
str(mushroom.master)
## 'data.frame': 8124 obs. of 23 variables:
## $ class : Factor w/ 2 levels "e","p": 2 1 1 2 1 1 1 1 2 1 ...
## $ cap.shape : Factor w/ 6 levels "b","c","f","k",..: 6 6 1 6 6 6 1 1 6 1 ...
## $ cap.surface : Factor w/ 4 levels "f","g","s","y": 3 3 3 4 3 4 3 4 4 3 ...
## $ cap.color : Factor w/ 10 levels "b","c","e","g",..: 5 10 9 9 4 10 9 9 9 10 ...
## $ bruises : Factor w/ 2 levels "f","t": 2 2 2 2 1 2 2 2 2 2 ...
## $ odor : Factor w/ 9 levels "a","c","f","l",..: 7 1 4 7 6 1 1 4 7 1 ...
## $ gill.attachment : Factor w/ 2 levels "a","f": 2 2 2 2 2 2 2 2 2 2 ...
## $ gill.spacing : Factor w/ 2 levels "c","w": 1 1 1 1 2 1 1 1 1 1 ...
## $ gill.size : Factor w/ 2 levels "b","n": 2 1 1 2 1 1 1 1 2 1 ...
## $ gill.color : Factor w/ 12 levels "b","e","g","h",..: 5 5 6 6 5 6 3 6 8 3 ...
## $ stalk.shape : Factor w/ 2 levels "e","t": 1 1 1 1 2 1 1 1 1 1 ...
## $ stalk.root : Factor w/ 5 levels "?","b","c","e",..: 4 3 3 4 4 3 3 3 4 3 ...
## $ stalk.surface.above.ring: Factor w/ 4 levels "f","k","s","y": 3 3 3 3 3 3 3 3 3 3 ...
## $ stalk.surface.below.ring: Factor w/ 4 levels "f","k","s","y": 3 3 3 3 3 3 3 3 3 3 ...
## $ stalk.color.above.ring : Factor w/ 9 levels "b","c","e","g",..: 8 8 8 8 8 8 8 8 8 8 ...
## $ stalk.color.below.ring : Factor w/ 9 levels "b","c","e","g",..: 8 8 8 8 8 8 8 8 8 8 ...
## $ veil.type : Factor w/ 1 level "p": 1 1 1 1 1 1 1 1 1 1 ...
## $ veil.color : Factor w/ 4 levels "n","o","w","y": 3 3 3 3 3 3 3 3 3 3 ...
## $ ring.number : Factor w/ 3 levels "n","o","t": 2 2 2 2 2 2 2 2 2 2 ...
## $ ring.type : Factor w/ 5 levels "e","f","l","n",..: 5 5 5 5 1 5 5 5 5 5 ...
## $ spore.print.color : Factor w/ 9 levels "b","h","k","n",..: 3 4 4 3 4 3 3 4 3 3 ...
## $ population : Factor w/ 6 levels "a","c","n","s",..: 4 3 3 4 1 3 3 4 5 4 ...
## $ habitat : Factor w/ 7 levels "d","g","l","m",..: 6 2 4 6 2 2 4 4 2 4 ...
mushroom.subset.variables <- c("class", "cap.shape", "cap.color", "odor", "gill.size", "spore.print.color", "habitat")
mushroom.subset <- mushroom.master[mushroom.subset.variables]
str(mushroom.subset)
## 'data.frame': 8124 obs. of 7 variables:
## $ class : Factor w/ 2 levels "e","p": 2 1 1 2 1 1 1 1 2 1 ...
## $ cap.shape : Factor w/ 6 levels "b","c","f","k",..: 6 6 1 6 6 6 1 1 6 1 ...
## $ cap.color : Factor w/ 10 levels "b","c","e","g",..: 5 10 9 9 4 10 9 9 9 10 ...
## $ odor : Factor w/ 9 levels "a","c","f","l",..: 7 1 4 7 6 1 1 4 7 1 ...
## $ gill.size : Factor w/ 2 levels "b","n": 2 1 1 2 1 1 1 1 2 1 ...
## $ spore.print.color: Factor w/ 9 levels "b","h","k","n",..: 3 4 4 3 4 3 3 4 3 3 ...
## $ habitat : Factor w/ 7 levels "d","g","l","m",..: 6 2 4 6 2 2 4 4 2 4 ...
levels(mushroom.subset$class) <- c("e"="edible", "p"="poisonous")
levels(mushroom.subset$cap.shape) <- c("b"="bell", "c"="conical", "x"="convex", "f"="flat", "k"="knobbed", "s"="sunken")
levels(mushroom.subset$cap.color) <- c("n"="brown", "b"="buff", "c"="cinnamon", "g"="gray", "r"="green", "p"="pink", "u"="purple", "e"="red", "w"="white", "y"="yellow")
levels(mushroom.subset$odor) <- c("a"="almond", "l"="anise", "c"="creosote", "y"="fishy", "f"="foul", "m"="musty", "n"="none", "p"="pungent", "s"="spicy")
levels(mushroom.subset$gill.size) <- c("b"="broad", "n"="narrow")
levels(mushroom.subset$spore.print.color) <- c("k"="black", "n"="brown", "b"="buff", "h"="chocolate", "r"="green", "o"="orange", "u"="purple", "w"="white", "y"="yellow")
levels(mushroom.subset$habitat) <- c("g"="grasses", "l"="leaves", "m"="meadows", "p"="paths", "u"="urban", "w"="waste", "d"="woods")
str(mushroom.subset)
## 'data.frame': 8124 obs. of 7 variables:
## $ class : Factor w/ 2 levels "edible","poisonous": 2 1 1 2 1 1 1 1 2 1 ...
## $ cap.shape : Factor w/ 6 levels "bell","conical",..: 6 6 1 6 6 6 1 1 6 1 ...
## $ cap.color : Factor w/ 10 levels "brown","buff",..: 5 10 9 9 4 10 9 9 9 10 ...
## $ odor : Factor w/ 9 levels "almond","anise",..: 7 1 4 7 6 1 1 4 7 1 ...
## $ gill.size : Factor w/ 2 levels "broad","narrow": 2 1 1 2 1 1 1 1 2 1 ...
## $ spore.print.color: Factor w/ 9 levels "black","brown",..: 3 4 4 3 4 3 3 4 3 3 ...
## $ habitat : Factor w/ 7 levels "grasses","leaves",..: 6 2 4 6 2 2 4 4 2 4 ...
head(mushroom.subset, 20)
## class cap.shape cap.color odor gill.size spore.print.color
## 1 poisonous sunken green none narrow buff
## 2 edible sunken yellow almond broad chocolate
## 3 edible bell white fishy broad chocolate
## 4 poisonous sunken white none narrow buff
## 5 edible sunken gray musty broad chocolate
## 6 edible sunken yellow almond broad buff
## 7 edible bell white almond broad buff
## 8 edible bell white fishy broad chocolate
## 9 poisonous sunken white none narrow buff
## 10 edible bell yellow almond broad buff
## 11 edible sunken yellow fishy broad chocolate
## 12 edible sunken yellow almond broad buff
## 13 edible bell yellow almond broad chocolate
## 14 poisonous sunken white none narrow chocolate
## 15 edible sunken green musty broad buff
## 16 edible knobbed gray musty narrow chocolate
## 17 edible convex white musty broad chocolate
## 18 poisonous sunken green none narrow buff
## 19 poisonous sunken white none narrow chocolate
## 20 poisonous sunken green none narrow chocolate
## habitat
## 1 waste
## 2 leaves
## 3 paths
## 4 waste
## 5 leaves
## 6 leaves
## 7 paths
## 8 paths
## 9 leaves
## 10 paths
## 11 leaves
## 12 paths
## 13 leaves
## 14 waste
## 15 leaves
## 16 waste
## 17 leaves
## 18 leaves
## 19 waste
## 20 waste