Assignment 3

Assinment Task

Is to study the dataset and the associated description of the data (i.e. "data dictionary"). You may need to look around a bit, but it's there! You should take the data, and create a data frame with a subset of the columns (and if you like rows) in the dataset. You should include the column that indicates edible or poisonous and three or four other columns. You should also add meaningful column names and replace the abbreviations used in the data-for example, in the appropriate column, "e" might become "edible." Your deliverable is the R code to perform these transformation tasks.

Get the data

Reading the data form the CSV file using the provided URL link. Extracting the data and store them into my_data

require(RCurl)
## Loading required package: RCurl

## Loading required package: bitops
myurl <- "https://archive.ics.uci.edu/ml/machine-learning-databases/mushroom/agaricus-lepiota.data"
e_data <- getURL(myurl)

my_data <- read.csv(text=e_data,header=FALSE,sep=",")
head(my_data)
##   V1 V2 V3 V4 V5 V6 V7 V8 V9 V10 V11 V12 V13 V14 V15 V16 V17 V18 V19 V20
## 1  p  x  s  n  t  p  f  c  n   k   e   e   s   s   w   w   p   w   o   p
## 2  e  x  s  y  t  a  f  c  b   k   e   c   s   s   w   w   p   w   o   p
## 3  e  b  s  w  t  l  f  c  b   n   e   c   s   s   w   w   p   w   o   p
## 4  p  x  y  w  t  p  f  c  n   n   e   e   s   s   w   w   p   w   o   p
## 5  e  x  s  g  f  n  f  w  b   k   t   e   s   s   w   w   p   w   o   e
## 6  e  x  y  y  t  a  f  c  b   n   e   c   s   s   w   w   p   w   o   p
##   V21 V22 V23
## 1   k   s   u
## 2   n   n   g
## 3   n   n   m
## 4   k   s   u
## 5   n   a   g
## 6   k   n   g

Construct the data dictionary

Creating a subset with desireed fields from the stored data

my_subset <- my_data[,c(1,4,6,9,20,23)]
knitr::kable(head(my_subset))
V1 V4 V6 V9 V20 V23
p n p n p u
e y a b p g
e w l b p m
p w p n p u
e g n b e g
e y a b p g

Rename the fields

Giving the column a meaningful name that represent the data

colnames(my_subset) <-c("class","cap-color","odor","gill-size","ring-type","habitat")

knitr::kable(head(my_subset))
class cap-color odor gill-size ring-type habitat
p n p n p u
e y a b p g
e w l b p m
p w p n p u
e g n b e g
e y a b p g

Replace abbreviation

Use the levels function to replace the abbreviated characters with it's attribute categories

# column class 
levels(my_subset$'class') <- c(levels(my_subset$'class'), "edible", "poison")
my_subset$'class'[my_subset$'class' == "e"] <- "edible"
my_subset$'class'[my_subset$'class' == "p"] <- "poison"

# column cap-color 
levels(my_subset$'cap-color') <- c(levels(my_subset$'cap-color'),
                                   "brown", "buff","cinnamon","gray","green","pink","purple","red","white","yellow","bruises","n0")
my_subset$'cap-color'[my_subset$'cap-color' == "n"] <- "brown"
my_subset$'cap-color'[my_subset$'cap-color' == "b"] <- "buff"
my_subset$'cap-color'[my_subset$'cap-color' == "c"] <- "cinnamon"
my_subset$'cap-color'[my_subset$'cap-color' == "g"] <- "gray"
my_subset$'cap-color'[my_subset$'cap-color' == "r"] <- "green"
my_subset$'cap-color'[my_subset$'cap-color' == "p"] <- "pink"
my_subset$'cap-color'[my_subset$'cap-color' == "u"] <- "purple"
my_subset$'cap-color'[my_subset$'cap-color' == "e"] <- "red"
my_subset$'cap-color'[my_subset$'cap-color' == "w"] <- "white"
my_subset$'cap-color'[my_subset$'cap-color' == "y"] <- "yellow"
my_subset$'cap-color'[my_subset$'cap-color' == "t"] <- "bruises"
my_subset$'cap-color'[my_subset$'cap-color' == "f"] <- "n0"

# column odor
levels(my_subset$'odor') <- c(levels(my_subset$'odor'),
                              "almond", "anise","creosote", "fishy","foul","musty","none","pungent","spicy")
my_subset$'odor'[my_subset$'odor' == "a"] <- "almond"
my_subset$'odor'[my_subset$'odor' == "l"] <- "anise"
my_subset$'odor'[my_subset$'odor' == "c"] <- "creosote"
my_subset$'odor'[my_subset$'odor' == "y"] <- "fishy"
my_subset$'odor'[my_subset$'odor' == "f"] <- "foul"
my_subset$'odor'[my_subset$'odor' == "m"] <- "musty"
my_subset$'odor'[my_subset$'odor' == "n"] <- "none"
my_subset$'odor'[my_subset$'odor' == "p"] <- "pungent"
my_subset$'odor'[my_subset$'odor' == "s"] <- "spicy"

# column gill-size
levels(my_subset$'gill-size') <- c(levels(my_subset$'gill-size'), "broad", "narrow")
my_subset$'gill-size'[my_subset$'gill-size' == "b"] <- "broad"
my_subset$'gill-size'[my_subset$'gill-size' == "n"] <- "narrow"

# column ring-type
levels(my_subset$'ring-type') <- c(levels(my_subset$'ring-type'),
                                   "cobwebby", "evanescent","flaring", "large","none","pendant","sheathing","zone")
                             
my_subset$'ring-type'[my_subset$'ring-type' == "c"] <- "cobwebby"
my_subset$'ring-type'[my_subset$'ring-type' == "e"] <- "evanescent"
my_subset$'ring-type'[my_subset$'ring-type' == "f"] <- "flaring"
my_subset$'ring-type'[my_subset$'ring-type' == "l"] <- "large"
my_subset$'ring-type'[my_subset$'ring-type' == "n"] <- "none"
my_subset$'ring-type'[my_subset$'ring-type' == "p"] <- "pendant"
my_subset$'ring-type'[my_subset$'ring-type' == "s"] <- "sheathing"
my_subset$'ring-type'[my_subset$'ring-type' == "z"] <- "zone"

# column habitat
levels(my_subset$'habitat') <- c(levels(my_subset$'habitat'),
                              "grasses", "leaves","meadows", "paths","urban","waste","woods")
my_subset$'habitat'[my_subset$'habitat' == "g"] <- "grasses"
my_subset$'habitat'[my_subset$'habitat' == "l"] <- "leaves"
my_subset$'habitat'[my_subset$'habitat' == "m"] <- "meadows"
my_subset$'habitat'[my_subset$'habitat' == "p"] <- "paths"
my_subset$'habitat'[my_subset$'habitat' == "u"] <- "urban"
my_subset$'habitat'[my_subset$'habitat' == "w"] <- "waste"
my_subset$'habitat'[my_subset$'habitat' == "d"] <- "woods"

knitr::kable(head(my_subset))
class cap-color odor gill-size ring-type habitat
poison brown pungent narrow pendant urban
edible yellow almond broad pendant grasses
edible white anise broad pendant meadows
poison white pungent narrow pendant urban
edible gray none broad evanescent grasses
edible yellow almond broad pendant grasses