CUNY Data 607 - Week 1 Assignment – Loading Data into a Data Frame

R Code Performed to Transform Data

Load the .csv data into a dataframe

data <- "https://archive.ics.uci.edu/ml/machine-learning-databases/mushroom/agaricus-lepiota.data"
mushroom <- read.table(file = data, header = FALSE, sep = ",")

Subse the data

subset <- mushroom[, c("V1", "V2", "V4", "V6", "V23")]

Rename the headers

names(subset) <- c("Class", "Cap-Shape", "Cap-Color", "Odor", "Habitat")

Replace abbreviations with descriptions*

levels(subset$Class) <- c(levels(subset$Class), "edible", "poisonous")
subset$Class[subset$Class == 'e'] <- 'edible'
subset$Class[subset$Class == 'p'] <- 'poisonous'

levels(subset$`Cap-Shape`) <- c(levels(subset$`Cap-Shape`), "bell", "conical", "convex", "flat", "knobbed", "sunken")
subset$`Cap-Shape`[subset$`Cap-Shape` == 'b'] <- 'bell'
subset$`Cap-Shape`[subset$`Cap-Shape` == 'c'] <- 'conical'
subset$`Cap-Shape`[subset$`Cap-Shape` == 'x'] <- 'convex'
subset$`Cap-Shape`[subset$`Cap-Shape` == 'f'] <- 'flat'
subset$`Cap-Shape`[subset$`Cap-Shape` == 'k'] <- 'knobbed'
subset$`Cap-Shape`[subset$`Cap-Shape` == 's'] <- 'sunken'

levels(subset$`Cap-Color`) <- c(levels(subset$`Cap-Color`), "brown", "buff", "cinnamon", "gray", "green", "pink", "purple", "red", "white", "yellow")
subset$`Cap-Color`[subset$`Cap-Color` == 'n'] <- 'brown'
subset$`Cap-Color`[subset$`Cap-Color` == 'b'] <- 'buff'
subset$`Cap-Color`[subset$`Cap-Color` == 'c'] <- 'cinnamon'
subset$`Cap-Color`[subset$`Cap-Color` == 'g'] <- 'gray'
subset$`Cap-Color`[subset$`Cap-Color` == 'r'] <- 'green'
subset$`Cap-Color`[subset$`Cap-Color` == 'p'] <- 'pink'
subset$`Cap-Color`[subset$`Cap-Color` == 'u'] <- 'purple'
subset$`Cap-Color`[subset$`Cap-Color` == 'e'] <- 'red'
subset$`Cap-Color`[subset$`Cap-Color` == 'w'] <- 'white'
subset$`Cap-Color`[subset$`Cap-Color` == 'y'] <- 'yellow'

levels(subset$Odor) <- c(levels(subset$Odor), "almond", "anise", "creosote", "fishy", "foul", "musty", "none", "pungent", "spicy")
subset$Odor[subset$Odor == 'a'] <- 'almond'
subset$Odor[subset$Odor == 'l'] <- 'anise'
subset$Odor[subset$Odor == 'c'] <- 'creosote'
subset$Odor[subset$Odor == 'y'] <- 'fishy'
subset$Odor[subset$Odor == 'f'] <- 'foul'
subset$Odor[subset$Odor == 'm'] <- 'musty'
subset$Odor[subset$Odor == 'n'] <- 'none'
subset$Odor[subset$Odor == 'p'] <- 'pungent'
subset$Odor[subset$Odor == 's'] <- 'spicy'

levels(subset$Habitat) <- c(levels(subset$Habitat), "grasses", "leaves", "meadows", "paths", "urban", "waste", "woods")
subset$Habitat[subset$Habitat == 'g'] <- 'grasses'
subset$Habitat[subset$Habitat == 'l'] <- 'leaves'
subset$Habitat[subset$Habitat == 'm'] <- 'meadows'
subset$Habitat[subset$Habitat == 'p'] <- 'paths'
subset$Habitat[subset$Habitat == 'u'] <- 'urban'
subset$Habitat[subset$Habitat == 'w'] <- 'waste'
subset$Habitat[subset$Habitat == 'd'] <- 'woods'

Transformed Data Table

head(subset)

##       Class Cap-Shape Cap-Color    Odor Habitat
## 1 poisonous    convex     brown pungent   urban
## 2    edible    convex    yellow  almond grasses
## 3    edible      bell     white   anise meadows
## 4 poisonous    convex     white pungent   urban
## 5    edible    convex      gray    none grasses
## 6    edible    convex    yellow  almond grasses

Source code I found to produce the “Replaced abbreviations with descriptions”: https://stackoverflow.com/questions/11810605/replace-contents-of-factor-column-in-r-dataframe

CUNY Data 607 - Week 1 Assignment – Loading Data into a Data Frame

Emilie M Bolduc

9/2/2017

Assignment

R Code Performed to Transform Data

Load the .csv data into a dataframe

Subse the data

Rename the headers

Replace abbreviations with descriptions*

Transformed Data Table