R Markdown

This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.

When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:

Note that the echo = FALSE parameter was added to the code chunk to prevent printing of the R code that generated the plot.

Import CSV file locally

mushroom <- read.csv('Mushroom_Data.csv',header = FALSE, sep = ",")

Import packages for data transformation

library(dplyr)
## Warning: package 'dplyr' was built under R version 3.4.3
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(tidyr)
## Warning: package 'tidyr' was built under R version 3.4.3
library(stringr)
## Warning: package 'stringr' was built under R version 3.4.3

Insert 4 new columns

mushroom[c("cap_shape","cap_surface","cap_color","other")] <- NA

Create new dataframe using existing dataframe, with a total of 6 columns

mushroom.split <- str_split_fixed(mushroom$V1, ",", 6)

Rename all columns in new dataframe

names(mushroom.split) <- c("edible_poison", "cap_shape", "cap_surface", "cap_color", "bruises_flag", "other")

Replace values in first column of new dataframe

mushroom.split[,1][mushroom.split[,1]=="p"] <- "poisonous"
mushroom.split[,1][mushroom.split[,1]=="e"] <- "edible"

Replace values in second column of new dataframe

mushroom.split[,2][mushroom.split[,2]=="x"] <- "convex"
mushroom.split[,2][mushroom.split[,2]=="b"] <- "bell"
mushroom.split[,2][mushroom.split[,2]=="c"] <- "conical"
mushroom.split[,2][mushroom.split[,2]=="f"] <- "flat"
mushroom.split[,2][mushroom.split[,2]=="k"] <- "knobbed"
mushroom.split[,2][mushroom.split[,2]=="s"] <- "sunken"

Replace values in third column of new dataframe

mushroom.split[,3][mushroom.split[,3]=="f"] <- "fibrous"
mushroom.split[,3][mushroom.split[,3]=="g"] <- "grooves"
mushroom.split[,3][mushroom.split[,3]=="y"] <- "scaly"
mushroom.split[,3][mushroom.split[,3]=="s"] <- "smooth"

Replace values in fourth column of new dataframe

mushroom.split[,4][mushroom.split[,4]=="n"] <- "brown"
mushroom.split[,4][mushroom.split[,4]=="b"] <- "buff"
mushroom.split[,4][mushroom.split[,4]=="c"] <- "cinnamon"
mushroom.split[,4][mushroom.split[,4]=="g"] <- "gray"
mushroom.split[,4][mushroom.split[,4]=="f"] <- "green"
mushroom.split[,4][mushroom.split[,4]=="p"] <- "pink"
mushroom.split[,4][mushroom.split[,4]=="u"] <- "purple"
mushroom.split[,4][mushroom.split[,4]=="e"] <- "red"
mushroom.split[,4][mushroom.split[,4]=="w"] <- "white"
mushroom.split[,4][mushroom.split[,4]=="y"] <- "yellow"

Replace values in fifth column of new dataframe

mushroom.split[,5][mushroom.split[,5]=="t"] <- "bruises"
mushroom.split[,5][mushroom.split[,5]=="f"] <- "no"

Attempt to clear extra data from new dataframe This line throws an error but the intention is to get rid of this column

#mushroom.split[6] <- NULL