Importing the Data and write file

theUrl <- "https://archive.ics.uci.edu/ml/machine-learning-databases/mushroom/agaricus-lepiota.data"
mushroomsData <- read.table(file = theUrl, header = FALSE, sep = ",", stringsAsFactors = FALSE)

##write data set to a csv file
write.table(mushroomsData, file = "mushroomsData.csv", sep = ",")

Once written in .csv file - I uploaded this file to github and now loading data from github link

giturl = "https://raw.githubusercontent.com/ahussan/DATA_607_CUNY_SPS/master/Week_1_Assignment/mushroomsData.csv"
mushroomsData = read.table(giturl, sep = ",", stringsAsFactors = FALSE)
head(mushroomsData)
##   V1 V2 V3 V4 V5 V6 V7 V8 V9 V10 V11 V12 V13 V14 V15 V16 V17 V18 V19 V20
## 1  p  x  s  n  t  p  f  c  n   k   e   e   s   s   w   w   p   w   o   p
## 2  e  x  s  y  t  a  f  c  b   k   e   c   s   s   w   w   p   w   o   p
## 3  e  b  s  w  t  l  f  c  b   n   e   c   s   s   w   w   p   w   o   p
## 4  p  x  y  w  t  p  f  c  n   n   e   e   s   s   w   w   p   w   o   p
## 5  e  x  s  g  f  n  f  w  b   k   t   e   s   s   w   w   p   w   o   e
## 6  e  x  y  y  t  a  f  c  b   n   e   c   s   s   w   w   p   w   o   p
##   V21 V22 V23
## 1   k   s   u
## 2   n   n   g
## 3   n   n   m
## 4   k   s   u
## 5   n   a   g
## 6   k   n   g
cat("Number of rows in the dataset: ", nrow(mushroomsData),"\n")
## Number of rows in the dataset:  8124
cat("Number of columns in the dataset: ", ncol(mushroomsData), "\n")
## Number of columns in the dataset:  23

Select columns and create data sets

Let’s select few columns to work with. We are going to select the following columns: Edible/Poisonous, Cap-Shape, Cap-Surface, Cap-color and Odor

###
firstDF <- mushroomsData[, 1:4] 
secondDf <- mushroomsData[, 6]
##combine first and second data frames
mymushrooms = cbind(firstDF, secondDf)
head(mymushrooms)
##   V1 V2 V3 V4 secondDf
## 1  p  x  s  n        p
## 2  e  x  s  y        a
## 3  e  b  s  w        l
## 4  p  x  y  w        p
## 5  e  x  s  g        n
## 6  e  x  y  y        a

Add column header

colnames(mymushrooms) <- c("Edible/Poisonous","Cap-Shape","Cap-Surface","Cap-Color","Odor");
head(mymushrooms)
##   Edible/Poisonous Cap-Shape Cap-Surface Cap-Color Odor
## 1                p         x           s         n    p
## 2                e         x           s         y    a
## 3                e         b           s         w    l
## 4                p         x           y         w    p
## 5                e         x           s         g    n
## 6                e         x           y         y    a

Data transformation

Let’s write few functions to transform the data in each column

transformClassData <- function(key){
  switch (key,
    'p' = 'poisonous',
    'e' = 'edible'
  )
}

transformCapShapeData <- function(key){
  switch (key,
          'b' = 'bell',
          'c' = 'conical',
          'x' = 'convex',
          'f' = 'flat',
          'k' = 'knobbed',
          's' = 'sunken'
  )
}

transformCapSurfaceData <- function(key){
  switch (key,
          'f' = 'fibrous',
          'g' = 'grooves',
          'y' = 'scaly',
          's' = 'smooth'
  )
}

transformCapColorData <- function(key){
  switch (key,
          'n' = 'brown',
          'b' = 'buff',
          'c' = 'cinnamon',
          'g' = 'gray',
          'r' = 'green',
          'p' = 'pink',
          'u' = 'purple',
          'e' = 'red',
          'w' = 'white',
          'y' = 'yellow'
  )
}

transformOdorData <- function(key){
  switch (as.character(key),
          'a' = 'almond',
          'l' = 'anise',
          'c' = 'creosote',
          'y' = 'fishy',
          'f' = 'foul',
          'm' = 'musty',
          'n' = 'none',
          'p' = 'pungent',
          's' = 'spicy'
  )
}

Now that we have all the functions in place, we can replace the abbreviations with the corresponding values:

mymushrooms$`Edible/Poisonous` <- sapply(mymushrooms$`Edible/Poisonous`, transformClassData)
mymushrooms$`Cap-Shape` <- sapply(mymushrooms$`Cap-Shape`, transformCapShapeData)
mymushrooms$`Cap-Surface` <- sapply(mymushrooms$`Cap-Surface`, transformCapSurfaceData)
mymushrooms$`Cap-Color` <- sapply(mymushrooms$`Cap-Color`, transformCapColorData)
mymushrooms$Odor <- sapply(mymushrooms$Odor, transformOdorData)

Verify our data fram

Let’s take a look to the head of our data fram:

head(mymushrooms)
##   Edible/Poisonous Cap-Shape Cap-Surface Cap-Color    Odor
## 1        poisonous    convex      smooth     brown pungent
## 2           edible    convex      smooth    yellow  almond
## 3           edible      bell      smooth     white   anise
## 4        poisonous    convex       scaly     white pungent
## 5           edible    convex      smooth      gray    none
## 6           edible    convex       scaly    yellow  almond

End of this file