Assignment Solution for the Data607 -Week 1

Load the data from the git repository (agaricus-lepiota.data) using rcurl package.

require(RCurl)
## Loading required package: RCurl
## Loading required package: bitops
dataLink <- getURL("https://raw.githubusercontent.com/charlsjoseph/CUNY-Data607/master/week1/607-week1-Assignment/agaricus-lepiota.data", ssl.verifypeer = FALSE)
agaricus.lepiota <- read.csv(text = dataLink)

Extracting the 4 columns Using slicing [] operator to fetch all rows and specific columns.

df[ rows, columns ]

  1. Type ( edible=e, poisonous=p )
  2. cap-shape (bell=b,conical=c,convex=x,flat=f,knobbed=k,sunken=s)
  3. cap-surface (fibrous=f,grooves=g,scaly=y,smooth=s)
  4. odor (almond=a,anise=l,creosote=c,fishy=y,foul=f, musty=m,none=n,pungent=p,spicy=s)
edat1 <- agaricus.lepiota[, c(1:3, 6)]
head(edat1)
##   p x s p.1
## 1 e x s   a
## 2 e b s   l
## 3 p x y   p
## 4 e x s   n
## 5 e x y   a
## 6 e b s   a
colnames(edat1) <- c("type", "capshape", "capsurface", "odor")
head(edat1)
##   type capshape capsurface odor
## 1    e        x          s    a
## 2    e        b          s    l
## 3    p        x          y    p
## 4    e        x          s    n
## 5    e        x          y    a
## 6    e        b          s    a

Tranforming data using sapply function. Below code transform the abbreviation into a meaningfull name.

edat1$type <- sapply(edat1$type, function(x)
{switch(as.character(x), 
        "e" = "edible", 
        "p" = "pois")
})


edat1$capshape <- sapply(edat1$capshape, function(x)
{switch(as.character(x), 
        "b" = "bell", 
        "c" = "conical", 
        "f" = "flat" , 
        "k"= "knobbed", 
        "s"= "sunken", 
        "x"="convex")
})

edat1$capsurface <- sapply(edat1$capsurface, function(x)
{switch(as.character(x), 
        "f" = "fibrous", 
        "g" = "grooves", 
        "y" = "scaly" , 
        "s"= "smooth")
})

edat1$odor <- sapply(edat1$odor, function(x)
{switch(as.character(x), 
        "a" = "almond", 
        "l" = "anise", 
        "c" = "creosote" , 
        "y"= "fishy", 
        "f" = "foul", 
        "m" = "musty" , 
        "n" = "none" , 
        "p" = "pungent", 
        "s" = "spicy")
})

Printing the head of transformed data.

head(edat1)
##     type capshape capsurface    odor
## 1 edible   convex     smooth  almond
## 2 edible     bell     smooth   anise
## 3   pois   convex      scaly pungent
## 4 edible   convex     smooth    none
## 5 edible   convex      scaly  almond
## 6 edible     bell     smooth  almond