Assignment Solution for the Data607 -Week 1
Load the data from the git repository (agaricus-lepiota.data) using rcurl package.
require(RCurl)
## Loading required package: RCurl
## Loading required package: bitops
dataLink <- getURL("https://raw.githubusercontent.com/charlsjoseph/CUNY-Data607/master/week1/607-week1-Assignment/agaricus-lepiota.data", ssl.verifypeer = FALSE)
agaricus.lepiota <- read.csv(text = dataLink)
Extracting the 4 columns Using slicing [] operator to fetch all rows and specific columns.
df[ rows, columns ]
edat1 <- agaricus.lepiota[, c(1:3, 6)]
head(edat1)
## p x s p.1
## 1 e x s a
## 2 e b s l
## 3 p x y p
## 4 e x s n
## 5 e x y a
## 6 e b s a
colnames(edat1) <- c("type", "capshape", "capsurface", "odor")
head(edat1)
## type capshape capsurface odor
## 1 e x s a
## 2 e b s l
## 3 p x y p
## 4 e x s n
## 5 e x y a
## 6 e b s a
Tranforming data using sapply function. Below code transform the abbreviation into a meaningfull name.
edat1$type <- sapply(edat1$type, function(x)
{switch(as.character(x),
"e" = "edible",
"p" = "pois")
})
edat1$capshape <- sapply(edat1$capshape, function(x)
{switch(as.character(x),
"b" = "bell",
"c" = "conical",
"f" = "flat" ,
"k"= "knobbed",
"s"= "sunken",
"x"="convex")
})
edat1$capsurface <- sapply(edat1$capsurface, function(x)
{switch(as.character(x),
"f" = "fibrous",
"g" = "grooves",
"y" = "scaly" ,
"s"= "smooth")
})
edat1$odor <- sapply(edat1$odor, function(x)
{switch(as.character(x),
"a" = "almond",
"l" = "anise",
"c" = "creosote" ,
"y"= "fishy",
"f" = "foul",
"m" = "musty" ,
"n" = "none" ,
"p" = "pungent",
"s" = "spicy")
})
Printing the head of transformed data.
head(edat1)
## type capshape capsurface odor
## 1 edible convex smooth almond
## 2 edible bell smooth anise
## 3 pois convex scaly pungent
## 4 edible convex smooth none
## 5 edible convex scaly almond
## 6 edible bell smooth almond