Note : Below solution explains how to read and manipulate data in different ways
#install.packages("RCurl")
library(RCurl)
## Warning: package 'RCurl' was built under R version 3.2.5
## Loading required package: bitops
## Warning: package 'bitops' was built under R version 3.2.5
Load Mushroom Data Set from UCI repository into an R data frame.
url <- "https://archive.ics.uci.edu/ml/machine-learning-databases/mushroom/agaricus-lepiota.data"
#Create Data set from the Data in URL
mushroom_dataSet <-getURL(url)
#mushroom_dataSet
create a data table using the data in URL
mushrooms_tbl <- read.table(file = url, header = FALSE, sep = ",")
head(mushrooms_tbl)
## V1 V2 V3 V4 V5 V6 V7 V8 V9 V10 V11 V12 V13 V14 V15 V16 V17 V18 V19 V20
## 1 p x s n t p f c n k e e s s w w p w o p
## 2 e x s y t a f c b k e c s s w w p w o p
## 3 e b s w t l f c b n e c s s w w p w o p
## 4 p x y w t p f c n n e e s s w w p w o p
## 5 e x s g f n f w b k t e s s w w p w o e
## 6 e x y y t a f c b n e c s s w w p w o p
## V21 V22 V23
## 1 k s u
## 2 n n g
## 3 n n m
## 4 k s u
## 5 n a g
## 6 k n g
Write Mushroom Data Set to a local folder
write.csv(mushroom_dataSet, "E:/0_MSC/github/MATS_DATA607_/W1/Mushroom.csv", row.names=FALSE)
Read above created file into a Data Frame
mushroome_df <- read.csv(text=mushroom_dataSet,header=F,sep=",", stringsAsFactors = FALSE)
head(mushroome_df)
## V1 V2 V3 V4 V5 V6 V7 V8 V9 V10 V11 V12 V13 V14 V15 V16 V17 V18 V19 V20
## 1 p x s n t p f c n k e e s s w w p w o p
## 2 e x s y t a f c b k e c s s w w p w o p
## 3 e b s w t l f c b n e c s s w w p w o p
## 4 p x y w t p f c n n e e s s w w p w o p
## 5 e x s g f n f w b k t e s s w w p w o e
## 6 e x y y t a f c b n e c s s w w p w o p
## V21 V22 V23
## 1 k s u
## 2 n n g
## 3 n n m
## 4 k s u
## 5 n a g
## 6 k n g
#head(mushroom_data)
Analysing Mushroom Data model
dim(mushroome_df)
## [1] 8124 23
names(mushroome_df)
## [1] "V1" "V2" "V3" "V4" "V5" "V6" "V7" "V8" "V9" "V10" "V11"
## [12] "V12" "V13" "V14" "V15" "V16" "V17" "V18" "V19" "V20" "V21" "V22"
## [23] "V23"
Create a data with extracted columns for analysis (selected columns only) Three data frames are created since there are column break
stgMushRoom1 <- mushrooms_tbl[,1:4]
stgMushRoom2 <- mushrooms_tbl[,6]
stgMushRoom3 <- mushrooms_tbl[,23]
The 3 frames are combined into one data frame
stgMushRoom <- cbind(stgMushRoom1, stgMushRoom2,stgMushRoom3)
head(stgMushRoom)
## V1 V2 V3 V4 stgMushRoom2 stgMushRoom3
## 1 p x s n p u
## 2 e x s y a g
## 3 e b s w l m
## 4 p x y w p u
## 5 e x s g n g
## 6 e x y y a g
Adding Column Names
#column names are added
colnames(stgMushRoom) <- c("EdiblePoisonous","CapShape","CapSurface","CapColor","Odor","GrowsIn")
head(stgMushRoom)
## EdiblePoisonous CapShape CapSurface CapColor Odor GrowsIn
## 1 p x s n p u
## 2 e x s y a g
## 3 e b s w l m
## 4 p x y w p u
## 5 e x s g n g
## 6 e x y y a g
Replacing Abrivation values with proper values 1..EdiblePoisonous
#EdiblePoisonous
#Need to add the levels for every variable
levels(stgMushRoom$`EdiblePoisonous`) <- c(levels(stgMushRoom$`EdiblePoisonous`), c("Edible","Poisonous"))
stgMushRoom$`EdiblePoisonous`[stgMushRoom$`EdiblePoisonous` == "e"] <- "Edible"
stgMushRoom$`EdiblePoisonous`[stgMushRoom$`EdiblePoisonous` == "p"] <- "Poisonous"
2..CapShape
#CapShape
levels(stgMushRoom$`CapShape`) <- c(levels(stgMushRoom$`CapShape`), c("Bell","Conical","Convex","Flat","Knobbed","Sunken"))
stgMushRoom$`CapShape`[stgMushRoom$`CapShape` == "b"] <- "Bell"
stgMushRoom$`CapShape`[stgMushRoom$`CapShape` == "c"] <- "Conical"
stgMushRoom$`CapShape`[stgMushRoom$`CapShape` == "x"] <- "Convex"
stgMushRoom$`CapShape`[stgMushRoom$`CapShape` == "f"] <- "Flat"
stgMushRoom$`CapShape`[stgMushRoom$`CapShape` == "k"] <- "Knobbed"
stgMushRoom$`CapShape`[stgMushRoom$`CapShape` == "s"] <- "Sunken"
3..CapSurface
#CapSurface
levels(stgMushRoom$`CapSurface`) <- c(levels(stgMushRoom$`CapSurface`), c("Fibrous", "Grooves", "Scaly", "Smooth"))
stgMushRoom$`CapSurface`[stgMushRoom$`CapSurface` == "f"] <- "Fibrous"
stgMushRoom$`CapSurface`[stgMushRoom$`CapSurface` == "g"] <- "Grooves"
stgMushRoom$`CapSurface`[stgMushRoom$`CapSurface` == "y"] <- "Scaly"
stgMushRoom$`CapSurface`[stgMushRoom$`CapSurface` == "s"] <- "Smooth"
4..Odor
#Odor
levels(stgMushRoom$Odor) <- c(levels(stgMushRoom$Odor), c("Almond", "Anise", "Creosote", "Fishy", "Foul", "Musty", "None", "Pungent", "Spicy"))
stgMushRoom$Odor[stgMushRoom$Odor == "a"] <- "Almond"
stgMushRoom$Odor[stgMushRoom$Odor == "l"] <- "Anise"
stgMushRoom$Odor[stgMushRoom$Odor == "c"] <- "Creosote"
stgMushRoom$Odor[stgMushRoom$Odor == "y"] <- "Fishy"
stgMushRoom$Odor[stgMushRoom$Odor == "f"] <- "Foul"
stgMushRoom$Odor[stgMushRoom$Odor == "m"] <- "Musty"
stgMushRoom$Odor[stgMushRoom$Odor == "n"] <- "None"
stgMushRoom$Odor[stgMushRoom$Odor == "p"] <- "Pungent"
stgMushRoom$Odor[stgMushRoom$Odor == "s"] <- "Spicy"
5..CapColor
levels(stgMushRoom$`CapColor`) <- c(levels(stgMushRoom$`CapColor`), c("Brown", "Buff", "Cinnamon", "Gray", "Green", "Pink", "Purple", "Red", "White", "Yellow"))
stgMushRoom$`CapColor`[stgMushRoom$`CapColor` == "n"] <- "Brown"
stgMushRoom$`CapColor`[stgMushRoom$`CapColor` == "b"] <- "Buff"
stgMushRoom$`CapColor`[stgMushRoom$`CapColor` == "c"] <- "Cinnamon"
stgMushRoom$`CapColor`[stgMushRoom$`CapColor` == "g"] <- "Gray"
stgMushRoom$`CapColor`[stgMushRoom$`CapColor` == "r"] <- "Green"
stgMushRoom$`CapColor`[stgMushRoom$`CapColor` == "p"] <- "Pink"
stgMushRoom$`CapColor`[stgMushRoom$`CapColor` == "u"] <- "Purple"
stgMushRoom$`CapColor`[stgMushRoom$`CapColor` == "e"] <- "Red"
stgMushRoom$`CapColor`[stgMushRoom$`CapColor` == "w"] <- "White"
stgMushRoom$`CapColor`[stgMushRoom$`CapColor` == "y"] <- "Yellow"
6..GrowsIn
levels(stgMushRoom$`GrowsIn`) <- c(levels(stgMushRoom$`GrowsIn`), c("grasses", "leaves", "meadows", "paths", "urban", "waste", "woods"))
stgMushRoom$`GrowsIn`[stgMushRoom$`GrowsIn` == "g"] <- "grasses"
stgMushRoom$`GrowsIn`[stgMushRoom$`GrowsIn` == "l"] <- "leaves"
stgMushRoom$`GrowsIn`[stgMushRoom$`GrowsIn` == "m"] <- "meadows"
stgMushRoom$`GrowsIn`[stgMushRoom$`GrowsIn` == "p"] <- "paths"
stgMushRoom$`GrowsIn`[stgMushRoom$`GrowsIn` == "u"] <- "urban"
stgMushRoom$`GrowsIn`[stgMushRoom$`GrowsIn` == "w"] <- "waste"
stgMushRoom$`GrowsIn`[stgMushRoom$`GrowsIn` == "d"] <- "woods"
write.csv(stgMushRoom, "E:/0_MSC/github/MATS_DATA607_/W1/stgMushRoom.csv")
head(stgMushRoom)
## EdiblePoisonous CapShape CapSurface CapColor Odor GrowsIn
## 1 Poisonous Convex Smooth Brown Pungent urban
## 2 Edible Convex Smooth Yellow Almond grasses
## 3 Edible Bell Smooth White Anise meadows
## 4 Poisonous Convex Scaly White Pungent urban
## 5 Edible Convex Smooth Gray None grasses
## 6 Edible Convex Scaly Yellow Almond grasses