Url <- "https://archive.ics.uci.edu/ml/machine-learning-databases/mushroom/agaricus-lepiota.data"
Data <- read.table(file = Url, header = FALSE, sep = ",")
head(Data)
## V1 V2 V3 V4 V5 V6 V7 V8 V9 V10 V11 V12 V13 V14 V15 V16 V17 V18 V19 V20
## 1 p x s n t p f c n k e e s s w w p w o p
## 2 e x s y t a f c b k e c s s w w p w o p
## 3 e b s w t l f c b n e c s s w w p w o p
## 4 p x y w t p f c n n e e s s w w p w o p
## 5 e x s g f n f w b k t e s s w w p w o e
## 6 e x y y t a f c b n e c s s w w p w o p
## V21 V22 V23
## 1 k s u
## 2 n n g
## 3 n n m
## 4 k s u
## 5 n a g
## 6 k n g
create a data frame with some columns
S1 <- Data[,1:4]
S2 <- Data[,6]
the two are combined into one data frame
DataT <- cbind(S1, S2)
head(DataT)
## V1 V2 V3 V4 S2
## 1 p x s n p
## 2 e x s y a
## 3 e b s w l
## 4 p x y w p
## 5 e x s g n
## 6 e x y y a
column names are added
colnames(DataT) <- c("Edible/Poisonous","Cap-Shape","Cap-Surface","Cap-Color","Odor")
head(DataT)
## Edible/Poisonous Cap-Shape Cap-Surface Cap-Color Odor
## 1 p x s n p
## 2 e x s y a
## 3 e b s w l
## 4 p x y w p
## 5 e x s g n
## 6 e x y y a
Edible/Poisonous
levels(DataT$`Edible/Poisonous`) <- c(levels(DataT$`Edible/Poisonous`), c("Edible","Poisonous"))
DataT$`Edible/Poisonous`[DataT$`Edible/Poisonous` == "e"] <- "Edible"
DataT$`Edible/Poisonous`[DataT$`Edible/Poisonous` == "p"] <- "Poisonous"
Cap-Shape
levels(DataT$`Cap-Shape`) <- c(levels(DataT$`Cap-Shape`), c("Bell","Conical","Convex","Flat","Knobbed","Sunken"))
DataT$`Cap-Shape`[DataT$`Cap-Shape` == "b"] <- "Bell"
DataT$`Cap-Shape`[DataT$`Cap-Shape` == "c"] <- "Conical"
DataT$`Cap-Shape`[DataT$`Cap-Shape` == "x"] <- "Convex"
DataT$`Cap-Shape`[DataT$`Cap-Shape` == "f"] <- "Flat"
DataT$`Cap-Shape`[DataT$`Cap-Shape` == "k"] <- "Knobbed"
DataT$`Cap-Shape`[DataT$`Cap-Shape` == "s"] <- "Sunken"
Cap-Surface
levels(DataT$`Cap-Surface`) <- c(levels(DataT$`Cap-Surface`), c("Fibrous", "Grooves", "Scaly", "Smooth"))
DataT$`Cap-Surface`[DataT$`Cap-Surface` == "f"] <- "Fibrous"
DataT$`Cap-Surface`[DataT$`Cap-Surface` == "g"] <- "Grooves"
DataT$`Cap-Surface`[DataT$`Cap-Surface` == "y"] <- "Scaly"
DataT$`Cap-Surface`[DataT$`Cap-Surface` == "s"] <- "Smooth"
Cap-Color
levels(DataT$`Cap-Color`) <- c(levels(DataT$`Cap-Color`), c("Brown", "Buff", "Cinnamon", "Gray", "Green", "Pink", "Purple", "Red", "White", "Yellow"))
DataT$`Cap-Color`[DataT$`Cap-Color` == "n"] <- "Brown"
DataT$`Cap-Color`[DataT$`Cap-Color` == "b"] <- "Buff"
DataT$`Cap-Color`[DataT$`Cap-Color` == "c"] <- "Cinnamon"
DataT$`Cap-Color`[DataT$`Cap-Color` == "g"] <- "Gray"
DataT$`Cap-Color`[DataT$`Cap-Color` == "r"] <- "Green"
DataT$`Cap-Color`[DataT$`Cap-Color` == "p"] <- "Pink"
DataT$`Cap-Color`[DataT$`Cap-Color` == "u"] <- "Purple"
DataT$`Cap-Color`[DataT$`Cap-Color` == "e"] <- "Red"
DataT$`Cap-Color`[DataT$`Cap-Color` == "w"] <- "White"
DataT$`Cap-Color`[DataT$`Cap-Color` == "y"] <- "Yellow"
Odor
levels(DataT$Odor) <- c(levels(DataT$Odor), c("Almond", "Anise", "Creosote", "Fishy", "Foul", "Musty", "None", "Pungent", "Spicy"))
DataT$Odor[DataT$Odor == "a"] <- "Almond"
DataT$Odor[DataT$Odor == "l"] <- "Anise"
DataT$Odor[DataT$Odor == "c"] <- "Creosote"
DataT$Odor[DataT$Odor == "y"] <- "Fishy"
DataT$Odor[DataT$Odor == "m"] <- "Musty"
DataT$Odor[DataT$Odor == "n"] <- "None"
DataT$Odor[DataT$Odor == "p"] <- "Pungent"
DataT$Odor[DataT$Odor == "s"] <- "Spicy"
Check Results
head(DataT)
## Edible/Poisonous Cap-Shape Cap-Surface Cap-Color Odor
## 1 Poisonous Convex Smooth Brown Pungent
## 2 Edible Convex Smooth Yellow Almond
## 3 Edible Bell Smooth White Anise
## 4 Poisonous Convex Scaly White Pungent
## 5 Edible Convex Smooth Gray None
## 6 Edible Convex Scaly Yellow Almond