Elina Azrilyan

Assignment 1

Your task is to study the dataset and the associated description of the data (i.e. “data dictionary”). You may need to look around a bit, but it’s there! You should take the data, and create a data frame with a subset of the columns in the dataset. You should include the column that indicates edible or poisonous and three or four other columns. You should also add meaningful column names and replace the abbreviations used in the data—for example, in the appropriate column, “e” might become “edible.” Your deliverable is the R code to perform these transformation tasks.

Loading data from the following location into a data frame: https://archive.ics.uci.edu/ml/datasets/Mushroom

MushroomData <- read.csv(file="https://archive.ics.uci.edu/ml/machine-learning-databases/mushroom/agaricus-lepiota.data", header=FALSE, sep=",")
head(MushroomData)
##   V1 V2 V3 V4 V5 V6 V7 V8 V9 V10 V11 V12 V13 V14 V15 V16 V17 V18 V19 V20
## 1  p  x  s  n  t  p  f  c  n   k   e   e   s   s   w   w   p   w   o   p
## 2  e  x  s  y  t  a  f  c  b   k   e   c   s   s   w   w   p   w   o   p
## 3  e  b  s  w  t  l  f  c  b   n   e   c   s   s   w   w   p   w   o   p
## 4  p  x  y  w  t  p  f  c  n   n   e   e   s   s   w   w   p   w   o   p
## 5  e  x  s  g  f  n  f  w  b   k   t   e   s   s   w   w   p   w   o   e
## 6  e  x  y  y  t  a  f  c  b   n   e   c   s   s   w   w   p   w   o   p
##   V21 V22 V23
## 1   k   s   u
## 2   n   n   g
## 3   n   n   m
## 4   k   s   u
## 5   n   a   g
## 6   k   n   g

Creating a subset of the mushroom data with only 5 columns: Class, Cap Shape, Cap Color, Odor, and Population.

SMushroomData <- data.frame(MushroomData$V1, MushroomData$V2, MushroomData$V5, MushroomData$V6, MushroomData$V22)
head(SMushroomData)
##   MushroomData.V1 MushroomData.V2 MushroomData.V5 MushroomData.V6
## 1               p               x               t               p
## 2               e               x               t               a
## 3               e               b               t               l
## 4               p               x               t               p
## 5               e               x               f               n
## 6               e               x               t               a
##   MushroomData.V22
## 1                s
## 2                n
## 3                n
## 4                s
## 5                a
## 6                n

Adding column names according to the data dictionary

colnames(SMushroomData) <- c("Class","CapShape", "Bruises", "Odor", "Population")
head(SMushroomData)
##   Class CapShape Bruises Odor Population
## 1     p        x       t    p          s
## 2     e        x       t    a          n
## 3     e        b       t    l          n
## 4     p        x       t    p          s
## 5     e        x       f    n          a
## 6     e        x       t    a          n

Replacing abbreviations with actual values

SMushroomData$Class = as.character(SMushroomData$Class);
SMushroomData$Class[SMushroomData$Class == "e"] = "edible";
SMushroomData$Class[SMushroomData$Class == "p"] = "poisonous";
head(SMushroomData)
##       Class CapShape Bruises Odor Population
## 1 poisonous        x       t    p          s
## 2    edible        x       t    a          n
## 3    edible        b       t    l          n
## 4 poisonous        x       t    p          s
## 5    edible        x       f    n          a
## 6    edible        x       t    a          n
SMushroomData$CapShape = as.character(SMushroomData$CapShape);
SMushroomData$CapShape[SMushroomData$CapShape == "b"] = "bell";
SMushroomData$CapShape[SMushroomData$CapShape == "c"] = "conical";
SMushroomData$CapShape[SMushroomData$CapShape == "x"] = "convex";
SMushroomData$CapShape[SMushroomData$CapShape == "f"] = "flat";
SMushroomData$CapShape[SMushroomData$CapShape == "k"] = "knobbed";
SMushroomData$CapShape[SMushroomData$CapShape == "s"] = "sunken";
head(SMushroomData)
##       Class CapShape Bruises Odor Population
## 1 poisonous   convex       t    p          s
## 2    edible   convex       t    a          n
## 3    edible     bell       t    l          n
## 4 poisonous   convex       t    p          s
## 5    edible   convex       f    n          a
## 6    edible   convex       t    a          n
SMushroomData$Bruises = as.character(SMushroomData$Bruises);
SMushroomData$Bruises[SMushroomData$Bruises == "t"] = "yes";
SMushroomData$Bruises[SMushroomData$Bruises == "f"] = "no";
head(SMushroomData)
##       Class CapShape Bruises Odor Population
## 1 poisonous   convex     yes    p          s
## 2    edible   convex     yes    a          n
## 3    edible     bell     yes    l          n
## 4 poisonous   convex     yes    p          s
## 5    edible   convex      no    n          a
## 6    edible   convex     yes    a          n
SMushroomData$Odor = as.character(SMushroomData$Odor);
SMushroomData$Odor[SMushroomData$Odor == "a"] = "almond";
SMushroomData$Odor[SMushroomData$Odor == "l"] = "anise";
SMushroomData$Odor[SMushroomData$Odor == "c"] = "creosote";
SMushroomData$Odor[SMushroomData$Odor == "y"] = "fishy";
SMushroomData$Odor[SMushroomData$Odor == "f"] = "foul";
SMushroomData$Odor[SMushroomData$Odor == "m"] = "musty";
SMushroomData$Odor[SMushroomData$Odor == "n"] = "none";
SMushroomData$Odor[SMushroomData$Odor == "p"] = "pungent";
SMushroomData$Odor[SMushroomData$Odor == "s"] = "spicy";
head(SMushroomData)
##       Class CapShape Bruises    Odor Population
## 1 poisonous   convex     yes pungent          s
## 2    edible   convex     yes  almond          n
## 3    edible     bell     yes   anise          n
## 4 poisonous   convex     yes pungent          s
## 5    edible   convex      no    none          a
## 6    edible   convex     yes  almond          n
SMushroomData$Population = as.character(SMushroomData$Population);
SMushroomData$Population[SMushroomData$Population == "a"] = "abundant";
SMushroomData$Population[SMushroomData$Population == "c"] = "clustered";
SMushroomData$Population[SMushroomData$Population == "n"] = "numerous";
SMushroomData$Population[SMushroomData$Population == "s"] = "scattered";
SMushroomData$Population[SMushroomData$Population == "v"] = "several";
SMushroomData$Population[SMushroomData$Population == "y"] = "solitary";
head(SMushroomData)
##       Class CapShape Bruises    Odor Population
## 1 poisonous   convex     yes pungent  scattered
## 2    edible   convex     yes  almond   numerous
## 3    edible     bell     yes   anise   numerous
## 4 poisonous   convex     yes pungent  scattered
## 5    edible   convex      no    none   abundant
## 6    edible   convex     yes  almond   numerous