Load the data into R

dat <- read.table("agaricus-lepiota.data", sep = ",")
head(dat)
##   V1 V2 V3 V4 V5 V6 V7 V8 V9 V10 V11 V12 V13 V14 V15 V16 V17 V18 V19 V20
## 1  p  x  s  n  t  p  f  c  n   k   e   e   s   s   w   w   p   w   o   p
## 2  e  x  s  y  t  a  f  c  b   k   e   c   s   s   w   w   p   w   o   p
## 3  e  b  s  w  t  l  f  c  b   n   e   c   s   s   w   w   p   w   o   p
## 4  p  x  y  w  t  p  f  c  n   n   e   e   s   s   w   w   p   w   o   p
## 5  e  x  s  g  f  n  f  w  b   k   t   e   s   s   w   w   p   w   o   e
## 6  e  x  y  y  t  a  f  c  b   n   e   c   s   s   w   w   p   w   o   p
##   V21 V22 V23
## 1   k   s   u
## 2   n   n   g
## 3   n   n   m
## 4   k   s   u
## 5   n   a   g
## 6   k   n   g

Compare the dataset to the dataset description

str(data)
## function (..., list = character(), package = NULL, lib.loc = NULL, 
##     verbose = getOption("verbose"), envir = .GlobalEnv)

Data set contains 23 columns first column states whether poisoinous (p) or edible (e) next 22 columns are attributes stated in data description Dataset also has 8124 observations as indicated nu data description file

Choose the columns to work on and name them

mushroom_dataset <- as.data.frame(dat[c(1,5,11,17,19)])
colnames(mushroom_dataset) <- c("Class", "bruises?", "stalk-shape", "veil-type", "ring-number")
str(mushroom_dataset)
## 'data.frame':    8124 obs. of  5 variables:
##  $ Class      : Factor w/ 2 levels "e","p": 2 1 1 2 1 1 1 1 2 1 ...
##  $ bruises?   : Factor w/ 2 levels "f","t": 2 2 2 2 1 2 2 2 2 2 ...
##  $ stalk-shape: Factor w/ 2 levels "e","t": 1 1 1 1 2 1 1 1 1 1 ...
##  $ veil-type  : Factor w/ 1 level "p": 1 1 1 1 1 1 1 1 1 1 ...
##  $ ring-number: Factor w/ 3 levels "n","o","t": 2 2 2 2 2 2 2 2 2 2 ...

These are all factors, will change to character so it’s easier to work with

mushroom_dataset$Class <- as.character(mushroom_dataset$Class)
mushroom_dataset$`bruises?` <- as.character(mushroom_dataset$`bruises?`)
mushroom_dataset$`stalk-shape` <-as.character(mushroom_dataset$`stalk-shape`)
mushroom_dataset$`veil-type` <- as.character(mushroom_dataset$`veil-type`)
mushroom_dataset$`ring-number` <- as.character(mushroom_dataset$`ring-number`)

Renaming the variables in the dataset so it’s easier to read, and return them to easier to use data types

for (i in 1:8124) {
  if (mushroom_dataset$Class[i] == 'e')
    mushroom_dataset$Class[i] <- "edible"
  else
    mushroom_dataset$Class[i] = 'poisonous'
}
mushroom_dataset$Class <- as.factor(mushroom_dataset$Class)

for (i in 1:8124) {
  if (mushroom_dataset$`bruises?`[i] == 't')
    mushroom_dataset$`bruises?`[i] <- "TRUE"
  else
    mushroom_dataset$`bruises?`[i] = "FALSE"
}
mushroom_dataset$`bruises?` <- as.factor(mushroom_dataset$`bruises?`)


for (i in 1:8124) {
  if (mushroom_dataset$`stalk-shape`[i] == 'e')
    mushroom_dataset$`stalk-shape`[i] <- 'enlarging'
  else
    mushroom_dataset$`stalk-shape`[i] = 'tapering'
}
mushroom_dataset$`stalk-shape` <-as.factor(mushroom_dataset$`stalk-shape`)

for (i in 1:8124) {
  if (mushroom_dataset$`veil-type`[i] == 'p')
    mushroom_dataset$`veil-type`[i] <- 'partial'
  else
    mushroom_dataset$`stalk-shape`[i] = 'universal'
}
mushroom_dataset$`veil-type` <- as.factor(mushroom_dataset$`veil-type`)

for (i in 1:8124) {
  if (mushroom_dataset$`ring-number`[i] == 'n')
    mushroom_dataset$`ring-number`[i] = '0'
  
  else if (mushroom_dataset$`ring-number`[i] == 'o')
    mushroom_dataset$`ring-number`[i] = '1'
  
  else 
    mushroom_dataset$`ring-number`[i] = '2'
}
mushroom_dataset$`ring-number` <- as.numeric(mushroom_dataset$`ring-number`)

Check the new dataset

str(mushroom_dataset)
## 'data.frame':    8124 obs. of  5 variables:
##  $ Class      : Factor w/ 2 levels "edible","poisonous": 2 1 1 2 1 1 1 1 2 1 ...
##  $ bruises?   : Factor w/ 2 levels "FALSE","TRUE": 2 2 2 2 1 2 2 2 2 2 ...
##  $ stalk-shape: Factor w/ 2 levels "enlarging","tapering": 1 1 1 1 2 1 1 1 1 1 ...
##  $ veil-type  : Factor w/ 1 level "partial": 1 1 1 1 1 1 1 1 1 1 ...
##  $ ring-number: num  1 1 1 1 1 1 1 1 1 1 ...
head(mushroom_dataset)
##       Class bruises? stalk-shape veil-type ring-number
## 1 poisonous     TRUE   enlarging   partial           1
## 2    edible     TRUE   enlarging   partial           1
## 3    edible     TRUE   enlarging   partial           1
## 4 poisonous     TRUE   enlarging   partial           1
## 5    edible    FALSE    tapering   partial           1
## 6    edible     TRUE   enlarging   partial           1