Your task
study the dataset and the associated description of the data (i.e. “data dictionary”). You may need to look around a bit, but it’s there!
You should take the data, and create a data frame with a subset of the columns in the dataset.
You should also add meaningful column names and replace the abbreviations used in the data-for example, in the appropriate column, “e” might become “edible.”
Your deliverable is the R code to perform these transformation tasks.
(a) Mushroom records drawn from The Audubon Society Field Guide to North
American Mushrooms (1981). G. H. Lincoff (Pres.), New York: Alfred
A. Knopf
(b) Donor: Jeff Schlimmer (Jeffrey.Schlimmer@a.gp.cs.cmu.edu)
(c) Date: 27 April 1987
suppressWarnings(library(tidyverse))
suppressWarnings(library(stringr))
suppressWarnings(library(data.table))
library(plyr)
library(XML)
library(crayon)
library(lubridate)
library(rjson)
library(readxl)
setwd("C:\\Users\\951250\\Documents\\myR\\MS\\607")
workDir <- getwd()
filePath = paste0(workDir,"/data")
# fileName <- list.files(path=filePath)
# mushroom <- read_delim(paste0(filePath,"/","mushroom.data" ))
# coloumName <- c(" ")
mushroom_meta <- read_delim(paste0(filePath,"/","atr.txt"),delim=".",col_names = FALSE)## Parsed with column specification:
## cols(
## X1 = col_character(),
## X2 = col_character(),
## X3 = col_character()
## )
# using the Name from above tible to set the header of the mushroom dataset
mushroom <- read_delim(paste0(filePath,"/","mushroom.data" ),delim = ",",col_names = trimws(mushroom_meta$X2))## Parsed with column specification:
## cols(
## .default = col_character(),
## `bruises?` = col_logical(),
## `gill-attachment` = col_logical()
## )
## See spec(...) for full column specifications.
## Warning: 210 parsing failures.
## row col expected actual file
## 6039 gill-attachment 1/0/T/F/TRUE/FALSE a 'C:/Users/951250/Documents/myR/MS/607/data/mushroom.data'
## 6041 gill-attachment 1/0/T/F/TRUE/FALSE a 'C:/Users/951250/Documents/myR/MS/607/data/mushroom.data'
## 6376 gill-attachment 1/0/T/F/TRUE/FALSE a 'C:/Users/951250/Documents/myR/MS/607/data/mushroom.data'
## 6425 gill-attachment 1/0/T/F/TRUE/FALSE a 'C:/Users/951250/Documents/myR/MS/607/data/mushroom.data'
## 6435 gill-attachment 1/0/T/F/TRUE/FALSE a 'C:/Users/951250/Documents/myR/MS/607/data/mushroom.data'
## .... ............... .................. ...... .........................................................
## See problems(...) for more details.
Data of Mushsroom datset
Data of Masuhroom Dataset’s Attributes
names(mushroom_meta) <- c("ID","Type","Value")
head(mushroom_meta[,c(2,3)] )head(mushroom)mushroom <- mutate(mushroom,classes = ifelse(classes == "p", "poisonous", "edible"))mushroom$`cap-shape`<- revalue(mushroom$`cap-shape`,c(b="bell",c="conical",x="convex",f="flat",
k="knobbed",s="sunken"))
mushroom$`cap-surface` <- revalue(mushroom$`cap-surface`,c(f="fibrous",g="grooves",y="scaly",s="smooth"))
mushroom$`cap-color` <- revalue(mushroom$`cap-color`,c(n = "brown" ,b ="buff", c="cinnamon", g= "gray" ,r ="green",p ="pink" ,u= "purple" ,e="red", w ="white", y= "yellow"))
head(mushroom[,c(1:4)])glimpse(mushroom[,c(1:4)])## Observations: 8,124
## Variables: 4
## $ classes <chr> "poisonous", "edible", "edible", "poisonous", "e...
## $ `cap-shape` <chr> "convex", "convex", "bell", "convex", "convex", ...
## $ `cap-surface` <chr> "smooth", "smooth", "smooth", "scaly", "smooth",...
## $ `cap-color` <chr> "brown", "yellow", "white", "white", "gray", "ye...
# Data Subset
mushroom_edible <- subset(mushroom,select = c(1:5))
head(mushroom_edible)