R Markdown

This assignment for DATA 607 course within the CUNY School of Professional Studies MS Data Science program is to transform semi-unstructured data into a structured data using R methods and functions:

Loading dplyr package

Copying the mushroom dataframe to df dataframe

df <- mushroom 

Selecting only specific columns to work with and writing it to df1 dataframe

df1 <- df[c("X1","X2","X3","X4")]

Renaming the df1 dataframe columns to the right column names

df2 <- rename(df1, c("X1"="Classes","X2"="Cap_Shape",   "X3"="Cap_Surface", "X4"="Cap_color"))
df2  
## # A tibble: 8,124 x 4
##    Classes Cap_Shape Cap_Surface Cap_color
##    <chr>   <chr>     <chr>       <chr>    
##  1 p       x         s           n        
##  2 e       x         s           y        
##  3 e       b         s           w        
##  4 p       x         y           w        
##  5 e       x         s           g        
##  6 e       x         y           y        
##  7 e       b         s           w        
##  8 e       b         y           w        
##  9 p       x         y           w        
## 10 e       b         s           y        
## # ... with 8,114 more rows

Changing the values of the dataframe for the above variables

df2$Classes[df2$Classes == "e"] <- "Edible"
df2$Classes[df2$Classes == "p"] <- "Poisonous"

df2$Cap_Shape[df2$Cap_Shape == "b"] <- "Bell"
df2$Cap_Shape[df2$Cap_Shape == "c"] <- "Conical"
df2$Cap_Shape[df2$Cap_Shape == "x"] <- "Convex"
df2$Cap_Shape[df2$Cap_Shape == "f"] <- "Flat"
df2$Cap_Shape[df2$Cap_Shape == "k"] <- "Knobbed"
df2$Cap_Shape[df2$Cap_Shape == "s"] <- "Sunken"

df2$Cap_Surface[df2$Cap_Surface == "f"] <- "Fibrous"
df2$Cap_Surface[df2$Cap_Surface == "g"] <- "Grooves"
df2$Cap_Surface[df2$Cap_Surface == "y"] <- "Scaly"
df2$Cap_Surface[df2$Cap_Surface == "s"] <- "Smooth"

df2$Cap_color[df2$Cap_color == "n"] <- "Brown"
df2$Cap_color[df2$Cap_color == "b"] <- "Buff"
df2$Cap_color[df2$Cap_color == "c"] <- "Cinnamon"
df2$Cap_color[df2$Cap_color == "g"] <- "Gray"
df2$Cap_color[df2$Cap_color == "r"] <- "Green"
df2$Cap_color[df2$Cap_color == "p"] <- "Pink"
df2$Cap_color[df2$Cap_color == "u"] <- "Purple"
df2$Cap_color[df2$Cap_color == "e"] <- "Red"
df2$Cap_color[df2$Cap_color == "w"] <- "White"
df2$Cap_color[df2$Cap_color == "y"] <- "Yellow"

FinalMushroomData <- df2
FinalMushroomData
## # A tibble: 8,124 x 4
##    Classes   Cap_Shape Cap_Surface Cap_color
##    <chr>     <chr>     <chr>       <chr>    
##  1 Poisonous Convex    Smooth      Brown    
##  2 Edible    Convex    Smooth      Yellow   
##  3 Edible    Bell      Smooth      White    
##  4 Poisonous Convex    Scaly       White    
##  5 Edible    Convex    Smooth      Gray     
##  6 Edible    Convex    Scaly       Yellow   
##  7 Edible    Bell      Smooth      White    
##  8 Edible    Bell      Scaly       White    
##  9 Poisonous Convex    Scaly       White    
## 10 Edible    Bell      Smooth      Yellow   
## # ... with 8,114 more rows