MSDS Spring 2018

DATA 607 Data Aquisition and Management

Jiadi Li

Week 1 Assignment: Basic Loading and Transformation

Task: Create a data frame with a subset of the columns in the dataset. Add meaningful column names and replace the abbreviations.

  1. Place the file in Github and load the data.
Mushroom <- read.csv("https://raw.githubusercontent.com/xiaoxiaogao-DD/Spring2018_DATA607_Assignment1/master/mushroom.csv",header = FALSE)
summary(Mushroom)
##  V1       V2       V3             V4       V5             V6      
##  e:4208   b: 452   f:2320   n      :2284   f:4748   n      :3528  
##  p:3916   c:   4   g:   4   g      :1840   t:3376   f      :2160  
##           f:3152   s:2556   e      :1500            s      : 576  
##           k: 828   y:3244   y      :1072            y      : 576  
##           s:  32            w      :1040            a      : 400  
##           x:3656            b      : 168            l      : 400  
##                             (Other): 220            (Other): 484  
##  V7       V8       V9            V10       V11      V12      V13     
##  a: 210   c:6812   b:5612   b      :1728   e:3516   ?:2480   f: 552  
##  f:7914   w:1312   n:2512   p      :1492   t:4608   b:3776   k:2372  
##                             w      :1202            c: 556   s:5176  
##                             n      :1048            e:1120   y:  24  
##                             g      : 752            r: 192           
##                             h      : 732                             
##                             (Other):1170                             
##  V14           V15            V16       V17      V18      V19     
##  f: 600   w      :4464   w      :4384   p:8124   n:  96   n:  36  
##  k:2304   p      :1872   p      :1872            o:  96   o:7488  
##  s:4936   g      : 576   g      : 576            w:7924   t: 600  
##  y: 284   n      : 448   n      : 512            y:   8           
##           b      : 432   b      : 432                             
##           o      : 192   o      : 192                             
##           (Other): 140   (Other): 156                             
##  V20           V21       V22      V23     
##  e:2776   w      :2388   a: 384   d:3148  
##  f:  48   n      :1968   c: 340   g:2148  
##  l:1296   k      :1872   n: 400   l: 832  
##  n:  36   h      :1632   s:1248   m: 292  
##  p:3968   r      :  72   v:4040   p:1144  
##           b      :  48   y:1712   u: 368  
##           (Other): 144            w: 192
  1. Subset and rename column
Mushroom_cap <- subset(Mushroom,select = c(V2,V3,V4,V1))
colnames(Mushroom_cap)[1] <- "shape"
colnames(Mushroom_cap)[2] <- "surface"
colnames(Mushroom_cap)[3] <- "color"
colnames(Mushroom_cap)[4] <- "classes"
summary(Mushroom_cap)
##  shape    surface      color      classes 
##  b: 452   f:2320   n      :2284   e:4208  
##  c:   4   g:   4   g      :1840   p:3916  
##  f:3152   s:2556   e      :1500           
##  k: 828   y:3244   y      :1072           
##  s:  32            w      :1040           
##  x:3656            b      : 168           
##                    (Other): 220
  1. Rename attributes for classes
Mushroom_cap$classes <- as.character(Mushroom_cap$classes)
Mushroom_cap$classes[Mushroom_cap$classes=="e"]<- "edible"
Mushroom_cap$classes[Mushroom_cap$classes=="p"]<- "poisoneous"
Mushroom_cap$classes <- as.factor(Mushroom_cap$classes)
  1. Rename attributes for shape
Mushroom_cap$shape <- as.character(Mushroom_cap$shape)
Mushroom_cap$shape[Mushroom_cap$shape=="b"]<- "bell"
Mushroom_cap$shape[Mushroom_cap$shape=="c"]<- "conical"
Mushroom_cap$shape[Mushroom_cap$shape=="f"]<- "flat"
Mushroom_cap$shape[Mushroom_cap$shape=="k"]<- "knobbed"
Mushroom_cap$shape[Mushroom_cap$shape=="s"]<- "sunken"
Mushroom_cap$shape[Mushroom_cap$shape=="x"]<- "convex"
Mushroom_cap$shape <- as.factor(Mushroom_cap$shape)
  1. Rename surface
Mushroom_cap$surface <- as.character(Mushroom_cap$surface)
Mushroom_cap$surface[Mushroom_cap$surface=="f"]<- "fibrous"
Mushroom_cap$surface[Mushroom_cap$surface=="g"]<- "grooves"
Mushroom_cap$surface[Mushroom_cap$surface=="s"]<- "smooth"
Mushroom_cap$surface[Mushroom_cap$surface=="y"]<- "scaly"
Mushroom_cap$surface <- as.factor(Mushroom_cap$surface)
  1. Rename color
Mushroom_cap$color <- as.character(Mushroom_cap$color)
Mushroom_cap$color[Mushroom_cap$color=="n"]<- "brown"
Mushroom_cap$color[Mushroom_cap$color=="g"]<- "gray"
Mushroom_cap$color[Mushroom_cap$color=="e"]<- "red"
Mushroom_cap$color[Mushroom_cap$color=="y"]<- "yellow"
Mushroom_cap$color[Mushroom_cap$color=="w"]<- "white"
Mushroom_cap$color[Mushroom_cap$color=="b"]<- "buff"
Mushroom_cap$color <- as.factor(Mushroom_cap$color)
summary(Mushroom_cap)
##      shape         surface         color            classes    
##  bell   : 452   fibrous:2320   brown  :2284   edible    :4208  
##  conical:   4   grooves:   4   gray   :1840   poisoneous:3916  
##  convex :3656   scaly  :3244   red    :1500                    
##  flat   :3152   smooth :2556   yellow :1072                    
##  knobbed: 828                  white  :1040                    
##  sunken :  32                  buff   : 168                    
##                                (Other): 220