Task: Create a data frame with a subset of the columns in the dataset. Add meaningful column names and replace the abbreviations.
- Place the file in Github and load the data.
Mushroom <- read.csv("https://raw.githubusercontent.com/xiaoxiaogao-DD/Spring2018_DATA607_Assignment1/master/mushroom.csv",header = FALSE)
summary(Mushroom)
## V1 V2 V3 V4 V5 V6
## e:4208 b: 452 f:2320 n :2284 f:4748 n :3528
## p:3916 c: 4 g: 4 g :1840 t:3376 f :2160
## f:3152 s:2556 e :1500 s : 576
## k: 828 y:3244 y :1072 y : 576
## s: 32 w :1040 a : 400
## x:3656 b : 168 l : 400
## (Other): 220 (Other): 484
## V7 V8 V9 V10 V11 V12 V13
## a: 210 c:6812 b:5612 b :1728 e:3516 ?:2480 f: 552
## f:7914 w:1312 n:2512 p :1492 t:4608 b:3776 k:2372
## w :1202 c: 556 s:5176
## n :1048 e:1120 y: 24
## g : 752 r: 192
## h : 732
## (Other):1170
## V14 V15 V16 V17 V18 V19
## f: 600 w :4464 w :4384 p:8124 n: 96 n: 36
## k:2304 p :1872 p :1872 o: 96 o:7488
## s:4936 g : 576 g : 576 w:7924 t: 600
## y: 284 n : 448 n : 512 y: 8
## b : 432 b : 432
## o : 192 o : 192
## (Other): 140 (Other): 156
## V20 V21 V22 V23
## e:2776 w :2388 a: 384 d:3148
## f: 48 n :1968 c: 340 g:2148
## l:1296 k :1872 n: 400 l: 832
## n: 36 h :1632 s:1248 m: 292
## p:3968 r : 72 v:4040 p:1144
## b : 48 y:1712 u: 368
## (Other): 144 w: 192
- Subset and rename column
Mushroom_cap <- subset(Mushroom,select = c(V2,V3,V4,V1))
colnames(Mushroom_cap)[1] <- "shape"
colnames(Mushroom_cap)[2] <- "surface"
colnames(Mushroom_cap)[3] <- "color"
colnames(Mushroom_cap)[4] <- "classes"
summary(Mushroom_cap)
## shape surface color classes
## b: 452 f:2320 n :2284 e:4208
## c: 4 g: 4 g :1840 p:3916
## f:3152 s:2556 e :1500
## k: 828 y:3244 y :1072
## s: 32 w :1040
## x:3656 b : 168
## (Other): 220
- Rename attributes for classes
Mushroom_cap$classes <- as.character(Mushroom_cap$classes)
Mushroom_cap$classes[Mushroom_cap$classes=="e"]<- "edible"
Mushroom_cap$classes[Mushroom_cap$classes=="p"]<- "poisoneous"
Mushroom_cap$classes <- as.factor(Mushroom_cap$classes)
- Rename attributes for shape
Mushroom_cap$shape <- as.character(Mushroom_cap$shape)
Mushroom_cap$shape[Mushroom_cap$shape=="b"]<- "bell"
Mushroom_cap$shape[Mushroom_cap$shape=="c"]<- "conical"
Mushroom_cap$shape[Mushroom_cap$shape=="f"]<- "flat"
Mushroom_cap$shape[Mushroom_cap$shape=="k"]<- "knobbed"
Mushroom_cap$shape[Mushroom_cap$shape=="s"]<- "sunken"
Mushroom_cap$shape[Mushroom_cap$shape=="x"]<- "convex"
Mushroom_cap$shape <- as.factor(Mushroom_cap$shape)
- Rename surface
Mushroom_cap$surface <- as.character(Mushroom_cap$surface)
Mushroom_cap$surface[Mushroom_cap$surface=="f"]<- "fibrous"
Mushroom_cap$surface[Mushroom_cap$surface=="g"]<- "grooves"
Mushroom_cap$surface[Mushroom_cap$surface=="s"]<- "smooth"
Mushroom_cap$surface[Mushroom_cap$surface=="y"]<- "scaly"
Mushroom_cap$surface <- as.factor(Mushroom_cap$surface)
- Rename color
Mushroom_cap$color <- as.character(Mushroom_cap$color)
Mushroom_cap$color[Mushroom_cap$color=="n"]<- "brown"
Mushroom_cap$color[Mushroom_cap$color=="g"]<- "gray"
Mushroom_cap$color[Mushroom_cap$color=="e"]<- "red"
Mushroom_cap$color[Mushroom_cap$color=="y"]<- "yellow"
Mushroom_cap$color[Mushroom_cap$color=="w"]<- "white"
Mushroom_cap$color[Mushroom_cap$color=="b"]<- "buff"
Mushroom_cap$color <- as.factor(Mushroom_cap$color)
summary(Mushroom_cap)
## shape surface color classes
## bell : 452 fibrous:2320 brown :2284 edible :4208
## conical: 4 grooves: 4 gray :1840 poisoneous:3916
## convex :3656 scaly :3244 red :1500
## flat :3152 smooth :2556 yellow :1072
## knobbed: 828 white :1040
## sunken : 32 buff : 168
## (Other): 220