Data
dat1 <- read.table(header=TRUE, text="
ID Age Age1 Age2 Align Align1 Weat Weat1 Weat2
8645 17 15-24 1 A 1 1 0 Clean
6228 21 15-24 1 B 0 1 0 Clean
5830 23 15-24 1 C 0 3 1 Rain
1844 27 25-34 0 B 0 3 1 Rain
4461 36 35-44 0 C 0 4 0 Snow
2119 59 55-64 0 C 0 2 0 Cloudy
2115 54 45-54 0 A 1 1 0 Clean")
library(DT)
## Warning: package 'DT' was built under R version 3.3.3
datatable(dat1)
names(dat1)
## [1] "ID" "Age" "Age1" "Age2" "Align" "Align1" "Weat" "Weat1"
## [9] "Weat2"
Category to Binary
dat2 <- dat1[c(1, 3, 5, 7)]
datatable(dat2)
library(dplyr)
## Warning: package 'dplyr' was built under R version 3.3.3
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(tidyr)
## Warning: package 'tidyr' was built under R version 3.3.3
dat2_1 <- dat2 %>%
mutate(Age2 = as.integer(Age1 %in% "15-24"),
Align1 = as.integer(Align %in% "A"),
Weat1 = as.integer(Weat == 3))
## Warning: package 'bindrcpp' was built under R version 3.3.3
datatable(dat2_1)
dat2_1 <- dat2 %>%
mutate(Age2 = Age1 %in% "15-24" + 0L,
Align1 = Align %in% "A" + 0L,
Weat1 = (Weat == 3) + 0L)
datatable(dat2_1)
Integer to Categorical
dat4 <- dat1[c(1, 7)]
datatable(dat4)
library(car)
##
## Attaching package: 'car'
## The following object is masked from 'package:dplyr':
##
## recode
dat4$Weat2 <- recode(dat4$Weat, "1= 'Clean'; 2= 'Cloudy';
3='Rain'; 4='Snow'")
datatable(dat4)
Integer to Cluster Category
################
dat6 <- dat1[c(1, 2)]
datatable(dat6)
brk <-c(14, 24,34,44,54,64,Inf)
dat6$Age1 <-cut(dat6$Age, breaks=brk,
labels=paste(head(brk,-1)+1, tail(brk,-1), sep="-"))
datatable(dat6)
ftable alike
dat3 <- dat1[c(1, 9, 5)]
library(tidyr)
dat3_1 <- dat3 %>% group_by(ID, Weat2, Align) %>% summarise(count = n()) %>%
spread(Align, count)
datatable(dat3_1)
Not so cool alternative of Category to Binary
dat5 <- dat1[c(1, 3, 5)]
datatable(dat5)
library(plyr)
## -------------------------------------------------------------------------
## You have loaded plyr after dplyr - this is likely to cause problems.
## If you need functions from both plyr and dplyr, please load plyr first, then dplyr:
## library(plyr); library(dplyr)
## -------------------------------------------------------------------------
##
## Attaching package: 'plyr'
## The following objects are masked from 'package:dplyr':
##
## arrange, count, desc, failwith, id, mutate, rename, summarise,
## summarize
dat5$Age2 <- revalue(dat5$Age1, c("15-24"=1,"25-34"=0,"35-44"=0,"45-54"=0,"55-64"=0))
dat5$Align1 <- revalue(dat5$Align, c("A"=1,"B"=0,"C"=0))
datatable(dat5)