1 data$newCol <-

record <- read.csv("./data/record.csv", encoding = "UTF-8")
head(record)
##    X.U.FEFF.name     house start.of.reign end.of.reign      domain
## 1          Wehha Wuffingas             NA          571 East Anglia
## 2          Wuffa Wuffingas            571          578 East Anglia
## 3         Tytila Wuffingas            578          616 East Anglia
## 4 R<U+00E6>dwald Wuffingas            616          627 East Anglia
## 5       Eorpwald Wuffingas            627          627 East Anglia
## 6       Ricberht Wuffingas            627          630 East Anglia
record$time.of.regin <- record$end.of.reign - record$start.of.reign
head(record, 2)
##   X.U.FEFF.name     house start.of.reign end.of.reign      domain
## 1         Wehha Wuffingas             NA          571 East Anglia
## 2         Wuffa Wuffingas            571          578 East Anglia
##   time.of.regin
## 1            NA
## 2             7

2 with & within

record2 <- read.csv("./data/record.csv", encoding = "UTF-8")
# with 直接创建列,只能对单个列有作用
record2$length.time.of.regin <- with(record2, {
    length.time.of.regin <- end.of.reign - start.of.reign
})
head(record2, 2)
##   X.U.FEFF.name     house start.of.reign end.of.reign      domain
## 1         Wehha Wuffingas             NA          571 East Anglia
## 2         Wuffa Wuffingas            571          578 East Anglia
##   length.time.of.regin
## 1                   NA
## 2                    7
# within返回整个dataframe
record3 <- read.csv("./data/record.csv", encoding = "UTF-8")
record3_dataframe <- within(record3, {
    length.time.of.regin <- end.of.reign - start.of.reign
    regin.was.more.than.30.years <- length.time.of.regin > 30
})
head(record3_dataframe)
##    X.U.FEFF.name     house start.of.reign end.of.reign      domain
## 1          Wehha Wuffingas             NA          571 East Anglia
## 2          Wuffa Wuffingas            571          578 East Anglia
## 3         Tytila Wuffingas            578          616 East Anglia
## 4 R<U+00E6>dwald Wuffingas            616          627 East Anglia
## 5       Eorpwald Wuffingas            627          627 East Anglia
## 6       Ricberht Wuffingas            627          630 East Anglia
##   regin.was.more.than.30.years length.time.of.regin
## 1                           NA                   NA
## 2                        FALSE                    7
## 3                         TRUE                   38
## 4                        FALSE                   11
## 5                        FALSE                    0
## 6                        FALSE                    3

3 ifelse(f(x), 1, 0)

一般用于创建factor列
3个类别 ifelse(dat\(daily < 10000, "low", ifelse(dat\)daily > 20000, “high”, “med”)
record3_dataframe$newLevels <- ifelse(!is.na(record3_dataframe$regin.was.more.than.30.years), 
    "YSE", "NO")
head(record3_dataframe)
##    X.U.FEFF.name     house start.of.reign end.of.reign      domain
## 1          Wehha Wuffingas             NA          571 East Anglia
## 2          Wuffa Wuffingas            571          578 East Anglia
## 3         Tytila Wuffingas            578          616 East Anglia
## 4 R<U+00E6>dwald Wuffingas            616          627 East Anglia
## 5       Eorpwald Wuffingas            627          627 East Anglia
## 6       Ricberht Wuffingas            627          630 East Anglia
##   regin.was.more.than.30.years length.time.of.regin newLevels
## 1                           NA                   NA        NO
## 2                        FALSE                    7       YSE
## 3                         TRUE                   38       YSE
## 4                        FALSE                   11       YSE
## 5                        FALSE                    0       YSE
## 6                        FALSE                    3       YSE
table(record3_dataframe$newLevels)
## 
##  NO YSE 
##  18 241

4 cut()

cut(x, breaks, labels = NULL,
    include.lowest = FALSE, right = TRUE, dig.lab = 3,
    ordered_result = FALSE, ...)
record3_dataframe_noNA <- na.omit(record3_dataframe)
summary(record3_dataframe_noNA$length.time.of.regin)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    0.00    2.00    7.00   10.91   14.00   93.00
record3_dataframe_noNA$newLevels2 <- cut(record3_dataframe_noNA$length.time.of.regin, 
    breaks = c(0, 10, 50, 70, 100))
head(record3_dataframe_noNA)
##          X.U.FEFF.name     house start.of.reign end.of.reign      domain
## 2                Wuffa Wuffingas            571          578 East Anglia
## 3               Tytila Wuffingas            578          616 East Anglia
## 4       R<U+00E6>dwald Wuffingas            616          627 East Anglia
## 5             Eorpwald Wuffingas            627          627 East Anglia
## 6             Ricberht Wuffingas            627          630 East Anglia
## 7 Sigeberht and Ecgric Wuffingas            630          634 East Anglia
##   regin.was.more.than.30.years length.time.of.regin newLevels newLevels2
## 2                        FALSE                    7       YSE     (0,10]
## 3                         TRUE                   38       YSE    (10,50]
## 4                        FALSE                   11       YSE    (10,50]
## 5                        FALSE                    0       YSE       <NA>
## 6                        FALSE                    3       YSE     (0,10]
## 7                        FALSE                    4       YSE     (0,10]
table(record3_dataframe_noNA$newLevels2, useNA = "always")
## 
##   (0,10]  (10,50]  (50,70] (70,100]     <NA> 
##      131       83        3        1       23

5 rbind/cbind

m1 <- matrix(1:9, nrow = 3, ncol = 3, byrow = FALSE)
m1
##      [,1] [,2] [,3]
## [1,]    1    4    7
## [2,]    2    5    8
## [3,]    3    6    9
m2 <- matrix(1:9, nrow = 3, ncol = 3, byrow = TRUE)
m2
##      [,1] [,2] [,3]
## [1,]    1    2    3
## [2,]    4    5    6
## [3,]    7    8    9
cbind(m1, m2)
##      [,1] [,2] [,3] [,4] [,5] [,6]
## [1,]    1    4    7    1    2    3
## [2,]    2    5    8    4    5    6
## [3,]    3    6    9    7    8    9
matrixData <- rbind(m1, m2)
matrixData
##      [,1] [,2] [,3]
## [1,]    1    4    7
## [2,]    2    5    8
## [3,]    3    6    9
## [4,]    1    2    3
## [5,]    4    5    6
## [6,]    7    8    9

6 dplyr–mutate

library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
dimnames(matrixData) <- list(c(1, 2, 3, 4, 5, 6), c("shi", "xiao", "kai"))
data_new_frame <- data.frame(matrixData)
# data_new_frame
data_new_frame %>% mutate(newcol = xiao - shi)
##   shi xiao kai newcol
## 1   1    4   7      3
## 2   2    5   8      3
## 3   3    6   9      3
## 4   1    2   3      1
## 5   4    5   6      1
## 6   7    8   9      1

7 plyr–mutate

library(Hmisc)
## Loading required package: lattice
## Loading required package: survival
## Loading required package: Formula
## Loading required package: ggplot2
## 
## Attaching package: 'Hmisc'
## The following objects are masked from 'package:dplyr':
## 
##     src, summarize
## The following objects are masked from 'package:base':
## 
##     format.pval, units
library(plyr)
## -------------------------------------------------------------------------
## You have loaded plyr after dplyr - this is likely to cause problems.
## If you need functions from both plyr and dplyr, please load plyr first, then dplyr:
## library(plyr); library(dplyr)
## -------------------------------------------------------------------------
## 
## Attaching package: 'plyr'
## The following objects are masked from 'package:Hmisc':
## 
##     is.discrete, summarize
## The following objects are masked from 'package:dplyr':
## 
##     arrange, count, desc, failwith, id, mutate, rename, summarise,
##     summarize
restData <- read.csv("./data/restaurants.csv", encoding = "UTF-8")
restData2 <- mutate(restData, zipGroups = cut2(zipCode, g = 4))
head(restData2)
##                    name zipCode neighborhood councilDistrict
## 1                   410   21206    Frankford               2
## 2                  1919   21231  Fells Point               1
## 3                 SAUTE   21224       Canton               1
## 4    #1 CHINESE KITCHEN   21211      Hampden              14
## 5 #1 chinese restaurant   21223     Millhill               9
## 6             19TH HOLE   21218 Clifton Park              14
##   policeDistrict                          Location.1      zipGroups
## 1   NORTHEASTERN   4509 BELAIR ROAD\nBaltimore, MD\n [ 21205,21220)
## 2   SOUTHEASTERN      1919 FLEET ST\nBaltimore, MD\n [ 21227,21287]
## 3   SOUTHEASTERN     2844 HUDSON ST\nBaltimore, MD\n [ 21220,21227)
## 4       NORTHERN    3998 ROLAND AVE\nBaltimore, MD\n [ 21205,21220)
## 5   SOUTHWESTERN 2481 frederick ave\nBaltimore, MD\n [ 21220,21227)
## 6   NORTHEASTERN    2722 HARFORD RD\nBaltimore, MD\n [ 21205,21220)