#cut
vDates <- as.Date(c("2013-06-01", "2013-07-08", "2013-09-01", "2013-09-15"))
#as.Data()函数的作用非常重要;如果没有它,R语言会认为以上内容仅仅是数字串而非日期对象
vDates.bymonth <- cut(vDates, breaks = "month")
Dates <- data.frame(vDates, vDates.bymonth)
Dates
## vDates vDates.bymonth
## 1 2013-06-01 2013-06-01
## 2 2013-07-08 2013-07-01
## 3 2013-09-01 2013-09-01
## 4 2013-09-15 2013-09-01
#aggregate
orders <- data.frame(
"ORDERID" = 1:5,
"CLIENT" = c("WVF VIP", "UFS COM", "SWFR", "JFS PEP","DSG"),
"SELLERID"= c(5, 13, 2, 27, 15),
"AMOUNT" = c(440, 1863.4, 1813, 670.8, 3730)
)
#先对CLIENT分组再对SELLERID分组就必须写成:orders[,c("SELLERID","CLIENT")]
result1<-aggregate(orders$AMOUNT, orders[, c("SELLERID", "CLIENT")], sum)
result2<-aggregate(orders$AMOUNT, orders[, c("SELLERID", "CLIENT")], max)
result<-cbind(result1, result2$x)
result
## SELLERID CLIENT x result2$x
## 1 15 DSG 3730.0 3730.0
## 2 27 JFS PEP 670.8 670.8
## 3 2 SWFR 1813.0 1813.0
## 4 13 UFS COM 1863.4 1863.4
## 5 5 WVF VIP 440.0 440.0
sp<-split(orders,orders[, c("SELLERID","CLIENT")],drop=TRUE)
result1<-sapply(sp, FUN=function(x) sum(x$AMOUNT))
result2<-sapply(sp, FUN=function(x) max(x$AMOUNT))
result <- data.frame("result1" = result1,
"result2" = result2)
result
## result1 result2
## 15.DSG 3730.0 3730.0
## 27.JFS PEP 670.8 670.8
## 2.SWFR 1813.0 1813.0
## 13.UFS COM 1863.4 1863.4
## 5.WVF VIP 440.0 440.0
#subset(x, subset, ...)
#subset(x, subset, select, drop = FALSE, ...) ##对于矩阵
#subset(x, subset, select, drop = FALSE, ...) ##对于数据框
#x是对象,subset是保留元素或者行列的逻辑表达式,对于缺失值用NA代替。
#Select 是选取的范围,应小于x。
x <- data.frame(matrix(1:30, nrow = 5,byrow = T))
rownames(x) <- c("one","two","three","four","five")
colnames(x) <- c("a","b","c","d","e","f")
new <- subset(x,a >= 14,select = a:f)
new ## 从a到f列选取a>14的行
## a b c d e f
## four 19 20 21 22 23 24
## five 25 26 27 28 29 30
data <- data.frame("V1" = c(1,3,4,5),"V2" = c(1,4,5,3) )
data$V1[which(data$V2 < 5)] #筛选出V1中,V2小于0的数字,跟order的作用些许相似
## [1] 1 3 5
参考文献: R语言︱数据集分组、筛选