1 data$newCol <-
record <- read.csv("./data/record.csv", encoding = "UTF-8")
head(record)
## X.U.FEFF.name house start.of.reign end.of.reign domain
## 1 Wehha Wuffingas NA 571 East Anglia
## 2 Wuffa Wuffingas 571 578 East Anglia
## 3 Tytila Wuffingas 578 616 East Anglia
## 4 R<U+00E6>dwald Wuffingas 616 627 East Anglia
## 5 Eorpwald Wuffingas 627 627 East Anglia
## 6 Ricberht Wuffingas 627 630 East Anglia
record$time.of.regin <- record$end.of.reign - record$start.of.reign
head(record, 2)
## X.U.FEFF.name house start.of.reign end.of.reign domain
## 1 Wehha Wuffingas NA 571 East Anglia
## 2 Wuffa Wuffingas 571 578 East Anglia
## time.of.regin
## 1 NA
## 2 7
2 with & within
record2 <- read.csv("./data/record.csv", encoding = "UTF-8")
# with 直接创建列,只能对单个列有作用
record2$length.time.of.regin <- with(record2, {
length.time.of.regin <- end.of.reign - start.of.reign
})
head(record2, 2)
## X.U.FEFF.name house start.of.reign end.of.reign domain
## 1 Wehha Wuffingas NA 571 East Anglia
## 2 Wuffa Wuffingas 571 578 East Anglia
## length.time.of.regin
## 1 NA
## 2 7
# within返回整个dataframe
record3 <- read.csv("./data/record.csv", encoding = "UTF-8")
record3_dataframe <- within(record3, {
length.time.of.regin <- end.of.reign - start.of.reign
regin.was.more.than.30.years <- length.time.of.regin > 30
})
head(record3_dataframe)
## X.U.FEFF.name house start.of.reign end.of.reign domain
## 1 Wehha Wuffingas NA 571 East Anglia
## 2 Wuffa Wuffingas 571 578 East Anglia
## 3 Tytila Wuffingas 578 616 East Anglia
## 4 R<U+00E6>dwald Wuffingas 616 627 East Anglia
## 5 Eorpwald Wuffingas 627 627 East Anglia
## 6 Ricberht Wuffingas 627 630 East Anglia
## regin.was.more.than.30.years length.time.of.regin
## 1 NA NA
## 2 FALSE 7
## 3 TRUE 38
## 4 FALSE 11
## 5 FALSE 0
## 6 FALSE 3
3 ifelse(f(x), 1, 0)
一般用于创建factor列
3个类别 ifelse(dat\(daily < 10000, "low", ifelse(dat\)daily > 20000, “high”, “med”)
record3_dataframe$newLevels <- ifelse(!is.na(record3_dataframe$regin.was.more.than.30.years),
"YSE", "NO")
head(record3_dataframe)
## X.U.FEFF.name house start.of.reign end.of.reign domain
## 1 Wehha Wuffingas NA 571 East Anglia
## 2 Wuffa Wuffingas 571 578 East Anglia
## 3 Tytila Wuffingas 578 616 East Anglia
## 4 R<U+00E6>dwald Wuffingas 616 627 East Anglia
## 5 Eorpwald Wuffingas 627 627 East Anglia
## 6 Ricberht Wuffingas 627 630 East Anglia
## regin.was.more.than.30.years length.time.of.regin newLevels
## 1 NA NA NO
## 2 FALSE 7 YSE
## 3 TRUE 38 YSE
## 4 FALSE 11 YSE
## 5 FALSE 0 YSE
## 6 FALSE 3 YSE
table(record3_dataframe$newLevels)
##
## NO YSE
## 18 241
4 cut()
cut(x, breaks, labels = NULL,
include.lowest = FALSE, right = TRUE, dig.lab = 3,
ordered_result = FALSE, ...)
record3_dataframe_noNA <- na.omit(record3_dataframe)
summary(record3_dataframe_noNA$length.time.of.regin)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.00 2.00 7.00 10.91 14.00 93.00
record3_dataframe_noNA$newLevels2 <- cut(record3_dataframe_noNA$length.time.of.regin,
breaks = c(0, 10, 50, 70, 100))
head(record3_dataframe_noNA)
## X.U.FEFF.name house start.of.reign end.of.reign domain
## 2 Wuffa Wuffingas 571 578 East Anglia
## 3 Tytila Wuffingas 578 616 East Anglia
## 4 R<U+00E6>dwald Wuffingas 616 627 East Anglia
## 5 Eorpwald Wuffingas 627 627 East Anglia
## 6 Ricberht Wuffingas 627 630 East Anglia
## 7 Sigeberht and Ecgric Wuffingas 630 634 East Anglia
## regin.was.more.than.30.years length.time.of.regin newLevels newLevels2
## 2 FALSE 7 YSE (0,10]
## 3 TRUE 38 YSE (10,50]
## 4 FALSE 11 YSE (10,50]
## 5 FALSE 0 YSE <NA>
## 6 FALSE 3 YSE (0,10]
## 7 FALSE 4 YSE (0,10]
table(record3_dataframe_noNA$newLevels2, useNA = "always")
##
## (0,10] (10,50] (50,70] (70,100] <NA>
## 131 83 3 1 23
5 rbind/cbind
m1 <- matrix(1:9, nrow = 3, ncol = 3, byrow = FALSE)
m1
## [,1] [,2] [,3]
## [1,] 1 4 7
## [2,] 2 5 8
## [3,] 3 6 9
m2 <- matrix(1:9, nrow = 3, ncol = 3, byrow = TRUE)
m2
## [,1] [,2] [,3]
## [1,] 1 2 3
## [2,] 4 5 6
## [3,] 7 8 9
cbind(m1, m2)
## [,1] [,2] [,3] [,4] [,5] [,6]
## [1,] 1 4 7 1 2 3
## [2,] 2 5 8 4 5 6
## [3,] 3 6 9 7 8 9
matrixData <- rbind(m1, m2)
matrixData
## [,1] [,2] [,3]
## [1,] 1 4 7
## [2,] 2 5 8
## [3,] 3 6 9
## [4,] 1 2 3
## [5,] 4 5 6
## [6,] 7 8 9
6 dplyr–mutate
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
dimnames(matrixData) <- list(c(1, 2, 3, 4, 5, 6), c("shi", "xiao", "kai"))
data_new_frame <- data.frame(matrixData)
# data_new_frame
data_new_frame %>% mutate(newcol = xiao - shi)
## shi xiao kai newcol
## 1 1 4 7 3
## 2 2 5 8 3
## 3 3 6 9 3
## 4 1 2 3 1
## 5 4 5 6 1
## 6 7 8 9 1
7 plyr–mutate
library(Hmisc)
## Loading required package: lattice
## Loading required package: survival
## Loading required package: Formula
## Loading required package: ggplot2
##
## Attaching package: 'Hmisc'
## The following objects are masked from 'package:dplyr':
##
## src, summarize
## The following objects are masked from 'package:base':
##
## format.pval, units
library(plyr)
## -------------------------------------------------------------------------
## You have loaded plyr after dplyr - this is likely to cause problems.
## If you need functions from both plyr and dplyr, please load plyr first, then dplyr:
## library(plyr); library(dplyr)
## -------------------------------------------------------------------------
##
## Attaching package: 'plyr'
## The following objects are masked from 'package:Hmisc':
##
## is.discrete, summarize
## The following objects are masked from 'package:dplyr':
##
## arrange, count, desc, failwith, id, mutate, rename, summarise,
## summarize
restData <- read.csv("./data/restaurants.csv", encoding = "UTF-8")
restData2 <- mutate(restData, zipGroups = cut2(zipCode, g = 4))
head(restData2)
## name zipCode neighborhood councilDistrict
## 1 410 21206 Frankford 2
## 2 1919 21231 Fells Point 1
## 3 SAUTE 21224 Canton 1
## 4 #1 CHINESE KITCHEN 21211 Hampden 14
## 5 #1 chinese restaurant 21223 Millhill 9
## 6 19TH HOLE 21218 Clifton Park 14
## policeDistrict Location.1 zipGroups
## 1 NORTHEASTERN 4509 BELAIR ROAD\nBaltimore, MD\n [ 21205,21220)
## 2 SOUTHEASTERN 1919 FLEET ST\nBaltimore, MD\n [ 21227,21287]
## 3 SOUTHEASTERN 2844 HUDSON ST\nBaltimore, MD\n [ 21220,21227)
## 4 NORTHERN 3998 ROLAND AVE\nBaltimore, MD\n [ 21205,21220)
## 5 SOUTHWESTERN 2481 frederick ave\nBaltimore, MD\n [ 21220,21227)
## 6 NORTHEASTERN 2722 HARFORD RD\nBaltimore, MD\n [ 21205,21220)