x <- "a"
class(x)## [1] "character"
x <- 3.14
class(x)## [1] "numeric"
x <- 3
class(x)## [1] "numeric"
x <- 1+2i
class(x)## [1] "complex"
x <- TRUE
class(x)## [1] "logical"
x <- vector("character",length = 10)
x1 <- 1:4
x2 <- c(1,2,3,4)
x3 <- c(TRUE,10,"a")x <- matrix(1:6,nrow = 3,ncol = 2)
dim(x)## [1] 3 2
attributes(x)## $dim
## [1] 3 2
y <- 1:6
dim(y) <-c(2,3)
rbind(x,x)## [,1] [,2]
## [1,] 1 4
## [2,] 2 5
## [3,] 3 6
## [4,] 1 4
## [5,] 2 5
## [6,] 3 6
cbind(x,x)## [,1] [,2] [,3] [,4]
## [1,] 1 4 1 4
## [2,] 2 5 2 5
## [3,] 3 6 3 6
x <- array(1:24,dim = c(4,6))
x <- array(1:24,dim = c(2,3,4))l <- list("a",2,10.3,3+4i,TRUE)
l <- list(a =1,b=2)
x <- matrix(1:6,nrow = 2,ncol = 3)
l <- list(c("a","b"),c("a","b","c"))
dimnames(x) = lx <- factor(c("f","f","f","t","t"))
table(x)## x
## f t
## 3 2
unclass(x)## [1] 1 1 1 2 2
## attr(,"levels")
## [1] "f" "t"
x <- c(1,NA,2,NA,3)
is.na(x)## [1] FALSE TRUE FALSE TRUE FALSE
is.nan(x)## [1] FALSE FALSE FALSE FALSE FALSE
x <- c(1,NaN,2,NaN,3)
is.na(x)## [1] FALSE TRUE FALSE TRUE FALSE
is.nan(x)## [1] FALSE TRUE FALSE TRUE FALSE
df <- data.frame(id = c(1,2,3),name = c("a","b","c"),gender = c(TRUE,FALSE,TRUE))
df <- data.frame(id = c(1,2,3),name = c(1,2,3))
data.matrix(df)## id name
## [1,] 1 1
## [2,] 2 2
## [3,] 3 3
x <- date()
x1 <- Sys.Date()
x2 <- as.Date("2017-01-01")
weekdays(x2)## [1] "星期日"
months(x2)## [1] "一月"
julian(x2)## [1] 17167
## attr(,"origin")
## [1] "1970-01-01"
x <- Sys.time()
class(x)## [1] "POSIXct" "POSIXt"
p = as.POSIXlt(x)
names(unclass(p))## [1] "sec" "min" "hour" "mday" "mon" "year" "wday"
## [8] "yday" "isdst" "zone" "gmtoff"
p$sec## [1] 12.33149
p <- "一月 1, 2015 02:36"
strptime(p, "%B %d, %Y %H:%M")## [1] "2015-01-01 02:36:00 CST"
x <- 1:10
x[1]## [1] 1
x[1:5]## [1] 1 2 3 4 5
x[6:10]## [1] 6 7 8 9 10
x[x>5]## [1] 6 7 8 9 10
x[x>5&x<7]## [1] 6
y <-1:4
names(y) <- c("a","b","c","d")
y## a b c d
## 1 2 3 4
y[2]## b
## 2
y["b"]## b
## 2
x <- matrix(1:6,nrow = 2,ncol = 3)
x## [,1] [,2] [,3]
## [1,] 1 3 5
## [2,] 2 4 6
x[1,2]## [1] 3
x[2,3]## [1] 6
x[1,]## [1] 1 3 5
x[,1]## [1] 1 2
x[2,c(1,3)]## [1] 2 6
x[1,2,drop = FALSE]#返回矩阵## [,1]
## [1,] 3
x <- data.frame(v1=1:5,v2=6:10,v3=11:15)
x## v1 v2 v3
## 1 1 6 11
## 2 2 7 12
## 3 3 8 13
## 4 4 9 14
## 5 5 10 15
x$v3[c(2,4)] <- NA
x## v1 v2 v3
## 1 1 6 11
## 2 2 7 NA
## 3 3 8 13
## 4 4 9 NA
## 5 5 10 15
x[,2]## [1] 6 7 8 9 10
x[,"v2"]## [1] 6 7 8 9 10
x[(x$v1<4&x$v2>=8),]## v1 v2 v3
## 3 3 8 13
x[x$v1>2,]## v1 v2 v3
## 3 3 8 13
## 4 4 9 NA
## 5 5 10 15
x[which(x$v1>2),]## v1 v2 v3
## 3 3 8 13
## 4 4 9 NA
## 5 5 10 15
x$v1>2## [1] FALSE FALSE TRUE TRUE TRUE
which(x$v1>2)## [1] 3 4 5
subset(x,x$v1>2)## v1 v2 v3
## 3 3 8 13
## 4 4 9 NA
## 5 5 10 15
x <- list(id=1:4, height=170, gender="male")
x["id"]#与x[1]等效,返回的是名字和内容## $id
## [1] 1 2 3 4
#只返回内容
x[["id"]]## [1] 1 2 3 4
x$id## [1] 1 2 3 4
x[c(1,3)]#返回指定列## $id
## [1] 1 2 3 4
##
## $gender
## [1] "male"
#可以用变量代替引用
y <- "id"
x[[y]]## [1] 1 2 3 4
#不可以用变量代替引用($)
x$id## [1] 1 2 3 4
x$y## NULL
#获取嵌套列表中的元素
x <- list(a=list(1,2,3,4),b=c("Monday","Tuesdat"))
x[[1]][[2]]#方法一:连续两个双括号## [1] 2
x[[c(1,3)]]#方法二:一个双括号,里面用c()函数## [1] 3
#不完全匹配
#如果一个不完全匹配有多个成功匹配,返回结果为NULL
l <- list(asdfghj=1:10)
l$asdfghj## [1] 1 2 3 4 5 6 7 8 9 10
l$a## [1] 1 2 3 4 5 6 7 8 9 10
l[["a",exact=FALSE]]#双括号关闭精确匹配## [1] 1 2 3 4 5 6 7 8 9 10
x <- c(1, NA, 2, NA, 3)
x[!is.na(x)]#取得非缺失值## [1] 1 2 3
x <- c(1, NA, 2, NA, 3)
y <- c("a","b",NA,"c",NA)
z <- complete.cases(x,y) #取对应位置都不是缺失值
x[z]## [1] 1
y[z]#取得两个向量中都不为NA的列## [1] "a"
library(datasets)#导入R自带的数据集包
head(airquality)#head()方法查看数据集前几行## Ozone Solar.R Wind Temp Month Day
## 1 41 190 7.4 67 5 1
## 2 36 118 8.0 72 5 2
## 3 12 149 12.6 74 5 3
## 4 18 313 11.5 62 5 4
## 5 NA NA 14.3 56 5 5
## 6 28 NA 14.9 66 5 6
g <- complete.cases(airquality)
airquality[g,][1:10,]#显示前10条不含NA属性的记录?[g,]## Ozone Solar.R Wind Temp Month Day
## 1 41 190 7.4 67 5 1
## 2 36 118 8.0 72 5 2
## 3 12 149 12.6 74 5 3
## 4 18 313 11.5 62 5 4
## 7 23 299 8.6 65 5 7
## 8 19 99 13.8 59 5 8
## 9 8 19 20.1 61 5 9
## 12 16 256 9.7 69 5 12
## 13 11 290 9.2 66 5 13
## 14 14 274 10.9 68 5 14
x <- 1:5
y <- 6:10
x+y## [1] 7 9 11 13 15
x*y## [1] 6 14 24 36 50
x/y## [1] 0.1666667 0.2857143 0.3750000 0.4444444 0.5000000
#以上结果都一一对应
x <- matrix(1:4, nrow = 2, ncol = 2, byrow = T)
y <- matrix(rep(2,4), nrow = 2, ncol = 2) #函数rep(2,4)表示把2重复4次
x %*% y #矩阵真正乘/除法需要如此。## [,1] [,2]
## [1,] 6 6
## [2,] 14 14
x * y #只是对应元素相乘## [,1] [,2]
## [1,] 2 4
## [2,] 6 8
str(lapply)## function (X, FUN, ...)
x <- list(a=1:10,b = c(11,21,31,41,51))
lapply(x,mean)## $a
## [1] 5.5
##
## $b
## [1] 31
#sapply:简化结果
#结果列表元素长度均为1,返回向量
#结果列表元素长度相同且大于1,返回矩阵
sapply(x,mean)## a b
## 5.5 31.0
x<-1:4
lapply(x,runif)## [[1]]
## [1] 0.5492342
##
## [[2]]
## [1] 0.6183434 0.1522713
##
## [[3]]
## [1] 0.6454553 0.7390807 0.7526788
##
## [[4]]
## [1] 0.7943832 0.6664156 0.2864188 0.2150539
lapply(x,runif,min=0,max =100)## [[1]]
## [1] 25.45416
##
## [[2]]
## [1] 28.21838 75.90251
##
## [[3]]
## [1] 54.27880 42.61973 41.30445
##
## [[4]]
## [1] 47.68680 44.93604 36.89763 1.23020
x<-list(a=matrix(1:6,2,3),b=matrix(4:7,2,2))
lapply(x,function(m) m[1,])## $a
## [1] 1 3 5
##
## $b
## [1] 4 6
#沿着数组的某一维度处理数据
#将函数用于矩阵的行或者列
#一句话就可以完成for/while函数
#apply(数组,维度,函数/函数名)
x <- matrix(1:16,4,4)
apply(x, 2, mean)#求列的平均rowSums(x)## [1] 2.5 6.5 10.5 14.5
apply(x, 2, sum)#求列和## [1] 10 26 42 58
apply(x, 1, mean)#求行的平均## [1] 7 8 9 10
apply(x, 1, sum)#求行和## [1] 28 32 36 40
x <- matrix(rnorm(100),10,10)
apply(x, 1, quantile, probs = c(0.25, 0.75)) #quantile 求数据的百分位点 probs = c(0.25, 0.75)表示取25%、75%位点## [,1] [,2] [,3] [,4] [,5] [,6]
## 25% -0.5730587 -0.5742401 -0.4449251 -0.5983579 -0.5054332 -1.0739440
## 75% 0.1647651 0.5436005 0.6467494 0.9513756 0.6277096 0.6775467
## [,7] [,8] [,9] [,10]
## 25% -1.2209648 -0.3635073 -0.7862123 -0.2682258
## 75% 0.7735006 0.9409280 0.8353105 0.3755603
x <- array(rnorm(2*3*4),c(2,3,4))
apply(x,c(1,2), mean) #c(1,2)表示1和2平面沿着第3维度压下去## [,1] [,2] [,3]
## [1,] 0.1011687 0.5728817 -0.06759604
## [2,] 0.7331838 -0.0461645 -0.25580312
apply(x,c(1,3), mean) #同理## [,1] [,2] [,3] [,4]
## [1,] -0.7733867 0.44088528 0.9215759 0.2195313
## [2,] 0.4363613 -0.06334287 -0.2483996 0.4503362
apply(x,c(2,3), mean) #同理## [,1] [,2] [,3] [,4]
## [1,] 0.2050038 0.1728889 0.12774627 1.1630660
## [2,] -0.1592614 0.1972115 0.89517401 0.1203102
## [3,] -0.5512805 0.1962132 -0.01315594 -0.2785751
#lapply的多元版本
#mapply(参数)
#mapply(函数/函数名,数据, 函数相关的参数)
a<-list(rep(1,4), rep(2,3), rep(3,2),rep(4,1))
b<-mapply(rep,1:4,4:1) #等价于上面的list,rep是函数,1:4表示数据来源于这几个数,最后一个4:1表示要重复多少次
s <- function(n, mean ,std){
rnorm(n, mean, std)
} #n表示从正态分布中取数的个数,mean表示均值,std表示标准差
s(4,0,1) #从s中抽取4个均值为0,标准差为1的数值## [1] -1.1298121 0.5491142 1.1766728 0.1346312
#调用函数s,生成1到5四个元素,其中均值是5到1,标准差是2
mapply(s, 1:5,5:1,2) #第二个表示个数,第三个表示均值,第四个表示标准差## [[1]]
## [1] 5.604684
##
## [[2]]
## [1] 5.232657 6.132522
##
## [[3]]
## [1] 2.7868800 0.6771235 4.0905246
##
## [[4]]
## [1] 4.6004507 3.5785187 -0.8828502 3.5718702
##
## [[5]]
## [1] 1.7658713 2.2977775 4.3826147 -0.3973626 1.7977228
list(s(1,5,2),s(2,4,2),s(3,3,2),s(4,2,2),s(5,1,2)) #这个list的效果跟mapply函数一样## [[1]]
## [1] 6.462947
##
## [[2]]
## [1] 4.860135 5.522239
##
## [[3]]
## [1] 4.3545863 5.1025989 -0.8612773
##
## [[4]]
## [1] 1.013272 4.208057 3.417460 2.997981
##
## [[5]]
## [1] 3.0405550 -0.3997026 3.2907657 -1.8208597 1.6011254
#得到的结果就是第一个为均值为5,标准差为2,的一个数值;
#第二个就是均值为4,标准差为2,的2个数值
#以此类推x <- c(rnorm(5), runif(5),rnorm(5,1))
f <- gl(3,5)#
tapply(x,f,mean)## 1 2 3
## 0.4557796 0.4417913 2.0062974
tapply(x,f,mean, simplify = FALSE)## $`1`
## [1] 0.4557796
##
## $`2`
## [1] 0.4417913
##
## $`3`
## [1] 2.006297
根据因子或者因子列表将向量或其他对象分组 通常与lapply一起使用 *参数格式:split(向量/列表/数据框,因子/因子列表)
x <- c(rnorm(5), runif(5),rnorm(5,1))
f <- gl(3,5)
split(x,f)## $`1`
## [1] 1.01248686 0.09475982 -3.04033829 -1.24508167 -1.93456948
##
## $`2`
## [1] 0.1701324 0.5625662 0.2379305 0.8464871 0.2316612
##
## $`3`
## [1] 1.207715 1.841184 1.931470 1.434442 2.790525
lapply(split(x,f), mean)## $`1`
## [1] -1.022549
##
## $`2`
## [1] 0.4097555
##
## $`3`
## [1] 1.841067
lapply(split(x,f), sum)## $`1`
## [1] -5.112743
##
## $`2`
## [1] 2.048777
##
## $`3`
## [1] 9.205336
head(airquality)## Ozone Solar.R Wind Temp Month Day
## 1 41 190 7.4 67 5 1
## 2 36 118 8.0 72 5 2
## 3 12 149 12.6 74 5 3
## 4 18 313 11.5 62 5 4
## 5 NA NA 14.3 56 5 5
## 6 28 NA 14.9 66 5 6
split(airquality,airquality$Month)#按照month分组查看## $`5`
## Ozone Solar.R Wind Temp Month Day
## 1 41 190 7.4 67 5 1
## 2 36 118 8.0 72 5 2
## 3 12 149 12.6 74 5 3
## 4 18 313 11.5 62 5 4
## 5 NA NA 14.3 56 5 5
## 6 28 NA 14.9 66 5 6
## 7 23 299 8.6 65 5 7
## 8 19 99 13.8 59 5 8
## 9 8 19 20.1 61 5 9
## 10 NA 194 8.6 69 5 10
## 11 7 NA 6.9 74 5 11
## 12 16 256 9.7 69 5 12
## 13 11 290 9.2 66 5 13
## 14 14 274 10.9 68 5 14
## 15 18 65 13.2 58 5 15
## 16 14 334 11.5 64 5 16
## 17 34 307 12.0 66 5 17
## 18 6 78 18.4 57 5 18
## 19 30 322 11.5 68 5 19
## 20 11 44 9.7 62 5 20
## 21 1 8 9.7 59 5 21
## 22 11 320 16.6 73 5 22
## 23 4 25 9.7 61 5 23
## 24 32 92 12.0 61 5 24
## 25 NA 66 16.6 57 5 25
## 26 NA 266 14.9 58 5 26
## 27 NA NA 8.0 57 5 27
## 28 23 13 12.0 67 5 28
## 29 45 252 14.9 81 5 29
## 30 115 223 5.7 79 5 30
## 31 37 279 7.4 76 5 31
##
## $`6`
## Ozone Solar.R Wind Temp Month Day
## 32 NA 286 8.6 78 6 1
## 33 NA 287 9.7 74 6 2
## 34 NA 242 16.1 67 6 3
## 35 NA 186 9.2 84 6 4
## 36 NA 220 8.6 85 6 5
## 37 NA 264 14.3 79 6 6
## 38 29 127 9.7 82 6 7
## 39 NA 273 6.9 87 6 8
## 40 71 291 13.8 90 6 9
## 41 39 323 11.5 87 6 10
## 42 NA 259 10.9 93 6 11
## 43 NA 250 9.2 92 6 12
## 44 23 148 8.0 82 6 13
## 45 NA 332 13.8 80 6 14
## 46 NA 322 11.5 79 6 15
## 47 21 191 14.9 77 6 16
## 48 37 284 20.7 72 6 17
## 49 20 37 9.2 65 6 18
## 50 12 120 11.5 73 6 19
## 51 13 137 10.3 76 6 20
## 52 NA 150 6.3 77 6 21
## 53 NA 59 1.7 76 6 22
## 54 NA 91 4.6 76 6 23
## 55 NA 250 6.3 76 6 24
## 56 NA 135 8.0 75 6 25
## 57 NA 127 8.0 78 6 26
## 58 NA 47 10.3 73 6 27
## 59 NA 98 11.5 80 6 28
## 60 NA 31 14.9 77 6 29
## 61 NA 138 8.0 83 6 30
##
## $`7`
## Ozone Solar.R Wind Temp Month Day
## 62 135 269 4.1 84 7 1
## 63 49 248 9.2 85 7 2
## 64 32 236 9.2 81 7 3
## 65 NA 101 10.9 84 7 4
## 66 64 175 4.6 83 7 5
## 67 40 314 10.9 83 7 6
## 68 77 276 5.1 88 7 7
## 69 97 267 6.3 92 7 8
## 70 97 272 5.7 92 7 9
## 71 85 175 7.4 89 7 10
## 72 NA 139 8.6 82 7 11
## 73 10 264 14.3 73 7 12
## 74 27 175 14.9 81 7 13
## 75 NA 291 14.9 91 7 14
## 76 7 48 14.3 80 7 15
## 77 48 260 6.9 81 7 16
## 78 35 274 10.3 82 7 17
## 79 61 285 6.3 84 7 18
## 80 79 187 5.1 87 7 19
## 81 63 220 11.5 85 7 20
## 82 16 7 6.9 74 7 21
## 83 NA 258 9.7 81 7 22
## 84 NA 295 11.5 82 7 23
## 85 80 294 8.6 86 7 24
## 86 108 223 8.0 85 7 25
## 87 20 81 8.6 82 7 26
## 88 52 82 12.0 86 7 27
## 89 82 213 7.4 88 7 28
## 90 50 275 7.4 86 7 29
## 91 64 253 7.4 83 7 30
## 92 59 254 9.2 81 7 31
##
## $`8`
## Ozone Solar.R Wind Temp Month Day
## 93 39 83 6.9 81 8 1
## 94 9 24 13.8 81 8 2
## 95 16 77 7.4 82 8 3
## 96 78 NA 6.9 86 8 4
## 97 35 NA 7.4 85 8 5
## 98 66 NA 4.6 87 8 6
## 99 122 255 4.0 89 8 7
## 100 89 229 10.3 90 8 8
## 101 110 207 8.0 90 8 9
## 102 NA 222 8.6 92 8 10
## 103 NA 137 11.5 86 8 11
## 104 44 192 11.5 86 8 12
## 105 28 273 11.5 82 8 13
## 106 65 157 9.7 80 8 14
## 107 NA 64 11.5 79 8 15
## 108 22 71 10.3 77 8 16
## 109 59 51 6.3 79 8 17
## 110 23 115 7.4 76 8 18
## 111 31 244 10.9 78 8 19
## 112 44 190 10.3 78 8 20
## 113 21 259 15.5 77 8 21
## 114 9 36 14.3 72 8 22
## 115 NA 255 12.6 75 8 23
## 116 45 212 9.7 79 8 24
## 117 168 238 3.4 81 8 25
## 118 73 215 8.0 86 8 26
## 119 NA 153 5.7 88 8 27
## 120 76 203 9.7 97 8 28
## 121 118 225 2.3 94 8 29
## 122 84 237 6.3 96 8 30
## 123 85 188 6.3 94 8 31
##
## $`9`
## Ozone Solar.R Wind Temp Month Day
## 124 96 167 6.9 91 9 1
## 125 78 197 5.1 92 9 2
## 126 73 183 2.8 93 9 3
## 127 91 189 4.6 93 9 4
## 128 47 95 7.4 87 9 5
## 129 32 92 15.5 84 9 6
## 130 20 252 10.9 80 9 7
## 131 23 220 10.3 78 9 8
## 132 21 230 10.9 75 9 9
## 133 24 259 9.7 73 9 10
## 134 44 236 14.9 81 9 11
## 135 21 259 15.5 76 9 12
## 136 28 238 6.3 77 9 13
## 137 9 24 10.9 71 9 14
## 138 13 112 11.5 71 9 15
## 139 46 237 6.9 78 9 16
## 140 18 224 13.8 67 9 17
## 141 13 27 10.3 76 9 18
## 142 24 238 10.3 68 9 19
## 143 16 201 8.0 82 9 20
## 144 13 238 12.6 64 9 21
## 145 23 14 9.2 71 9 22
## 146 36 139 10.3 81 9 23
## 147 7 49 10.3 69 9 24
## 148 14 20 16.6 63 9 25
## 149 30 193 6.9 70 9 26
## 150 NA 145 13.2 77 9 27
## 151 14 191 14.3 75 9 28
## 152 18 131 8.0 76 9 29
## 153 20 223 11.5 68 9 30
s <- split(airquality,airquality$Month)
table(airquality$Month)#查看每个Month下包含的记录数##
## 5 6 7 8 9
## 31 30 31 31 30
lapply(s, function(x) colMeans(x[,c("Ozone","Wind","Temp")]))# 求平均值## $`5`
## Ozone Wind Temp
## NA 11.62258 65.54839
##
## $`6`
## Ozone Wind Temp
## NA 10.26667 79.10000
##
## $`7`
## Ozone Wind Temp
## NA 8.941935 83.903226
##
## $`8`
## Ozone Wind Temp
## NA 8.793548 83.967742
##
## $`9`
## Ozone Wind Temp
## NA 10.18 76.90
sapply(s, function(x) colMeans(x[,c("Ozone","Wind","Temp")]))#简化显示结果## 5 6 7 8 9
## Ozone NA NA NA NA NA
## Wind 11.62258 10.26667 8.941935 8.793548 10.18
## Temp 65.54839 79.10000 83.903226 83.967742 76.90
sapply(s, function(x) colMeans(x[,c("Ozone","Wind","Temp")],na.rm = T))#处理缺失值## 5 6 7 8 9
## Ozone 23.61538 29.44444 59.115385 59.961538 31.44828
## Wind 11.62258 10.26667 8.941935 8.793548 10.18000
## Temp 65.54839 79.10000 83.903226 83.967742 76.90000
x <- data.frame(v1=1:5, v2=c(10,7,9,6,8), v3=11:15, v4=c(1,1,2,2,1))
x## v1 v2 v3 v4
## 1 1 10 11 1
## 2 2 7 12 1
## 3 3 9 13 2
## 4 4 6 14 2
## 5 5 8 15 1
sort(x$v2)#v2列按照升序排列## [1] 6 7 8 9 10
sort(x$v2,decreasing = TRUE)#v2列按照降序排列## [1] 10 9 8 7 6
order(x$v2)#返回的不是内容本身,是内容的下标## [1] 4 2 5 3 1
x[order(x$v2),]#对x数据框按照v2进行排序## v1 v2 v3 v4
## 4 4 6 14 2
## 2 2 7 12 1
## 5 5 8 15 1
## 3 3 9 13 2
## 1 1 10 11 1
x[order(x$v4, x$v2, decreasing = TRUE), ]#将序排列x,先按照v4,次要按照v2## v1 v2 v3 v4
## 3 3 9 13 2
## 4 4 6 14 2
## 1 1 10 11 1
## 5 5 8 15 1
## 2 2 7 12 1
#默认前六行或者后六行
head(airquality, 10)#查看前10行## Ozone Solar.R Wind Temp Month Day
## 1 41 190 7.4 67 5 1
## 2 36 118 8.0 72 5 2
## 3 12 149 12.6 74 5 3
## 4 18 313 11.5 62 5 4
## 5 NA NA 14.3 56 5 5
## 6 28 NA 14.9 66 5 6
## 7 23 299 8.6 65 5 7
## 8 19 99 13.8 59 5 8
## 9 8 19 20.1 61 5 9
## 10 NA 194 8.6 69 5 10
tail(airquality, 10)#查看后10行## Ozone Solar.R Wind Temp Month Day
## 144 13 238 12.6 64 9 21
## 145 23 14 9.2 71 9 22
## 146 36 139 10.3 81 9 23
## 147 7 49 10.3 69 9 24
## 148 14 20 16.6 63 9 25
## 149 30 193 6.9 70 9 26
## 150 NA 145 13.2 77 9 27
## 151 14 191 14.3 75 9 28
## 152 18 131 8.0 76 9 29
## 153 20 223 11.5 68 9 30
summary(airquality)#总结,数据分布整体把握## Ozone Solar.R Wind Temp
## Min. : 1.00 Min. : 7.0 Min. : 1.700 Min. :56.00
## 1st Qu.: 18.00 1st Qu.:115.8 1st Qu.: 7.400 1st Qu.:72.00
## Median : 31.50 Median :205.0 Median : 9.700 Median :79.00
## Mean : 42.13 Mean :185.9 Mean : 9.958 Mean :77.88
## 3rd Qu.: 63.25 3rd Qu.:258.8 3rd Qu.:11.500 3rd Qu.:85.00
## Max. :168.00 Max. :334.0 Max. :20.700 Max. :97.00
## NA's :37 NA's :7
## Month Day
## Min. :5.000 Min. : 1.0
## 1st Qu.:6.000 1st Qu.: 8.0
## Median :7.000 Median :16.0
## Mean :6.993 Mean :15.8
## 3rd Qu.:8.000 3rd Qu.:23.0
## Max. :9.000 Max. :31.0
##
str(airquality)#以简洁方式对数据总结## 'data.frame': 153 obs. of 6 variables:
## $ Ozone : int 41 36 12 18 NA 28 23 19 8 NA ...
## $ Solar.R: int 190 118 149 313 NA NA 299 99 19 194 ...
## $ Wind : num 7.4 8 12.6 11.5 14.3 14.9 8.6 13.8 20.1 8.6 ...
## $ Temp : int 67 72 74 62 56 66 65 59 61 69 ...
## $ Month : int 5 5 5 5 5 5 5 5 5 5 ...
## $ Day : int 1 2 3 4 5 6 7 8 9 10 ...
table(airquality$Month)#对列进行频数统计##
## 5 6 7 8 9
## 31 30 31 31 30
table(airquality$Ozone, useNA = "ifany")#将Ozone中NA的数值统计出来##
## 1 4 6 7 8 9 10 11 12 13 14 16 18 19 20
## 1 1 1 3 1 3 1 3 2 4 4 4 4 1 4
## 21 22 23 24 27 28 29 30 31 32 34 35 36 37 39
## 4 1 6 2 1 3 1 2 1 3 1 2 2 2 2
## 40 41 44 45 46 47 48 49 50 52 59 61 63 64 65
## 1 1 3 2 1 1 1 1 1 1 2 1 1 2 1
## 66 71 73 76 77 78 79 80 82 84 85 89 91 96 97
## 1 1 2 1 1 2 1 1 1 1 2 1 1 1 2
## 108 110 115 118 122 135 168 <NA>
## 1 1 1 1 1 1 1 37
any(is.na(airquality$Ozone))#判断是否有缺失值,true是有缺失值## [1] TRUE
sum(is.na(airquality$Ozone))#统计缺失值数量## [1] 37
all(airquality$Month < 12)#查看是不是所有的月份都小于12## [1] TRUE
#将Titanic强制转换为数据框
t <- as.data.frame(Titanic)
x <- xtabs(Freq ~ Class + Age, data = t)#按照Class和Age生成交叉表
x## Age
## Class Child Adult
## 1st 6 319
## 2nd 24 261
## 3rd 79 627
## Crew 0 885
ftable(x)#扁平化显示## Age Child Adult
## Class
## 1st 6 319
## 2nd 24 261
## 3rd 79 627
## Crew 0 885
object.size(airquality)#查看对象大小## 5496 bytes
print(object.size(airquality),units="Kb")#按照kb显示大小## 5.4 Kb