在这一章里面,我们将介绍一些简单的R软件运算,包括基本数字运算、向量运算与统计运算,让读者们对R软件的基本计算功能先有一个初步的印象。
R软件的简单运算是通过程序语言通用的运算符符号来完成的。
1+1
## [1] 2
1*3.4
## [1] 3.4
1/2
## [1] 0.5
1%/%2
## [1] 0
余数(modulus):
5 %% 2
## [1] 1
三角函数运算:
cos(1.0)
## [1] 0.5403023
幂次计算:
2 ^ 0.5
## [1] 1.414214
sqrt(2)
## [1] 1.414214
科学计数符号:
x = 1.2e-5
x * 10000
## [1] 0.12
逻辑运算会产生逻辑向量:
x = c(1,2,3,4,5)
x>3
## [1] FALSE FALSE FALSE TRUE TRUE
使用uniroot()函数可以解一元n次方程,还有二元一次方程 ### 一元一次方程
f1<-function(x,a,b) a*x+b
a<-5;b<-12
result<-uniroot(f1,c(-10,10),a=a,b=b,tol = 0.0001)
result$root
## [1] -2.4
f2<-function(x,a,b,c) a*x^2+b*x+c
a<-1;b<-5;c<-6
result<-uniroot(f2,c(-4,-3),a=a,b=b,c=c,tol = 0.0001)
result$root
## [1] -3
lf<-matrix(c(3,5,1,2),nrow = 2,byrow = TRUE)
rf<-matrix(c(4,1),nrow = 2)
result<-solve(lf,rf)
result
## [,1]
## [1,] 3
## [2,] -1
在R软件中,如果想要构建规则性的数字或向量,可以使用以下函数:
1:9
## [1] 1 2 3 4 5 6 7 8 9
x = 1:9
x
## [1] 1 2 3 4 5 6 7 8 9
1.5:10
## [1] 1.5 2.5 3.5 4.5 5.5 6.5 7.5 8.5 9.5
c(1.5:10,11)
## [1] 1.5 2.5 3.5 4.5 5.5 6.5 7.5 8.5 9.5 11.0
prod(1:8)
## [1] 40320
seq(1,5)
## [1] 1 2 3 4 5
seq(1,5,by=0.5)
## [1] 1.0 1.5 2.0 2.5 3.0 3.5 4.0 4.5 5.0
seq(1,5,length = 7)
## [1] 1.000000 1.666667 2.333333 3.000000 3.666667 4.333333 5.000000
rep(10,5)
## [1] 10 10 10 10 10
rep(c("A","B","C","D"),2)
## [1] "A" "B" "C" "D" "A" "B" "C" "D"
rep(1:4,times = 3,each =2)
## [1] 1 1 2 2 3 3 4 4 1 1 2 2 3 3 4 4 1 1 2 2 3 3 4 4
rep(1:4,each =2,length = 12)
## [1] 1 1 2 2 3 3 4 4 1 1 2 2
matrix(rep(0,16),nrow = 4)
## [,1] [,2] [,3] [,4]
## [1,] 0 0 0 0
## [2,] 0 0 0 0
## [3,] 0 0 0 0
## [4,] 0 0 0 0
matrix(rep(0,16),nrow = 4)
## [,1] [,2] [,3] [,4]
## [1,] 0 0 0 0
## [2,] 0 0 0 0
## [3,] 0 0 0 0
## [4,] 0 0 0 0
matrix(0,nrow = 4,ncol = 4)
## [,1] [,2] [,3] [,4]
## [1,] 0 0 0 0
## [2,] 0 0 0 0
## [3,] 0 0 0 0
## [4,] 0 0 0 0
以下函数可用于vector变量:
x=c(1,2.0,3);x
## [1] 1 2 3
(x=c(1.0,2.3,3))
## [1] 1.0 2.3 3.0
x = c(1,2,3)
x + 1
## [1] 2 3 4
x - 1.2
## [1] -0.2 0.8 1.8
x * 2
## [1] 2 4 6
x * x
## [1] 1 4 9
y = c(4,5,6,7)
x * y
## Warning in x * y: longer object length is not a multiple of shorter object
## length
## [1] 4 10 18 7
x = c(1,2,3,4)
y = c(5,6,7,8)
y / x
## [1] 5.000000 3.000000 2.333333 2.000000
y - x
## [1] 4 4 4 4
x ^ y
## [1] 1 64 2187 65536
cos(x*pi)+cos(y*pi)
## [1] -2 2 -2 2
s = c(1,2,3,4,5,6)
length(s)
## [1] 6
sum(s)
## [1] 21
prod(s)
## [1] 720
cumsum(s)
## [1] 1 3 6 10 15 21
x = c(1,2,3,4)
y = c(5,6,7)
z = c(x,y)
z
## [1] 1 2 3 4 5 6 7
一个向量x的第i个元素可以用x[i]表示。
x = c(11,12,13)
x[2]
## [1] 12
x[4]
## [1] NA
x[c(1,3)]
## [1] 11 13
x[1:3]
## [1] 11 12 13
y = x[1:2]
y
## [1] 11 12
x = c(11,12,13)
mean(x)
## [1] 12
max(x)
## [1] 13
min(x)
## [1] 11
var(x)
## [1] 1
sd(x)
## [1] 1
sum(x)
## [1] 36
也可以不使用sd函数,而是用自定义函数计算标准差:
my.sd <- function(y)
{
n=length(y)
s=sqrt((sum(y^2)-n*mean(y)^2)/(n-1))
return(s)
}
my.sd(x)
## [1] 1
模拟100个人的身高体重数据(正态分布)
weight =rnorm(100,55,5)
height = rnorm(100,165,5)
plot(weight,height)
summary(lm(height~weight))
##
## Call:
## lm(formula = height ~ weight)
##
## Residuals:
## Min 1Q Median 3Q Max
## -13.1174 -3.9101 -0.3632 3.4933 14.9925
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 166.12281 5.08849 32.647 <2e-16 ***
## weight -0.02401 0.09197 -0.261 0.795
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 5.369 on 98 degrees of freedom
## Multiple R-squared: 0.0006951, Adjusted R-squared: -0.009502
## F-statistic: 0.06816 on 1 and 98 DF, p-value: 0.7946
x <- seq(0,1,by = 0.2)
y <- seq(0,1,by = 0.2)
y[4]
## [1] 0.6
x[3]
## [1] 0.4
1 - x[3]
## [1] 0.6
y[4] > 1 - x[3]
## [1] TRUE
x <- c(1,3,5,7,9)
x
## [1] 1 3 5 7 9
v <- paste("x",1:5,sep="")
v
## [1] "x1" "x2" "x3" "x4" "x5"
x <- c(1,3,5,7,9)
y <- c(2,4,6,8,10)
x * y
## [1] 2 12 30 56 90
x %*% y
## [,1]
## [1,] 190
(t <- 1:10)
## [1] 1 2 3 4 5 6 7 8 9 10
(r <- 5:1)
## [1] 5 4 3 2 1
2 * 1:5
## [1] 2 4 6 8 10
seq(1,10,2)
## [1] 1 3 5 7 9
seq(1,by = 2,length = 10)
## [1] 1 3 5 7 9 11 13 15 17 19
x <- c(1,3,5,7,9)
length(x)
## [1] 5
y <- c(2,6,3,7,5)
sort(y)
## [1] 2 3 5 6 7
rev(y)
## [1] 5 7 3 6 2
append(y,10:15,after = 3)
## [1] 2 6 3 10 11 12 13 14 15 7 5
sum(x)
## [1] 25
max(y)
## [1] 7
x <- c(1,3,5,7,9)
x[2]
## [1] 3
x[c(1,3)] <- c(9,11)
x
## [1] 9 3 11 7 9
x[x < 9]
## [1] 3 7
y <- 1:10
y[-(1:5)]
## [1] 6 7 8 9 10
matrix(1:12,nrow = 4,ncol = 3)
## [,1] [,2] [,3]
## [1,] 1 5 9
## [2,] 2 6 10
## [3,] 3 7 11
## [4,] 4 8 12
matrix(1:12,nrow = 4,ncol = 3,byrow = T)
## [,1] [,2] [,3]
## [1,] 1 2 3
## [2,] 4 5 6
## [3,] 7 8 9
## [4,] 10 11 12
(A <- matrix(1:12,nrow = 3,ncol = 4))
## [,1] [,2] [,3] [,4]
## [1,] 1 4 7 10
## [2,] 2 5 8 11
## [3,] 3 6 9 12
t(A)
## [,1] [,2] [,3]
## [1,] 1 2 3
## [2,] 4 5 6
## [3,] 7 8 9
## [4,] 10 11 12
A * A
## [,1] [,2] [,3] [,4]
## [1,] 1 16 49 100
## [2,] 4 25 64 121
## [3,] 9 36 81 144
A %*% t(A)
## [,1] [,2] [,3]
## [1,] 166 188 210
## [2,] 188 214 240
## [3,] 210 240 270
diag(A)
## [1] 1 5 9
diag(diag(A))
## [,1] [,2] [,3]
## [1,] 1 0 0
## [2,] 0 5 0
## [3,] 0 0 9
diag(3)
## [,1] [,2] [,3]
## [1,] 1 0 0
## [2,] 0 1 0
## [3,] 0 0 1
(B <- matrix(rnorm(16),4,4))
## [,1] [,2] [,3] [,4]
## [1,] -1.9441703 -1.4811642 1.2187468 1.0028083
## [2,] 0.6886851 -0.3208272 0.3789500 0.9239378
## [3,] 1.7030543 -0.2826748 0.2768131 -1.1049455
## [4,] -1.3975218 -0.6632472 0.8276895 1.4652977
solve(B)
## [,1] [,2] [,3] [,4]
## [1,] -0.03313522 0.5688924 0.1142514 -0.2498819
## [2,] -2.06669506 -1.8009514 1.2105668 3.4628296
## [3,] -1.77181671 -1.9747337 2.0794344 4.0257930
## [4,] 0.03376588 0.8428525 -0.5176777 -0.2624789
(B.eigen <- eigen(B,symmetric = T))
## $values
## [1] 2.2962328 1.2039567 -0.5355768 -3.4874994
##
## $vectors
## [,1] [,2] [,3] [,4]
## [1,] 0.2730185 -0.48668143 0.01842590 0.8296159
## [2,] 0.3168695 -0.06025928 0.93287247 -0.1603481
## [3,] -0.1790906 -0.87148651 -0.07292924 -0.4506874
## [4,] -0.8904949 0.00461278 0.35226518 0.2879353
svd(B)
## $d
## [1] 3.946727 1.590731 1.189326 0.143115
##
## $u
## [,1] [,2] [,3] [,4]
## [1,] -0.71015075 -0.3175435 -0.4980918 -0.3830882
## [2,] -0.05382737 -0.5504837 0.7326792 -0.3965496
## [3,] 0.40938383 -0.7653272 -0.3594082 0.3427898
## [4,] -0.57025731 -0.1020202 0.2931066 0.7605833
##
## $v
## [,1] [,2] [,3] [,4]
## [1,] 0.7190095 -0.57996589 0.3794131 -0.05206411
## [2,] 0.3373981 0.58523258 0.3446369 0.65183640
## [3,] -0.3153411 -0.56068851 -0.1566321 0.74943632
## [4,] -0.5193726 -0.08228479 0.8442378 -0.10365246
dim(A)
## [1] 3 4
nrow(B)
## [1] 4
det(B)
## [1] 1.068612
A[row(A) < col(A)] = 0
A
## [,1] [,2] [,3] [,4]
## [1,] 1 0 0 0
## [2,] 2 5 0 0
## [3,] 3 6 9 0
apply(A,1,sum)
## [1] 1 7 18
apply(A,2,mean)
## [1] 2.000000 3.666667 3.000000 0.000000
矩阵只能是2维的,数组是多维的。一维数组就是向量,二维数组就是矩阵。
(xx <- array(1:24,c(3,4,2)))
## , , 1
##
## [,1] [,2] [,3] [,4]
## [1,] 1 4 7 10
## [2,] 2 5 8 11
## [3,] 3 6 9 12
##
## , , 2
##
## [,1] [,2] [,3] [,4]
## [1,] 13 16 19 22
## [2,] 14 17 20 23
## [3,] 15 18 21 24
xx[2,3,2]
## [1] 20
xx[2,1:3,2]
## [1] 14 17 20
dim(xx)
## [1] 3 4 2
数组的运算和矩阵类似。
y <- c("male","female","female","male","female","male","male")
(f <- factor(y))
## [1] male female female male female male male
## Levels: female male
levels(f)
## [1] "female" "male"
如果一个数据对象需要包含不同的数据类型,则可以采用列表(List)
x <- c(1,1,2,2,3,3,3)
y <- c("male","female","female","male","female","male","male")
z <- c(80,85,92,76,61,95,83)
(stu <- list(class = x, sex = y, score = z))
## $class
## [1] 1 1 2 2 3 3 3
##
## $sex
## [1] "male" "female" "female" "male" "female" "male" "male"
##
## $score
## [1] 80 85 92 76 61 95 83
stu[[3]]
## [1] 80 85 92 76 61 95 83
stu$sex
## [1] "male" "female" "female" "male" "female" "male" "male"
数据框(data frame)是一种矩阵形式的数据,但各列可以是不同类型的数据,可以看做是矩阵的推广,类似于关系数据库的形式。
(student <- data.frame(class = x, sex = y, score = z))
## class sex score
## 1 1 male 80
## 2 1 female 85
## 3 2 female 92
## 4 2 male 76
## 5 3 female 61
## 6 3 male 95
## 7 3 male 83
row.names(student) <- c("zhao","qian","sun","li","zhou","wu","zhen")
student
## class sex score
## zhao 1 male 80
## qian 1 female 85
## sun 2 female 92
## li 2 male 76
## zhou 3 female 61
## wu 3 male 95
## zhen 3 male 83
student[,"score"]
## [1] 80 85 92 76 61 95 83
student[,2]
## [1] male female female male female male male
## Levels: female male
student$score
## [1] 80 85 92 76 61 95 83
student[["class"]]
## [1] 1 1 2 2 3 3 3
student[[3]]
## [1] 80 85 92 76 61 95 83
数据框绑定attach函数
#score
#Error: object 'score' not found
attach(student)
score
## [1] 80 85 92 76 61 95 83
detach()
#score
#Error: object 'score' not found