X=matrix(1:20,10,2)
X
## [,1] [,2]
## [1,] 1 11
## [2,] 2 12
## [3,] 3 13
## [4,] 4 14
## [5,] 5 15
## [6,] 6 16
## [7,] 7 17
## [8,] 8 18
## [9,] 9 19
## [10,] 10 20
x=rnorm(100)#产生100个标准正态分布随机数
x
## [1] 0.50004507 1.46773649 0.89908609 -0.44076450 -0.01943443
## [6] -0.21289645 -1.40586848 0.78254826 -0.22203210 0.96791077
## [11] 0.13402647 1.76918599 -0.91254099 0.74271781 0.70999072
## [16] -0.61818885 1.68648950 0.16834679 -1.04886828 -1.29707930
## [21] 1.13559290 -0.85227718 1.07760431 -1.24772250 -0.31498643
## [26] 0.37235728 -0.48250220 -2.18963544 -0.65130278 -0.54361045
## [31] 1.16003852 -0.09250449 -0.39585485 -0.86323445 -0.14611100
## [36] -0.65601751 -0.41133070 -0.84982965 -1.50257417 -0.60946256
## [41] -1.59371706 -0.38664190 0.92465777 -0.49111688 0.73551881
## [46] -0.61234433 -1.38849905 0.22220757 -0.44307734 0.64588686
## [51] -0.20535238 -1.01849811 0.98657954 -0.04260383 -0.44246464
## [56] 1.49241284 -0.07323067 -0.60024476 -1.38231590 -1.03977229
## [61] 0.09707323 0.67549212 -2.92765502 -1.14404648 1.03898238
## [66] -0.05616122 -0.94582967 1.83842730 -0.26522375 -0.20266148
## [71] 0.31541506 -0.23537082 -0.26907570 -0.41467273 1.61586293
## [76] -0.11457830 0.69736258 -0.39956188 -1.03299561 -0.03515792
## [81] -0.75708439 1.91107646 1.62754340 -0.81432850 1.32146026
## [86] 1.03751034 -0.53821456 0.24921928 0.71920170 -0.11678561
## [91] -2.19645217 0.53184495 2.07016109 0.35925740 0.22802105
## [96] 1.26762758 -1.40609909 0.22341295 0.46947248 0.17849808
hist(x,prob=T)#做x这组数据的直方图,其中纵轴表示频率
curve(dnorm(x),add=T)#添加直方图拟合的密度函数曲线
x1=c(171,175,159,155,152,158,154,164,168,166,159,164)#创建一个向量
x2=c(57,64,41,38,35,44,41,51,57,49,47,46)
length(x1)#向量的长度
## [1] 12
mode(x1)#数据的类型
## [1] "numeric"
rbind(x1,x2)#按行合并
## [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10] [,11] [,12]
## x1 171 175 159 155 152 158 154 164 168 166 159 164
## x2 57 64 41 38 35 44 41 51 57 49 47 46
cbind(x1,x2)#按列合并
## x1 x2
## [1,] 171 57
## [2,] 175 64
## [3,] 159 41
## [4,] 155 38
## [5,] 152 35
## [6,] 158 44
## [7,] 154 41
## [8,] 164 51
## [9,] 168 57
## [10,] 166 49
## [11,] 159 47
## [12,] 164 46
matrix(x1,nrow=3,ncol=4)#利用x1数据创建矩阵
## [,1] [,2] [,3] [,4]
## [1,] 171 155 154 166
## [2,] 175 152 164 159
## [3,] 159 158 168 164
matrix(x1,nrow=4,ncol=3)#创建行数列数发生变化的矩阵
## [,1] [,2] [,3]
## [1,] 171 152 168
## [2,] 175 158 166
## [3,] 159 154 159
## [4,] 155 164 164
matrix(x1,nrow=4,ncol=3,byrow=T)#创建按照行排列的矩阵
## [,1] [,2] [,3]
## [1,] 171 175 159
## [2,] 155 152 158
## [3,] 154 164 168
## [4,] 166 159 164
A=matrix(1:12,nrow=3,ncol=4)#创建矩阵
t(A)#求矩阵转置
## [,1] [,2] [,3]
## [1,] 1 2 3
## [2,] 4 5 6
## [3,] 7 8 9
## [4,] 10 11 12
A=B=matrix(1:12,nrow=3,ncol=4)#创建两个相同的矩
A+B#矩阵加法
## [,1] [,2] [,3] [,4]
## [1,] 2 8 14 20
## [2,] 4 10 16 22
## [3,] 6 12 18 24
A-B#矩阵减法
## [,1] [,2] [,3] [,4]
## [1,] 0 0 0 0
## [2,] 0 0 0 0
## [3,] 0 0 0 0
A=matrix(1:12,nrow=3,ncol=4)#创建矩阵
B=matrix(1:12,nrow=4,ncol=3)#创建矩阵
A%*%B#求矩阵的乘积
## [,1] [,2] [,3]
## [1,] 70 158 246
## [2,] 80 184 288
## [3,] 90 210 330
A=matrix(1:16,nrow=4,ncol=4)#创建行列数相等矩阵
diag(A)#获得矩阵对角线元素
## [1] 1 6 11 16
diag(diag(A))#利用对角线元素创建对角矩阵
## [,1] [,2] [,3] [,4]
## [1,] 1 0 0 0
## [2,] 0 6 0 0
## [3,] 0 0 11 0
## [4,] 0 0 0 16
diag(3)#创建3阶单位矩阵
## [,1] [,2] [,3]
## [1,] 1 0 0
## [2,] 0 1 0
## [3,] 0 0 1
A=matrix(rnorm(16),4,4)#创建矩阵
A
## [,1] [,2] [,3] [,4]
## [1,] -0.7413759 -2.86248551 1.4534779 0.5867999069
## [2,] 0.1100988 2.04144332 -1.4332151 -0.5420695305
## [3,] 0.5326002 -2.26919486 0.2347451 0.0008461842
## [4,] 0.5958555 -0.02321432 -1.4678842 -0.8202258850
solve(A)#求矩阵的逆
## [,1] [,2] [,3] [,4]
## [1,] -1.1032243 -0.4707900 0.9730097 -0.4771218
## [2,] -0.3491463 -0.5572976 -0.5028235 0.1180035
## [3,] -0.8748148 -4.3459010 -2.8291284 2.2433398
## [4,] 0.7740181 7.4512325 5.7841123 -5.5838251
(A=diag(4)+1)#创建矩阵
## [,1] [,2] [,3] [,4]
## [1,] 2 1 1 1
## [2,] 1 2 1 1
## [3,] 1 1 2 1
## [4,] 1 1 1 2
(A.e=eigen(A,symmetric=T))#求矩阵的特征值与特征向量
## $values
## [1] 5 1 1 1
##
## $vectors
## [,1] [,2] [,3] [,4]
## [1,] -0.5 0.8660254 0.0000000 0.0000000
## [2,] -0.5 -0.2886751 -0.5773503 -0.5773503
## [3,] -0.5 -0.2886751 -0.2113249 0.7886751
## [4,] -0.5 -0.2886751 0.7886751 -0.2113249
A.e$vectors%*%diag(A.e$values)%*%t(A.e$vectors)#特征向量矩阵U和特征值矩阵D与原矩阵A的关系A=UDU'
## [,1] [,2] [,3] [,4]
## [1,] 2 1 1 1
## [2,] 1 2 1 1
## [3,] 1 1 2 1
## [4,] 1 1 1 2
(A.c=chol(A))#矩阵的Choleskey分解
## [,1] [,2] [,3] [,4]
## [1,] 1.414214 0.7071068 0.7071068 0.7071068
## [2,] 0.000000 1.2247449 0.4082483 0.4082483
## [3,] 0.000000 0.0000000 1.1547005 0.2886751
## [4,] 0.000000 0.0000000 0.0000000 1.1180340
t(A.c)%*%A.c#Choleskey分解矩阵V与原矩阵A.c的关系A.c=V'V
## [,1] [,2] [,3] [,4]
## [1,] 2 1 1 1
## [2,] 1 2 1 1
## [3,] 1 1 2 1
## [4,] 1 1 1 2
(A=matrix(1:18,3,6))#创建矩阵
## [,1] [,2] [,3] [,4] [,5] [,6]
## [1,] 1 4 7 10 13 16
## [2,] 2 5 8 11 14 17
## [3,] 3 6 9 12 15 18
(A.s=svd(A))#矩阵的奇异值分解
## $d
## [1] 4.589453e+01 1.640705e+00 1.366522e-15
##
## $u
## [,1] [,2] [,3]
## [1,] -0.5290354 0.74394551 0.4082483
## [2,] -0.5760715 0.03840487 -0.8164966
## [3,] -0.6231077 -0.66713577 0.4082483
##
## $v
## [,1] [,2] [,3]
## [1,] -0.07736219 -0.71960032 -0.4076688
## [2,] -0.19033085 -0.50893247 0.5745647
## [3,] -0.30329950 -0.29826463 -0.0280114
## [4,] -0.41626816 -0.08759679 0.2226621
## [5,] -0.52923682 0.12307105 -0.6212052
## [6,] -0.64220548 0.33373889 0.2596585
A.s$u%*%diag(A.s$d)%*%t(A.s$v)#矩阵的奇异值分解结果与原矩阵A的关系A=UDV'
## [,1] [,2] [,3] [,4] [,5] [,6]
## [1,] 1 4 7 10 13 16
## [2,] 2 5 8 11 14 17
## [3,] 3 6 9 12 15 18
(A=matrix(1:16,4,4))#创建矩阵
## [,1] [,2] [,3] [,4]
## [1,] 1 5 9 13
## [2,] 2 6 10 14
## [3,] 3 7 11 15
## [4,] 4 8 12 16
qr(A)#矩阵的QR分解
## $qr
## [,1] [,2] [,3] [,4]
## [1,] -5.4772256 -12.7801930 -2.008316e+01 -2.738613e+01
## [2,] 0.3651484 -3.2659863 -6.531973e+00 -9.797959e+00
## [3,] 0.5477226 -0.3781696 1.601186e-15 2.217027e-15
## [4,] 0.7302967 -0.9124744 -5.547002e-01 -1.478018e-15
##
## $rank
## [1] 2
##
## $qraux
## [1] 1.182574e+00 1.156135e+00 1.832050e+00 1.478018e-15
##
## $pivot
## [1] 1 2 3 4
##
## attr(,"class")
## [1] "qr"
(A=matrix(1:4,2,2))#创建矩阵
## [,1] [,2]
## [1,] 1 3
## [2,] 2 4
(B=matrix(rep(1,4),2,2))#创建矩阵
## [,1] [,2]
## [1,] 1 1
## [2,] 1 1
kronecker(A,B)#矩阵的kronecker积
## [,1] [,2] [,3] [,4]
## [1,] 1 1 3 3
## [2,] 1 1 3 3
## [3,] 2 2 4 4
## [4,] 2 2 4 4
(X=data.frame(x1,x2))#产生由X1和X2构建的数据框
## x1 x2
## 1 171 57
## 2 175 64
## 3 159 41
## 4 155 38
## 5 152 35
## 6 158 44
## 7 154 41
## 8 164 51
## 9 168 57
## 10 166 49
## 11 159 47
## 12 164 46
(X=data.frame('身高'=x1,'体重'=x2))据框新的列标签
## 身高 体重
## 1 171 57
## 2 175 64
## 3 159 41
## 4 155 38
## 5 152 35
## 6 158 44
## 7 154 41
## 8 164 51
## 9 168 57
## 10 166 49
## 11 159 47
## 12 164 46
apply(X为矩阵,MARGIN=1表示对行运算/=2表示对列预算,FIN用来指定运算函数,…)
A=matrix(1:12,3,4)#创建矩阵
dim(A)#矩阵的维数
## [1] 3 4
nrow(A)#矩阵的行数
## [1] 3
ncol(A)#矩阵的列数
## [1] 4
rowSums(A)#矩阵按行求和
## [1] 22 26 30
rowMeans(A)#矩阵按行求均值
## [1] 5.5 6.5 7.5
colSums(A)#矩阵按列求和
## [1] 6 15 24 33
colMeans(A)#矩阵按列求均值
## [1] 2 5 8 11
apply(A,1,sum)#矩阵按行求和
## [1] 22 26 30
apply(A,1,mean)#矩阵按行求均值
## [1] 5.5 6.5 7.5
apply(A,2,sum)#矩阵按列求和
## [1] 6 15 24 33
apply(A,2,mean)#矩阵按列求均值
## [1] 2 5 8 11
A=matrix(rnorm(100),20,5)#创建矩阵
apply(A,2,var)#矩阵按列求方差
## [1] 0.8380330 1.0014935 1.6928737 1.1800251 0.6701847
apply(A,2,function(x,a)x*a,a=2)#矩阵按列求函数结果
## [,1] [,2] [,3] [,4] [,5]
## [1,] 1.6849429 -0.4705260 1.7202792 1.11696729 1.1895191
## [2,] 2.2467405 -0.1679803 0.2850114 0.70654885 0.2211689
## [3,] -0.8005214 3.0963894 -2.1206206 -0.57409623 -1.5118071
## [4,] -2.2196454 1.7805190 -0.4658638 0.21723989 3.1251585
## [5,] -2.8726198 1.7046809 5.1375556 3.41780925 1.3096301
## [6,] -0.3690779 3.0400686 5.7055706 2.36362412 1.7530150
## [7,] -3.1351343 2.3625934 0.5885855 -0.08682032 -1.9947425
## [8,] -0.1023500 0.4797240 3.4513281 0.26224780 0.8777756
## [9,] -2.5504198 1.0925203 -1.6792587 1.18982543 0.8248051
## [10,] 0.1610013 -1.9089909 1.2408879 -1.45690524 -0.3419066
## [11,] 0.9678919 -0.2212908 -1.0023266 -3.16162704 2.8794111
## [12,] 0.1345222 3.8792559 -0.6621181 1.29030125 -2.6970918
## [13,] 0.3435619 -1.4836508 -2.9163719 0.63825222 0.2638939
## [14,] 0.4611490 1.4811701 -3.5187995 -0.63330142 0.4933913
## [15,] -0.5067010 -0.2271658 -2.0478149 -0.71247620 -0.5878192
## [16,] 0.1306488 -0.5617005 2.9284372 -2.35807615 0.8060922
## [17,] 3.6815116 3.3292026 2.9298623 5.44525945 1.6418616
## [18,] -0.6502591 -4.0517691 -2.1130174 0.02383377 -1.7659428
## [19,] -3.2965374 1.1335201 0.8974002 -3.26297045 1.4564526
## [20,] 0.8752568 -0.8393604 0.6941933 3.28867428 -2.0086394
X=read.csv(“textdata.csv”)#读取名为textdata的csv格式文档 library(RODBC)#加载RODBC软件包 Rcode<-odbcConnectExcel(“Rcode.xls”)#读取名为Rcode的Excel工作薄 (data<-sqlFetch(Rcode,“data”))#显示Rcode中名为data表单的数据 (codedata <- sqlFetch(Rcode,“codedata”))#显示Rcode中名为codata表单的数据 x1=c(171,175,159,155,152,158,154,164,168,166,159,164)#身高 x2=c(57,64,41,38,35,44,41,51,57,49,47,46)#体重 hist(x1) #做出身高的直方图 plot(x1,x2) #做出身高和体重的散点图
table(年龄)#一维列联表 barplot(table(年龄),col=1:7)#条形图 pie(table(结果))#饼图 table(年龄,性别) #二维列联表 barplot(table(年龄,性别),beside=T,col=1:7)#以性别分组的年龄条图 barplot(table(性别,年龄),beside=T,col=1:2)#以年龄分组的性别条图 ftable(年龄,性别,结果) #以年龄、性别排列的结果频数三维列联表 ftable(性别,年龄,结果)#以性别、年龄排列的结果频数三维列联表 (ft=ftable(性别,结果,年龄))#显示以性别、结果排列的年龄频数三维列联表 rowSums(ft)#求行和 colSums(ft)#求列和 sum(ft)#求总和
setwd("C:/Users/lenovo/Desktop")
d3.1=read.table("d3.1.txt",header=T)
barplot(apply(d3.1,1,mean),las=3)#按行做均值条形图
barplot(apply(d3.1,2,mean))#按列做均值图条形图
barplot(apply(d3.1[,2:8],2,mean))#去掉第一列后的数据按列做均值条形图
barplot(apply(d3.1,2,median))#按列做中位数条形
pie(apply(d3.1,2,mean))#按列做均值饼图
boxplot(d3.1)#按列做箱线图
boxplot(d3.1,horizontal=T)#箱线图中图形按水平放置
stars(d3.1,full=T,key.loc=c(13,1.5))#具有图例的360度星相图
stars(d3.1,full=F,key.loc=c(13,1.5))#具有图例的180度星相图
stars(d3.1,full=T,draw.segments=T,key.loc=c(13,1.5))#具有图例的360度彩色圆形星相图
stars(d3.1,full=F,draw.segments=T,key.loc=c(13,1.5))#具有图例的180度彩色圆形星相图
library(aplpack)#加载aplpack包
## Warning: package 'aplpack' was built under R version 3.1.3
## Loading required package: tcltk
faces(d3.1,ncol.plot=7)#按每行7个做脸谱图
## effect of variables:
## modified item Var
## "height of face " "食品"
## "width of face " "衣着"
## "structure of face" "设备"
## "height of mouth " "医疗"
## "width of mouth " "交通"
## "smiling " "教育"
## "height of eyes " "居住"
## "width of eyes " "杂项"
## "height of hair " "食品"
## "width of hair " "衣着"
## "style of hair " "设备"
## "height of nose " "医疗"
## "width of nose " "交通"
## "width of ear " "教育"
## "height of ear " "居住"
faces(d3.1[,2:8],ncol.plot=7)#去掉第一个变量按每行7个做脸谱图
## effect of variables:
## modified item Var
## "height of face " "衣着"
## "width of face " "设备"
## "structure of face" "医疗"
## "height of mouth " "交通"
## "width of mouth " "教育"
## "smiling " "居住"
## "height of eyes " "杂项"
## "width of eyes " "衣着"
## "height of hair " "设备"
## "width of hair " "医疗"
## "style of hair " "交通"
## "height of nose " "教育"
## "width of nose " "居住"
## "width of ear " "杂项"
## "height of ear " "衣着"
faces(d3.1[c(1,9,19,28,29,30),])#选择第1,9,19,28,29,30个观测的多元数据做脸谱图
## effect of variables:
## modified item Var
## "height of face " "食品"
## "width of face " "衣着"
## "structure of face" "设备"
## "height of mouth " "医疗"
## "width of mouth " "交通"
## "smiling " "教育"
## "height of eyes " "居住"
## "width of eyes " "杂项"
## "height of hair " "食品"
## "width of hair " "衣着"
## "style of hair " "设备"
## "height of nose " "医疗"
## "width of nose " "交通"
## "width of ear " "教育"
## "height of ear " "居住"
library(mvstats)#加载mvstats包
plot.andrews(d3.1)#绘制调和曲线图
plot.andrews(d3.1[c(1,9,19,28,29,30),])#选择第1,9,19,28,29,30个观测的多元数据做调和曲线图
plot(x1,x2)#做散点图
lxy<-function(x,y){n=length(x);sum(x*y)-sum(x)*sum(y)/n}#建立离均差乘积和函数
lxy(x1,x1)#x1的离均差平方和
## [1] 556.9167
lxy(x2,x2)#x2的离均差平方和
## [1] 813
lxy(x1,x2)#x1的离均差乘积和
## [1] 645.5
(r=lxy(x1,x2)/sqrt(lxy(x1,x1)*lxy(x2,x2)))#显示用离均差乘积和计算的相关系数
## [1] 0.9593031
cor(x1,x2)#计算相关系数
## [1] 0.9593031
n=length(x1)#向量的长度
tr=r/sqrt((1-r^2)/(n-2))#相关系数假设检验t统计量
tr
## [1] 10.74298
cor.test(x1,x2)#相关系数假设检验
##
## Pearson's product-moment correlation
##
## data: x1 and x2
## t = 10.743, df = 10, p-value = 8.21e-07
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.8574875 0.9888163
## sample estimates:
## cor
## 0.9593031
setwd("C:/Users/lenovo/Desktop")
yx=read.table("d4.3.txt",header=T)#加载例4.3数据
fm=lm(y~x,data=yx)#一元线性回归模型
fm
##
## Call:
## lm(formula = y ~ x, data = yx)
##
## Coefficients:
## (Intercept) x
## -1.197 1.116
y
## [,1] [,2] [,3] [,4] [,5] [,6]
## 北京 -6271.737 -472.14218 -1478.7720 -47.721642 670.0846 -197.4040
## 天津 -4892.687 -80.69336 -1680.0065 468.804807 642.5301 -440.8240
## 河北 -3248.461 -142.58568 -1052.8677 161.459560 730.6497 -325.9395
## 山西 -3176.387 -287.80663 -952.9766 27.908060 602.0532 -487.2531
## 内蒙古 -3512.959 -480.82319 -1140.9499 -208.459846 700.9511 -473.6798
## 辽宁 -3895.944 128.15536 -1424.9970 192.599398 818.0546 -381.6057
## 吉林 -3284.255 -251.59105 -1253.5557 143.034021 790.3206 -451.8751
## 黑龙江 -2967.350 -142.87870 -1189.9996 8.172715 651.5781 -302.3414
## 上海 -7641.196 165.85432 -1230.5422 -188.637492 583.0494 -407.1886
## 江苏 -4560.977 35.31018 -1425.7811 18.149052 319.9015 -395.0486
## 浙江 -6117.230 -80.43122 -1145.7491 -245.495957 692.1764 -330.7203
## 安徽 -3668.009 222.85883 -1365.1952 -6.957738 467.6136 -398.1544
## 福建 -4886.053 391.30589 -1233.3928 86.680723 557.8603 -659.8796
## 江西 -3349.249 305.33829 -1342.2211 -119.420965 424.4819 -460.7248
## 山东 -3856.906 -227.33267 -1061.3222 -63.375078 712.8425 -495.6177
## 河南 -3095.495 -119.36938 -1120.7817 -53.671806 602.3980 -388.7157
## 湖北 -3718.126 226.61657 -1406.8402 -88.134677 538.4257 -470.1937
## 湖南 -3702.661 -43.40191 -1309.8025 -18.580840 457.1074 -367.0418
## 广东 -6444.785 253.07907 -608.0827 206.794612 572.2519 -427.8501
## 广西 -3626.489 411.41245 -1245.4843 127.464766 415.1013 -315.9280
## 海南 -3879.410 654.40143 -830.3841 228.024851 548.1847 -268.6813
## 重庆 -4101.280 64.21307 -1443.1986 -11.589613 651.8433 -438.0735
## 四川 -3832.917 397.68453 -1287.4281 -140.710939 575.5561 -321.1221
## 贵州 -3387.344 263.03119 -1175.8815 -167.166245 428.9832 -428.4575
## 云南 -3629.139 609.53498 -1244.3362 -3.613962 857.7951 -220.6917
## 西藏 -3603.868 1032.87564 -1388.8594 -193.249168 902.7327 -452.9071
## 陕西 -3467.587 -36.18118 -1290.7885 40.858539 421.1680 -313.2432
## 甘肃 -3209.422 -33.92919 -1146.2330 -53.411235 470.2348 -357.5851
## 青海 -3089.786 46.95751 -1181.3101 -55.270695 508.7677 -241.8020
## 宁夏 -3113.219 -51.16864 -1116.2861 61.118194 670.9278 -448.3887
## 新疆 -3125.600 -99.47641 -1142.9285 -188.221320 711.5164 -388.0790
## [,7] [,8]
## 北京 338.1326 -46.93540207
## 天津 262.7464 -68.06335480
## 河北 329.1848 -88.64297392
## 山西 200.4013 -153.73040440
## 内蒙古 223.8099 -13.10436469
## 辽宁 170.3246 -55.07060619
## 吉林 134.9042 -52.41184623
## 黑龙江 136.3281 -90.64088177
## 上海 199.0403 13.60840001
## 江苏 224.6134 -113.97505321
## 浙江 141.7991 -216.26974579
## 安徽 159.9425 -107.00704251
## 福建 220.8554 -95.38599704
## 江西 325.3652 -43.25605156
## 山东 354.3797 -106.08449797
## 河南 311.9963 -57.90190734
## 湖北 283.9291 -165.29453809
## 湖南 262.1497 -104.05884628
## 广东 264.6169 -80.14799721
## 广西 206.5708 -70.16690022
## 海南 307.4828 -61.60926534
## 重庆 413.6344 -167.50485784
## 四川 333.1896 -80.74317703
## 贵州 206.5979 -110.47488954
## 云南 213.7375 -194.85291383
## 西藏 230.0083 -0.08183429
## 陕西 177.1975 -76.73062051
## 甘肃 211.8872 -19.80288724
## 青海 246.9396 -17.17355476
## 宁夏 252.4040 -60.43164552
## 新疆 275.6191 -56.70631902
plot(y~x,data=yx)#做散点图
abline(fm)#添加回归线
anova(fm)#模型方差分析
## Analysis of Variance Table
##
## Response: y
## Df Sum Sq Mean Sq F value Pr(>F)
## x 1 712077 712077 27428 < 2.2e-16 ***
## Residuals 29 753 26
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(fm)#回归系数t检验
##
## Call:
## lm(formula = y ~ x, data = yx)
##
## Residuals:
## Min 1Q Median 3Q Max
## -6.630 -3.692 -1.535 5.338 11.432
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.19656 1.16125 -1.03 0.311
## x 1.11623 0.00674 165.61 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 5.095 on 29 degrees of freedom
## Multiple R-squared: 0.9989, Adjusted R-squared: 0.9989
## F-statistic: 2.743e+04 on 1 and 29 DF, p-value: < 2.2e-16
yX=read.table("d4.4.txt",header=T)#加载例4.4数据
(fm=lm(y~x1+x2+x3+x4,data=yX))#显示多元线性回归模型
##
## Call:
## lm(formula = y ~ x1 + x2 + x3 + x4, data = yX)
##
## Coefficients:
## (Intercept) x1 x2 x3 x4
## 23.5321088 -0.0033866 1.1641150 0.0002919 -0.0437416
coef.sd(fm)#标准化偏回归系数结果
## $coef.sd
## x1 x2 x3 x4
## -0.0174513678 1.0423522972 0.0009628564 -0.0371053994
anova(fm)#多元线性回归模型方差分析
## Analysis of Variance Table
##
## Response: y
## Df Sum Sq Mean Sq F value Pr(>F)
## x1 1 694627 694627 89259.0016 < 2.2e-16 ***
## x2 1 17803 17803 2287.6286 < 2.2e-16 ***
## x3 1 24 24 3.0569 0.0922 .
## x4 1 174 174 22.2954 7.005e-05 ***
## Residuals 26 202 8
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(fm)#多元线性回归系数t检验
##
## Call:
## lm(formula = y ~ x1 + x2 + x3 + x4, data = yX)
##
## Residuals:
## Min 1Q Median 3Q Max
## -5.0229 -2.1354 0.3297 1.2639 6.9690
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 23.5321088 4.5990714 5.117 2.47e-05 ***
## x1 -0.0033866 0.0080749 -0.419 0.678
## x2 1.1641150 0.0404889 28.751 < 2e-16 ***
## x3 0.0002919 0.0085527 0.034 0.973
## x4 -0.0437416 0.0092638 -4.722 7.00e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.79 on 26 degrees of freedom
## Multiple R-squared: 0.9997, Adjusted R-squared: 0.9997
## F-statistic: 2.289e+04 on 4 and 26 DF, p-value: < 2.2e-16
cor(yX)#多元数据相关系数矩阵
## t y x1 x2 x3 x4
## t 1.0000000 0.8141349 0.8842687 0.8308056 0.8234883 0.9742470
## y 0.8141349 1.0000000 0.9871498 0.9994718 0.9912053 0.6956619
## x1 0.8842687 0.9871498 1.0000000 0.9907018 0.9867664 0.7818066
## x2 0.8308056 0.9994718 0.9907018 1.0000000 0.9917094 0.7154297
## x3 0.8234883 0.9912053 0.9867664 0.9917094 1.0000000 0.7073820
## x4 0.9742470 0.6956619 0.7818066 0.7154297 0.7073820 1.0000000
pairs(yX)#多元数据散点图
corr.test(yX)#多元数据相关系数检验
## corr test:
## t y x1 x2 x3 x4
## t 0.000 0.000 0.000 0.000 0.000 0
## y 7.550 0.000 0.000 0.000 0.000 0
## x1 10.197 33.267 0.000 0.000 0.000 0
## x2 8.039 165.614 39.214 0.000 0.000 0
## x3 7.817 40.336 32.772 41.560 0.000 0
## x4 23.268 5.215 6.752 5.514 5.389 0
## lower is t value,upper is p value
(R2=summary(fm)$r.sq)#显示多元线性回归模型决定系数
## [1] 0.9997
(R=sqrt(R2))#显示多元数据复相关系数
## [1] 0.9999
library(leaps)#加载leaps包
varsel=regsubsets(y~x1+x2+x3+x4,data=yX)#多元数据线性回归变量选择模型
result=summary(varsel)#变量选择方法结果
data.frame(result$outmat,RSS=result$rss,R2=result$rsq)#RSS和决定系数准则结果展示
## x1 x2 x3 x4 RSS R2
## 1 ( 1 ) * 752.9 0.9989
## 2 ( 1 ) * * 203.9 0.9997
## 3 ( 1 ) * * * 202.3 0.9997
## 4 ( 1 ) * * * * 202.3 0.9997
data.frame(result$outmat,adjR2=result$adjr2,Cp=result$cp,BIC=result$bic)#调整决定系数,Cp和BIC准则结果展示
## x1 x2 x3 x4 adjR2 Cp BIC
## 1 ( 1 ) * 0.9989 69.745 -205.6
## 2 ( 1 ) * * 0.9997 1.199 -242.6
## 3 ( 1 ) * * * 0.9997 3.001 -239.4
## 4 ( 1 ) * * * * 0.9997 5.000 -236.0
fm.step=step(fm,direction="forward")#向前引入法变量选择结果
## Start: AIC=68.15
## y ~ x1 + x2 + x3 + x4
fm.step=step(fm,direction="backward")#向后剔除法变量选择结果
## Start: AIC=68.15
## y ~ x1 + x2 + x3 + x4
##
## Df Sum of Sq RSS AIC
## - x3 1 0 202 66.2
## - x1 1 1 204 66.4
## <none> 202 68.2
## - x4 1 174 376 85.4
## - x2 1 6433 6635 174.4
##
## Step: AIC=66.16
## y ~ x1 + x2 + x4
##
## Df Sum of Sq RSS AIC
## - x1 1 2 204 64.4
## <none> 202 66.2
## - x4 1 197 400 85.3
## - x2 1 7382 7585 176.5
##
## Step: AIC=64.39
## y ~ x2 + x4
##
## Df Sum of Sq RSS AIC
## <none> 204 64.4
## - x4 1 549 753 102.9
## - x2 1 367655 367859 294.8
fm.step=step(fm,direction="both")#逐步筛选法变量选择结果
## Start: AIC=68.15
## y ~ x1 + x2 + x3 + x4
##
## Df Sum of Sq RSS AIC
## - x3 1 0 202 66.2
## - x1 1 1 204 66.4
## <none> 202 68.2
## - x4 1 174 376 85.4
## - x2 1 6433 6635 174.4
##
## Step: AIC=66.16
## y ~ x1 + x2 + x4
##
## Df Sum of Sq RSS AIC
## - x1 1 2 204 64.4
## <none> 202 66.2
## + x3 1 0 202 68.2
## - x4 1 197 400 85.3
## - x2 1 7382 7585 176.5
##
## Step: AIC=64.39
## y ~ x2 + x4
##
## Df Sum of Sq RSS AIC
## <none> 204 64.4
## + x1 1 2 202 66.2
## + x3 1 0 204 66.4
## - x4 1 549 753 102.9
## - x2 1 367655 367859 294.8
par(mar=c(4,4,2,1),cex=0.75)
x=-10:10
plot(x,1+2*x+3*x^2,"o",ylab="y=1+2x+3x^2")#多项式曲线
x=1:20
par(mfrow=c(1,2),cex=0.75)
plot(x,3+2*log(x),"o",ylab="y=3+2log(x)")# b>0对数函数曲线
plot(x,3-2*log(x),"o",ylab="y=3-2log(x)")# b<0对数函数曲线
par(mfrow=c(1,1))
par(mfrow=c(1,2),cex=0.75)
plot(x,3*exp(0.2*x),"o",ylab="y=3exp(0.2x)")#递增指数函数曲线
plot(x,3*exp(0.2/x),"o",ylab="y=3*exp(0.2/x)")#递减指数函数曲线
par(mfrow=c(1,1))
par(mfrow=c(1,2),cex=0.75)
plot(x,3*x^2,"o",ylab="y=3x^2")#b>0幂函数曲线
plot(x,3*x^-2,"o",ylab="y=3x^-2")#b<0幂函数曲线
par(mfrow=c(1,1))
par(mfrow=c(1,2),cex=0.75)
plot(x,3+2/x,"o",ylab="y=3+2/x")#b>0双曲线函数
plot(x,3-2/x,"o",ylab="y=3-2/x")#b<0双曲线函数
par(mfrow=c(1,1))
x=c(1.5,2.8,4.5,7.5,10.5,13.5,15.1,16.5,19.5,22.5,24.5,26.5)#例4.6数据
y=c(7.0,5.5,4.6,3.6,2.9,2.7,2.5,2.4,2.2,2.1,1.9,1.8)
plot(x,y)#做散点图
lm.1=lm(y~x)#一元线性回归模型
summary(lm.1)$coef#一元线性回归模型系数
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 5.603 0.43474 12.889 1.488e-07
## x -0.170 0.02719 -6.254 9.456e-05
summary(lm.1)$r.sq#一元线性回归模型决定系数
## [1] 0.7964
plot(x,y)#做散点图
abline(lm.1)#添加一元线性回归线
x1=x
x2=x^2
lm.2=lm(y~x1+x2)#二次函数回归模型
summary(lm.2)$coef#二次函数回归模型系数
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 6.91469 0.331987 20.828 6.346e-09
## x1 -0.46563 0.056969 -8.173 1.864e-05
## x2 0.01076 0.002009 5.353 4.604e-04
summary(lm.2)$r.sq#二次函数回归模型决定系数
## [1] 0.9513
plot(x,y)#做散点图
lines(x,fitted(lm.2))#添加二次函数回归线
lm.log=lm(y~log(x))#对数函数回归模型
summary(lm.log)$coef#对数函数回归模型系数
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 7.364 0.1688 43.64 9.596e-13
## log(x) -1.757 0.0677 -25.95 1.660e-10
summary(lm.log)$r.sq#对数函数回归模型决定系数
## [1] 0.9854
plot(x,y)#做散点图
lines(x,fitted(lm.log))#添加对数函数回归线
lm.exp=lm(log(y)~x)#指数函数回归模型
summary(lm.exp)$coef#指数函数回归模型系数
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.75966 0.075101 23.43 4.543e-10
## x -0.04881 0.004697 -10.39 1.116e-06
summary(lm.exp)$r.sq#指数函数回归模型决定系数
## [1] 0.9153
plot(x,y)#做散点图
lines(x,exp(fitted(lm.exp)))#添加指数函数回归线
lm.pow=lm(log(y)~log(x))#幂函数回归模型
summary(lm.pow)$coef#幂函数回归模型系数
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.1907 0.02951 74.23 4.806e-15
## log(x) -0.4724 0.01184 -39.90 2.337e-12
summary(lm.pow)$r.sq#幂函数回归模型决定系数
## [1] 0.9938
plot(x,y)#做散点图
lines(x,exp(fitted(lm.pow)))#添加幂函数回归线
(S1=nls(y~a+b*x,start=list(a=5,b=-0.1)))#非线性回归函数的线性回归模型
## Nonlinear regression model
## model: y ~ a + b * x
## data: parent.frame()
## a b
## 5.60 -0.17
## residual sum-of-squares: 5.93
##
## Number of iterations to convergence: 1
## Achieved convergence tolerance: 3.32e-09
plot(x,y)
lines(x,fitted(S1))
(S2=nls(y~a+b*log(x),start=list(a=5,b=-0.1)))#对数回归模型
## Nonlinear regression model
## model: y ~ a + b * log(x)
## data: parent.frame()
## a b
## 7.36 -1.76
## residual sum-of-squares: 0.426
##
## Number of iterations to convergence: 1
## Achieved convergence tolerance: 1.47e-07
plot(x,y)
lines(x,fitted(S2))#添加对数回归线
(S3=nls(y~a*exp(b*x),start=list(a=5,b=-0.1)))#指数回归模型
## Nonlinear regression model
## model: y ~ a * exp(b * x)
## data: parent.frame()
## a b
## 6.6312 -0.0613
## residual sum-of-squares: 2.49
##
## Number of iterations to convergence: 8
## Achieved convergence tolerance: 1.91e-06
plot(x,y)
lines(x,fitted(S3))#添加指数回归线
(S4=nls(y~a*(x^b),start=list(a=5,b=-0.1)))#幂函数回归模型
## Nonlinear regression model
## model: y ~ a * (x^b)
## data: parent.frame()
## a b
## 8.609 -0.452
## residual sum-of-squares: 0.164
##
## Number of iterations to convergence: 5
## Achieved convergence tolerance: 6.07e-07
plot(x,y)
lines(x,fitted(S4))#添加幂函数回归线
d4.8=read.table("d4.8.txt",header=T)#选取例4.8数据
model=nls(Y~A0*(exp(m*t))*(L^a)*(K^b),data=d4.8,start=list(A0=0.45,m=0,a=0.5,b=0.5))#生产函数回归模型
model#回归结果
## Nonlinear regression model
## model: Y ~ A0 * (exp(m * t)) * (L^a) * (K^b)
## data: d4.8
## A0 m a b
## 0.7199 0.0437 0.4080 0.7119
## residual sum-of-squares: 8.92
##
## Number of iterations to convergence: 22
## Achieved convergence tolerance: 9.37e-07
summary(model)#模型检验
##
## Formula: Y ~ A0 * (exp(m * t)) * (L^a) * (K^b)
##
## Parameters:
## Estimate Std. Error t value Pr(>|t|)
## A0 0.7199 0.3461 2.08 0.0711 .
## m 0.0437 0.0111 3.92 0.0044 **
## a 0.4080 0.1720 2.37 0.0451 *
## b 0.7119 0.0428 16.65 1.7e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.06 on 8 degrees of freedom
##
## Number of iterations to convergence: 22
## Achieved convergence tolerance: 9.37e-07
0.71987*exp(0.04369*13)*75^0.40798*135^0.71187#按公式预测结果
## [1] 242.9
predict(model,data.frame(t=13,L=75,K=135))#按模型预测结果
## [1] 242.9
b=lxy(x,y)/lxy(x,x)#线性回归方程斜率 a=mean(y)-bmean(x)#线性回归方程截距 c(a=a,b=b)#显示线性回归方程估计值 plot(x,y)#做散点图 lines(x,a+bx)#添加估计方程线 SST=lxy(y,y)#因变量的离均差平方和 SSR=b*lxy(x,y)#回归平方和 SSE=SST-SSR#误差平方和 MSR=SSR/1#回归均方 MSE=SSE/(n-2)#误差均方 F= MSR/MSE#F统计量 c(SST=SST,SSR=SSR,SSE=SSE,MSR=MSR,MSE=MSE,F=F)#显示结果 sy.x=sqrt(MSE)#估计标准差 sb=sy.x/sqrt(lxy(x,x))#离均差平方和 t=b/sb#t统计量 ta=qt(1-0.05/2,n-2)#t分位数 c(sy.x=sy.x,sb=sb,t=t,ta=ta)#显示结果
d5.1=read.table("d5.1.txt",header=T)#读取例5.1数据
logit.glm<-glm(y~x1+x2+x3,family=binomial,data=d5.1)#Logistic回归模型
summary(logit.glm)#Logistic回归模型结果
##
## Call:
## glm(formula = y ~ x1 + x2 + x3, family = binomial, data = d5.1)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.564 -0.913 -0.789 0.964 1.600
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.5976 0.8948 0.67 0.504
## x1 -1.4961 0.7049 -2.12 0.034 *
## x2 -0.0016 0.0168 -0.10 0.924
## x3 0.3159 0.7011 0.45 0.652
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 62.183 on 44 degrees of freedom
## Residual deviance: 57.026 on 41 degrees of freedom
## AIC: 65.03
##
## Number of Fisher Scoring iterations: 4
logit.step<-step(logit.glm,direction="both")#逐步筛选法变量选择
## Start: AIC=65.03
## y ~ x1 + x2 + x3
##
## Df Deviance AIC
## - x2 1 57.0 63.0
## - x3 1 57.2 63.2
## <none> 57.0 65.0
## - x1 1 61.9 67.9
##
## Step: AIC=63.03
## y ~ x1 + x3
##
## Df Deviance AIC
## - x3 1 57.2 61.2
## <none> 57.0 63.0
## + x2 1 57.0 65.0
## - x1 1 62.0 66.0
##
## Step: AIC=61.24
## y ~ x1
##
## Df Deviance AIC
## <none> 57.2 61.2
## + x3 1 57.0 63.0
## + x2 1 57.2 63.2
## - x1 1 62.2 64.2
summary(logit.step)#逐步筛选法变量选择结果
##
## Call:
## glm(formula = y ~ x1, family = binomial, data = d5.1)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.449 -0.878 -0.878 0.928 1.510
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.619 0.469 1.32 0.187
## x1 -1.373 0.635 -2.16 0.031 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 62.183 on 44 degrees of freedom
## Residual deviance: 57.241 on 43 degrees of freedom
## AIC: 61.24
##
## Number of Fisher Scoring iterations: 4
pre1<-predict(logit.step,data.frame(x1=1))#预测视力正常司机Logistic回归结果
p1<-exp(pre1)/(1+exp(pre1))#预测视力正常司机发生事故概率
pre2<-predict(logit.step,data.frame(x1=0))#预测视力有问题的司机Logistic回归结果
p2<-exp(pre2)/(1+exp(pre2))#预测视力有问题的司机发生事故概率
c(p1,p2)#结果显示
## 1 1
## 0.32 0.65
d5.2=read.table("d5.2.txt",header=T)#读取例5.2数据
log.glm<-glm(y~x1+x2,family=poisson(link=log),data=d5.2)#多元对数线性模型
summary(log.glm)#多元对数线性模型结果
##
## Call:
## glm(formula = y ~ x1 + x2, family = poisson(link = log), data = d5.2)
##
## Deviance Residuals:
## 1 2 3 4 5 6
## -10.78 14.44 -8.47 -2.62 4.96 -3.14
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 6.1569 0.1420 43.37 <2e-16 ***
## x1 0.1291 0.0437 2.96 0.0031 **
## x2 -1.1257 0.0826 -13.62 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for poisson family taken to be 1)
##
## Null deviance: 662.84 on 5 degrees of freedom
## Residual deviance: 437.97 on 3 degrees of freedom
## AIC: 482
##
## Number of Fisher Scoring iterations: 5
d5.3=read.table("d5.3.txt",header=T)#读取例5.3数据
anova(lm(Y~factor(A),data=d5.3))#完全随机设计模型方差分析
## Analysis of Variance Table
##
## Response: Y
## Df Sum Sq Mean Sq F value Pr(>F)
## factor(A) 2 0.1222 0.0611 40.5 8.9e-07 ***
## Residuals 15 0.0226 0.0015
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
d5.4=read.table("d5.4.txt",header=T)#读取例5.4数据
anova(lm(Y~factor(A)+factor(B),data=d5.4))#随机单位组设计模型方差分析
## Analysis of Variance Table
##
## Response: Y
## Df Sum Sq Mean Sq F value Pr(>F)
## factor(A) 3 15759 5253 0.43 0.74
## factor(B) 2 22385 11192 0.92 0.45
## Residuals 6 73198 12200
d5.5=read.table("d5.5.txt",header=T)#读取例5.5数据
anova(lm(Y~A+B+A:B,data=d5.5))#析因设计模型方差分析
## Analysis of Variance Table
##
## Response: Y
## Df Sum Sq Mean Sq F value Pr(>F)
## A 1 1600 1600 28.4 0.00018 ***
## B 1 2500 2500 44.4 2.3e-05 ***
## A:B 1 729 729 12.9 0.00366 **
## Residuals 12 676 56
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
d5.6=read.table("d5.6.txt",header=T)#读取例5.6数据
anova(lm(Y~A+B+A*B+C+D,data=d5.6))#正交实验设计模型方差分析
## Analysis of Variance Table
##
## Response: Y
## Df Sum Sq Mean Sq F value Pr(>F)
## A 1 8.0 8.0 3.2 0.216
## B 1 18.0 18.0 7.2 0.115
## C 1 60.5 60.5 24.2 0.039 *
## D 1 4.5 4.5 1.8 0.312
## A:B 1 50.0 50.0 20.0 0.047 *
## Residuals 2 5.0 2.5
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1