dat <- c(74.3,78.8,68.8,78.0,70.4,80.5,80.5,69.7,71.2,73.5,
79.5,75.6,75.0,78.8,72.0,72.0,72.0,74.3,71.2,72.0,
75.0,73.5,78.8,74.3,75.8,65.0,74.3,71.2,69.7,68.0,
73.5,75.0,72.0,64.3,75.8,80.3,69.7,74.3,73.5,73.5,
75.8,75.8,68.8,76.5,70.4,71.2,81.2,75.0,70.4,68.0,
70.4,72.0,76.5,74.3,76.5,77.6,67.3,72.0,75.0,74.3,
73.5,79.5,73.5,74.7,65.0,76.5,81.6,75.4,72.7,72.7,
67.2,76.5,72.7,70.4,77.2,68.8,67.3,67.3,67.3,72.7,
75.8,73.5,75.0,73.5,73.5,73.5,72.7,81.6,70.3,74.3,
73.5,79.5,70.4,76.5,72.7,77.2,84.3,75.0,76.5,70.4)
计算均值、方差、标准差、极差、标准误、变异系数、偏度、峰度:
#均值=
m=mean(dat)
m
## [1] 73.67
#方差=
v=var(dat)
v
## [1] 15.52
#标准差=
s=sd(dat)
s
## [1] 3.939
#极差=
max(dat)-min(dat)
## [1] 20
#标准误=
n=length(dat)
s/sqrt(n)
## [1] 0.3939
#变异系数=
100*s/m
## [1] 5.347
#偏度=
n/((n-1)*(n-2))*sum((dat-m)^3)/s^3
## [1] 0.05406
#峰度=
((n*(n+1))/((n-1)*(n-2)*(n-3))*sum((dat-m)^4)/s^4-(3*(n-1)^2)/((n-2)*(n-3)))
## [1] 0.03702
#直方图
hist(dat,main="Histogram",freq=F)
lines(density(dat),col="blue")
#经验分布图
xqdb <- ecdf(dat)
plot(xqdb,verticals=T,do.points=F)
#x=seq(min(xqdb),max(xqdb),0.1)
lines(60:90 ,pnorm(60:90,m,s))
#QQ图
qqnorm(dat)
qqline(dat)
#茎叶图
stem(dat)
##
## The decimal point is at the |
##
## 64 | 300
## 66 | 23333
## 68 | 00888777
## 70 | 344444442222
## 72 | 0000000777777555555555555
## 74 | 33333333700000004688888
## 76 | 5555555226
## 78 | 0888555
## 80 | 355266
## 82 |
## 84 | 3
#箱线图
boxplot(dat)
#五数总括
summary(dat)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 64.3 71.2 73.5 73.7 75.8 84.3
#W检验方法
shapiro.test(dat)
##
## Shapiro-Wilk normality test
##
## data: dat
## W = 0.9901, p-value = 0.6708
#Kolmogorov-Smirmov
ks.test(dat,"pnorm",m,s)
## Warning: ties should not be present for the Kolmogorov-Smirnov test
##
## One-sample Kolmogorov-Smirnov test
##
## data: dat
## D = 0.073, p-value = 0.6611
## alternative hypothesis: two-sided
dat <- data.frame(id=c(1:19),
name=c("Alice","Becka","Gail","Karen","Kathy","Mary","Sandy","Sharon","Tammy","Alfred","Duke","Guido","James","Jeffrey","John","Philip","Robert","Thomas","William"),
sex=rep(c("F","M"),c(9,10)),
age=c(13,13,14,12,12,15,11,15,14,14,14,15,12,13,12,16,12,11,15),
height=c(56.5,65.3,64.3,56.3,59.8,66.5,51.3,62.5,62.8,69.0,63.5,67.0,57.3,62.5,59.0,72.0,64.8,57.5,66.5),
weight=c(84.0,98.0,90.0,77.0,84.5,112.0,50.5,112.5,102.5,112.5,102.5,133.0,83.0,84.0,99.5,150.0,128.0,85.0,112.0))
cor.test(dat$weight,dat$height,method="pearson")
##
## Pearson's product-moment correlation
##
## data: dat$weight and dat$height
## t = 7.555, df = 17, p-value = 7.887e-07
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.7044 0.9523
## sample estimates:
## cor
## 0.8778
x <- c(5.1,3.5,7.1,6.2,8.8,7.8,4.5,5.6,8.0,6.4)
y <- c(1907,1287,2700,2373,3260,3000,1947,2273,3113,2493)
plot(x,y)
lm.sol<-lm(y~1+x)
summary(lm.sol)
##
## Call:
## lm(formula = y ~ 1 + x)
##
## Residuals:
## Min 1Q Median 3Q Max
## -128.59 -70.98 -3.73 49.26 167.23
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 141.0 125.1 1.13 0.29
## x 364.2 19.3 18.91 6.3e-08 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 96.4 on 8 degrees of freedom
## Multiple R-squared: 0.978, Adjusted R-squared: 0.975
## F-statistic: 358 on 1 and 8 DF, p-value: 6.33e-08
#####回归方程为 Y=140.95+364.18X
new<- data.frame(x=7)
lm.pred<-predict(lm.sol,new,interval="prediction")
lm.pred
## fit lwr upr
## 1 2690 2455 2925
####Y(7)= 2690, [2455,2925]