R语言第三周作业

3.1

dat <- c(74.3,78.8,68.8,78.0,70.4,80.5,80.5,69.7,71.2,73.5,
         79.5,75.6,75.0,78.8,72.0,72.0,72.0,74.3,71.2,72.0,
         75.0,73.5,78.8,74.3,75.8,65.0,74.3,71.2,69.7,68.0,
         73.5,75.0,72.0,64.3,75.8,80.3,69.7,74.3,73.5,73.5,
         75.8,75.8,68.8,76.5,70.4,71.2,81.2,75.0,70.4,68.0,
         70.4,72.0,76.5,74.3,76.5,77.6,67.3,72.0,75.0,74.3,
         73.5,79.5,73.5,74.7,65.0,76.5,81.6,75.4,72.7,72.7,
         67.2,76.5,72.7,70.4,77.2,68.8,67.3,67.3,67.3,72.7,
         75.8,73.5,75.0,73.5,73.5,73.5,72.7,81.6,70.3,74.3,
         73.5,79.5,70.4,76.5,72.7,77.2,84.3,75.0,76.5,70.4)

计算均值、方差、标准差、极差、标准误、变异系数、偏度、峰度:

#均值=
m=mean(dat)
m
## [1] 73.67
#方差=
v=var(dat)
v
## [1] 15.52
#标准差=
s=sd(dat)
s
## [1] 3.939
#极差=
max(dat)-min(dat)
## [1] 20
#标准误=
n=length(dat)
s/sqrt(n)
## [1] 0.3939
#变异系数=
100*s/m
## [1] 5.347
#偏度=
 n/((n-1)*(n-2))*sum((dat-m)^3)/s^3
## [1] 0.05406
#峰度=
 ((n*(n+1))/((n-1)*(n-2)*(n-3))*sum((dat-m)^4)/s^4-(3*(n-1)^2)/((n-2)*(n-3)))
## [1] 0.03702

3.2

#直方图
hist(dat,main="Histogram",freq=F)
lines(density(dat),col="blue")

plot of chunk unnamed-chunk-3

#经验分布图
xqdb <- ecdf(dat)
plot(xqdb,verticals=T,do.points=F)
#x=seq(min(xqdb),max(xqdb),0.1)
lines(60:90 ,pnorm(60:90,m,s))

plot of chunk unnamed-chunk-3

#QQ图
qqnorm(dat)
qqline(dat)

plot of chunk unnamed-chunk-3

3.3

#茎叶图
stem(dat)
## 
##   The decimal point is at the |
## 
##   64 | 300
##   66 | 23333
##   68 | 00888777
##   70 | 344444442222
##   72 | 0000000777777555555555555
##   74 | 33333333700000004688888
##   76 | 5555555226
##   78 | 0888555
##   80 | 355266
##   82 | 
##   84 | 3
#箱线图
boxplot(dat)

plot of chunk unnamed-chunk-4

#五数总括
summary(dat)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    64.3    71.2    73.5    73.7    75.8    84.3

3.4

#W检验方法
shapiro.test(dat)
## 
##  Shapiro-Wilk normality test
## 
## data:  dat
## W = 0.9901, p-value = 0.6708
#Kolmogorov-Smirmov
ks.test(dat,"pnorm",m,s)
## Warning: ties should not be present for the Kolmogorov-Smirnov test
## 
##  One-sample Kolmogorov-Smirnov test
## 
## data:  dat
## D = 0.073, p-value = 0.6611
## alternative hypothesis: two-sided

3.9

dat <- data.frame(id=c(1:19),
                  name=c("Alice","Becka","Gail","Karen","Kathy","Mary","Sandy","Sharon","Tammy","Alfred","Duke","Guido","James","Jeffrey","John","Philip","Robert","Thomas","William"),
                  sex=rep(c("F","M"),c(9,10)),
                  age=c(13,13,14,12,12,15,11,15,14,14,14,15,12,13,12,16,12,11,15),
                  height=c(56.5,65.3,64.3,56.3,59.8,66.5,51.3,62.5,62.8,69.0,63.5,67.0,57.3,62.5,59.0,72.0,64.8,57.5,66.5),
                  weight=c(84.0,98.0,90.0,77.0,84.5,112.0,50.5,112.5,102.5,112.5,102.5,133.0,83.0,84.0,99.5,150.0,128.0,85.0,112.0))

cor.test(dat$weight,dat$height,method="pearson")
## 
##  Pearson's product-moment correlation
## 
## data:  dat$weight and dat$height
## t = 7.555, df = 17, p-value = 7.887e-07
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.7044 0.9523
## sample estimates:
##    cor 
## 0.8778

6.1

x <- c(5.1,3.5,7.1,6.2,8.8,7.8,4.5,5.6,8.0,6.4)
y <- c(1907,1287,2700,2373,3260,3000,1947,2273,3113,2493)
plot(x,y)

plot of chunk unnamed-chunk-7

lm.sol<-lm(y~1+x) 
summary(lm.sol)
## 
## Call:
## lm(formula = y ~ 1 + x)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -128.59  -70.98   -3.73   49.26  167.23 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)    141.0      125.1    1.13     0.29    
## x              364.2       19.3   18.91  6.3e-08 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 96.4 on 8 degrees of freedom
## Multiple R-squared:  0.978,  Adjusted R-squared:  0.975 
## F-statistic:  358 on 1 and 8 DF,  p-value: 6.33e-08
#####回归方程为 Y=140.95+364.18X 

new<- data.frame(x=7)  
lm.pred<-predict(lm.sol,new,interval="prediction") 
lm.pred
##    fit  lwr  upr
## 1 2690 2455 2925
####Y(7)= 2690, [2455,2925]