读入数据
w <- as.vector(unlist(read.table("exec0301.data")))
w
## [1] 4.3 79.5 75.0 73.5 75.8 70.4 73.5 67.2 75.8 73.5 78.8 75.6 73.5 75.0
## [15] 75.8 72.0 79.5 76.5 73.5 79.5 68.8 75.0 78.8 72.0 68.8 76.5 73.5 72.7
## [29] 75.0 70.4 78.0 78.8 74.3 64.3 76.5 74.3 74.7 70.4 73.5 76.5 70.4 72.0
## [43] 75.8 75.8 70.4 76.5 65.0 77.2 73.5 72.7 80.5 72.0 65.0 80.3 71.2 77.6
## [57] 76.5 68.8 73.5 77.2 80.5 72.0 74.3 69.7 81.2 67.3 81.6 67.3 72.7 84.3
## [71] 69.7 74.3 71.2 74.3 75.0 72.0 75.4 67.3 81.6 75.0 71.2 71.2 69.7 73.5
## [85] 70.4 75.0 72.7 67.3 70.3 76.5 73.5 72.0 68.0 73.5 68.0 74.3 72.7 72.7
## [99] 74.3 70.4
计算均值(mean)、方差标准差(std_dev)、极差®、标准误(sm)、变异系数(cv)、偏度(Skewness)、峰度(Kurtosis)。
source("../R-Book-Demo/ch3/data_outline.R", echo = TRUE, max.deparse.length = 3000)
##
## > data_outline <- function(x) {
## + n <- length(x)
## + m <- mean(x)
## + v <- var(x)
## + s <- sd(x)
## + me <- median(x)
## + cv <- 100 * s/m
## + css <- sum((x - m)^2)
## + uss <- sum(x^2)
## + R <- max(x) - min(x)
## + R1 <- quantile(x, 3/4) - quantile(x, 1/4)
## + sm <- s/sqrt(n)
## + g1 <- n/((n - 1) * (n - 2)) * sum((x - m)^3)/s^3
## + g2 <- ((n * (n + 1))/((n - 1) * (n - 2) * (n - 3)) * sum((x -
## + m)^4)/s^4 - (3 * (n - 1)^2)/((n - 2) * (n - 3)))
## + data.frame(N = n, Mean = m, Var = v, std_dev = s, Median = me,
## + std_mean = sm, CV = cv, CSS = css, USS = uss, R = R,
## + R1 = R1, Skewness = g1, Kurtosis = g2, row.names = 1)
## + }
data_outline(w)
## N Mean Var std_dev Median std_mean CV CSS USS R R1
## 1 100 72.97 63.62 7.976 73.5 0.7976 10.93 6299 538731 80 4.8
## Skewness Kurtosis
## 1 -6.504 56.06
输入数据
dm <- data.frame(J1 = c(2, 4, 3, 2, 4, 7, 7, 2, 2, 5, 4, NA), J2 = c(5, 6, 8,
5, 10, 7, 12, 12, 6, 6, NA, NA), J3 = c(7, 11, 6, 6, 7, 9, 5, 5, 10, 6,
3, 10))
t(dm)
## [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10] [,11] [,12]
## J1 2 4 3 2 4 7 7 2 2 5 4 NA
## J2 5 6 8 5 10 7 12 12 6 6 NA NA
## J3 7 11 6 6 7 9 5 5 10 6 3 10
使用boxplot画箱线图
boxplot(dm$J1, dm$J2, dm$J3, names = c("J1", "J2", "J3"))
使用plot画箱线图
plot(factor(c(rep(1, length(dm$J1)), rep(2, length(dm$J2)), rep(3, length(dm$J3)))),
unlist(dm))
结论,J1菌型平均存活天数明显短于J2,J3菌型。
读入数据
# 从exam0203.txt直接读取数据,有表头
dt <- read.table("../R-Book-Demo/ch2/exam0203.txt", head = TRUE)
dt
## Name Sex Age Height Weight
## 1 Alice F 13 56.5 84.0
## 2 Becka F 13 65.3 98.0
## 3 Gail F 14 64.3 90.0
## 4 Karen F 12 56.3 77.0
## 5 Kathy F 12 59.8 84.5
## 6 Mary F 15 66.5 112.0
## 7 Sandy F 11 51.3 50.5
## 8 Sharon F 15 62.5 112.5
## 9 Tammy F 14 62.8 102.5
## 10 Alfred M 14 69.0 112.5
## 11 Duke M 14 63.5 102.5
## 12 Guido M 15 67.0 133.0
## 13 James M 12 57.3 83.0
## 14 Jeffrey M 13 62.5 84.0
## 15 John M 12 59.0 99.5
## 16 Philip M 16 72.0 150.0
## 17 Robert M 12 64.8 128.0
## 18 Thomas M 11 57.5 85.0
## 19 William M 15 66.5 112.0
体重对于身高的散点图
plot(dt$Weight, dt$Height, xlab = "Weight", ylab = "Height")
不同性别情况下,体重对于身高的散点图
coplot(dt$Weight ~ dt$Height | dt$Sex, xlab = "Weight", ylab = "Height")
不同年龄段下,体重对于身高的散点图
coplot(dt$Weight ~ dt$Height | dt$Age, xlab = "Weight", ylab = "Height")
不同性别,不同年龄段下,体重对于身高的散点图
coplot(dt$Weight ~ dt$Height | dt$Age + dt$Sex, xlab = "Weight", ylab = "Height")
定义函数
f <- function(x, y) x^4 - 2 * (x^2) * y + x^2 - 2 * x * y + 2 * (y^2) + 9/2 *
x - 4 * y + 4
定义x,y
x <- seq(-2, 3, 0.05)
y <- seq(-1, 7, 0.05)
计算函数值
z <- outer(x, y, f)
函数的二维等值线图
contour(x, y, z, levels = c(0, 1, 2, 3, 4, 5, 10, 15, 20, 30, 40, 50, 60, 80,
100))
函数的三维网格曲面图
persp(x, y, z, theta = -30, phi = 25, col = "yellow")
读入求职者数据
pushd = getwd()
setwd("../R-Book-Demo/ch3")
source("group.R", echo = TRUE)
##
## > rt <- read.table("applicant.data")
##
## > attach(rt)
##
## > rt$G1 <- (SC + LC + SMS + DRV + AMB + GSP + POT)/7
##
## > rt$G2 <- (FL + EXP + SUIT)/3
##
## > rt$G3 <- (LA + HON + KJ)/3
##
## > rt$G4 <- AA
##
## > rt$G5 <- APP
##
## > AVG <- apply(rt[, 16:20], 1, mean)
##
## > sort(AVG, decreasing = TRUE)
## 8 40 39 7 23 9 2 22 24 16 46 5
## 9.000 8.971 8.914 8.619 8.390 8.210 8.067 8.057 8.038 7.571 7.533 7.314
## 10 20 13 44 17 14 12 6 27 45 3 21
## 7.305 7.219 7.210 7.095 7.076 7.048 7.029 7.019 6.914 6.905 6.733 6.695
## 15 11 4 26 18 25 38 19 37 1 36 41
## 6.619 6.495 6.390 6.352 6.219 6.200 6.152 5.952 5.924 5.886 5.686 5.362
## 43 31 30 32 47 33 35 42 48 34 28 29
## 5.314 5.048 5.010 4.648 4.524 4.438 4.257 4.257 4.190 4.048 3.267 2.514
setwd(pushd)
以15项自变量画星图
stars(rt[1:15], draw.segments = TRUE)
以G1,G2,G3,G4,G5为轴画星图
stars(rt[16:20], draw.segments = TRUE)
由图可看到明显7,8,9,2,39,40较佳,但是还是没有按照sort结果精确。
定义调和曲线函数
pushd = getwd()
setwd("../R-Book-Demo/ch3")
source("unison.R", echo = TRUE, max.deparse.length = 3000)
##
## > unison <- function(x) {
## + if (is.data.frame(x) == TRUE)
## + x <- as.matrix(x)
## + t <- seq(-pi, pi, pi/30)
## + m <- nrow(x)
## + n <- ncol(x)
## + f <- array(0, c(m, length(t)))
## + for (i in 1:m) {
## + f[i, ] <- x[i, 1]/sqrt(2)
## + for (j in 2:n) {
## + if (j%%2 == 0)
## + f[i, ] <- f[i, ] + x[i, j] * sin(j/2 * t)
## + else f[i, ] <- f[i, ] + x[i, j] * cos(j%/%2 * t)
## + }
## + }
## + plot(c(-pi, pi), c(min(f), max(f)), type = "n", main = "The Unison graph of Data",
## + xlab = "t", ylab = "f(t)")
## + for (i in 1:m) lines(t, f[i, ], col = i)
## + }
setwd(pushd)
以G1,G2,G3,G4,G5为自变量画调和曲线图
unison(rt[16:20])