讀入資料
library(MASS)
dta <- cats
使用原本lm()裡的dummy coding,截距是參照組(此例為女性)的平均數。
coef(lm(Bwt ~ Sex, data = cats))
(Intercept) SexM
2.3595745 0.5404255
換成effect coding。讓contrasts的和為零,截距則是總體的平均數。
coef(lm(Bwt ~ Sex, data = cats, contrasts = list(Sex = "contr.sum")))
(Intercept) Sex1
2.6297872 -0.2702128
讀入資料
dta3 <- read.table("D:/201802/data_management/w3/readingtimes.txt", h = T)
使用rank,將各行句子的閱讀速度由低至高排序。
subject <- dta3[,5:14]
for (i in 1:7) {
print(rank(subject[i,]))
}
S01 S02 S03 S04 S05 S06 S07 S08 S09 S10
6 2 9 3 7 4 5 10 1 8
S01 S02 S03 S04 S05 S06 S07 S08 S09 S10
7 5 3 4 10 6 9 2 1 8
S01 S02 S03 S04 S05 S06 S07 S08 S09 S10
2 4 7 6 10 5 9 8 1 3
S01 S02 S03 S04 S05 S06 S07 S08 S09 S10
5 6 3 2 8 9 7 10 1 4
S01 S02 S03 S04 S05 S06 S07 S08 S09 S10
9 3 4 1 8 6 5 10 2 7
S01 S02 S03 S04 S05 S06 S07 S08 S09 S10
3 7 2 4 8 10 9 1 6 5
S01 S02 S03 S04 S05 S06 S07 S08 S09 S10
6 3 5 7 8 9 4 2 1 10
#mean speed of each sentence
dta3$mean <- rowMeans(subject)
#mean speed of each word
dta3$estimate <- dta3$mean / dta3$Wrds
#whole mean per word
mean(dta3$estimate)
[1] 0.3783395
找出t-test的function寫法
getAnywhere(t.test.default)
A single object matching 't.test.default' was found
It was found in the following places
registered S3 method for t.test from namespace stats
namespace:stats
with value
function (x, y = NULL, alternative = c("two.sided", "less", "greater"),
mu = 0, paired = FALSE, var.equal = FALSE, conf.level = 0.95,
...)
{
alternative <- match.arg(alternative)
if (!missing(mu) && (length(mu) != 1 || is.na(mu)))
stop("'mu' must be a single number")
if (!missing(conf.level) && (length(conf.level) != 1 || !is.finite(conf.level) ||
conf.level < 0 || conf.level > 1))
stop("'conf.level' must be a single number between 0 and 1")
if (!is.null(y)) {
dname <- paste(deparse(substitute(x)), "and", deparse(substitute(y)))
if (paired)
xok <- yok <- complete.cases(x, y)
else {
yok <- !is.na(y)
xok <- !is.na(x)
}
y <- y[yok]
}
else {
dname <- deparse(substitute(x))
if (paired)
stop("'y' is missing for paired test")
xok <- !is.na(x)
yok <- NULL
}
x <- x[xok]
if (paired) {
x <- x - y
y <- NULL
}
nx <- length(x)
mx <- mean(x)
vx <- var(x)
if (is.null(y)) {
if (nx < 2)
stop("not enough 'x' observations")
df <- nx - 1
stderr <- sqrt(vx/nx)
if (stderr < 10 * .Machine$double.eps * abs(mx))
stop("data are essentially constant")
tstat <- (mx - mu)/stderr
method <- if (paired)
"Paired t-test"
else "One Sample t-test"
estimate <- setNames(mx, if (paired)
"mean of the differences"
else "mean of x")
}
else {
ny <- length(y)
if (nx < 1 || (!var.equal && nx < 2))
stop("not enough 'x' observations")
if (ny < 1 || (!var.equal && ny < 2))
stop("not enough 'y' observations")
if (var.equal && nx + ny < 3)
stop("not enough observations")
my <- mean(y)
vy <- var(y)
method <- paste(if (!var.equal)
"Welch", "Two Sample t-test")
estimate <- c(mx, my)
names(estimate) <- c("mean of x", "mean of y")
if (var.equal) {
df <- nx + ny - 2
v <- 0
if (nx > 1)
v <- v + (nx - 1) * vx
if (ny > 1)
v <- v + (ny - 1) * vy
v <- v/df
stderr <- sqrt(v * (1/nx + 1/ny))
}
else {
stderrx <- sqrt(vx/nx)
stderry <- sqrt(vy/ny)
stderr <- sqrt(stderrx^2 + stderry^2)
df <- stderr^4/(stderrx^4/(nx - 1) + stderry^4/(ny -
1))
}
if (stderr < 10 * .Machine$double.eps * max(abs(mx),
abs(my)))
stop("data are essentially constant")
tstat <- (mx - my - mu)/stderr
}
if (alternative == "less") {
pval <- pt(tstat, df)
cint <- c(-Inf, tstat + qt(conf.level, df))
}
else if (alternative == "greater") {
pval <- pt(tstat, df, lower.tail = FALSE)
cint <- c(tstat - qt(conf.level, df), Inf)
}
else {
pval <- 2 * pt(-abs(tstat), df)
alpha <- 1 - conf.level
cint <- qt(1 - alpha/2, df)
cint <- tstat + c(-cint, cint)
}
cint <- mu + cint * stderr
names(tstat) <- "t"
names(df) <- "df"
names(mu) <- if (paired || !is.null(y))
"difference in means"
else "mean"
attr(cint, "conf.level") <- conf.level
rval <- list(statistic = tstat, parameter = df, p.value = pval,
conf.int = cint, estimate = estimate, null.value = mu,
alternative = alternative, method = method, data.name = dname)
class(rval) <- "htest"
return(rval)
}
<bytecode: 0x0000000027255b48>
<environment: namespace:stats>
(1)combn(,3):在給予的c()串裡任意挑選3個數字。
(2)apply():依列排序。
(3)unique():依每列為一組,列出不重複者。
combo <- unique(apply(combn(c(1:6, 1:6, 1:6), 3), 2, sort), MARGIN = 2)
計算各組的總和。
# all possible sum without duplicate
combo.sum <- unique(colSums(combo))
# rank the sum of dice from low to high
sort(combo.sum)
[1] 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18