Q2 Explain using contrast

讀入資料

library(MASS)
dta <- cats

使用原本lm()裡的dummy coding,截距是參照組(此例為女性)的平均數。

coef(lm(Bwt ~ Sex, data = cats)) 
(Intercept)        SexM 
  2.3595745   0.5404255 

換成effect coding。讓contrasts的和為零,截距則是總體的平均數。

coef(lm(Bwt ~ Sex, data = cats, contrasts = list(Sex = "contr.sum")))
(Intercept)        Sex1 
  2.6297872  -0.2702128 

Q3 Reading speed question

  1. 依照各受試者閱讀速度排序。

讀入資料

dta3 <- read.table("D:/201802/data_management/w3/readingtimes.txt", h = T)

使用rank,將各行句子的閱讀速度由低至高排序。

subject <- dta3[,5:14]
for (i in 1:7) {
  print(rank(subject[i,]))
}
S01 S02 S03 S04 S05 S06 S07 S08 S09 S10 
  6   2   9   3   7   4   5  10   1   8 
S01 S02 S03 S04 S05 S06 S07 S08 S09 S10 
  7   5   3   4  10   6   9   2   1   8 
S01 S02 S03 S04 S05 S06 S07 S08 S09 S10 
  2   4   7   6  10   5   9   8   1   3 
S01 S02 S03 S04 S05 S06 S07 S08 S09 S10 
  5   6   3   2   8   9   7  10   1   4 
S01 S02 S03 S04 S05 S06 S07 S08 S09 S10 
  9   3   4   1   8   6   5  10   2   7 
S01 S02 S03 S04 S05 S06 S07 S08 S09 S10 
  3   7   2   4   8  10   9   1   6   5 
S01 S02 S03 S04 S05 S06 S07 S08 S09 S10 
  6   3   5   7   8   9   4   2   1  10 
  1. 計算平均閱讀一個字所需的時間。
#mean speed of each sentence
dta3$mean <- rowMeans(subject)

#mean speed of each word
dta3$estimate <- dta3$mean / dta3$Wrds

#whole mean per word
mean(dta3$estimate)
[1] 0.3783395

Q4 Extract source code

找出t-test的function寫法

getAnywhere(t.test.default)
A single object matching 't.test.default' was found
It was found in the following places
  registered S3 method for t.test from namespace stats
  namespace:stats
with value

function (x, y = NULL, alternative = c("two.sided", "less", "greater"), 
    mu = 0, paired = FALSE, var.equal = FALSE, conf.level = 0.95, 
    ...) 
{
    alternative <- match.arg(alternative)
    if (!missing(mu) && (length(mu) != 1 || is.na(mu))) 
        stop("'mu' must be a single number")
    if (!missing(conf.level) && (length(conf.level) != 1 || !is.finite(conf.level) || 
        conf.level < 0 || conf.level > 1)) 
        stop("'conf.level' must be a single number between 0 and 1")
    if (!is.null(y)) {
        dname <- paste(deparse(substitute(x)), "and", deparse(substitute(y)))
        if (paired) 
            xok <- yok <- complete.cases(x, y)
        else {
            yok <- !is.na(y)
            xok <- !is.na(x)
        }
        y <- y[yok]
    }
    else {
        dname <- deparse(substitute(x))
        if (paired) 
            stop("'y' is missing for paired test")
        xok <- !is.na(x)
        yok <- NULL
    }
    x <- x[xok]
    if (paired) {
        x <- x - y
        y <- NULL
    }
    nx <- length(x)
    mx <- mean(x)
    vx <- var(x)
    if (is.null(y)) {
        if (nx < 2) 
            stop("not enough 'x' observations")
        df <- nx - 1
        stderr <- sqrt(vx/nx)
        if (stderr < 10 * .Machine$double.eps * abs(mx)) 
            stop("data are essentially constant")
        tstat <- (mx - mu)/stderr
        method <- if (paired) 
            "Paired t-test"
        else "One Sample t-test"
        estimate <- setNames(mx, if (paired) 
            "mean of the differences"
        else "mean of x")
    }
    else {
        ny <- length(y)
        if (nx < 1 || (!var.equal && nx < 2)) 
            stop("not enough 'x' observations")
        if (ny < 1 || (!var.equal && ny < 2)) 
            stop("not enough 'y' observations")
        if (var.equal && nx + ny < 3) 
            stop("not enough observations")
        my <- mean(y)
        vy <- var(y)
        method <- paste(if (!var.equal) 
            "Welch", "Two Sample t-test")
        estimate <- c(mx, my)
        names(estimate) <- c("mean of x", "mean of y")
        if (var.equal) {
            df <- nx + ny - 2
            v <- 0
            if (nx > 1) 
                v <- v + (nx - 1) * vx
            if (ny > 1) 
                v <- v + (ny - 1) * vy
            v <- v/df
            stderr <- sqrt(v * (1/nx + 1/ny))
        }
        else {
            stderrx <- sqrt(vx/nx)
            stderry <- sqrt(vy/ny)
            stderr <- sqrt(stderrx^2 + stderry^2)
            df <- stderr^4/(stderrx^4/(nx - 1) + stderry^4/(ny - 
                1))
        }
        if (stderr < 10 * .Machine$double.eps * max(abs(mx), 
            abs(my))) 
            stop("data are essentially constant")
        tstat <- (mx - my - mu)/stderr
    }
    if (alternative == "less") {
        pval <- pt(tstat, df)
        cint <- c(-Inf, tstat + qt(conf.level, df))
    }
    else if (alternative == "greater") {
        pval <- pt(tstat, df, lower.tail = FALSE)
        cint <- c(tstat - qt(conf.level, df), Inf)
    }
    else {
        pval <- 2 * pt(-abs(tstat), df)
        alpha <- 1 - conf.level
        cint <- qt(1 - alpha/2, df)
        cint <- tstat + c(-cint, cint)
    }
    cint <- mu + cint * stderr
    names(tstat) <- "t"
    names(df) <- "df"
    names(mu) <- if (paired || !is.null(y)) 
        "difference in means"
    else "mean"
    attr(cint, "conf.level") <- conf.level
    rval <- list(statistic = tstat, parameter = df, p.value = pval, 
        conf.int = cint, estimate = estimate, null.value = mu, 
        alternative = alternative, method = method, data.name = dname)
    class(rval) <- "htest"
    return(rval)
}
<bytecode: 0x0000000027255b48>
<environment: namespace:stats>

Q5 Dice question

(1)combn(,3):在給予的c()串裡任意挑選3個數字。
(2)apply():依列排序。
(3)unique():依每列為一組,列出不重複者。

combo <- unique(apply(combn(c(1:6, 1:6, 1:6), 3), 2, sort), MARGIN = 2)

計算各組的總和。

# all possible sum without duplicate
combo.sum <- unique(colSums(combo))

# rank the sum of dice from low to high
sort(combo.sum)
 [1]  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18