Exercise 2
Read in the data and do the linear regression
dta<-cats
knitr::kable(head(cats, 10)) | Sex | Bwt | Hwt |
|---|---|---|
| F | 2.0 | 7.0 |
| F | 2.0 | 7.4 |
| F | 2.0 | 9.5 |
| F | 2.1 | 7.2 |
| F | 2.1 | 7.3 |
| F | 2.1 | 7.6 |
| F | 2.1 | 8.1 |
| F | 2.1 | 8.2 |
| F | 2.1 | 8.3 |
| F | 2.1 | 8.5 |
coef(lm(Bwt ~ Sex, data = cats))## (Intercept) SexM
## 2.3595745 0.5404255
coef(lm(Bwt ~ Sex, data = cats, contrasts = list(Sex = "contr.sum")))## (Intercept) Sex1
## 2.6297872 -0.2702128
在第一條回歸式中,R默認使用Dummy Coding,但若加上contr.sum則表示Deviation Coding,This coding system compares the mean of the dependent variable for a given level to the overall mean of the dependent variable.
Reference : R Library Contrast Coding Systems for categorical variables
Exercise 3
Data
Read the data
readingtimes <- read.csv("C:/Users/user/Dropbox/1062-Data_manage/0312/readingtimes.txt", sep="")
knitr::kable(head(readingtimes, 10)) | Snt | Sp | Wrds | New | S01 | S02 | S03 | S04 | S05 | S06 | S07 | S08 | S09 | S10 |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 1 | 1 | 13 | 1 | 3.429 | 2.795 | 4.161 | 3.071 | 3.625 | 3.161 | 3.232 | 7.161 | 1.536 | 4.063 |
| 2 | 2 | 16 | 3 | 6.482 | 5.411 | 4.491 | 5.063 | 9.295 | 5.643 | 8.357 | 4.313 | 2.946 | 6.652 |
| 3 | 3 | 9 | 2 | 1.714 | 2.339 | 3.018 | 2.464 | 6.045 | 2.455 | 4.920 | 3.366 | 1.375 | 2.179 |
| 4 | 4 | 9 | 2 | 3.679 | 3.714 | 2.866 | 2.732 | 4.205 | 6.241 | 3.723 | 6.330 | 1.152 | 3.661 |
| 5 | 5 | 10 | 3 | 4.000 | 2.902 | 2.991 | 2.670 | 3.884 | 3.223 | 3.143 | 6.143 | 2.759 | 3.330 |
| 6 | 6 | 18 | 4 | 6.973 | 8.018 | 6.625 | 7.571 | 8.795 | 13.188 | 11.170 | 6.071 | 7.964 | 7.866 |
| 7 | 7 | 6 | 1 | 2.634 | 1.750 | 2.268 | 2.884 | 3.491 | 3.688 | 2.054 | 1.696 | 1.455 | 3.705 |
Rank subjects by their reading speeed
rank(apply(readingtimes[,5:14],2,mean))## S01 S02 S03 S04 S05 S06 S07 S08 S09 S10
## 5 4 2 3 10 9 8 7 1 6
Estimate, on average, how long does it take to read a word
先計算每個學生的平均閱讀速度,以及平均字數,再計算平均一個字要花多少時間
matrixmean<-apply(readingtimes[,c(3,5:14)],2,mean)
mean(matrixmean[2:11])/matrixmean[1]## Wrds
## 0.3802173
Exercise 4
source code of the t.test function
getAnywhere(t.test.default)## A single object matching 't.test.default' was found
## It was found in the following places
## registered S3 method for t.test from namespace stats
## namespace:stats
## with value
##
## function (x, y = NULL, alternative = c("two.sided", "less", "greater"),
## mu = 0, paired = FALSE, var.equal = FALSE, conf.level = 0.95,
## ...)
## {
## alternative <- match.arg(alternative)
## if (!missing(mu) && (length(mu) != 1 || is.na(mu)))
## stop("'mu' must be a single number")
## if (!missing(conf.level) && (length(conf.level) != 1 || !is.finite(conf.level) ||
## conf.level < 0 || conf.level > 1))
## stop("'conf.level' must be a single number between 0 and 1")
## if (!is.null(y)) {
## dname <- paste(deparse(substitute(x)), "and", deparse(substitute(y)))
## if (paired)
## xok <- yok <- complete.cases(x, y)
## else {
## yok <- !is.na(y)
## xok <- !is.na(x)
## }
## y <- y[yok]
## }
## else {
## dname <- deparse(substitute(x))
## if (paired)
## stop("'y' is missing for paired test")
## xok <- !is.na(x)
## yok <- NULL
## }
## x <- x[xok]
## if (paired) {
## x <- x - y
## y <- NULL
## }
## nx <- length(x)
## mx <- mean(x)
## vx <- var(x)
## if (is.null(y)) {
## if (nx < 2)
## stop("not enough 'x' observations")
## df <- nx - 1
## stderr <- sqrt(vx/nx)
## if (stderr < 10 * .Machine$double.eps * abs(mx))
## stop("data are essentially constant")
## tstat <- (mx - mu)/stderr
## method <- if (paired)
## "Paired t-test"
## else "One Sample t-test"
## estimate <- setNames(mx, if (paired)
## "mean of the differences"
## else "mean of x")
## }
## else {
## ny <- length(y)
## if (nx < 1 || (!var.equal && nx < 2))
## stop("not enough 'x' observations")
## if (ny < 1 || (!var.equal && ny < 2))
## stop("not enough 'y' observations")
## if (var.equal && nx + ny < 3)
## stop("not enough observations")
## my <- mean(y)
## vy <- var(y)
## method <- paste(if (!var.equal)
## "Welch", "Two Sample t-test")
## estimate <- c(mx, my)
## names(estimate) <- c("mean of x", "mean of y")
## if (var.equal) {
## df <- nx + ny - 2
## v <- 0
## if (nx > 1)
## v <- v + (nx - 1) * vx
## if (ny > 1)
## v <- v + (ny - 1) * vy
## v <- v/df
## stderr <- sqrt(v * (1/nx + 1/ny))
## }
## else {
## stderrx <- sqrt(vx/nx)
## stderry <- sqrt(vy/ny)
## stderr <- sqrt(stderrx^2 + stderry^2)
## df <- stderr^4/(stderrx^4/(nx - 1) + stderry^4/(ny -
## 1))
## }
## if (stderr < 10 * .Machine$double.eps * max(abs(mx),
## abs(my)))
## stop("data are essentially constant")
## tstat <- (mx - my - mu)/stderr
## }
## if (alternative == "less") {
## pval <- pt(tstat, df)
## cint <- c(-Inf, tstat + qt(conf.level, df))
## }
## else if (alternative == "greater") {
## pval <- pt(tstat, df, lower.tail = FALSE)
## cint <- c(tstat - qt(conf.level, df), Inf)
## }
## else {
## pval <- 2 * pt(-abs(tstat), df)
## alpha <- 1 - conf.level
## cint <- qt(1 - alpha/2, df)
## cint <- tstat + c(-cint, cint)
## }
## cint <- mu + cint * stderr
## names(tstat) <- "t"
## names(df) <- "df"
## names(mu) <- if (paired || !is.null(y))
## "difference in means"
## else "mean"
## attr(cint, "conf.level") <- conf.level
## rval <- list(statistic = tstat, parameter = df, p.value = pval,
## conf.int = cint, estimate = estimate, null.value = mu,
## alternative = alternative, method = method, data.name = dname)
## class(rval) <- "htest"
## return(rval)
## }
## <bytecode: 0x00000000145918f0>
## <environment: namespace:stats>
Exercise 5
Use R to list all possible sums from rolling three dice
d1 <- round(runif(10000)*5)+1
d2 <- round(runif(10000)*5)+1
d3 <- round(runif(10000)*5)+1
sumdice<- d1+d2+d3
sumdice %>% table## .
## 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17
## 4 73 179 370 669 1016 1306 1411 1429 1291 944 654 403 177 68
## 18
## 6
Exercise 6
An R script for IQ_Beh data set
dta <- read.table("C:/Users/user/Dropbox/1062-Data_manage/0312/IQ_Beh.txt", header = T, row.names = 1)str(dta)## 'data.frame': 94 obs. of 3 variables:
## $ Dep: Factor w/ 2 levels "D","N": 2 2 2 2 1 2 2 2 2 2 ...
## $ IQ : int 103 124 124 104 96 92 124 99 92 116 ...
## $ BP : int 4 12 9 3 3 3 6 4 3 9 ...
head(dta)## Dep IQ BP
## 1 N 103 4
## 2 N 124 12
## 3 N 124 9
## 4 N 104 3
## 5 D 96 3
## 6 N 92 3
class(dta)## [1] "data.frame"
dim(dta)## [1] 94 3
names(dta)## [1] "Dep" "IQ" "BP"
is.vector(dta$BP)## [1] TRUE
dta[1, ]## Dep IQ BP
## 1 N 103 4
dta[1:3, "IQ"]## [1] 103 124 124
tail(dta[order(dta$BP), ])## Dep IQ BP
## 16 N 89 11
## 58 N 117 11
## 66 N 126 11
## 2 N 124 12
## 73 D 99 13
## 12 D 22 17
tail(dta[order(-dta$BP), ], 4)## Dep IQ BP
## 77 N 124 1
## 80 N 121 1
## 24 N 106 0
## 75 N 122 0
histogram of IQ
with(dta, hist(IQ, xlab = "IQ", main = ""))boxplot of behavior problem by depression status
boxplot(BP ~ Dep, data = dta,
xlab = "Depression",
ylab = "Behavior problem score")scatter plot
plot(IQ ~ BP, data = dta, pch = 20, col = dta$Dep,
xlab = "Behavior problem score", ylab = "IQ")
grid()two regression lines
plot(BP ~ IQ, data = dta, type = "n",
ylab = "Behavior problem score", xlab = "IQ")
text(dta$IQ, dta$BP, labels = dta$Dep, cex = 0.5)
abline(lm(BP ~ IQ, data = dta, subset = Dep == "D"))
abline(lm(BP ~ IQ, data = dta, subset = Dep == "N"), lty = 2)