HW2.Split the data for each chick.
dta2 <- ChickWeight
str(dta2)
## Classes 'nfnGroupedData', 'nfGroupedData', 'groupedData' and 'data.frame': 578 obs. of 4 variables:
## $ weight: num 42 51 59 64 76 93 106 125 149 171 ...
## $ Time : num 0 2 4 6 8 10 12 14 16 18 ...
## $ Chick : Ord.factor w/ 50 levels "18"<"16"<"15"<..: 15 15 15 15 15 15 15 15 15 15 ...
## $ Diet : Factor w/ 4 levels "1","2","3","4": 1 1 1 1 1 1 1 1 1 1 ...
## - attr(*, "formula")=Class 'formula' language weight ~ Time | Chick
## .. ..- attr(*, ".Environment")=<environment: R_EmptyEnv>
## - attr(*, "outer")=Class 'formula' language ~Diet
## .. ..- attr(*, ".Environment")=<environment: R_EmptyEnv>
## - attr(*, "labels")=List of 2
## ..$ x: chr "Time"
## ..$ y: chr "Body weight"
## - attr(*, "units")=List of 2
## ..$ x: chr "(days)"
## ..$ y: chr "(gm)"
head(dta2)
## weight Time Chick Diet
## 1 42 0 1 1
## 2 51 2 1 1
## 3 59 4 1 1
## 4 64 6 1 1
## 5 76 8 1 1
## 6 93 10 1 1
sapply(split(dta2, dta2$Chick),
function(x) lm(weight ~ Time, data = x)$coef)
## 18 16 15 13 9 20 10
## (Intercept) 39 43.392857 46.83333 43.384359 52.094086 37.667826 38.695054
## Time -2 1.053571 1.89881 2.239601 2.663137 3.732718 4.066102
## 8 17 19 4 6 11
## (Intercept) 43.727273 43.030706 31.21222 32.86568 44.123431 47.921948
## Time 4.827273 4.531538 5.08743 6.08864 6.378006 7.510967
## 3 1 12 2 5 14
## (Intercept) 23.17955 24.465436 21.939797 24.724853 16.89563 20.52488
## Time 8.48737 7.987899 8.440629 8.719861 10.05536 11.98245
## 7 24 30 22 23 27
## (Intercept) 5.842535 53.067766 39.109666 40.082590 38.428074 29.858569
## Time 13.205264 1.207533 5.898351 5.877931 6.685978 7.379368
## 28 26 25 29 21 33
## (Intercept) 23.984874 20.70715 19.65119 5.882771 15.56330 45.830283
## Time 9.703676 10.10316 11.30676 12.453487 15.47512 5.855241
## 37 36 31 39 38 32
## (Intercept) 29.608834 25.85403 19.13099 17.03661 10.67282 13.69173
## Time 6.677053 9.99047 10.02617 10.73710 12.06051 13.18091
## 40 34 35 44 45 43
## (Intercept) 10.83830 5.081682 4.757979 44.909091 35.673121 52.185751
## Time 13.44229 15.000151 17.258811 6.354545 7.686432 8.318863
## 41 47 49 46 50 42
## (Intercept) 39.337922 36.489790 31.662986 27.771744 23.78218 19.86507
## Time 8.159885 8.374981 9.717894 9.738466 11.33293 11.83679
## 48
## (Intercept) 7.947663
## Time 13.714718
HW3.Create a function to experiment with this relationship between t and normal distribution.
t_z <- function(df){
curve(dnorm(x), -4, 4, col = 2, ylab = "dnorm(x)", lwd = 2)
for(i in 1:length(df)) curve(dt(x, df[[i]]), col = 3, lty = 2, add = TRUE)
}
t_z(df<-c(2,4,8,16,32))

HW4.Explain the advantages or disadvantages of each method.
library(pacman)
pacman::p_load(MASS, tidyverse)
method 1:將dataframe分為不同組別並取其mean,此種方法最簡潔有力
str(Cushings)
## 'data.frame': 27 obs. of 3 variables:
## $ Tetrahydrocortisone: num 3.1 3 1.9 3.8 4.1 1.9 8.3 3.8 3.9 7.8 ...
## $ Pregnanetriol : num 11.7 1.3 0.1 0.04 1.1 0.4 1 0.2 0.6 1.2 ...
## $ Type : Factor w/ 4 levels "a","b","c","u": 1 1 1 1 1 1 2 2 2 2 ...
head(Cushings)
## Tetrahydrocortisone Pregnanetriol Type
## a1 3.1 11.70 a
## a2 3.0 1.30 a
## a3 1.9 0.10 a
## a4 3.8 0.04 a
## a5 4.1 1.10 a
## a6 1.9 0.40 a
aggregate( . ~ Type, data = Cushings, mean)
## Type Tetrahydrocortisone Pregnanetriol
## 1 a 2.966667 2.44
## 2 b 8.180000 1.12
## 3 c 19.720000 5.50
## 4 u 14.016667 1.20
method 3:將Cushings依不同組別因子(即Type),計算mean
do.call("rbind", as.list(
by(Cushings, list(Cushings$Type), function(x) {
y <- subset(x, select = -Type)
apply(y, 2, mean)
}
)))
## Tetrahydrocortisone Pregnanetriol
## a 2.966667 2.44
## b 8.180000 1.12
## c 19.720000 5.50
## u 14.016667 1.20
method 4:依照類別變數(即Type)分組,搭配summarize,取分組mean結論,另一種簡潔的方式,初學者可能較易懂
Cushings %>%
group_by(Type) %>%
summarize( t_m = mean(Tetrahydrocortisone), p_m = mean(Pregnanetriol))
## # A tibble: 4 x 3
## Type t_m p_m
## <fct> <dbl> <dbl>
## 1 a 2.97 2.44
## 2 b 8.18 1.12
## 3 c 19.7 5.50
## 4 u 14.0 1.20
method 5:以現有的column資料做運算,形成新的column
Cushings %>%
nest(-Type) %>%
mutate(avg = map(data, ~ apply(., 2, mean)),
res_1 = map_dbl(avg, "Tetrahydrocortisone"),
res_2 = map_dbl(avg, "Pregnanetriol"))
## # A tibble: 4 x 5
## Type data avg res_1 res_2
## <fct> <list> <list> <dbl> <dbl>
## 1 a <data.frame [6 x 2]> <dbl [2]> 2.97 2.44
## 2 b <data.frame [10 x 2]> <dbl [2]> 8.18 1.12
## 3 c <data.frame [5 x 2]> <dbl [2]> 19.7 5.50
## 4 u <data.frame [6 x 2]> <dbl [2]> 14.0 1.20
HW6.compute the c-statistic illustrated with the data set in the article…
#Read Data
dta6 <- read.table("cstat.txt", header=T)
str(dta6)
## 'data.frame': 42 obs. of 1 variable:
## $ nc: int 28 46 39 45 24 20 35 37 36 40 ...
head(dta6)
## nc
## 1 28
## 2 46
## 3 39
## 4 45
## 5 24
## 6 20
#Function
c.stat <- function(data, n = length(data)){
cden <- 1-(sum(diff(dta6[1:n,1])^2)/(2*(n-1)*var(dta6[1:n,1])))
sc <- sqrt((n-2)/((n-1)*(n+1)))
pval <- 1-pnorm(cden/sc)
return(list(c = cden, z = cden/sc, pvalue = pval))
}
# Put Data number in Function
c.stat(dta6, 42)
## $c
## [1] 0.6450652
##
## $z
## [1] 4.282524
##
## $pvalue
## [1] 9.239272e-06
HW7.Use a vectorized version of the ssq function in the lecture note…
#Read Data
dta7 <- read.table("hs0.txt", header=T)
str(dta7$math)
## int [1:200] 41 53 54 47 57 51 42 45 54 52 ...
# 設定ssq function
ssq <- function(mu, sigma, y) {sum(((y - mu) / sigma)^2)}
vssq <- Vectorize(ssq, c("mu", "sigma"))
# 設定最小化的vssq
m_vssq <- with(dta7, vssq(mu = quantile(math, 1/4):quantile(math, 3/4),
sigma = var(math), y = math))
f <- function(data = NA){
z <- vssq(mu = quantile(data, 1/4):quantile(data, 3/4),
sigma = var(data),
y = data)
require(plot3D)
image3D(x = quantile(data, 1/4):quantile(data, 3/4),
y = var(data),
z = data,
xlab = "mu", ylab = "sigma", zlab = "score",
col = "steelblue")
}
f(data = dta7$math)
## Loading required package: plot3D
## Warning: package 'plot3D' was built under R version 3.4.4
