HW1.Input an amoun in Taiwanese dollars (NT) and return output in US dollars (US$).

NT_exchange_US <- function(){
  NT <- readline(prompt = "Please Enter your NT$:")
  cat("You can exchange $", round(as.numeric(NT)/29.32,1), " US dollars\n", sep = "")
}
NT_exchange_US()
## Please Enter your NT$:
## You can exchange $NA US dollars

HW2.Split the data for each chick.

dta2 <- ChickWeight
str(dta2)
## Classes 'nfnGroupedData', 'nfGroupedData', 'groupedData' and 'data.frame':   578 obs. of  4 variables:
##  $ weight: num  42 51 59 64 76 93 106 125 149 171 ...
##  $ Time  : num  0 2 4 6 8 10 12 14 16 18 ...
##  $ Chick : Ord.factor w/ 50 levels "18"<"16"<"15"<..: 15 15 15 15 15 15 15 15 15 15 ...
##  $ Diet  : Factor w/ 4 levels "1","2","3","4": 1 1 1 1 1 1 1 1 1 1 ...
##  - attr(*, "formula")=Class 'formula'  language weight ~ Time | Chick
##   .. ..- attr(*, ".Environment")=<environment: R_EmptyEnv> 
##  - attr(*, "outer")=Class 'formula'  language ~Diet
##   .. ..- attr(*, ".Environment")=<environment: R_EmptyEnv> 
##  - attr(*, "labels")=List of 2
##   ..$ x: chr "Time"
##   ..$ y: chr "Body weight"
##  - attr(*, "units")=List of 2
##   ..$ x: chr "(days)"
##   ..$ y: chr "(gm)"
head(dta2)
##   weight Time Chick Diet
## 1     42    0     1    1
## 2     51    2     1    1
## 3     59    4     1    1
## 4     64    6     1    1
## 5     76    8     1    1
## 6     93   10     1    1
sapply(split(dta2, dta2$Chick), 
       function(x) lm(weight ~ Time, data = x)$coef) 
##             18        16       15        13         9        20        10
## (Intercept) 39 43.392857 46.83333 43.384359 52.094086 37.667826 38.695054
## Time        -2  1.053571  1.89881  2.239601  2.663137  3.732718  4.066102
##                     8        17       19        4         6        11
## (Intercept) 43.727273 43.030706 31.21222 32.86568 44.123431 47.921948
## Time         4.827273  4.531538  5.08743  6.08864  6.378006  7.510967
##                    3         1        12         2        5       14
## (Intercept) 23.17955 24.465436 21.939797 24.724853 16.89563 20.52488
## Time         8.48737  7.987899  8.440629  8.719861 10.05536 11.98245
##                     7        24        30        22        23        27
## (Intercept)  5.842535 53.067766 39.109666 40.082590 38.428074 29.858569
## Time        13.205264  1.207533  5.898351  5.877931  6.685978  7.379368
##                    28       26       25        29       21        33
## (Intercept) 23.984874 20.70715 19.65119  5.882771 15.56330 45.830283
## Time         9.703676 10.10316 11.30676 12.453487 15.47512  5.855241
##                    37       36       31       39       38       32
## (Intercept) 29.608834 25.85403 19.13099 17.03661 10.67282 13.69173
## Time         6.677053  9.99047 10.02617 10.73710 12.06051 13.18091
##                   40        34        35        44        45        43
## (Intercept) 10.83830  5.081682  4.757979 44.909091 35.673121 52.185751
## Time        13.44229 15.000151 17.258811  6.354545  7.686432  8.318863
##                    41        47        49        46       50       42
## (Intercept) 39.337922 36.489790 31.662986 27.771744 23.78218 19.86507
## Time         8.159885  8.374981  9.717894  9.738466 11.33293 11.83679
##                    48
## (Intercept)  7.947663
## Time        13.714718

HW3.Create a function to experiment with this relationship between t and normal distribution.

t_z <- function(df){
  curve(dnorm(x), -4, 4, col = 2, ylab = "dnorm(x)", lwd = 2)
  for(i in 1:length(df)) curve(dt(x, df[[i]]), col = 3, lty = 2, add = TRUE)
}
t_z(df<-c(2,4,8,16,32))

HW4.Explain the advantages or disadvantages of each method.

library(pacman)
pacman::p_load(MASS, tidyverse)

method 1:將dataframe分為不同組別並取其mean,此種方法最簡潔有力

str(Cushings)
## 'data.frame':    27 obs. of  3 variables:
##  $ Tetrahydrocortisone: num  3.1 3 1.9 3.8 4.1 1.9 8.3 3.8 3.9 7.8 ...
##  $ Pregnanetriol      : num  11.7 1.3 0.1 0.04 1.1 0.4 1 0.2 0.6 1.2 ...
##  $ Type               : Factor w/ 4 levels "a","b","c","u": 1 1 1 1 1 1 2 2 2 2 ...
head(Cushings)
##    Tetrahydrocortisone Pregnanetriol Type
## a1                 3.1         11.70    a
## a2                 3.0          1.30    a
## a3                 1.9          0.10    a
## a4                 3.8          0.04    a
## a5                 4.1          1.10    a
## a6                 1.9          0.40    a
aggregate( . ~ Type, data = Cushings, mean)
##   Type Tetrahydrocortisone Pregnanetriol
## 1    a            2.966667          2.44
## 2    b            8.180000          1.12
## 3    c           19.720000          5.50
## 4    u           14.016667          1.20

method 2:把資料分割成wide format,並依第2維度(即Type)分別取mean

sapply(split(Cushings[,-3], Cushings$Type), function(x) apply(x, 2, mean))
##                            a    b     c        u
## Tetrahydrocortisone 2.966667 8.18 19.72 14.01667
## Pregnanetriol       2.440000 1.12  5.50  1.20000

method 3:將Cushings依不同組別因子(即Type),計算mean

do.call("rbind", as.list(
  by(Cushings, list(Cushings$Type), function(x) {
    y <- subset(x, select =  -Type)
    apply(y, 2, mean)
  }
)))
##   Tetrahydrocortisone Pregnanetriol
## a            2.966667          2.44
## b            8.180000          1.12
## c           19.720000          5.50
## u           14.016667          1.20

method 4:依照類別變數(即Type)分組,搭配summarize,取分組mean結論,另一種簡潔的方式,初學者可能較易懂

Cushings %>%
 group_by(Type) %>%
 summarize( t_m = mean(Tetrahydrocortisone), p_m = mean(Pregnanetriol))
## # A tibble: 4 x 3
##   Type    t_m   p_m
##   <fct> <dbl> <dbl>
## 1 a      2.97  2.44
## 2 b      8.18  1.12
## 3 c     19.7   5.50
## 4 u     14.0   1.20

method 5:以現有的column資料做運算,形成新的column

Cushings %>%
 nest(-Type) %>%
 mutate(avg = map(data, ~ apply(., 2, mean)), 
        res_1 = map_dbl(avg, "Tetrahydrocortisone"), 
        res_2 = map_dbl(avg, "Pregnanetriol")) 
## # A tibble: 4 x 5
##   Type  data                  avg       res_1 res_2
##   <fct> <list>                <list>    <dbl> <dbl>
## 1 a     <data.frame [6 x 2]>  <dbl [2]>  2.97  2.44
## 2 b     <data.frame [10 x 2]> <dbl [2]>  8.18  1.12
## 3 c     <data.frame [5 x 2]>  <dbl [2]> 19.7   5.50
## 4 u     <data.frame [6 x 2]>  <dbl [2]> 14.0   1.20

HW5.Write a function to take input from users to create a plot…

dta5 <- function(n, mu, s){
  set.seed(0221)
  random.sample <- rnorm(n, mu, s)
  plot(x = 1:n, y = cumsum(random.sample)/1:n, type = "l", col = 3,
       xlab = "Sample Size", ylab = "Running Average")
  abline(h = mu, col = 2, lty = 2)
  grid()
}

dta5(4000, 100, 10)

HW6.compute the c-statistic illustrated with the data set in the article…

#Read Data
dta6 <- read.table("cstat.txt", header=T)
str(dta6)
## 'data.frame':    42 obs. of  1 variable:
##  $ nc: int  28 46 39 45 24 20 35 37 36 40 ...
head(dta6)
##   nc
## 1 28
## 2 46
## 3 39
## 4 45
## 5 24
## 6 20
#Function
c.stat <- function(data, n = length(data)){
  cden <- 1-(sum(diff(dta6[1:n,1])^2)/(2*(n-1)*var(dta6[1:n,1])))
  sc <- sqrt((n-2)/((n-1)*(n+1)))
  pval <- 1-pnorm(cden/sc)
  return(list(c = cden, z = cden/sc, pvalue = pval))
}
# Put Data number in Function
c.stat(dta6, 42) 
## $c
## [1] 0.6450652
## 
## $z
## [1] 4.282524
## 
## $pvalue
## [1] 9.239272e-06

HW7.Use a vectorized version of the ssq function in the lecture note…

#Read Data
dta7 <- read.table("hs0.txt", header=T)
str(dta7$math)
##  int [1:200] 41 53 54 47 57 51 42 45 54 52 ...
# 設定ssq function
ssq <- function(mu, sigma, y) {sum(((y - mu) / sigma)^2)}
vssq <- Vectorize(ssq, c("mu", "sigma"))

# 設定最小化的vssq 
m_vssq <- with(dta7, vssq(mu = quantile(math, 1/4):quantile(math, 3/4),
               sigma = var(math), y = math))
f <- function(data = NA){
  z <- vssq(mu = quantile(data, 1/4):quantile(data, 3/4),
            sigma = var(data),
            y = data)
  require(plot3D)
  image3D(x = quantile(data, 1/4):quantile(data, 3/4),
          y = var(data),
          z = data,
          xlab = "mu", ylab = "sigma", zlab = "score",
          col = "steelblue")
}
f(data = dta7$math)
## Loading required package: plot3D
## Warning: package 'plot3D' was built under R version 3.4.4