Q1 NT to USD

library(quantmod)
#get current currency
Currency <- getQuote("USDTWD=x")[,2]

NT <- function() {
  NTdollar <- as.numeric(readline(prompt="Enter Taiwanese dollar: "))
  cat("US$", NTdollar/Currency, sep = "")
}

NT()

Enter Taiwanese dollar: 
US$NA

Q2 Chickweight

library(datasets)
dta2 <- ChickWeight
str(dta2)

Classes 'nfnGroupedData', 'nfGroupedData', 'groupedData' and 'data.frame':  578 obs. of  4 variables:
 $ weight: num  42 51 59 64 76 93 106 125 149 171 ...
 $ Time  : num  0 2 4 6 8 10 12 14 16 18 ...
 $ Chick : Ord.factor w/ 50 levels "18"<"16"<"15"<..: 15 15 15 15 15 15 15 15 15 15 ...
 $ Diet  : Factor w/ 4 levels "1","2","3","4": 1 1 1 1 1 1 1 1 1 1 ...
 - attr(*, "formula")=Class 'formula'  language weight ~ Time | Chick
  .. ..- attr(*, ".Environment")=<environment: R_EmptyEnv> 
 - attr(*, "outer")=Class 'formula'  language ~Diet
  .. ..- attr(*, ".Environment")=<environment: R_EmptyEnv> 
 - attr(*, "labels")=List of 2
  ..$ x: chr "Time"
  ..$ y: chr "Body weight"
 - attr(*, "units")=List of 2
  ..$ x: chr "(days)"
  ..$ y: chr "(gm)"

with(dta2, by(dta2, Chick, 
             function(x) {lm(weight ~ Time, data = x)} )) %>%
  sapply(., coef)

            18        16       15        13         9        20        10
(Intercept) 39 43.392857 46.83333 43.384359 52.094086 37.667826 38.695054
Time        -2  1.053571  1.89881  2.239601  2.663137  3.732718  4.066102
                    8        17       19        4         6        11
(Intercept) 43.727273 43.030706 31.21222 32.86568 44.123431 47.921948
Time         4.827273  4.531538  5.08743  6.08864  6.378006  7.510967
                   3         1        12         2        5       14
(Intercept) 23.17955 24.465436 21.939797 24.724853 16.89563 20.52488
Time         8.48737  7.987899  8.440629  8.719861 10.05536 11.98245
                    7        24        30        22        23        27
(Intercept)  5.842535 53.067766 39.109666 40.082590 38.428074 29.858569
Time        13.205264  1.207533  5.898351  5.877931  6.685978  7.379368
                   28       26       25        29       21        33
(Intercept) 23.984874 20.70715 19.65119  5.882771 15.56330 45.830283
Time         9.703676 10.10316 11.30676 12.453487 15.47512  5.855241
                   37       36       31       39       38       32
(Intercept) 29.608834 25.85403 19.13099 17.03661 10.67282 13.69173
Time         6.677053  9.99047 10.02617 10.73710 12.06051 13.18091
                  40        34        35        44        45        43
(Intercept) 10.83830  5.081682  4.757979 44.909091 35.673121 52.185751
Time        13.44229 15.000151 17.258811  6.354545  7.686432  8.318863
                   41        47        49        46       50       42
(Intercept) 39.337922 36.489790 31.662986 27.771744 23.78218 19.86507
Time         8.159885  8.374981  9.717894  9.738466 11.33293 11.83679
                   48
(Intercept)  7.947663
Time        13.714718

Q3 t-curve

normplot <- function() {
  DF <- as.numeric(readline(prompt="Enter degrees of freedom : "))
  plotting <- function(DF){
  curve(dnorm(x), -4, 4, col = 2, ylab = "dnorm(x)", lwd = 1)
  curve(dt(x, DF), col = 3, lty = 2, add = TRUE)
  }
  plotting(DF)
}

normplot()

Enter degrees of freedom :

Q4 Cushing

str(Cushings)

'data.frame':   27 obs. of  3 variables:
 $ Tetrahydrocortisone: num  3.1 3 1.9 3.8 4.1 1.9 8.3 3.8 3.9 7.8 ...
 $ Pregnanetriol      : num  11.7 1.3 0.1 0.04 1.1 0.4 1 0.2 0.6 1.2 ...
 $ Type               : Factor w/ 4 levels "a","b","c","u": 1 1 1 1 1 1 2 2 2 2 ...

head(Cushings)

   Tetrahydrocortisone Pregnanetriol Type
a1                 3.1         11.70    a
a2                 3.0          1.30    a
a3                 1.9          0.10    a
a4                 3.8          0.04    a
a5                 4.1          1.10    a
a6                 1.9          0.40    a

# method 1
# 簡單，內容也直覺易讀
aggregate( . ~ Type, data = Cushings, mean)

  Type Tetrahydrocortisone Pregnanetriol
1    a            2.966667          2.44
2    b            8.180000          1.12
3    c           19.720000          5.50
4    u           14.016667          1.20

# method 2
# 行列與其他的方法相反
sapply(split(Cushings[,-3], Cushings$Type), function(x) apply(x, 2, mean))

                           a    b     c        u
Tetrahydrocortisone 2.966667 8.18 19.72 14.01667
Pregnanetriol       2.440000 1.12  5.50  1.20000

# method 3
# 複雜
do.call("rbind", as.list(
  by(Cushings, list(Cushings$Type), function(x) {
    y <- subset(x, select =  -Type)
    apply(y, 2, mean)
  }
  )))

  Tetrahydrocortisone Pregnanetriol
a            2.966667          2.44
b            8.180000          1.12
c           19.720000          5.50
u           14.016667          1.20

# method 4 
# 利用pipe直接把類型grouping，操作上比較直覺。
Cushings %>%
  group_by(Type) %>%
  summarize( t_m = mean(Tetrahydrocortisone), p_m = mean(Pregnanetriol))

# A tibble: 4 x 3
  Type    t_m   p_m
  <fct> <dbl> <dbl>
1 a      2.97  2.44
2 b      8.18  1.12
3 c     19.7   5.50
4 u     14.0   1.20

# method 5
# 內容比較不直覺，提供資料屬性。
Cushings %>%
  nest(-Type) %>%
  mutate(avg = map(data, ~ apply(., 2, mean)), 
         res_1 = map_dbl(avg, "Tetrahydrocortisone"), 
         res_2 = map_dbl(avg, "Pregnanetriol"))

# A tibble: 4 x 5
  Type  data                  avg       res_1 res_2
  <fct> <list>                <list>    <dbl> <dbl>
1 a     <data.frame [6 x 2]>  <dbl [2]>  2.97  2.44
2 b     <data.frame [10 x 2]> <dbl [2]>  8.18  1.12
3 c     <data.frame [5 x 2]>  <dbl [2]> 19.7   5.50
4 u     <data.frame [6 x 2]>  <dbl [2]> 14.0   1.20

Q5 Moving average

moving <- function(n, mu, sig, FUN = rnorm, ...){
  x <- 1:n
  y <- cumsum(FUN(n, mean = mu, sd = sig))/1:n   # moving average
  plot(x, y, type = "l", col = 3,
       xlab = "Sample size", ylab = "Running average")
  abline(h = mu, col = 2, lty = 2)
  grid()
}

set.seed(333)
moving(4000,100,10)

Q6 c-statistic

dta6 <- read.table("cstat.txt", h=T)

dta6a <- as.data.frame(dta6[1:10,])
dta6b <- as.data.frame(dta6[1:32,])
dta6c <- as.data.frame(dta6[23:42,])

# sc equation is wrong
c.stat <- function(data) {
  n <- length(data[,1])
  c <- 1-(sum(diff(data[,1])^2)/(2 *var(data[,1]) *(n - 1)))
  sc <- sqrt((n-2)/((n-1)*(n+1)))
  z <- c/sc
  pval <- pnorm(-abs(z))
  return(list(C = c, sc = sc, Z = z, pvalue = pval))
}

# refer to Table 2
c.stat(dta6a)

$C
[1] 0.1601208

$sc
[1] 0.2842676

$Z
[1] 0.563275

$pvalue
[1] 0.2866238

c.stat(dta6b)

$C
[1] 0.6642762

$sc
[1] 0.1712469

$Z
[1] 3.879054

$pvalue
[1] 5.243167e-05

c.stat(dta6c)

$C
[1] 0.5705596

$sc
[1] 0.2123977

$Z
[1] 2.68628

$pvalue
[1] 0.00361263

Q7 3d plot

knitr::knit_hooks$set(webgl = hook_webgl)
dta7 <- read.table("hs0.txt", h = T)

ssq <- function(mu, sigma, y) {sum(((y - mu) / sigma)^2)}
vssq <- Vectorize(ssq, c("mu", "sigma"))

# mean has the minimal of ssq (show+/- 5)
x <- seq(mean(dta7$math) - 5, mean(dta7$math) + 5, by = 0.1)
y <- sd(dta7$math)
z <- vssq(x, y, dta7$math)
rgl::plot3d(x, y, z, 
            xlab = "mu", ylab = "sigma(control)", zlab = "ssq")

You must enable Javascript to view this page properly.

w7 Data Management Q1 - Q7

Pei Jun

2018-04-16

Q1 NT to USD

Q2 Chickweight

Q3 t-curve

Q4 Cushing

Q5 Moving average

Q6 c-statistic

Q7 3d plot