In class exercise

1

lapply(lapply(search(), ls), length) 
#把search裡面有的object分類後計算每個有幾個object在裡面

2

#設定年、貸款金額、利率
year <- seq(10,30,5)
loan <- c(5000000, 10000000, 15000000)
rate <- c(0.02, 0.05, 0.07)
#用outer來計算array之間的相乘
bank <- function(y,l,r) {
  payment = outer(l, (r/(1-outer((1+r),(-y*12), "^"))), "*")
  return(payment)
}
#給出不同年底下不同貸款金額跟利率的paymeny
bank(year,loan,rate)
## , , 1
## 
##        [,1]   [,2]    [,3]
## [1,] 110240 250719  350104
## [2,] 220481 501437  700209
## [3,] 330721 752156 1050313
## 
## , , 2
## 
##        [,1]   [,2]    [,3]
## [1,] 102914 250038  350002
## [2,] 205827 500077  700004
## [3,] 308741 750115 1050005
## 
## , , 3
## 
##        [,1]   [,2]    [,3]
## [1,] 100870 250002  350000
## [2,] 201741 500004  700000
## [3,] 302611 750006 1050000
## 
## , , 4
## 
##        [,1]   [,2]    [,3]
## [1,] 100264 250000  350000
## [2,] 200527 500000  700000
## [3,] 300791 750000 1050000
## 
## , , 5
## 
##        [,1]   [,2]    [,3]
## [1,] 100080 250000  350000
## [2,] 200160 500000  700000
## [3,] 300241 750000 1050000

3

#先讀進資料看一下資料形式
dt3 <- read.table("C:/Users/user/Dropbox/1062-Data_manage/0409/inclass/hs0.txt", header=T)
knitr::kable(head(dt3))
id female race ses schtyp prog read write math science socst
70 male white low public general 57 52 41 47 57
121 female white middle public vocation 68 59 53 63 61
86 male white high public general 44 33 54 58 31
141 male white high public vocation 63 44 47 53 56
172 male white middle public academic 47 52 57 53 61
113 male white middle public academic 44 52 51 63 61
#3a 看兩兩之間的t.test結果
outer(7:11, 7:11, 
      Vectorize(
        function (i,j) t.test(dt3[,i], dt3[,j])$p.value
      ) 
)
##        [,1]   [,2]   [,3]   [,4]   [,5]
## [1,] 1.0000 0.5813 0.6728 0.7572 0.8677
## [2,] 0.5813 1.0000 0.8903 0.3776 0.7150
## [3,] 0.6728 0.8903 1.0000 0.4516 0.8118
## [4,] 0.7572 0.3776 0.4516 1.0000 0.6378
## [5,] 0.8677 0.7150 0.8118 0.6378 1.0000
#由結果知道read、write、math、science、socst倆倆之間沒有顯著差異

#3b
#先看這五個變項在race上有沒有差異
manova(cbind(read, write, math, science, socst) ~ race - 1, data = dt3) %>% summary(., test = "Wilks")
##            Df  Wilks approx F num Df den Df Pr(>F)
## race        4 0.0174     74.4     20    621 <2e-16
## Residuals 191
#有差異所以來畫圖看看差異在哪裡
dt3 %>%
  tidyr::gather(subject, score, 7:11) %>% 
  ggplot(., aes(race, score, color = subject, group = subject))+
  stat_summary(fun.data = mean_se,
               position = position_dodge(.5),
               na.rm = TRUE)+
  theme_bw()

#3c
#用mean_cl_boot來直接檢視Math score在不同SES底下的平均及其拔靴後的95%CI
ggplot(dt3, aes(ses, math))+
  stat_summary(fun.data = mean_cl_boot, na.rm = TRUE)+
  scale_x_discrete(limits = c("low", "middle", "high"))+
  scale_y_continuous(breaks = seq(40, 70, by = 2.5))+
  labs(x = "SES", y = "Average Math Score")+
  theme_bw()

4

#pi/4=0.7854
#把圓的方程式替換成四分之一圓的一元方程式
fcube <- function(x) {sqrt(1-x^2)}
N <- 10000; 
x <- runif(N, 0, 1)
y <- runif(N, 0, 1)
#sum的結果接近pi/4
sum(fcube(runif(N, 0, 1)) > runif(N, 0, 1))/N*(1*1)
## [1] 0.7858
curve(fcube, 0, 1, ylim = c(0, 1), ylab = "f(x)")
points(x, y, col = ifelse(fcube(x) > y, 3, 2), pch = '.')

Homework

1

在R裡面可以讓使用者自行輸入的函數:

trans <- function( ) {
  NT <- readline(prompt="Enter NT dollars: ")
  NT <- NT %>% as.numeric()
  USD<-round(NT*0.034145,1)
  cat("USD:",USD,"\n")
}

這是寫好數值示範的版本:

trans_inputdefault <- function(NT) {
  NT <- NT %>% as.numeric()
  USD<-round(NT*0.034145,1)
  cat("USD:",USD,"\n")
}
trans_inputdefault(10000)
## USD: 341.5

2

#先讀進資料
dt2<- ChickWeight
knitr::kable(head(dt2))
weight Time Chick Diet
42 0 1 1
51 2 1 1
59 4 1 1
64 6 1 1
76 8 1 1
93 10 1 1
#製造一個空的v等等用來放coef
v<-c()

chick<-seq(1,50,1)
getslope <- function(x){
  tmp<-dt2 %>% filter(Chick==x) %>% lm(weight~Time,data=.) %>% coef()
  v[x]<-tmp[2]
}
#把slope的係數抓出來
slope<-lapply(chick,getslope) %>% unlist() 
#x軸是chick 1-50,y軸是weight~time的斜率
plot(chick,slope)

3

#先畫出常態分配的曲線
z <- seq(-4, 4, .05)
plot(z, dnorm(z), type = 'l',lty=1,col=2, ylab = "Density",
     main = "Probability density function")
#設定t分配的function
tcurve<-function(x) {
  lines(z,dt(z,x), type = 'l',lty=3,col=3)
}
#輸入不同的自由度之後畫圖
df<-c(2,4,8,16,32)
lapply(df,tcurve)

## [[1]]
## NULL
## 
## [[2]]
## NULL
## 
## [[3]]
## NULL
## 
## [[4]]
## NULL
## 
## [[5]]
## NULL

4 Cushings example

最簡潔的寫法,但以我自己的程度應該想不到

# method 1
aggregate( . ~ Type, data = Cushings, mean)
##   Type Tetrahydrocortisone Pregnanetriol
## 1    a               2.967          2.44
## 2    b               8.180          1.12
## 3    c              19.720          5.50
## 4    u              14.017          1.20

比較複雜,要熟悉apply家族的函數,也要知道資料裡面的結構

# method 2
sapply(split(Cushings[,-3], Cushings$Type), function(x) apply(x, 2, mean))
##                         a    b     c     u
## Tetrahydrocortisone 2.967 8.18 19.72 14.02
## Pregnanetriol       2.440 1.12  5.50  1.20

比method2更長一點,但是比較可以理解這段程式的邏輯,但也滿長的

# method 3
do.call("rbind", as.list(
  by(Cushings, list(Cushings$Type), function(x) {
    y <- subset(x, select =  -Type)
    apply(y, 2, mean)
  }
  )))
##   Tetrahydrocortisone Pregnanetriol
## a               2.967          2.44
## b               8.180          1.12
## c              19.720          5.50
## u              14.017          1.20

需要去命名平均後的變數名稱,但我覺得比method2與3容易被想到,但同樣的也要熟悉dplyr的功能

# method 4
Cushings %>%
  group_by(Type) %>%
  summarize( t_m = mean(Tetrahydrocortisone), p_m = mean(Pregnanetriol))
## # A tibble: 4 x 3
##   Type    t_m   p_m
##   <fct> <dbl> <dbl>
## 1 a      2.97  2.44
## 2 b      8.18  1.12
## 3 c     19.7   5.50
## 4 u     14.0   1.20

除了平均出各Type的數據外,甚至還提供了資料屬性,但寫法看起來很複雜

# method 5
Cushings %>%
  tidyr::nest(-Type) %>%
  mutate(avg = map(data, ~ apply(., 2, mean)), 
         res_1 = map_dbl(avg, "Tetrahydrocortisone"), 
         res_2 = map_dbl(avg, "Pregnanetriol")) 
## # A tibble: 4 x 5
##   Type  data                  avg       res_1 res_2
##   <fct> <list>                <list>    <dbl> <dbl>
## 1 a     <data.frame [6 x 2]>  <dbl [2]>  2.97  2.44
## 2 b     <data.frame [10 x 2]> <dbl [2]>  8.18  1.12
## 3 c     <data.frame [5 x 2]>  <dbl [2]> 19.7   5.50
## 4 u     <data.frame [6 x 2]>  <dbl [2]> 14.0   1.20

5

#設亂數種子
set.seed(832751)
#設定function
hw5<-function(n,mu,sigme){
  sample<-rnorm(n,mu,sigma)
  plot(x=1:n,y=cumsum(sample)/1:n, type = "l",col=3)
  abline(h = mu, col = 2, lty = 2)
}
#設參數畫圖
n<-4000; mu<-10; sigma<-10
hw5(n,mu,sigme)

6

# read in data 
dt6 <- read.table("C:/Users/user/Dropbox/1062-Data_manage/0409/cstat.txt", header=T)

#modify the function
cstat <- function(data){
  n=length(data[,1])
  cden <- 1-(sum(diff(data[1:n,1])^2)/(2*(n-1)*var(data[1:n,1])))
  sc <- sqrt((n-2)/((n-1)*(n+1)))
  pval <- 1-pnorm(cden/sc)
  return(list(C = cden, Sc = sc, Zscore = cden/sc, p_value = pval))
}

#show the result
cstat(dt6)
## $C
## [1] 0.6451
## 
## $Sc
## [1] 0.1506
## 
## $Zscore
## [1] 4.283
## 
## $p_value
## [1] 9.239e-06

7

dt7 <- read.table("C:/Users/user/Dropbox/1062-Data_manage/0409/inclass/hs0.txt", header=T)

#function in the lecture
ssq <- function(mu, sigma, y) { sum(((y - mu) / sigma)^2)}
vssq <- Vectorize(ssq, c("mu", "sigma"))

#set X,Y,Z
func <- function(data = NA){
  x <- seq(mean(data) - 5, mean(data) + 5, by = 0.1)
  y <- sd(data)
  z <- vssq(x, sd(data), data)
  rgl::plot3d(x, y, z,xlab = "mu", ylab = "sigma(control)", zlab = "ssq")}

func(dt7$math)

不知道為甚麼最後的圖片不會跑出來在Rmarkdown裡面…