In class exercise

1

lapply(lapply(search(), ls), length) 
#把search裡面有的object分類後計算每個有幾個object在裡面

2

#設定年、貸款金額、利率
year <- seq(10,30,5)
loan <- c(5000000, 10000000, 15000000)
rate <- c(0.02, 0.05, 0.07)
#用outer來計算array之間的相乘
bank <- function(y,l,r) {
  payment = outer(l, (r/(1-outer((1+r),(-y*12), "^"))), "*")
  return(payment)
}
#給出不同年底下不同貸款金額跟利率的paymeny
bank(year,loan,rate)

## , , 1
## 
##        [,1]   [,2]    [,3]
## [1,] 110240 250719  350104
## [2,] 220481 501437  700209
## [3,] 330721 752156 1050313
## 
## , , 2
## 
##        [,1]   [,2]    [,3]
## [1,] 102914 250038  350002
## [2,] 205827 500077  700004
## [3,] 308741 750115 1050005
## 
## , , 3
## 
##        [,1]   [,2]    [,3]
## [1,] 100870 250002  350000
## [2,] 201741 500004  700000
## [3,] 302611 750006 1050000
## 
## , , 4
## 
##        [,1]   [,2]    [,3]
## [1,] 100264 250000  350000
## [2,] 200527 500000  700000
## [3,] 300791 750000 1050000
## 
## , , 5
## 
##        [,1]   [,2]    [,3]
## [1,] 100080 250000  350000
## [2,] 200160 500000  700000
## [3,] 300241 750000 1050000

3

#先讀進資料看一下資料形式
dt3 <- read.table("C:/Users/user/Dropbox/1062-Data_manage/0409/inclass/hs0.txt", header=T)
knitr::kable(head(dt3))

id	female	race	ses	schtyp	prog	read	write	math	science	socst
70	male	white	low	public	general	57	52	41	47	57
121	female	white	middle	public	vocation	68	59	53	63	61
86	male	white	high	public	general	44	33	54	58	31
141	male	white	high	public	vocation	63	44	47	53	56
172	male	white	middle	public	academic	47	52	57	53	61
113	male	white	middle	public	academic	44	52	51	63	61

#3a 看兩兩之間的t.test結果
outer(7:11, 7:11, 
      Vectorize(
        function (i,j) t.test(dt3[,i], dt3[,j])$p.value
      ) 
)

##        [,1]   [,2]   [,3]   [,4]   [,5]
## [1,] 1.0000 0.5813 0.6728 0.7572 0.8677
## [2,] 0.5813 1.0000 0.8903 0.3776 0.7150
## [3,] 0.6728 0.8903 1.0000 0.4516 0.8118
## [4,] 0.7572 0.3776 0.4516 1.0000 0.6378
## [5,] 0.8677 0.7150 0.8118 0.6378 1.0000

#由結果知道read、write、math、science、socst倆倆之間沒有顯著差異

#3b
#先看這五個變項在race上有沒有差異
manova(cbind(read, write, math, science, socst) ~ race - 1, data = dt3) %>% summary(., test = "Wilks")

##            Df  Wilks approx F num Df den Df Pr(>F)
## race        4 0.0174     74.4     20    621 <2e-16
## Residuals 191

#有差異所以來畫圖看看差異在哪裡
dt3 %>%
  tidyr::gather(subject, score, 7:11) %>% 
  ggplot(., aes(race, score, color = subject, group = subject))+
  stat_summary(fun.data = mean_se,
               position = position_dodge(.5),
               na.rm = TRUE)+
  theme_bw()

#3c
#用mean_cl_boot來直接檢視Math score在不同SES底下的平均及其拔靴後的95%CI
ggplot(dt3, aes(ses, math))+
  stat_summary(fun.data = mean_cl_boot, na.rm = TRUE)+
  scale_x_discrete(limits = c("low", "middle", "high"))+
  scale_y_continuous(breaks = seq(40, 70, by = 2.5))+
  labs(x = "SES", y = "Average Math Score")+
  theme_bw()

4

#pi/4=0.7854
#把圓的方程式替換成四分之一圓的一元方程式
fcube <- function(x) {sqrt(1-x^2)}
N <- 10000; 
x <- runif(N, 0, 1)
y <- runif(N, 0, 1)
#sum的結果接近pi/4
sum(fcube(runif(N, 0, 1)) > runif(N, 0, 1))/N*(1*1)

## [1] 0.7858

curve(fcube, 0, 1, ylim = c(0, 1), ylab = "f(x)")
points(x, y, col = ifelse(fcube(x) > y, 3, 2), pch = '.')

Homework

1

在R裡面可以讓使用者自行輸入的函數:

trans <- function( ) {
  NT <- readline(prompt="Enter NT dollars: ")
  NT <- NT %>% as.numeric()
  USD<-round(NT*0.034145,1)
  cat("USD:",USD,"\n")
}

這是寫好數值示範的版本:

trans_inputdefault <- function(NT) {
  NT <- NT %>% as.numeric()
  USD<-round(NT*0.034145,1)
  cat("USD:",USD,"\n")
}
trans_inputdefault(10000)

## USD: 341.5

2

#先讀進資料
dt2<- ChickWeight
knitr::kable(head(dt2))

weight	Time	Chick	Diet
42	0	1	1
51	2	1	1
59	4	1	1
64	6	1	1
76	8	1	1
93	10	1	1

#製造一個空的v等等用來放coef
v<-c()

chick<-seq(1,50,1)
getslope <- function(x){
  tmp<-dt2 %>% filter(Chick==x) %>% lm(weight~Time,data=.) %>% coef()
  v[x]<-tmp[2]
}
#把slope的係數抓出來
slope<-lapply(chick,getslope) %>% unlist() 
#x軸是chick 1-50，y軸是weight~time的斜率
plot(chick,slope)

3

#先畫出常態分配的曲線
z <- seq(-4, 4, .05)
plot(z, dnorm(z), type = 'l',lty=1,col=2, ylab = "Density",
     main = "Probability density function")
#設定t分配的function
tcurve<-function(x) {
  lines(z,dt(z,x), type = 'l',lty=3,col=3)
}
#輸入不同的自由度之後畫圖
df<-c(2,4,8,16,32)
lapply(df,tcurve)

## [[1]]
## NULL
## 
## [[2]]
## NULL
## 
## [[3]]
## NULL
## 
## [[4]]
## NULL
## 
## [[5]]
## NULL

4 Cushings example

最簡潔的寫法，但以我自己的程度應該想不到

# method 1
aggregate( . ~ Type, data = Cushings, mean)

##   Type Tetrahydrocortisone Pregnanetriol
## 1    a               2.967          2.44
## 2    b               8.180          1.12
## 3    c              19.720          5.50
## 4    u              14.017          1.20

比較複雜，要熟悉apply家族的函數，也要知道資料裡面的結構

# method 2
sapply(split(Cushings[,-3], Cushings$Type), function(x) apply(x, 2, mean))

##                         a    b     c     u
## Tetrahydrocortisone 2.967 8.18 19.72 14.02
## Pregnanetriol       2.440 1.12  5.50  1.20

比method2更長一點，但是比較可以理解這段程式的邏輯，但也滿長的

# method 3
do.call("rbind", as.list(
  by(Cushings, list(Cushings$Type), function(x) {
    y <- subset(x, select =  -Type)
    apply(y, 2, mean)
  }
  )))

##   Tetrahydrocortisone Pregnanetriol
## a               2.967          2.44
## b               8.180          1.12
## c              19.720          5.50
## u              14.017          1.20

需要去命名平均後的變數名稱，但我覺得比method2與3容易被想到，但同樣的也要熟悉dplyr的功能

# method 4
Cushings %>%
  group_by(Type) %>%
  summarize( t_m = mean(Tetrahydrocortisone), p_m = mean(Pregnanetriol))

## # A tibble: 4 x 3
##   Type    t_m   p_m
##   <fct> <dbl> <dbl>
## 1 a      2.97  2.44
## 2 b      8.18  1.12
## 3 c     19.7   5.50
## 4 u     14.0   1.20

除了平均出各Type的數據外，甚至還提供了資料屬性，但寫法看起來很複雜

# method 5
Cushings %>%
  tidyr::nest(-Type) %>%
  mutate(avg = map(data, ~ apply(., 2, mean)), 
         res_1 = map_dbl(avg, "Tetrahydrocortisone"), 
         res_2 = map_dbl(avg, "Pregnanetriol"))

## # A tibble: 4 x 5
##   Type  data                  avg       res_1 res_2
##   <fct> <list>                <list>    <dbl> <dbl>
## 1 a     <data.frame [6 x 2]>  <dbl [2]>  2.97  2.44
## 2 b     <data.frame [10 x 2]> <dbl [2]>  8.18  1.12
## 3 c     <data.frame [5 x 2]>  <dbl [2]> 19.7   5.50
## 4 u     <data.frame [6 x 2]>  <dbl [2]> 14.0   1.20

5

#設亂數種子
set.seed(832751)
#設定function
hw5<-function(n,mu,sigme){
  sample<-rnorm(n,mu,sigma)
  plot(x=1:n,y=cumsum(sample)/1:n, type = "l",col=3)
  abline(h = mu, col = 2, lty = 2)
}
#設參數畫圖
n<-4000; mu<-10; sigma<-10
hw5(n,mu,sigme)

6

# read in data 
dt6 <- read.table("C:/Users/user/Dropbox/1062-Data_manage/0409/cstat.txt", header=T)

#modify the function
cstat <- function(data){
  n=length(data[,1])
  cden <- 1-(sum(diff(data[1:n,1])^2)/(2*(n-1)*var(data[1:n,1])))
  sc <- sqrt((n-2)/((n-1)*(n+1)))
  pval <- 1-pnorm(cden/sc)
  return(list(C = cden, Sc = sc, Zscore = cden/sc, p_value = pval))
}

#show the result
cstat(dt6)

## $C
## [1] 0.6451
## 
## $Sc
## [1] 0.1506
## 
## $Zscore
## [1] 4.283
## 
## $p_value
## [1] 9.239e-06

7

dt7 <- read.table("C:/Users/user/Dropbox/1062-Data_manage/0409/inclass/hs0.txt", header=T)

#function in the lecture
ssq <- function(mu, sigma, y) { sum(((y - mu) / sigma)^2)}
vssq <- Vectorize(ssq, c("mu", "sigma"))

#set X,Y,Z
func <- function(data = NA){
  x <- seq(mean(data) - 5, mean(data) + 5, by = 0.1)
  y <- sd(data)
  z <- vssq(x, sd(data), data)
  rgl::plot3d(x, y, z,xlab = "mu", ylab = "sigma(control)", zlab = "ssq")}

func(dt7$math)

不知道為甚麼最後的圖片不會跑出來在Rmarkdown裡面…

Data management 0409

Yifang Yu

2018-04-15

In class exercise

1

2

3

4

Homework

1

2

3

4 Cushings example

5

6

7