In class exercise
1
lapply(lapply(search(), ls), length)
#把search裡面有的object分類後計算每個有幾個object在裡面2
#設定年、貸款金額、利率
year <- seq(10,30,5)
loan <- c(5000000, 10000000, 15000000)
rate <- c(0.02, 0.05, 0.07)
#用outer來計算array之間的相乘
bank <- function(y,l,r) {
payment = outer(l, (r/(1-outer((1+r),(-y*12), "^"))), "*")
return(payment)
}
#給出不同年底下不同貸款金額跟利率的paymeny
bank(year,loan,rate)## , , 1
##
## [,1] [,2] [,3]
## [1,] 110240 250719 350104
## [2,] 220481 501437 700209
## [3,] 330721 752156 1050313
##
## , , 2
##
## [,1] [,2] [,3]
## [1,] 102914 250038 350002
## [2,] 205827 500077 700004
## [3,] 308741 750115 1050005
##
## , , 3
##
## [,1] [,2] [,3]
## [1,] 100870 250002 350000
## [2,] 201741 500004 700000
## [3,] 302611 750006 1050000
##
## , , 4
##
## [,1] [,2] [,3]
## [1,] 100264 250000 350000
## [2,] 200527 500000 700000
## [3,] 300791 750000 1050000
##
## , , 5
##
## [,1] [,2] [,3]
## [1,] 100080 250000 350000
## [2,] 200160 500000 700000
## [3,] 300241 750000 1050000
3
#先讀進資料看一下資料形式
dt3 <- read.table("C:/Users/user/Dropbox/1062-Data_manage/0409/inclass/hs0.txt", header=T)
knitr::kable(head(dt3))| id | female | race | ses | schtyp | prog | read | write | math | science | socst |
|---|---|---|---|---|---|---|---|---|---|---|
| 70 | male | white | low | public | general | 57 | 52 | 41 | 47 | 57 |
| 121 | female | white | middle | public | vocation | 68 | 59 | 53 | 63 | 61 |
| 86 | male | white | high | public | general | 44 | 33 | 54 | 58 | 31 |
| 141 | male | white | high | public | vocation | 63 | 44 | 47 | 53 | 56 |
| 172 | male | white | middle | public | academic | 47 | 52 | 57 | 53 | 61 |
| 113 | male | white | middle | public | academic | 44 | 52 | 51 | 63 | 61 |
#3a 看兩兩之間的t.test結果
outer(7:11, 7:11,
Vectorize(
function (i,j) t.test(dt3[,i], dt3[,j])$p.value
)
)## [,1] [,2] [,3] [,4] [,5]
## [1,] 1.0000 0.5813 0.6728 0.7572 0.8677
## [2,] 0.5813 1.0000 0.8903 0.3776 0.7150
## [3,] 0.6728 0.8903 1.0000 0.4516 0.8118
## [4,] 0.7572 0.3776 0.4516 1.0000 0.6378
## [5,] 0.8677 0.7150 0.8118 0.6378 1.0000
#由結果知道read、write、math、science、socst倆倆之間沒有顯著差異
#3b
#先看這五個變項在race上有沒有差異
manova(cbind(read, write, math, science, socst) ~ race - 1, data = dt3) %>% summary(., test = "Wilks")## Df Wilks approx F num Df den Df Pr(>F)
## race 4 0.0174 74.4 20 621 <2e-16
## Residuals 191
#有差異所以來畫圖看看差異在哪裡
dt3 %>%
tidyr::gather(subject, score, 7:11) %>%
ggplot(., aes(race, score, color = subject, group = subject))+
stat_summary(fun.data = mean_se,
position = position_dodge(.5),
na.rm = TRUE)+
theme_bw()#3c
#用mean_cl_boot來直接檢視Math score在不同SES底下的平均及其拔靴後的95%CI
ggplot(dt3, aes(ses, math))+
stat_summary(fun.data = mean_cl_boot, na.rm = TRUE)+
scale_x_discrete(limits = c("low", "middle", "high"))+
scale_y_continuous(breaks = seq(40, 70, by = 2.5))+
labs(x = "SES", y = "Average Math Score")+
theme_bw()4
#pi/4=0.7854
#把圓的方程式替換成四分之一圓的一元方程式
fcube <- function(x) {sqrt(1-x^2)}
N <- 10000;
x <- runif(N, 0, 1)
y <- runif(N, 0, 1)
#sum的結果接近pi/4
sum(fcube(runif(N, 0, 1)) > runif(N, 0, 1))/N*(1*1)## [1] 0.7858
curve(fcube, 0, 1, ylim = c(0, 1), ylab = "f(x)")
points(x, y, col = ifelse(fcube(x) > y, 3, 2), pch = '.')Homework
1
在R裡面可以讓使用者自行輸入的函數:
trans <- function( ) {
NT <- readline(prompt="Enter NT dollars: ")
NT <- NT %>% as.numeric()
USD<-round(NT*0.034145,1)
cat("USD:",USD,"\n")
}這是寫好數值示範的版本:
trans_inputdefault <- function(NT) {
NT <- NT %>% as.numeric()
USD<-round(NT*0.034145,1)
cat("USD:",USD,"\n")
}
trans_inputdefault(10000)## USD: 341.5
2
#先讀進資料
dt2<- ChickWeight
knitr::kable(head(dt2))| weight | Time | Chick | Diet |
|---|---|---|---|
| 42 | 0 | 1 | 1 |
| 51 | 2 | 1 | 1 |
| 59 | 4 | 1 | 1 |
| 64 | 6 | 1 | 1 |
| 76 | 8 | 1 | 1 |
| 93 | 10 | 1 | 1 |
#製造一個空的v等等用來放coef
v<-c()
chick<-seq(1,50,1)
getslope <- function(x){
tmp<-dt2 %>% filter(Chick==x) %>% lm(weight~Time,data=.) %>% coef()
v[x]<-tmp[2]
}
#把slope的係數抓出來
slope<-lapply(chick,getslope) %>% unlist()
#x軸是chick 1-50,y軸是weight~time的斜率
plot(chick,slope)3
#先畫出常態分配的曲線
z <- seq(-4, 4, .05)
plot(z, dnorm(z), type = 'l',lty=1,col=2, ylab = "Density",
main = "Probability density function")
#設定t分配的function
tcurve<-function(x) {
lines(z,dt(z,x), type = 'l',lty=3,col=3)
}
#輸入不同的自由度之後畫圖
df<-c(2,4,8,16,32)
lapply(df,tcurve)## [[1]]
## NULL
##
## [[2]]
## NULL
##
## [[3]]
## NULL
##
## [[4]]
## NULL
##
## [[5]]
## NULL
4 Cushings example
最簡潔的寫法,但以我自己的程度應該想不到
# method 1
aggregate( . ~ Type, data = Cushings, mean)## Type Tetrahydrocortisone Pregnanetriol
## 1 a 2.967 2.44
## 2 b 8.180 1.12
## 3 c 19.720 5.50
## 4 u 14.017 1.20
比較複雜,要熟悉apply家族的函數,也要知道資料裡面的結構
# method 2
sapply(split(Cushings[,-3], Cushings$Type), function(x) apply(x, 2, mean))## a b c u
## Tetrahydrocortisone 2.967 8.18 19.72 14.02
## Pregnanetriol 2.440 1.12 5.50 1.20
比method2更長一點,但是比較可以理解這段程式的邏輯,但也滿長的
# method 3
do.call("rbind", as.list(
by(Cushings, list(Cushings$Type), function(x) {
y <- subset(x, select = -Type)
apply(y, 2, mean)
}
)))## Tetrahydrocortisone Pregnanetriol
## a 2.967 2.44
## b 8.180 1.12
## c 19.720 5.50
## u 14.017 1.20
需要去命名平均後的變數名稱,但我覺得比method2與3容易被想到,但同樣的也要熟悉dplyr的功能
# method 4
Cushings %>%
group_by(Type) %>%
summarize( t_m = mean(Tetrahydrocortisone), p_m = mean(Pregnanetriol))## # A tibble: 4 x 3
## Type t_m p_m
## <fct> <dbl> <dbl>
## 1 a 2.97 2.44
## 2 b 8.18 1.12
## 3 c 19.7 5.50
## 4 u 14.0 1.20
除了平均出各Type的數據外,甚至還提供了資料屬性,但寫法看起來很複雜
# method 5
Cushings %>%
tidyr::nest(-Type) %>%
mutate(avg = map(data, ~ apply(., 2, mean)),
res_1 = map_dbl(avg, "Tetrahydrocortisone"),
res_2 = map_dbl(avg, "Pregnanetriol")) ## # A tibble: 4 x 5
## Type data avg res_1 res_2
## <fct> <list> <list> <dbl> <dbl>
## 1 a <data.frame [6 x 2]> <dbl [2]> 2.97 2.44
## 2 b <data.frame [10 x 2]> <dbl [2]> 8.18 1.12
## 3 c <data.frame [5 x 2]> <dbl [2]> 19.7 5.50
## 4 u <data.frame [6 x 2]> <dbl [2]> 14.0 1.20
5
#設亂數種子
set.seed(832751)
#設定function
hw5<-function(n,mu,sigme){
sample<-rnorm(n,mu,sigma)
plot(x=1:n,y=cumsum(sample)/1:n, type = "l",col=3)
abline(h = mu, col = 2, lty = 2)
}
#設參數畫圖
n<-4000; mu<-10; sigma<-10
hw5(n,mu,sigme)6
# read in data
dt6 <- read.table("C:/Users/user/Dropbox/1062-Data_manage/0409/cstat.txt", header=T)
#modify the function
cstat <- function(data){
n=length(data[,1])
cden <- 1-(sum(diff(data[1:n,1])^2)/(2*(n-1)*var(data[1:n,1])))
sc <- sqrt((n-2)/((n-1)*(n+1)))
pval <- 1-pnorm(cden/sc)
return(list(C = cden, Sc = sc, Zscore = cden/sc, p_value = pval))
}
#show the result
cstat(dt6)## $C
## [1] 0.6451
##
## $Sc
## [1] 0.1506
##
## $Zscore
## [1] 4.283
##
## $p_value
## [1] 9.239e-06
7
dt7 <- read.table("C:/Users/user/Dropbox/1062-Data_manage/0409/inclass/hs0.txt", header=T)
#function in the lecture
ssq <- function(mu, sigma, y) { sum(((y - mu) / sigma)^2)}
vssq <- Vectorize(ssq, c("mu", "sigma"))
#set X,Y,Z
func <- function(data = NA){
x <- seq(mean(data) - 5, mean(data) + 5, by = 0.1)
y <- sd(data)
z <- vssq(x, sd(data), data)
rgl::plot3d(x, y, z,xlab = "mu", ylab = "sigma(control)", zlab = "ssq")}
func(dt7$math)不知道為甚麼最後的圖片不會跑出來在Rmarkdown裡面…