R
的控制流程,也就是條件判斷與迴圈,讓電腦可以按照給定的邏輯,反覆執行指令,一直到滿足條件為止,這對於運算以及擷取資料非常有幫助。例如我們隨機找到10位民眾,想依據民眾的年齡是否在18歲以上,判斷是否應該訪問他們,並且顯示成為一個資料框,可寫語法運算如下:
age<-c(34, 12, 19, 21, 22, 30, 16, 18, 17, 39)
f<-function(x){
interview<-ifelse(x>=18, "Yes", "No")
return(data.frame(Age=x, Interview=interview, row.names=NULL))
}
f(age)
## Age Interview
## 1 34 Yes
## 2 12 No
## 3 19 Yes
## 4 21 Yes
## 5 22 Yes
## 6 30 Yes
## 7 16 No
## 8 18 Yes
## 9 17 No
## 10 39 Yes
x=c(20, 50, 16, 18)
interview<-ifelse(x>=18, "Yes", "No")
print(interview)
## [1] "Yes" "Yes" "No" "Yes"
vote <- rep(NA, 3)
vote[x>=18]<-"Yes"
vote[x<18]<-"No"
vote
## [1] "Yes" "Yes" "No" "Yes"
x=c(20, 50, 16, 18, 15)
reg<-c('T', 'F', 'F', 'T', 'T')#T:設籍
y<-ifelse(x>=18 & reg=='T', 1, 0)
y
## [1] 1 0 0 1 0
S<-c("2018-01-01", "2018-02-01", "2018-03-01", "2018-04-01")
S <- as.Date(S, format='%Y-%m-%d')
day<-as.Date("2018/02/28", format='%Y/%m/%d')
new.S<-ifelse(difftime(day, S)>=0, "Earlier", "Later")
new.S
## [1] "Earlier" "Earlier" "Later" "Later"
ds<-as.POSIXct("2018-11-05 00:00:00",
format = "%Y-%m-%d %H:%M:%S", tz="Asia/Taipei")
teatime<-as.POSIXct(c("2018-07-01 13:50:00", "2018-12-01 14:10:00",
"2019-02-02 14:15:00"), format = "%Y-%m-%d %H:%M:%S", tz="Asia/Taipei")
hrs <- function(u) {
x <- u * 3600
return(x)
}
ifelse(difftime(teatime, ds)>=0, paste(teatime+hrs(1)), paste(teatime))
## [1] "2018-07-01 13:50:00" "2018-12-01 15:10:00" "2019-02-02 15:15:00"
as.POSIXct()
。另外,paste()
函數可貼上計算結果。temperature<-c(31)
if (temperature>28){
cat ("Turn on air condition")
}else {
cat ("Turn off air condition")
}
## Turn on air condition
speed<-50
if (speed>=70){
print(speed)
}else {
print(speed*1.6)
}
## [1] 80
mpg <- mtcars$mpg
y <- rep(0, length(mpg))
if (any(mpg>=50)){
mtcars[which(mpg>=50),]
} else{
print('NA')
}
## [1] "NA"
A<-c(-1, 1.5); B<-c(2); C<-c('OK')
any (A>0)
## [1] TRUE
any (is.numeric(A) & is.numeric(B) & is.numeric(C))
## [1] FALSE
any (is.numeric(A) | is.numeric(B) | is.numeric(C))
## [1] TRUE
☛請練習如果「所有」車輛超過時速110,警察就會開罰單,顯示最快的車速,否則都不會開單。車速是130, 115, 120。
movie<-c(176)
if(movie>=180){
cat('Very long')
}else if(movie>=165){
cat('Long')
}else{
cat('Short')
}
## Long
}
應該與else
或是else if
連在一起。booking<-as.Date(Sys.Date(), format='%Y-%m-%d')
booking
## [1] "2022-03-18"
checkin<-as.Date(c("2020-05-20"), format='%Y-%m-%d')
if (difftime(checkin, booking)>90){
print (3000*0.85)
}else if (difftime(checkin, booking)>=60){
print (3000*0.9)
}else if (difftime(checkin, booking)>=7){
print (3000)
}else{
print (3000*1.2)
}
## [1] 3600
booking <-booking + 14
if (difftime(checkin, booking)>90){
print (3000*0.85)
}else if (difftime(checkin, booking)>=60){
print (3000*0.9)
}else if (difftime(checkin, booking)>=30){
print (3000)
}else{
print (3000*1.2)
}
## [1] 3600
for (U in 1:5){
cat("All work and no play","\n")
}
## All work and no play
## All work and no play
## All work and no play
## All work and no play
## All work and no play
for (i in 1:5){
cat("All work and no play", paste(i), "times \n")
}
## All work and no play 1 times
## All work and no play 2 times
## All work and no play 3 times
## All work and no play 4 times
## All work and no play 5 times
在這個迴圈中,我們用paste()
這個函數貼上\(i\)這個變數的值。
又例如從1加到10:
sum<-0
for (i in 1:10){
sum = sum + i
}
print(sum)
[1] 55
sum<-0
x<-c(0:10)
y<-c()
for (i in 1:10){
sum = sum + x[i]
y[i]=sum
cat(y[i], '+', x[i+1], '=', paste(sum(y[i]+x[i+1])),"\n")
}
## 0 + 1 = 1
## 1 + 2 = 3
## 3 + 3 = 6
## 6 + 4 = 10
## 10 + 5 = 15
## 15 + 6 = 21
## 21 + 7 = 28
## 28 + 8 = 36
## 36 + 9 = 45
## 45 + 10 = 55
print(y)
## [1] 0 1 3 6 10 15 21 28 36 45
price<-c(76, 98, 100, 120, 65)
tmp<-data.frame(price, fix=rep(NA,5))
for (i in 1:5){
if(price[i]>=100){
tmp[i, 2] = paste(price[i]*0.9)
}else if(price[i]>=90){
tmp[i, 2]=paste('ok')
}else{
tmp[i, 2]=paste('sale')
}
}
tmp
## price fix
## 1 76 sale
## 2 98 ok
## 3 100 90
## 4 120 108
## 5 65 sale
set.seed(02138)
dice <- seq(1:6)
x <- c()
for (i in 1:1000){
x[i]<-sum(sample(dice, 1), sample(dice, 1), sample(dice, 1))
}
# graphic
df<-data.frame(Dice=x)
library(ggplot2)
g <- ggplot(aes(Dice), data=df) +
geom_histogram(binwidth = 0.8,
fill='lightgreen', aes(y=..density..),
position="identity")
g
Figure 3.1: 三顆骰子點數長條圖
可以看出點數的總和近似常態分佈,集中在10點附近。
這個迴圈運用到「索引」的概念,紀錄每一次抽樣並且加總的結果,但是不需要顯示在螢幕上,而是成為一個向量,作為後續統計的資料。
card<-function(x) {
set.seed(x)
for (i in 1:3)
x[i]<-sample(1:13, 1)
if (x[1]+x[2]<17 & x[1]<10 | x[2]<10 ){
print(x[1:3])
cat(sum(x[1:3]),"is sum of three cards \n")
}else {
print(x[1:2])
cat(sum(x[1:2]), "is sum of the first 2 cards \n")
}
}
card(100); card(5003); card(02138)
## [1] 10 7 6
## 23 is sum of three cards
## [1] 3 2 6
## 11 is sum of three cards
## [1] 11 5 1
## 17 is sum of three cards
today<-as.Date(Sys.Date(), format='%Y-%m-%d')
hotel <- function(checkin){
n <- length(checkin)
price <- 3000
diff <- difftime(checkin, today)
for (i in 1:n)
if (diff[i]>90){
print(checkin[i])
cat (round(diff[i]/30,1), "months:", price*0.85, "\n")
}else if (diff[i]>=60){
print(checkin[i])
cat (round(diff[i]/30,1), "months:",price*0.9,"\n")
}else if (diff[i]>=30){
print(checkin[i])
cat (round(diff[i]/30,1), "months:",price,"\n")
}else{
print(checkin[i])
cat (diff[i], "days:",price*1.2, "\n")
}
}
checkin<-as.Date(c("2020-12-31", "2020-04-20","2020-05-20",
"2020-06-01"), format='%Y-%m-%d')
hotel(checkin)
## [1] "2020-12-31"
## -442 days: 3600
## [1] "2020-04-20"
## -697 days: 3600
## [1] "2020-05-20"
## -667 days: 3600
## [1] "2020-06-01"
## -655 days: 3600
multiplication <- matrix(nrow=10, ncol=10)
for (i in 1:dim(multiplication)[1]){
for (j in 1:dim(multiplication)[2]){
multiplication[i,j] <- (i+10)*(j+10)
}
}
rownames(multiplication)<-c(11:20)
colnames(multiplication)<-c(11:20)
multiplication
## 11 12 13 14 15 16 17 18 19 20
## 11 121 132 143 154 165 176 187 198 209 220
## 12 132 144 156 168 180 192 204 216 228 240
## 13 143 156 169 182 195 208 221 234 247 260
## 14 154 168 182 196 210 224 238 252 266 280
## 15 165 180 195 210 225 240 255 270 285 300
## 16 176 192 208 224 240 256 272 288 304 320
## 17 187 204 221 238 255 272 289 306 323 340
## 18 198 216 234 252 270 288 306 324 342 360
## 19 209 228 247 266 285 304 323 342 361 380
## 20 220 240 260 280 300 320 340 360 380 400
set.seed(02138)
sampleresult <- matrix(nrow=6, ncol=5)
R<-c(1, 10, 100, 200, 500, 1000)
L<-list(rnorm(1e+04,0,1), rnorm(1e+05,0,1),
rnorm(1e+06,0,1), rnorm(1e+07,0,1), rnorm(1e+08,0,1))
for (i in 1:length(R)){
for (j in 1:5){
sampleresult[i,j] <- mean(sample (L[[j]], size=R[i], replace=T))
}
}
sampleresult
#replication
sampleresult2 <- array(dim=c(6, 5, 3))
S<-c(10,100, 1000)
for (i in 1:length(R)){
for (j in 1:5){
for(s in 1:length(S)){
su<-c();
sampleresult2[i,j,s] <- mean({su[s]=mean(sample
(L[[j]], size=R[i], replace=T))})
}
}
}
sampleresult2
for
迴圈可以幫助我們清理資料,例如讀取一筆23個縣市的統計資料:cs<-here::here('data','CS3171D1A.csv')
stat.dat<-read.csv(cs,
header=TRUE,sep=";",dec=".",
fileEncoding="BIG5")
head(stat.dat)
## X 臺北縣 宜蘭縣 桃園縣 新竹縣 苗栗縣 臺中縣 彰化縣
## 1 老年人口比率(65歲以上) NA NA NA NA NA NA NA
## 2 2000 6.37 10.20 7.46 9.69 10.98 7.16 9.42
## 3 2001 6.44 10.49 7.49 9.91 11.21 7.32 9.73
## 4 2002 6.55 10.82 7.51 10.17 11.57 7.50 10.03
## 5 2003 6.67 11.17 7.56 10.39 11.87 7.68 10.31
## 6 2004 6.86 11.54 7.62 10.58 12.19 7.90 10.65
## 南投縣 雲林縣 嘉義縣 臺南縣 高雄縣 屏東縣 臺東縣 花蓮縣 澎湖縣 基隆市 新竹市
## 1 NA NA NA NA NA NA NA NA NA NA NA
## 2 10.60 11.61 12.41 10.75 8.35 10.00 11.27 10.73 14.40 8.81 8.46
## 3 10.90 11.99 12.75 11.04 8.52 10.25 11.40 10.83 14.29 9.06 8.50
## 4 11.23 12.41 13.14 11.31 8.75 10.54 11.55 11.00 14.42 9.28 8.59
## 5 11.56 12.82 13.58 11.56 8.95 10.84 11.76 11.19 14.58 9.47 8.69
## 6 11.96 13.26 13.98 11.82 9.16 11.13 12.01 11.41 14.78 9.71 8.81
## 臺中市 嘉義市 臺南市 臺北市 高雄市
## 1 NA NA NA NA NA
## 2 6.49 8.67 7.69 9.67 7.16
## 3 6.60 8.85 7.85 9.94 7.41
## 4 6.79 9.15 8.06 10.25 7.63
## 5 6.94 9.46 8.24 10.58 7.93
## 6 7.15 9.70 8.46 10.92 8.24
這筆資料的最左邊一欄有一個變數名稱,但是不是位在第一列,而是在第二列,我們如何正確地讀取每一列的資料?
首先創造一個有23個元素的向量
對某一個變數進行23次的迴圈
第一個元素應該來自於資料的第二列、第二行
old.2000<-rep(NA, 23) #讀取2010年老年人口比率
for (u in 1:23){
old.2000[u]<-stat.dat[2,u+1]
}
old.2000
## [1] 6.37 10.20 7.46 9.69 10.98 7.16 9.42 10.60 11.61 12.41 10.75 8.35
## [13] 10.00 11.27 10.73 14.40 8.81 8.46 6.49 8.67 7.69 9.67 7.16
old.2001<-c()
for (u in 1:23){
old.2001[u]<-stat.dat[3,u+1]
}
city <- colnames(stat.dat)
data.frame(city=city[-1],old.2000, old.2001)
## city old.2000 old.2001
## 1 臺北縣 6.37 6.44
## 2 宜蘭縣 10.20 10.49
## 3 桃園縣 7.46 7.49
## 4 新竹縣 9.69 9.91
## 5 苗栗縣 10.98 11.21
## 6 臺中縣 7.16 7.32
## 7 彰化縣 9.42 9.73
## 8 南投縣 10.60 10.90
## 9 雲林縣 11.61 11.99
## 10 嘉義縣 12.41 12.75
## 11 臺南縣 10.75 11.04
## 12 高雄縣 8.35 8.52
## 13 屏東縣 10.00 10.25
## 14 臺東縣 11.27 11.40
## 15 花蓮縣 10.73 10.83
## 16 澎湖縣 14.40 14.29
## 17 基隆市 8.81 9.06
## 18 新竹市 8.46 8.50
## 19 臺中市 6.49 6.60
## 20 嘉義市 8.67 8.85
## 21 臺南市 7.69 7.85
## 22 臺北市 9.67 9.94
## 23 高雄市 7.16 7.41
power<-0
while (power <= 12) {
if (2^power<1000){
cat(2^power, "\n")
}else{
cat("Stop")
}
power <- power +1
}
## 1
## 2
## 4
## 8
## 16
## 32
## 64
## 128
## 256
## 512
## StopStopStop
for
迴圈,可以輸出\(2^{0}\)到\(2^{12}\),但是無法像while
中斷迴圈 for (a in -1:11){
a <- a +1
print(2^a)
}
## [1] 1
## [1] 2
## [1] 4
## [1] 8
## [1] 16
## [1] 32
## [1] 64
## [1] 128
## [1] 256
## [1] 512
## [1] 1024
## [1] 2048
## [1] 4096
power<-0
while (power <= 12) {
if (2^power<1000){
cat(2^power, "\n")
}else{
cat("Stop")
break
}
power <- power +1
}
## 1
## 2
## 4
## 8
## 16
## 32
## 64
## 128
## 256
## 512
## Stop
today<-as.Date(Sys.Date(), format='%Y-%m-%d')
hotel <- function(checkin){
n <- length(checkin)
price <- 3000
diff <- difftime(checkin, today)
for (i in 1:n)
if (diff[i]>90){
print(checkin[i])
cat (round(diff[i]/30,1), "months:", price*0.85, "\n")
}else if (diff[i]>=60){
print(checkin[i])
cat (round(diff[i]/30,1), "months:",price*0.9,"\n")
}else if (diff[i]>=30){
print(checkin[i])
cat (round(diff[i]/30,1), "months:",price,"\n")
}else{
print(checkin[i])
cat("Over the budget")
break
#cat (diff[i], "days:",price*1.2, "\n")
}
}
checkin<-as.Date(c("2018-12-31", "2018-04-20","2018-05-20",
"2018-06-01","2018-06-30"), format='%Y-%m-%d')
checkin<-c(checkin, today+7)
hotel(checkin)
## [1] "2018-12-31"
## Over the budget
for (i in 1:10){
if (i<= 4){
next
}
#a <- a +1
cat(i, "squared is ", i^2, "\n")
}
## 5 squared is 25
## 6 squared is 36
## 7 squared is 49
## 8 squared is 64
## 9 squared is 81
## 10 squared is 100
fac<-function(n){
s<-1
for(i in 1:n){
s<-s*((1:n)[i])
message(paste(i,s,sep="\t"))
}
return(s)
}
fac(5)
## [1] 120
recursive.factorial <- function(x) {
if (x == 0) return (1)
else return (x * recursive.factorial(x-1))
}
recursive.factorial(1)
## [1] 1
recursive.factorial(5)
## [1] 120
\[0,1,1,2,3,5,8,13,21, \ldots\]。用數學式可表示為 \[ \text{x}_{n}=\text{x}_{n-1}+\text{x}_{n-2} \]
recurse_fibonacci <- function(n) {
if(n <= 1) {
return(n)
} else {
return(recurse_fibonacci(n-1) + recurse_fibonacci(n-2))
}
}
recurse_fibonacci(9)
[1] 34
recurse_fibonacci(10)
[1] 55
recurse_fibonacci(11)
[1] 89
weight<-c(31.5, 27.8, 39.2, 34.3, 28.8, 29.1, 31.1,
31.6, 29.1, 29.8, 27.7, 28.5, 27.9, 30.3,
29.8, 30.2, 30.4, 28.6, 29.9, 29.9)
最後更新日期 03/18/2022