R
can repeat and terminate the commands according to the conditions given by us. It is very useful for data generation and data clean. For example, we randomly draw ten people and we want to decide whether they are eligible for our survey based on their ages. We can write codes as follows:
age<-c(34, 12, 19, 21, 22, 30, 16, 18, 17, 39)
f<-function(x){
interview<-ifelse(x>=18, "Yes", "No")
return(data.frame(id=1:10, Age=x, Interview=interview, row.names=NULL))
}
f(age)
## id Age Interview
## 1 1 34 Yes
## 2 2 12 No
## 3 3 19 Yes
## 4 4 21 Yes
## 5 5 22 Yes
## 6 6 30 Yes
## 7 7 16 No
## 8 8 18 Yes
## 9 9 17 No
## 10 10 39 Yes
#knitr::kable(f(age))
state.name[1]
## [1] "Alabama"
state.abb[1:4]
## [1] "AL" "AK" "AZ" "AR"
head(sleep)
## extra group ID
## 1 0.7 1 1
## 2 -1.6 1 2
## 3 -0.2 1 3
## 4 -1.2 1 4
## 5 -0.1 1 5
## 6 3.4 1 6
sleep$extra[nrow(sleep)]
## [1] 3.4
state.abb[1:10]
## [1] "AL" "AK" "AZ" "AR" "CA" "CO" "CT" "DE" "FL" "GA"
state.abb.abb<-substr(state.abb, 1,1)
state.abb[which(state.abb.abb=="B")]
## character(0)
state.abb[which(state.abb.abb=="C")]
## [1] "CA" "CO" "CT"
ListA<-list(height=90, width=120, string=state.abb[1:2], data=state.area)
ListA
## $height
## [1] 90
##
## $width
## [1] 120
##
## $string
## [1] "AL" "AK"
##
## $data
## [1] 51609 589757 113909 53104 158693 104247 5009 2057 58560 58876
## [11] 6450 83557 56400 36291 56290 82264 40395 48523 33215 10577
## [21] 8257 58216 84068 47716 69686 147138 77227 110540 9304 7836
## [31] 121666 49576 52586 70665 41222 69919 96981 45333 1214 31055
## [41] 77047 42244 267339 84916 9609 40815 68192 24181 56154 97914
ListA[c(1)]
## $height
## [1] 90
ListA[c(3)]
## $string
## [1] "AL" "AK"
ListA[[3]]
## [1] "AL" "AK"
ListA["data"]
## $data
## [1] 51609 589757 113909 53104 158693 104247 5009 2057 58560 58876
## [11] 6450 83557 56400 36291 56290 82264 40395 48523 33215 10577
## [21] 8257 58216 84068 47716 69686 147138 77227 110540 9304 7836
## [31] 121666 49576 52586 70665 41222 69919 96981 45333 1214 31055
## [41] 77047 42244 267339 84916 9609 40815 68192 24181 56154 97914
m1<-matrix(c(1:9), 3, 3)
m1
## [,1] [,2] [,3]
## [1,] 1 4 7
## [2,] 2 5 8
## [3,] 3 6 9
print(m1[2,2]) #1
## [1] 5
print(m1[c(1:2)]) #2
## [1] 1 2
print(m1[c(1,2),c(1,2)]) #3
## [,1] [,2]
## [1,] 1 4
## [2,] 2 5
print(m1[c(1,3),c(1,3)]) #4
## [,1] [,2]
## [1,] 1 7
## [2,] 3 9
print(m1[,1]) #5
## [1] 1 2 3
m1[3,3]<-"Hello" #6
m1
## [,1] [,2] [,3]
## [1,] "1" "4" "7"
## [2,] "2" "5" "8"
## [3,] "3" "6" "Hello"
T <- array(1:20, dim=c(4,5)); T
## [,1] [,2] [,3] [,4] [,5]
## [1,] 1 5 9 13 17
## [2,] 2 6 10 14 18
## [3,] 3 7 11 15 19
## [4,] 4 8 12 16 20
ok <- which(T >= 17, arr.ind = T)
ok
## row col
## [1,] 1 5
## [2,] 2 5
## [3,] 3 5
## [4,] 4 5
T[ok]
## [1] 17 18 19 20
data(sleep)
names(sleep)
## [1] "extra" "group" "ID"
sleep[1:3, ]
## extra group ID
## 1 0.7 1 1
## 2 -1.6 1 2
## 3 -0.2 1 3
sleep[, "extra"]
## [1] 0.7 -1.6 -0.2 -1.2 -0.1 3.4 3.7 0.8 0.0 2.0 1.9 0.8 1.1 0.1
## [15] -0.1 4.4 5.5 1.6 4.6 3.4
head(sleep)
## extra group ID
## 1 0.7 1 1
## 2 -1.6 1 2
## 3 -0.2 1 3
## 4 -1.2 1 4
## 5 -0.1 1 5
## 6 3.4 1 6
sleep[sleep$ID %in% c(1,2,3), ] #select by ID
## extra group ID
## 1 0.7 1 1
## 2 -1.6 1 2
## 3 -0.2 1 3
## 11 1.9 2 1
## 12 0.8 2 2
## 13 1.1 2 3
sleep[sleep$ID %in% c(1,2,3) & sleep$group %in% c(1), ] # two conditions
## extra group ID
## 1 0.7 1 1
## 2 -1.6 1 2
## 3 -0.2 1 3
A<-c(4000: 6000); B<-c(3000:5000); C <- c(3000:10000)
table(A%in%B)
##
## FALSE TRUE
## 1000 1001
table(A%in%B%in%C)
##
## FALSE
## 2001
cond <- which(sleep$extra>0.5 & sleep$group==1)
sleep[cond, ]
## extra group ID
## 1 0.7 1 1
## 6 3.4 1 6
## 7 3.7 1 7
## 8 0.8 1 8
## 10 2.0 1 10
mtcars[mtcars$cyl %in% c(4) & mtcars$hp %in% c(90: max(mtcars$hp)), c(1:6)]
## mpg cyl disp hp drat wt
## Datsun 710 22.8 4 108.0 93 3.85 2.32
## Merc 230 22.8 4 140.8 95 3.92 3.15
## Toyota Corona 21.5 4 120.1 97 3.70 2.46
## Porsche 914-2 26.0 4 120.3 91 4.43 2.14
## Lotus Europa 30.4 4 95.1 113 3.77 1.51
## Volvo 142E 21.4 4 121.0 109 4.11 2.78
mtcars[which(mtcars$cyl==4 & mtcars$hp > 90) , c(1:6)]
## mpg cyl disp hp drat wt
## Datsun 710 22.8 4 108.0 93 3.85 2.32
## Merc 230 22.8 4 140.8 95 3.92 3.15
## Toyota Corona 21.5 4 120.1 97 3.70 2.46
## Porsche 914-2 26.0 4 120.3 91 4.43 2.14
## Lotus Europa 30.4 4 95.1 113 3.77 1.51
## Volvo 142E 21.4 4 121.0 109 4.11 2.78
x=c(20, 50, 16, 78)
interview<-ifelse(x>=18, "Yes", "No")
print(interview)
## [1] "Yes" "Yes" "No" "Yes"
survey <- c()
survey[x>=18]<-"Yes"
survey[x<18]<-"No"
survey
## [1] "Yes" "Yes" "No" "Yes"
interview
## [1] "Yes" "Yes" "No" "Yes"
\(\blacksquare\) Suppose we want to decide if these dates, Jan. 1st, Feb. 1st, Nov. 1st, and Dec. 20., are before or after July 12, 2018. Apply
temperature<-30
if (temperature>28){
cat ("Turn on air condition")
}else {
cat ("Turn off air condition")
}
## Turn on air condition
scores<-c(30, 50, 90, 20)
if (scores< 36){
sqrt(scores)*10
}else {
scores
}
## [1] 54.8 70.7 94.9 44.7
score<-c(30)
if (score< 36){
sqrt(score)*10
}else {
score
}
## [1] 54.8
movie<-170
if(movie>=180){
cat('Very long')
} else if(movie>=165) {
cat('Long')
} else {
cat('Short')
}
## Long
price=3000
booking<-as.Date(Sys.Date(), format='%Y-%m-%d')
booking
## [1] "2018-07-05"
checkin<-as.Date(c("2018-07-30"), format='%Y-%m-%d')
if (difftime(checkin, booking)>90){
print (price*0.85)
}else if (difftime(checkin, booking)>=60){
print (price*0.9)
}else if (difftime(checkin, booking)>=7){
print (price)
}else{
print (price*1.2)
}
## [1] 3000
booking2 <-booking + 14
if (difftime(checkin, booking2)>90){
print (price*0.85)
}else if (difftime(checkin, booking2)>=60){
print (price*0.9)
}else if (difftime(checkin, booking2)>=7){
print (price)
}else{
print (price*1.2)
}
## [1] 3000
We can repeat an action \(n\) times with
for (U in 1:5){
cat("All work and no play","\n")
}
## All work and no play
## All work and no play
## All work and no play
## All work and no play
## All work and no play
a <-c(1:5)
for (i in a) { cat ("Busy", " ")
}
## Busy Busy Busy Busy Busy
R
will execute the code in the bracket from the starting value to the end.
for (i in 1:4){
cat("Hello World", paste(i), "times \n")
}
## Hello World 1 times
## Hello World 2 times
## Hello World 3 times
## Hello World 4 times
sum<-0
for (i in 1:10){
sum = sum + i
}
print(sum)
## [1] 55
set.seed(11605)
dice <- seq(1:6)
x <- c()
for (i in 1:1000){
x[i]<-sum(sample(dice, 1), sample(dice, 1), sample(dice, 1))
}
# graphic
df<-data.frame(Dice=x)
library(ggplot2)
g <- ggplot(aes(Dice), data=df) +
geom_histogram(binwidth = 0.9, fill='lightgreen', aes(y=..density..), position="identity") +
labs(x="Sum of Three Dices", y="Density")
g
set.seed(02138)
card<-function(x) {
set.seed(x)
for (i in 1:3)
x[i]<-sample(1:13, 1)
if (x[1]+x[2]<16 ){
print(x[1:3])
cat(sum(x[1:3]),"is sum of three cards \n")
} else {
print(x[1:2])
cat(sum(x[1:2]), "is sum of the first 2 cards \n")
}
}
card(100); card(1001); card(11605)
## [1] 5 4 8
## 17 is sum of three cards
## [1] 13 6
## 19 is sum of the first 2 cards
## [1] 11 2 2
## 15 is sum of three cards
today<-as.Date(Sys.Date(), format='%Y-%m-%d')
hotel <- function(checkin){
n <- length(checkin)
price <- 3000
diff <- difftime(checkin, today)
for (i in 1:n)
if (diff[i]>90){
print(checkin[i])
cat (round(diff[i]/30,1), "months:", price*0.85, "\n")
}else if (diff[i]>=60){
print(checkin[i])
cat (round(diff[i]/30,1), "months:",price*0.9,"\n")
}else if (diff[i]>=30){
print(checkin[i])
cat (round(diff[i]/30,1), "months:",price,"\n")
}else{
print(checkin[i])
cat (diff[i], "days:",price*1.2, "\n")
}
}
checkin<-as.Date(c("2018-12-31", "2018-11-20","2018-09-20"), format='%Y-%m-%d')
checkin<-c(checkin, today+7)
hotel(checkin)
## [1] "2018-12-31"
## 6 months: 2550
## [1] "2018-11-20"
## 4.6 months: 2550
## [1] "2018-09-20"
## 2.6 months: 2700
## [1] "2018-07-12"
## 7 days: 3600
multiplication <- matrix(nrow=10, ncol=10)
for (i in 1:dim(multiplication)[1]){
for (j in 1:dim(multiplication)[2]){
multiplication[i,j] <- (i+10)*(j+10)
}
}
rownames(multiplication)<-c(11:20)
colnames(multiplication)<-c(11:20)
multiplication
## 11 12 13 14 15 16 17 18 19 20
## 11 121 132 143 154 165 176 187 198 209 220
## 12 132 144 156 168 180 192 204 216 228 240
## 13 143 156 169 182 195 208 221 234 247 260
## 14 154 168 182 196 210 224 238 252 266 280
## 15 165 180 195 210 225 240 255 270 285 300
## 16 176 192 208 224 240 256 272 288 304 320
## 17 187 204 221 238 255 272 289 306 323 340
## 18 198 216 234 252 270 288 306 324 342 360
## 19 209 228 247 266 285 304 323 342 361 380
## 20 220 240 260 280 300 320 340 360 380 400
library(foreign)
stat.dat<-read.csv("CS3171D1A.csv",header=TRUE,sep=";",dec=".",fileEncoding="BIG5")
stat.dat[1:11,]
## X 臺北縣 宜蘭縣 桃園縣 新竹縣 苗栗縣 臺中縣 彰化縣
## 1 老年人口比率(65歲以上) NA NA NA NA NA NA NA
## 2 2000 6.37 10.2 7.46 9.69 11.0 7.16 9.42
## 3 2001 6.44 10.5 7.49 9.91 11.2 7.32 9.73
## 4 2002 6.55 10.8 7.51 10.17 11.6 7.50 10.03
## 5 2003 6.67 11.2 7.56 10.39 11.9 7.68 10.31
## 6 2004 6.86 11.5 7.62 10.58 12.2 7.90 10.65
## 7 2005 7.08 11.9 7.72 10.85 12.5 8.12 10.97
## 8 2006 7.32 12.3 7.84 11.02 12.8 8.31 11.28
## 9 2007 7.52 12.6 7.92 11.12 13.0 8.50 11.55
## 10 2008 7.76 12.8 8.05 11.20 13.2 8.68 11.79
## 11 2009 8.04 13.0 8.18 11.28 13.3 8.86 11.98
## 南投縣 雲林縣 嘉義縣 臺南縣 高雄縣 屏東縣 臺東縣 花蓮縣 澎湖縣 基隆市
## 1 NA NA NA NA NA NA NA NA NA NA
## 2 10.6 11.6 12.4 10.8 8.35 10.0 11.3 10.7 14.4 8.81
## 3 10.9 12.0 12.8 11.0 8.52 10.2 11.4 10.8 14.3 9.06
## 4 11.2 12.4 13.1 11.3 8.75 10.5 11.6 11.0 14.4 9.28
## 5 11.6 12.8 13.6 11.6 8.95 10.8 11.8 11.2 14.6 9.47
## 6 12.0 13.3 14.0 11.8 9.16 11.1 12.0 11.4 14.8 9.71
## 7 12.3 13.7 14.3 12.1 9.39 11.4 12.2 11.6 14.8 10.03
## 8 12.7 14.1 14.8 12.4 9.65 11.8 12.5 11.9 15.0 10.31
## 9 13.0 14.4 15.1 12.6 9.88 12.0 12.7 12.1 15.0 10.54
## 10 13.2 14.7 15.3 12.8 10.11 12.3 12.9 12.3 14.9 10.77
## 11 13.4 14.9 15.6 12.9 10.33 12.5 13.0 12.4 14.6 10.96
## 新竹市 臺中市 嘉義市 臺南市 臺北市 高雄市
## 1 NA NA NA NA NA NA
## 2 8.46 6.49 8.67 7.69 9.67 7.16
## 3 8.50 6.60 8.85 7.85 9.94 7.41
## 4 8.59 6.79 9.15 8.06 10.25 7.63
## 5 8.69 6.94 9.46 8.24 10.58 7.93
## 6 8.81 7.15 9.70 8.46 10.92 8.24
## 7 8.95 7.35 10.00 8.69 11.29 8.59
## 8 9.12 7.59 10.25 8.90 11.64 8.94
## 9 9.21 7.75 10.45 9.09 11.96 9.23
## 10 9.29 7.92 10.64 9.33 12.31 9.57
## 11 9.37 8.13 10.86 9.53 12.60 9.93
dt <- data.frame(city=colnames(stat.dat)[-1])
old<-data.frame()
for (u in 1:23){
for (i in 1:11)
old[u, i]<-stat.dat[i+1, u+1]
}
dt <-data.frame(dt, old)
colnames(dt)<-c("city", c(2000:2010))
head(dt)
## city 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010
## 1 臺北縣 6.37 6.44 6.55 6.67 6.86 7.08 7.32 7.52 7.76 8.04 8.27
## 2 宜蘭縣 10.20 10.49 10.82 11.17 11.54 11.95 12.30 12.61 12.83 13.01 13.10
## 3 桃園縣 7.46 7.49 7.51 7.56 7.62 7.72 7.84 7.92 8.05 8.18 8.24
## 4 新竹縣 9.69 9.91 10.17 10.39 10.58 10.85 11.02 11.12 11.20 11.28 11.15
## 5 苗栗縣 10.98 11.21 11.57 11.87 12.19 12.50 12.79 13.01 13.21 13.33 13.40
## 6 臺中縣 7.16 7.32 7.50 7.68 7.90 8.12 8.31 8.50 8.68 8.86 8.99
library(reshape2)
DT <-melt(dt, id.vars='city', variable.name='years')
DT$years <- as.Date(DT$years, format="%Y")
library(ggplot2)
ggplot(DT, aes(x=years, y=value, col=city)) +
geom_line(size=1) +
geom_point(shape=16, size=3) +
labs(x="Years", y="Percent") +
scale_x_date(date_labels = "%Y") +
theme(text=element_text(family='STFangsong'))
power<--1
while (power <= 12) {
power <- power +1
if (2^power<1000){
cat(2^power, "\n")
}else{
cat("Stop")
}
}
## 1
## 2
## 4
## 8
## 16
## 32
## 64
## 128
## 256
## 512
## StopStopStopStop
for (a in -1:11){
a <- a +1
print(2^a)
}
## [1] 1
## [1] 2
## [1] 4
## [1] 8
## [1] 16
## [1] 32
## [1] 64
## [1] 128
## [1] 256
## [1] 512
## [1] 1024
## [1] 2048
## [1] 4096
power<-0
while (power <= 12) {
if (2^power<1000){
cat(2^power, "\n")
}else{
cat("Stop")
break
}
power <- power +1
}
## 1
## 2
## 4
## 8
## 16
## 32
## 64
## 128
## 256
## 512
## Stop
today<-as.Date(Sys.Date(), format='%Y-%m-%d')
hotel <- function(checkin){
n <- length(checkin)
price <- 3000
diff <- difftime(checkin, today)
for (i in 1:n)
if (diff[i]>90){
print(checkin[i])
cat (round(diff[i]/30,1), "months:", price*0.85, "\n")
}else if (diff[i]>=60){
print(checkin[i])
cat (round(diff[i]/30,1), "months:",price*0.9,"\n")
}else if (diff[i]>=30){
print(checkin[i])
cat (round(diff[i]/30,1), "months:",price,"\n")
}else{
print(checkin[i])
cat("Over the budget")
break
}
}
checkin<-as.Date(c("2018-10-31", "2018-08-10","2018-07-20"), format='%Y-%m-%d')
checkin<-c(checkin, today+3)
hotel(checkin)
## [1] "2018-10-31"
## 3.9 months: 2550
## [1] "2018-08-10"
## 1.2 months: 3000
## [1] "2018-07-20"
## Over the budget
for (i in 1:8){
if (i == 4){
next
}
if (i == 6){
next
}
cat(i, "power of 5 is", 5^i, "\n")
}
## 1 power of 5 is 5
## 2 power of 5 is 25
## 3 power of 5 is 125
## 5 power of 5 is 3125
## 7 power of 5 is 78125
## 8 power of 5 is 390625