Apply Function
x <- list(c(1,2,3,4), c(5,6,7,8))
x[[1]]
## [1] 1 2 3 4
# For Loop
res <- c()
for (i in x){
res <- c(res, sum(i))
}
res
## [1] 10 26
# Apply Function
lapply(x, sum)
## [[1]]
## [1] 10
##
## [[2]]
## [1] 26
grades <- list(c(90,84,88), c(50,60,70,55))
# For Loop
res <- c()
for( g in grades){
res <- c(res,mean(g))
}
res
## [1] 87.33333 58.75000
# Apply Method
lapply(grades, mean)
## [[1]]
## [1] 87.33333
##
## [[2]]
## [1] 58.75
m1 <- matrix(1:4, nrow= 2, byrow = TRUE)
m2 <- matrix(5:8, nrow= 2, byrow = TRUE)
li <- list(m1,m2)
mean(m1)
## [1] 2.5
ret <- lapply(li, mean)
ret[[1]]
## [1] 2.5
unlist(ret)
## [1] 2.5 6.5
f <- function(e){
e[1,]
}
for(mat in li){
#print(mat[1,])
print(f(mat))
}
## [1] 1 2
## [1] 5 6
lapply(li, function(mat) mat[1,] )
## [[1]]
## [1] 1 2
##
## [[2]]
## [1] 5 6
lapply(li, f )
## [[1]]
## [1] 1 2
##
## [[2]]
## [1] 5 6
grades <- list(c(90,84,88), c(50,60,70,55))
lapply(grades, sum)
## [[1]]
## [1] 262
##
## [[2]]
## [1] 235
unlist(lapply(grades, sum))
## [1] 262 235
sapply(grades, sum)
## [1] 262 235
m1 <- matrix(1:4, nrow= 2, byrow = TRUE)
m2 <- matrix(5:8, nrow= 2, byrow = TRUE)
li <- list(m1,m2)
lapply(li, mean)
## [[1]]
## [1] 2.5
##
## [[2]]
## [1] 6.5
sapply(li, mean)
## [1] 2.5 6.5
lapply(li, function(e) e[1,])
## [[1]]
## [1] 1 2
##
## [[2]]
## [1] 5 6
sapply(li, function(e) e[1,])
## [,1] [,2]
## [1,] 1 5
## [2,] 2 6
?apply
apply(m1,1, sum )
## [1] 3 7
apply(m1,2, sum )
## [1] 4 6
x <- c(80,70,59,88,72,57)
t <- c(1,1,2,1,1,2)
tapply(x,t, mean)
## 1 2
## 77.5 58.0
data(iris)
head(iris)
## Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## 1 5.1 3.5 1.4 0.2 setosa
## 2 4.9 3.0 1.4 0.2 setosa
## 3 4.7 3.2 1.3 0.2 setosa
## 4 4.6 3.1 1.5 0.2 setosa
## 5 5.0 3.6 1.4 0.2 setosa
## 6 5.4 3.9 1.7 0.4 setosa
tapply(iris$Petal.Length, iris$Species, mean)
## setosa versicolor virginica
## 1.462 4.260 5.552
Data Visualization
#data(anscombe)
#View(anscombe)
plot(y1 ~ x1, data = anscombe)
fit <- lm(y1 ~ x1, data = anscombe)
abline(fit, col="red")

Line Chart
x <- 1:6
y <- x
# method 1
plot(x, y, type = 'o')

# method 2
plot(x, y, type = 'n')
points(x,y, type = 'o')

# method 3
plot(x, y, type = 'n')
lines(x,y, type = 'o')

# method 1
par(mfrow=c(2,4))
types <- c('p','l', 'o', 'b', 'c', 's', 'h', 'n')
for(t in types){
#print(t)
title <- paste('type:', t)
plot(x,y, type = t, main = title)
}

# method 2
par(mfrow=c(2,4))
types <- c('p','l', 'o', 'b', 'c', 's', 'h', 'n')
for(t in types){
#print(t)
title <- paste('type:', t)
plot( x,y, type = 'n', main = title)
lines(x,y, type = t)
}

barplot(c(80,82,84,88), ylim = c(80,90))
taipei <- c(92.5, 132.6, 168.8, 159.1, 218.7)
tainan <- c(21.2, 30.6 , 37.3, 84.6, 184.3)
plot(taipei, type='o', col='blue', ylim = c(0,220), xlab = 'Month', ylab = 'Rainfall', main = 'Rainfall Comparison')
lines(tainan, type = 'o', pch = 22, lty = 2, col = 'red')
taipei <- c(92.5, 132.6, 168.8, 159.1, 218.7)
tainan <- c(21.2, 30.6 , 37.3, 84.6, 184.3)
plot(taipei, type='o', col='blue', ylim = c(0,220), xlim=c(1,6), xlab = 'Month', ylab = 'Rainfall', main = 'Rainfall Comparison')
lines(tainan, type = 'o', pch = 22, col = 'red')
text(5.5, 220, 'Taipei')
text(5.5, 180, 'Tainan')

Bar Chart
housePrice <- read.csv('https://raw.githubusercontent.com/ywchiu/fubonr/master/data/house-prices.csv')
head(housePrice)
## Home Price SqFt Bedrooms Bathrooms Offers Brick Neighborhood
## 1 1 114300 1790 2 2 2 No East
## 2 2 114200 2030 4 2 3 No East
## 3 3 114800 1740 3 2 1 No East
## 4 4 94700 1980 3 2 3 No East
## 5 5 119800 2130 3 3 3 No East
## 6 6 114600 1780 3 2 2 No North
bedroomsTable <- table(housePrice$Bedrooms)
barplot(bedroomsTable, xlab = 'Bedrooms Type', ylab = 'Count', main = 'Bedroom Type Calculation', col= "blue")

barplot(bedroomsTable, xlab = 'Bedrooms Type', ylab = 'Count', main = 'Bedroom Type Calculation', col= c("blue", 'orange', 'red', 'green') )

barplot(bedroomsTable, xlab = 'Bedrooms Type', ylab = 'Count', main = 'Bedroom Type Calculation', col= 1:4 )

barplot(bedroomsTable, xlab = 'Bedrooms Type', ylab = 'Count', main = 'Bedroom Type Calculation', col= c("blue", 'orange', 'red', 'green') )

## Color Scheme:
## https://color.adobe.com/explore/?filter=most-popular&time=month
barplot(bedroomsTable, xlab = 'Bedrooms Type', ylab = 'Count', main = 'Bedroom Type Calculation', col= c("#17A598", '#D5D8C8', '#EF551F', '#F3A346') )

Histogram
getwd()
## [1] "/Users/davidchiu"
load('course/fuboni/data/cdc.Rdata')
head(cdc)
## genhlth exerany hlthplan smoke100 height weight wtdesire age gender
## 1 good 0 1 0 70 175 175 77 m
## 2 good 0 1 1 64 125 115 33 f
## 3 good 1 1 1 60 105 105 49 f
## 4 good 1 1 0 66 132 124 42 f
## 5 very good 0 1 0 61 150 130 55 f
## 6 very good 1 1 0 64 114 114 55 f
hist(cdc$weight)

#barplot(cdc$weight)
hist(cdc$weight, breaks = 50)

hist(cdc$weight, breaks = 500)

sort(table(cdc$weight), decreasing = TRUE)
##
## 160 150 180 170 200 140 190 165 130 175 145 135 185 155 125 120 210 195
## 992 970 933 922 805 794 715 692 627 626 615 589 577 527 473 440 431 393
## 220 230 115 110 205 215 240 250 225 138 235 128 168 105 148 132 142 178
## 376 268 244 235 230 206 204 202 196 144 137 125 122 112 111 110 110 106
## 260 118 158 162 172 100 152 122 127 134 198 300 112 245 123 182 192 137
## 104 102 102 96 95 94 80 74 71 71 70 70 69 69 65 64 64 62
## 143 147 136 163 126 124 280 270 108 174 173 156 188 117 157 153 154 187
## 62 62 60 60 59 58 57 56 55 53 50 49 49 48 48 47 47 47
## 133 164 167 183 212 146 186 144 149 176 184 275 290 114 129 179 204 265
## 46 46 45 45 45 43 42 41 40 40 40 40 40 39 37 37 36 36
## 113 197 218 107 116 189 208 119 203 193 194 103 139 141 196 159 169 255
## 34 34 34 33 33 33 33 32 32 31 31 30 30 30 29 28 28 27
## 131 166 171 98 151 106 121 202 95 177 228 207 285 350 161 206 102 199
## 26 26 26 25 25 24 24 24 22 22 22 21 21 21 19 19 18 18
## 216 222 248 104 109 214 90 238 310 320 181 217 219 236 295 209 227 242
## 18 18 18 17 14 14 12 12 12 12 11 11 11 11 11 10 10 10
## 191 213 232 252 201 224 226 234 257 92 111 211 223 315 93 97 101 246
## 9 9 9 9 8 8 8 8 8 7 7 7 7 7 6 6 6 6
## 262 278 330 340 99 237 258 400 85 88 233 239 241 243 253 256 263 267
## 6 6 6 6 5 5 5 5 4 4 4 4 4 4 4 4 4 4
## 268 305 380 84 94 96 272 274 286 298 325 78 80 82 231 247 249 254
## 4 4 4 3 3 3 3 3 3 3 3 2 2 2 2 2 2 2
## 276 279 282 283 287 292 360 362 385 68 70 79 83 86 221 229 244 271
## 2 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1
## 273 294 296 297 308 309 313 318 319 324 327 328 344 348 364 370 371 390
## 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## 405 495 500
## 1 1 1
table(cdc$weight %% 10)
##
## 0 1 2 3 4 5 6 7 8 9
## 9421 207 919 545 525 5865 481 543 1159 335
par(mfrow = c(2,1))
hist(cdc$weight, breaks = 50, xlim = c(70, 300))
barplot(table(cdc$weight))

Pie Chart
bedroomsTable <- table(housePrice$Bedrooms)
bedroomsTable
##
## 2 3 4 5
## 30 67 29 2
pie(bedroomsTable, labels = c('2 Units', '3 Units', '4 Units', '5 Units'),col = 1:4, main = 'Bedrooms Pie Chart')

bedroomsTable <- sort(bedroomsTable, decreasing = TRUE)
bedroomsTable
##
## 3 2 4 5
## 67 30 29 2
pie(bedroomsTable, labels = c('3 Units', '2 Units', '4 Units', '5 Units'), col = 1:4, main = 'Bedrooms Pie Chart', init.angle = 90, clockwise = TRUE)

Scatter Chart
plot(cdc$weight, cdc$wtdesire)

plot(anscombe$x1, anscombe$y3)

plot(y3 ~ x1, data = anscombe)
fit <- lm(y3 ~ x1, data = anscombe)
abline(fit, col = "red")

data(iris)
versicolor <- iris[iris$Species == 'versicolor', ]
virginica <- iris[iris$Species == 'virginica', ]
plot(versicolor$Petal.Length, versicolor$Sepal.Length, col ='red', xlim = c(0,8), ylim = c(0,8))
points(virginica$Petal.Length, virginica$Sepal.Length, col ='blue')
abline(v = 4.7, col='orange')

plot(versicolor$Petal.Length, versicolor$Sepal.Length, type = 'n', xlim = c(0,8), ylim = c(0,8))
points(versicolor$Petal.Length, versicolor$Sepal.Length, col ='red')
points(virginica$Petal.Length, virginica$Sepal.Length, col ='blue')
abline(v = 4.7, col='orange')

plot(wtdesire ~ weight, data = cdc)
fit <- lm(wtdesire ~ weight, data = cdc)
abline(fit, col='red')

Mosaic Plot
table(cdc$smoke100)
##
## 0 1
## 10559 9441
table(cdc$gender)
##
## m f
## 9569 10431
smoker_genders <- table(cdc$gender , cdc$smoke100)
colnames(smoker_genders) <- c('no', 'yes')
mosaicplot(smoker_genders, col =c('green','red'))

Box Chart
boxplot(cdc$height, ylab="Height", main="Box Plot of Height")

set.seed(42)
temp <- sample.int(30, size=100, replace = TRUE)
mean(temp)
## [1] 16.3
temp <- c(temp, 999,999,999)
mean(temp)
## [1] 44.92233
sort(temp)[52]
## [1] 17
median(temp)
## [1] 17
a <- c(1,2,3,4,5,6,7,8,90)
mean(a)
## [1] 14
median(a)
## [1] 5
quantile(a, 0.25)
## 25%
## 3
quantile(a, 0.75)
## 75%
## 7
IQR(a)
## [1] 4
max(median(a) - 1.5 * IQR(a), min(a))
## [1] 1
min(median(a) + 1.5 * IQR(a), max(a))
## [1] 11
boxplot(a)

a2 <- a[a < 20]
boxplot(a2)

boxplot(cdc$height ~ cdc$gender)

par(mfrow=c(2,1))
boxplot(cdc$height, ylab="Height", main="Box Plot of Height")
hist(cdc$height)

Legend
taipei <- c(92.5, 132.6, 168.8, 159.1, 218.7)
tainan <- c(21.2, 30.6 , 37.3, 84.6, 184.3)
plot(taipei, type='o', col='blue', ylim = c(0,220), xlim=c(1,6), xlab = 'Month', ylab = 'Rainfall', main = 'Rainfall Comparison')
lines(tainan, type = 'o', pch = 22, col = 'red')
#legend('topright',c('taipei', 'tainan'), col=c('blue', 'red'), lwd = 2, title = 'rainfall', pch=c(19,22))
legend(1,200,c('taipei', 'tainan'), col=c('blue', 'red'), lwd = 2, title = 'rainfall', pch=c(19,22))

Par
?par
showLayout <- function(n){
for(i in 1:n){
plot(1,type="n",xaxt="n",yaxt="n",xlab="",ylab="")
text(1, 1, labels=i, cex=10)
}
}
par(mfrow=c(3,2), mar= c(1,1,1,1))
showLayout(6)
par(mfcol=c(3,2), mar= c(3,3,3,3))
showLayout(6)

Export Picture
jpeg('t1.jpg')
x <- pie(bedroomsTable, labels = c('3 Units', '2 Units', '4 Units', '5 Units'), col = 1:4, main = 'Bedrooms Pie Chart', init.angle = 90, clockwise = TRUE)
dev.off()
## quartz_off_screen
## 2
getwd()
## [1] "/Users/davidchiu"
安裝 Dplyr
#install.packages('dplyr')
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
data(Titanic)
titanic <- data.frame(Titanic)
head(titanic)
## Class Sex Age Survived Freq
## 1 1st Male Child No 0
## 2 2nd Male Child No 0
## 3 3rd Male Child No 35
## 4 Crew Male Child No 0
## 5 1st Female Child No 0
## 6 2nd Female Child No 0
titanic[ titanic$Sex == 'Male' & titanic$Age =='Adult', ]
## Class Sex Age Survived Freq
## 9 1st Male Adult No 118
## 10 2nd Male Adult No 154
## 11 3rd Male Adult No 387
## 12 Crew Male Adult No 670
## 25 1st Male Adult Yes 57
## 26 2nd Male Adult Yes 14
## 27 3rd Male Adult Yes 75
## 28 Crew Male Adult Yes 192
filter(titanic, Sex == 'Male' & Age == 'Adult')
## Class Sex Age Survived Freq
## 1 1st Male Adult No 118
## 2 2nd Male Adult No 154
## 3 3rd Male Adult No 387
## 4 Crew Male Adult No 670
## 5 1st Male Adult Yes 57
## 6 2nd Male Adult Yes 14
## 7 3rd Male Adult Yes 75
## 8 Crew Male Adult Yes 192
filter(titanic, Sex == 'Male' & Class == 'Crew')
## Class Sex Age Survived Freq
## 1 Crew Male Child No 0
## 2 Crew Male Adult No 670
## 3 Crew Male Child Yes 0
## 4 Crew Male Adult Yes 192
filter(titanic, Sex == 'Male' | Class == 'Crew')
## Class Sex Age Survived Freq
## 1 1st Male Child No 0
## 2 2nd Male Child No 0
## 3 3rd Male Child No 35
## 4 Crew Male Child No 0
## 5 Crew Female Child No 0
## 6 1st Male Adult No 118
## 7 2nd Male Adult No 154
## 8 3rd Male Adult No 387
## 9 Crew Male Adult No 670
## 10 Crew Female Adult No 3
## 11 1st Male Child Yes 5
## 12 2nd Male Child Yes 11
## 13 3rd Male Child Yes 13
## 14 Crew Male Child Yes 0
## 15 Crew Female Child Yes 0
## 16 1st Male Adult Yes 57
## 17 2nd Male Adult Yes 14
## 18 3rd Male Adult Yes 75
## 19 Crew Male Adult Yes 192
## 20 Crew Female Adult Yes 20
filter(titanic, Class %in% c('Crew', '1st') )
## Class Sex Age Survived Freq
## 1 1st Male Child No 0
## 2 Crew Male Child No 0
## 3 1st Female Child No 0
## 4 Crew Female Child No 0
## 5 1st Male Adult No 118
## 6 Crew Male Adult No 670
## 7 1st Female Adult No 4
## 8 Crew Female Adult No 3
## 9 1st Male Child Yes 5
## 10 Crew Male Child Yes 0
## 11 1st Female Child Yes 1
## 12 Crew Female Child Yes 0
## 13 1st Male Adult Yes 57
## 14 Crew Male Adult Yes 192
## 15 1st Female Adult Yes 140
## 16 Crew Female Adult Yes 20
titanic[ , c('Sex', 'Age')]
## Sex Age
## 1 Male Child
## 2 Male Child
## 3 Male Child
## 4 Male Child
## 5 Female Child
## 6 Female Child
## 7 Female Child
## 8 Female Child
## 9 Male Adult
## 10 Male Adult
## 11 Male Adult
## 12 Male Adult
## 13 Female Adult
## 14 Female Adult
## 15 Female Adult
## 16 Female Adult
## 17 Male Child
## 18 Male Child
## 19 Male Child
## 20 Male Child
## 21 Female Child
## 22 Female Child
## 23 Female Child
## 24 Female Child
## 25 Male Adult
## 26 Male Adult
## 27 Male Adult
## 28 Male Adult
## 29 Female Adult
## 30 Female Adult
## 31 Female Adult
## 32 Female Adult
select(titanic, Sex, Age)
## Sex Age
## 1 Male Child
## 2 Male Child
## 3 Male Child
## 4 Male Child
## 5 Female Child
## 6 Female Child
## 7 Female Child
## 8 Female Child
## 9 Male Adult
## 10 Male Adult
## 11 Male Adult
## 12 Male Adult
## 13 Female Adult
## 14 Female Adult
## 15 Female Adult
## 16 Female Adult
## 17 Male Child
## 18 Male Child
## 19 Male Child
## 20 Male Child
## 21 Female Child
## 22 Female Child
## 23 Female Child
## 24 Female Child
## 25 Male Adult
## 26 Male Adult
## 27 Male Adult
## 28 Male Adult
## 29 Female Adult
## 30 Female Adult
## 31 Female Adult
## 32 Female Adult
sum(tail(head(iris),3 )$Sepal.Length)
## [1] 15
# magrittr
iris %>% head() %>% tail(3) %>% .$Sepal.Length %>% sum()
## [1] 15
# dplyr & Magrittr
### select age, sex from titanic where class in ('crew', '1st')
res <- titanic %>% filter(Class %in% c('Crew', '1st')) %>% select(Sex, Age) %>% group_by(Age) %>% summarise(cnt = n())
barplot(res$cnt, names.arg = res$Age, col = c('blue', 'orange'))
