Apply Function

x <- list(c(1,2,3,4), c(5,6,7,8))
x[[1]]
## [1] 1 2 3 4
# For Loop 
res <- c()
for (i in x){
  res <- c(res, sum(i))
}
res
## [1] 10 26
# Apply Function
lapply(x, sum)
## [[1]]
## [1] 10
## 
## [[2]]
## [1] 26
grades <- list(c(90,84,88), c(50,60,70,55))

# For Loop
res <- c()
for( g in grades){
  res <- c(res,mean(g))
}
res
## [1] 87.33333 58.75000
# Apply Method
lapply(grades, mean)
## [[1]]
## [1] 87.33333
## 
## [[2]]
## [1] 58.75
m1 <- matrix(1:4, nrow= 2, byrow = TRUE)
m2 <- matrix(5:8, nrow= 2, byrow = TRUE)
li <- list(m1,m2)
mean(m1)
## [1] 2.5
ret <- lapply(li, mean)
ret[[1]]
## [1] 2.5
unlist(ret)
## [1] 2.5 6.5
f <- function(e){
  e[1,]
}

for(mat in li){
  #print(mat[1,])
  print(f(mat))
}
## [1] 1 2
## [1] 5 6
lapply(li, function(mat) mat[1,] )
## [[1]]
## [1] 1 2
## 
## [[2]]
## [1] 5 6
lapply(li, f )
## [[1]]
## [1] 1 2
## 
## [[2]]
## [1] 5 6
grades <- list(c(90,84,88), c(50,60,70,55))
lapply(grades, sum)
## [[1]]
## [1] 262
## 
## [[2]]
## [1] 235
unlist(lapply(grades, sum))
## [1] 262 235
sapply(grades, sum)
## [1] 262 235
m1 <- matrix(1:4, nrow= 2, byrow = TRUE)
m2 <- matrix(5:8, nrow= 2, byrow = TRUE)
li <- list(m1,m2)
lapply(li, mean)
## [[1]]
## [1] 2.5
## 
## [[2]]
## [1] 6.5
sapply(li, mean)
## [1] 2.5 6.5
lapply(li, function(e) e[1,])
## [[1]]
## [1] 1 2
## 
## [[2]]
## [1] 5 6
sapply(li, function(e) e[1,])
##      [,1] [,2]
## [1,]    1    5
## [2,]    2    6
?apply
apply(m1,1, sum )
## [1] 3 7
apply(m1,2, sum )
## [1] 4 6
x <- c(80,70,59,88,72,57) 
t <- c(1,1,2,1,1,2) 
tapply(x,t, mean)
##    1    2 
## 77.5 58.0
data(iris)
head(iris)
##   Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## 1          5.1         3.5          1.4         0.2  setosa
## 2          4.9         3.0          1.4         0.2  setosa
## 3          4.7         3.2          1.3         0.2  setosa
## 4          4.6         3.1          1.5         0.2  setosa
## 5          5.0         3.6          1.4         0.2  setosa
## 6          5.4         3.9          1.7         0.4  setosa
tapply(iris$Petal.Length, iris$Species, mean)
##     setosa versicolor  virginica 
##      1.462      4.260      5.552

Data Visualization

#data(anscombe)
#View(anscombe)
plot(y1 ~ x1, data = anscombe)
fit <- lm(y1 ~ x1, data = anscombe)
abline(fit, col="red")

Stock Visualization

Line Chart

x <- 1:6
y <- x

# method 1
plot(x, y, type = 'o')

# method 2
plot(x, y, type = 'n')
points(x,y, type = 'o')

# method 3
plot(x, y, type = 'n')
lines(x,y, type = 'o')

# method 1
par(mfrow=c(2,4))
types <- c('p','l', 'o', 'b', 'c', 's', 'h', 'n')
for(t in types){
  #print(t)
  title <- paste('type:', t)
  plot(x,y, type = t, main = title)
}

# method 2
par(mfrow=c(2,4))
types <- c('p','l', 'o', 'b', 'c', 's', 'h', 'n')
for(t in types){
  #print(t)
  title <- paste('type:', t)
  plot( x,y, type = 'n', main = title)
  lines(x,y, type = t)
}

barplot(c(80,82,84,88), ylim = c(80,90))


taipei <- c(92.5, 132.6, 168.8, 159.1, 218.7)
tainan <- c(21.2, 30.6 ,  37.3,  84.6, 184.3)
plot(taipei, type='o', col='blue', ylim = c(0,220), xlab = 'Month', ylab = 'Rainfall', main = 'Rainfall Comparison')
lines(tainan, type = 'o', pch = 22, lty = 2, col = 'red')


taipei <- c(92.5, 132.6, 168.8, 159.1, 218.7)
tainan <- c(21.2, 30.6 ,  37.3,  84.6, 184.3)
plot(taipei, type='o', col='blue', ylim = c(0,220), xlim=c(1,6), xlab = 'Month', ylab = 'Rainfall', main = 'Rainfall Comparison')
lines(tainan, type = 'o', pch = 22, col = 'red')
text(5.5, 220, 'Taipei')
text(5.5, 180, 'Tainan')

Bar Chart

housePrice <- read.csv('https://raw.githubusercontent.com/ywchiu/fubonr/master/data/house-prices.csv')
head(housePrice)
##   Home  Price SqFt Bedrooms Bathrooms Offers Brick Neighborhood
## 1    1 114300 1790        2         2      2    No         East
## 2    2 114200 2030        4         2      3    No         East
## 3    3 114800 1740        3         2      1    No         East
## 4    4  94700 1980        3         2      3    No         East
## 5    5 119800 2130        3         3      3    No         East
## 6    6 114600 1780        3         2      2    No        North
bedroomsTable <- table(housePrice$Bedrooms)
barplot(bedroomsTable, xlab = 'Bedrooms Type', ylab = 'Count', main = 'Bedroom Type Calculation', col= "blue")

barplot(bedroomsTable, xlab = 'Bedrooms Type', ylab = 'Count', main = 'Bedroom Type Calculation', col= c("blue", 'orange', 'red', 'green') )

barplot(bedroomsTable, xlab = 'Bedrooms Type', ylab = 'Count', main = 'Bedroom Type Calculation', col= 1:4 )

barplot(bedroomsTable, xlab = 'Bedrooms Type', ylab = 'Count', main = 'Bedroom Type Calculation', col= c("blue", 'orange', 'red', 'green') )

## Color Scheme:
## https://color.adobe.com/explore/?filter=most-popular&time=month

barplot(bedroomsTable, xlab = 'Bedrooms Type', ylab = 'Count', main = 'Bedroom Type Calculation', col= c("#17A598", '#D5D8C8', '#EF551F', '#F3A346') )

Histogram

getwd()
## [1] "/Users/davidchiu"
load('course/fuboni/data/cdc.Rdata')
head(cdc)
##     genhlth exerany hlthplan smoke100 height weight wtdesire age gender
## 1      good       0        1        0     70    175      175  77      m
## 2      good       0        1        1     64    125      115  33      f
## 3      good       1        1        1     60    105      105  49      f
## 4      good       1        1        0     66    132      124  42      f
## 5 very good       0        1        0     61    150      130  55      f
## 6 very good       1        1        0     64    114      114  55      f
hist(cdc$weight)

#barplot(cdc$weight)
hist(cdc$weight, breaks = 50)

hist(cdc$weight, breaks = 500)

sort(table(cdc$weight), decreasing = TRUE)
## 
## 160 150 180 170 200 140 190 165 130 175 145 135 185 155 125 120 210 195 
## 992 970 933 922 805 794 715 692 627 626 615 589 577 527 473 440 431 393 
## 220 230 115 110 205 215 240 250 225 138 235 128 168 105 148 132 142 178 
## 376 268 244 235 230 206 204 202 196 144 137 125 122 112 111 110 110 106 
## 260 118 158 162 172 100 152 122 127 134 198 300 112 245 123 182 192 137 
## 104 102 102  96  95  94  80  74  71  71  70  70  69  69  65  64  64  62 
## 143 147 136 163 126 124 280 270 108 174 173 156 188 117 157 153 154 187 
##  62  62  60  60  59  58  57  56  55  53  50  49  49  48  48  47  47  47 
## 133 164 167 183 212 146 186 144 149 176 184 275 290 114 129 179 204 265 
##  46  46  45  45  45  43  42  41  40  40  40  40  40  39  37  37  36  36 
## 113 197 218 107 116 189 208 119 203 193 194 103 139 141 196 159 169 255 
##  34  34  34  33  33  33  33  32  32  31  31  30  30  30  29  28  28  27 
## 131 166 171  98 151 106 121 202  95 177 228 207 285 350 161 206 102 199 
##  26  26  26  25  25  24  24  24  22  22  22  21  21  21  19  19  18  18 
## 216 222 248 104 109 214  90 238 310 320 181 217 219 236 295 209 227 242 
##  18  18  18  17  14  14  12  12  12  12  11  11  11  11  11  10  10  10 
## 191 213 232 252 201 224 226 234 257  92 111 211 223 315  93  97 101 246 
##   9   9   9   9   8   8   8   8   8   7   7   7   7   7   6   6   6   6 
## 262 278 330 340  99 237 258 400  85  88 233 239 241 243 253 256 263 267 
##   6   6   6   6   5   5   5   5   4   4   4   4   4   4   4   4   4   4 
## 268 305 380  84  94  96 272 274 286 298 325  78  80  82 231 247 249 254 
##   4   4   4   3   3   3   3   3   3   3   3   2   2   2   2   2   2   2 
## 276 279 282 283 287 292 360 362 385  68  70  79  83  86 221 229 244 271 
##   2   2   2   2   2   2   2   2   2   1   1   1   1   1   1   1   1   1 
## 273 294 296 297 308 309 313 318 319 324 327 328 344 348 364 370 371 390 
##   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1 
## 405 495 500 
##   1   1   1
table(cdc$weight %% 10)
## 
##    0    1    2    3    4    5    6    7    8    9 
## 9421  207  919  545  525 5865  481  543 1159  335
par(mfrow = c(2,1))
hist(cdc$weight, breaks = 50, xlim = c(70, 300))
barplot(table(cdc$weight))

Pie Chart

bedroomsTable <- table(housePrice$Bedrooms)

bedroomsTable
## 
##  2  3  4  5 
## 30 67 29  2
pie(bedroomsTable, labels = c('2 Units', '3 Units', '4 Units', '5 Units'),col = 1:4, main = 'Bedrooms Pie Chart')

bedroomsTable <- sort(bedroomsTable, decreasing = TRUE)
bedroomsTable
## 
##  3  2  4  5 
## 67 30 29  2
pie(bedroomsTable, labels = c('3 Units', '2 Units', '4 Units', '5 Units'), col = 1:4, main = 'Bedrooms Pie Chart', init.angle = 90, clockwise = TRUE)

Scatter Chart

plot(cdc$weight, cdc$wtdesire)

plot(anscombe$x1, anscombe$y3)

plot(y3 ~ x1, data = anscombe)
fit <- lm(y3 ~ x1, data = anscombe)
abline(fit, col = "red")

data(iris)
versicolor <- iris[iris$Species == 'versicolor', ]
virginica <- iris[iris$Species == 'virginica', ]

plot(versicolor$Petal.Length, versicolor$Sepal.Length, col ='red', xlim = c(0,8), ylim = c(0,8))
points(virginica$Petal.Length, virginica$Sepal.Length, col ='blue')
abline(v = 4.7, col='orange')

plot(versicolor$Petal.Length, versicolor$Sepal.Length, type = 'n', xlim = c(0,8), ylim = c(0,8))
points(versicolor$Petal.Length, versicolor$Sepal.Length, col ='red')
points(virginica$Petal.Length, virginica$Sepal.Length, col ='blue')
abline(v = 4.7, col='orange')

plot(wtdesire ~ weight, data = cdc)
fit <- lm(wtdesire ~ weight, data = cdc)
abline(fit, col='red')

Mosaic Plot

table(cdc$smoke100)
## 
##     0     1 
## 10559  9441
table(cdc$gender)
## 
##     m     f 
##  9569 10431
smoker_genders <- table(cdc$gender , cdc$smoke100)
colnames(smoker_genders) <- c('no', 'yes')
mosaicplot(smoker_genders, col =c('green','red'))

Box Chart

boxplot(cdc$height, ylab="Height", main="Box Plot of Height")

set.seed(42)
temp <- sample.int(30, size=100, replace = TRUE)
mean(temp)
## [1] 16.3
temp <- c(temp, 999,999,999)
mean(temp)
## [1] 44.92233
sort(temp)[52]
## [1] 17
median(temp)
## [1] 17
a <- c(1,2,3,4,5,6,7,8,90)
mean(a)
## [1] 14
median(a)
## [1] 5
quantile(a, 0.25)
## 25% 
##   3
quantile(a, 0.75)
## 75% 
##   7
IQR(a)
## [1] 4
max(median(a) - 1.5 * IQR(a), min(a))
## [1] 1
min(median(a) + 1.5 * IQR(a), max(a))
## [1] 11
boxplot(a)

a2 <- a[a < 20]
boxplot(a2)

boxplot(cdc$height ~ cdc$gender)

par(mfrow=c(2,1))
boxplot(cdc$height, ylab="Height", main="Box Plot of Height")
hist(cdc$height)

Legend

taipei <- c(92.5, 132.6, 168.8, 159.1, 218.7)
tainan <- c(21.2, 30.6 ,  37.3,  84.6, 184.3)
plot(taipei, type='o', col='blue', ylim = c(0,220), xlim=c(1,6), xlab = 'Month', ylab = 'Rainfall', main = 'Rainfall Comparison')
lines(tainan, type = 'o', pch = 22, col = 'red')
#legend('topright',c('taipei', 'tainan'), col=c('blue', 'red'), lwd = 2, title = 'rainfall', pch=c(19,22))
legend(1,200,c('taipei', 'tainan'), col=c('blue', 'red'), lwd = 2, title = 'rainfall', pch=c(19,22))

Par

?par


showLayout <- function(n){
for(i in 1:n){ 
  plot(1,type="n",xaxt="n",yaxt="n",xlab="",ylab="")
  text(1, 1, labels=i, cex=10)
} 
  }

par(mfrow=c(3,2), mar= c(1,1,1,1))
showLayout(6)

par(mfcol=c(3,2), mar= c(3,3,3,3))
showLayout(6)

Export Picture

jpeg('t1.jpg')
x <- pie(bedroomsTable, labels = c('3 Units', '2 Units', '4 Units', '5 Units'), col = 1:4, main = 'Bedrooms Pie Chart', init.angle = 90, clockwise = TRUE)

dev.off()
## quartz_off_screen 
##                 2
getwd()
## [1] "/Users/davidchiu"

安裝 Dplyr

#install.packages('dplyr')
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
data(Titanic)
titanic <- data.frame(Titanic)
head(titanic)
##   Class    Sex   Age Survived Freq
## 1   1st   Male Child       No    0
## 2   2nd   Male Child       No    0
## 3   3rd   Male Child       No   35
## 4  Crew   Male Child       No    0
## 5   1st Female Child       No    0
## 6   2nd Female Child       No    0
titanic[  titanic$Sex == 'Male' & titanic$Age =='Adult',   ]
##    Class  Sex   Age Survived Freq
## 9    1st Male Adult       No  118
## 10   2nd Male Adult       No  154
## 11   3rd Male Adult       No  387
## 12  Crew Male Adult       No  670
## 25   1st Male Adult      Yes   57
## 26   2nd Male Adult      Yes   14
## 27   3rd Male Adult      Yes   75
## 28  Crew Male Adult      Yes  192
filter(titanic, Sex == 'Male' & Age == 'Adult')
##   Class  Sex   Age Survived Freq
## 1   1st Male Adult       No  118
## 2   2nd Male Adult       No  154
## 3   3rd Male Adult       No  387
## 4  Crew Male Adult       No  670
## 5   1st Male Adult      Yes   57
## 6   2nd Male Adult      Yes   14
## 7   3rd Male Adult      Yes   75
## 8  Crew Male Adult      Yes  192
filter(titanic, Sex == 'Male' & Class == 'Crew')
##   Class  Sex   Age Survived Freq
## 1  Crew Male Child       No    0
## 2  Crew Male Adult       No  670
## 3  Crew Male Child      Yes    0
## 4  Crew Male Adult      Yes  192
filter(titanic, Sex == 'Male' | Class == 'Crew')
##    Class    Sex   Age Survived Freq
## 1    1st   Male Child       No    0
## 2    2nd   Male Child       No    0
## 3    3rd   Male Child       No   35
## 4   Crew   Male Child       No    0
## 5   Crew Female Child       No    0
## 6    1st   Male Adult       No  118
## 7    2nd   Male Adult       No  154
## 8    3rd   Male Adult       No  387
## 9   Crew   Male Adult       No  670
## 10  Crew Female Adult       No    3
## 11   1st   Male Child      Yes    5
## 12   2nd   Male Child      Yes   11
## 13   3rd   Male Child      Yes   13
## 14  Crew   Male Child      Yes    0
## 15  Crew Female Child      Yes    0
## 16   1st   Male Adult      Yes   57
## 17   2nd   Male Adult      Yes   14
## 18   3rd   Male Adult      Yes   75
## 19  Crew   Male Adult      Yes  192
## 20  Crew Female Adult      Yes   20
filter(titanic, Class %in% c('Crew', '1st') )
##    Class    Sex   Age Survived Freq
## 1    1st   Male Child       No    0
## 2   Crew   Male Child       No    0
## 3    1st Female Child       No    0
## 4   Crew Female Child       No    0
## 5    1st   Male Adult       No  118
## 6   Crew   Male Adult       No  670
## 7    1st Female Adult       No    4
## 8   Crew Female Adult       No    3
## 9    1st   Male Child      Yes    5
## 10  Crew   Male Child      Yes    0
## 11   1st Female Child      Yes    1
## 12  Crew Female Child      Yes    0
## 13   1st   Male Adult      Yes   57
## 14  Crew   Male Adult      Yes  192
## 15   1st Female Adult      Yes  140
## 16  Crew Female Adult      Yes   20
titanic[   ,  c('Sex', 'Age')]
##       Sex   Age
## 1    Male Child
## 2    Male Child
## 3    Male Child
## 4    Male Child
## 5  Female Child
## 6  Female Child
## 7  Female Child
## 8  Female Child
## 9    Male Adult
## 10   Male Adult
## 11   Male Adult
## 12   Male Adult
## 13 Female Adult
## 14 Female Adult
## 15 Female Adult
## 16 Female Adult
## 17   Male Child
## 18   Male Child
## 19   Male Child
## 20   Male Child
## 21 Female Child
## 22 Female Child
## 23 Female Child
## 24 Female Child
## 25   Male Adult
## 26   Male Adult
## 27   Male Adult
## 28   Male Adult
## 29 Female Adult
## 30 Female Adult
## 31 Female Adult
## 32 Female Adult
select(titanic, Sex, Age)
##       Sex   Age
## 1    Male Child
## 2    Male Child
## 3    Male Child
## 4    Male Child
## 5  Female Child
## 6  Female Child
## 7  Female Child
## 8  Female Child
## 9    Male Adult
## 10   Male Adult
## 11   Male Adult
## 12   Male Adult
## 13 Female Adult
## 14 Female Adult
## 15 Female Adult
## 16 Female Adult
## 17   Male Child
## 18   Male Child
## 19   Male Child
## 20   Male Child
## 21 Female Child
## 22 Female Child
## 23 Female Child
## 24 Female Child
## 25   Male Adult
## 26   Male Adult
## 27   Male Adult
## 28   Male Adult
## 29 Female Adult
## 30 Female Adult
## 31 Female Adult
## 32 Female Adult
sum(tail(head(iris),3 )$Sepal.Length)
## [1] 15
# magrittr
iris %>% head() %>% tail(3) %>% .$Sepal.Length %>% sum()
## [1] 15
# dplyr & Magrittr
### select age, sex from titanic where class in ('crew', '1st')

res <- titanic %>% filter(Class %in% c('Crew', '1st')) %>% select(Sex, Age) %>% group_by(Age) %>% summarise(cnt = n())

barplot(res$cnt, names.arg = res$Age, col = c('blue', 'orange'))