linkedin <- c(16, 9, 13, 5, 2, 17, 14)
facebook <- c(17, 7, 5, 16, 8, 13, 14)
views <- matrix(c(linkedin, facebook), nrow = 2, byrow = TRUE)
views > 15
## [,1] [,2] [,3] [,4] [,5] [,6] [,7]
## [1,] TRUE FALSE FALSE FALSE FALSE TRUE FALSE
## [2,] TRUE FALSE FALSE TRUE FALSE FALSE FALSE
linkedin > facebook
## [1] FALSE TRUE TRUE FALSE FALSE TRUE FALSE
linkedin > 15 | linkedin <= 5
## [1] TRUE FALSE FALSE TRUE TRUE TRUE FALSE
linkedin > 10 & facebook > 10
## [1] TRUE FALSE FALSE FALSE FALSE TRUE TRUE
views[2,]
## [1] 17 7 5 16 8 13 14
views[2,] > 15
## [1] TRUE FALSE FALSE TRUE FALSE FALSE FALSE
sum(views[2,]>15)
## [1] 2
f <- views[2,] > 15
help(mean)
?mean
args(mean)
## function (x, ...)
## NULL
linkedin <- c(16, 9, 13, 5, NA, 17, 14)
mean(linkedin)
## [1] NA
mean(linkedin, na.rm = TRUE)
## [1] 12.33333
mean(linkedin, na.rm = TRUE, trim = 0.2)
## [1] 13
(cf) mean() arg : trim Trim을 사용하면 양 극단에서 일정부분을 빼고 계산. 0.3일 경우 양 끝에서 전체 데이터의 0.3 만큼을 제외한다. 이 경우에 전체데이터가 10개이므로 양 끝에서 3개씩을 뺀다. 즉 정렬된 벡터 (−21, −5, 2, 3, 4.2, 7, 8, 12, 18, 54) 에서 (−21,−5,2) (12,18,54) 을 뺀 평균을 계산
speed <- 31
print(paste("Your speed is", speed))
## [1] "Your speed is 31"
pioneers <- c("GAUSS:1777", "BAYES:1702", "PASCAL:1623", "PEARSON:1857")
split_math <- strsplit(pioneers, split = ":")
split_low <- lapply(split_math, tolower)
split_low
## [[1]]
## [1] "gauss" "1777"
##
## [[2]]
## [1] "bayes" "1702"
##
## [[3]]
## [1] "pascal" "1623"
##
## [[4]]
## [1] "pearson" "1857"
select_first <- function(x) {
x[1]
}
select_second <- function(x) {
x[2]
}
names <- lapply(split_low, select_first)
years <- lapply(split_low, select_second)
names
## [[1]]
## [1] "gauss"
##
## [[2]]
## [1] "bayes"
##
## [[3]]
## [1] "pascal"
##
## [[4]]
## [1] "pearson"
years
## [[1]]
## [1] "1777"
##
## [[2]]
## [1] "1702"
##
## [[3]]
## [1] "1623"
##
## [[4]]
## [1] "1857"
names <- lapply(split_low, function(x) { x[1] })
years <- lapply(split_low, function(x) { x[2] })
select_el <- function(x, index) {
x[index]
}
names <- lapply(split_low, select_el, index = 1)
years <- lapply(split_low, select_el, index = 2)
cities <- c("New York", "Paris", "London")
unlist(lapply(cities, nchar))
## [1] 8 5 6
sapply(cities, nchar)
## New York Paris London
## 8 5 6
sapply(cities, nchar, USE.NAMES = FALSE)
## [1] 8 5 6
temp <- list(c(3,7,9,6,-1), c(6,9,12,13,5), c(4,8,3,-1,-3))
lapply(temp, min)
## [[1]]
## [1] -1
##
## [[2]]
## [1] 5
##
## [[3]]
## [1] -3
sapply(temp, max)
## [1] 9 13 8
extremes_avg <- function(x) {
( min(x) + max(x) ) / 2
}
sapply(temp, extremes_avg)
## [1] 4.0 9.0 2.5
extremes <- function(x) {
c(min = min(x), max = max(x))
}
sapply(temp, extremes)
## [,1] [,2] [,3]
## min -1 5 -3
## max 9 13 8
lapply(temp, extremes)
## [[1]]
## min max
## -1 9
##
## [[2]]
## min max
## 5 13
##
## [[3]]
## min max
## -3 8
below_zero <- function(x) {
return(x[x < 0])
}
freezing_s <- sapply(temp, below_zero)
freezing_l <- lapply(temp, below_zero)
identical(freezing_s, freezing_l)
## [1] TRUE
freezing_s
## [[1]]
## [1] -1
##
## [[2]]
## numeric(0)
##
## [[3]]
## [1] -1 -3
freezing_l
## [[1]]
## [1] -1
##
## [[2]]
## numeric(0)
##
## [[3]]
## [1] -1 -3
Given that the length of the output of below_zero() changes for different input vectors, sapply() is not able to nicely convert the output of lapply() to a nicely formatted matrix. Instead, the output values of sapply() and lapply() are exactly the same, as shown by the TRUE output of identical().
sapply() reacts when it is used to apply a function that returns NULL over a vector or a list.
sapply(list(runif (10), runif (10)),
function(x) c(min = min(x), mean = mean(x), max = max(x)))
## [,1] [,2]
## min 0.3291673 0.06498558
## mean 0.6606805 0.50216997
## max 0.8661332 0.77227106
basics <- function(x) {
c(min = min(x), mean = mean(x), max = max(x))
}
vapply(temp, basics, numeric(3))
## [,1] [,2] [,3]
## min -1.0 5 -3.0
## mean 4.8 9 2.2
## max 9.0 13 8.0
basics <- function(x) {
c(min = min(x), mean = mean(x), median = median(x), max = max(x))
}
vapply(temp, basics, numeric(4))
## [,1] [,2] [,3]
## min -1.0 5 -3.0
## mean 4.8 9 2.2
## median 6.0 9 3.0
## max 9.0 13 8.0
first_and_last <- function(name) {
name <- gsub(" ", "", name)
letters <- strsplit(name, split = "")[[1]]
return(c(first = min(letters), last = max(letters)))
}
sapply(cities, first_and_last)
## New York Paris London
## first "e" "a" "d"
## last "Y" "s" "o"
vapply(cities, first_and_last, character(2))
## New York Paris London
## first "e" "a" "d"
## last "Y" "s" "o"
errors <- c(1.9, -2.6, 4.0, -9.5, -3.4, 7.3)
sum(abs(round(errors)))
## [1] 29
vec1 <- c(1.5, 2.5, 8.4, 3.7, 6.3)
vec2 <- rev(vec1)
mean(c(vec1, vec2))
## [1] 4.48
linkedin <- list(16, 9, 13, 5, 2, 17, 14)
facebook <- list(17, 7, 5, 16, 8, 13, 14)
li_vec <- as.vector(linkedin)
fb_vec <- as.vector(facebook)
social_vec <- append(li_vec, fb_vec) # Append fb_vec to li_vec: social_vec
sort(unlist(social_vec), decreasing = TRUE) # Sort social_vec
## [1] 17 17 16 16 14 14 13 13 9 8 7 5 5 2
rep(seq(1, 7, by = 2), times = 7)
## [1] 1 3 5 7 1 3 5 7 1 3 5 7 1 3 5 7 1 3 5 7 1 3 5 7 1 3 5 7
regex : Regular Expressions
animals <- c("cat", "moose", "impala", "ant", "kiwi")
grepl(pattern = "a", x = animals) # [1] TRUE FALSE TRUE TRUE FALSE
grep(pattern = "a", x = animals) # [1] 1 3 4
grepl(pattern = "^a", x = animals) # [1] FALSE FALSE FALSE TRUE FALSE
grep(pattern = "^a", x = animals) # [1] 4
grepl(pattern = "a$", x = animals) # [1] FALSE FALSE TRUE FALSE FALSE
which(grepl(pattern = "a", x = animals)) # [1] 1 3 4
sub(pattern = "a", replacement = "o", x = animals) # [1] "cot" "moose" "impola" "ont" "kiwi"
gsub(pattern = "a", replacement = "o", x = animals) # [1] "cot" "moose" "impolo" "ont" "kiwi"
sub(pattern = "a|i", replacement = "_", x = animals) # [1] "c_t" "moose" "_mpala" "_nt" "k_wi"
emails <- c("john.doe@ivyleague.edu", "education@world.gov", "dalai.lama@peace.org", "invalid.edu", "quant@bigdatacollege.edu", "cookie.monster@sesame.tv")
hits <- grep("@.*\\.edu", x = emails)
emails[hits]
## [1] "john.doe@ivyleague.edu" "quant@bigdatacollege.edu"
sub(pattern = "@.*\\.edu$", replacement = "@datacamp.edu", x = emails)
## [1] "john.doe@datacamp.edu" "education@world.gov"
## [3] "dalai.lama@peace.org" "invalid.edu"
## [5] "quant@datacamp.edu" "cookie.monster@sesame.tv"
awards <- c("Won 1 Oscar.",
"Won 1 Oscar. Another 9 wins & 24 nominations.",
"1 win and 2 nominations.",
"2 wins & 3 nominations.",
"Nominated for 2 Golden Globes. 1 more win & 2 nominations.",
"4 wins & 1 nomination.")
sub(".*\\s([0-9]+)\\snomination.*$", "\\1", awards)
## [1] "Won 1 Oscar." "24" "2" "3"
## [5] "2" "1"
The ([0-9]+) selects the entire number that comes before the word “nomination” in the string, and the entire match gets replaced by this number because of the \1 that reference to the content inside the parentheses.
my_date <- Sys.Date()
class(my_date)
## [1] "Date"
my_time <- Sys.time()
class(my_time)
## [1] "POSIXct" "POSIXt"
my_date2 <- as.Date("1971-05-14") # as.Date("1971-14-05", format = "%Y-%d-%m")
my_date + 1
## [1] "2017-05-31"
my_date2 <- as.Date("1998-08-29")
my_date2 - my_date
## Time difference of -6849 days
my_time + 1 # seconds incremented by 1
## [1] "2017-05-30 17:17:30 EDT"
my_time2 <- as.POSIXct("1974-07-14 21:11:55 CET")
my_time2 - my_time
## Time difference of -15660.84 days
unclass(my_date)
## [1] 17316
str1 <- "May 23, '96"
str2 <- "2012-03-15"
str3 <- "30/January/2006"
date1 <- as.Date(str1, format = "%b %d, '%y")
date2 <- as.Date(str2, format = "%Y-%m-%d")
date3 <- as.Date(str3, format = "%d/%B/%Y")
format(date1, "%A")
## [1] "Thursday"
format(date2, "%d")
## [1] "15"
format(date3, "%b %Y")
## [1] "Jan 2006"
str1 <- "May 23, '96 hours:23 minutes:01 seconds:45"
str2 <- "2012-3-12 14:23:08"
# Convert the strings to POSIXct objects: time1, time2
time1 <- as.POSIXct(str1, format = "%B %d, '%y hours:%H minutes:%M seconds:%S")
time2 <- as.POSIXct(str2, fromat = "%Y-%m-%d %H:%M:%S")
# Convert times to formatted strings
format(time1, "%M")
## [1] "01"
format(time2, "%I:%M %p")
## [1] "02:23 PM"
pizza <- c("2017-05-12", "2017-05-14", "2017-05-19", "2017-05-25", "2017-05-30")
pizza <- as.Date(pizza)
day_diff <- diff(pizza)
day_diff
## Time differences in days
## [1] 2 5 6 5
mean(day_diff)
## Time difference of 4.5 days