==!=<, >, <= and >=#logical vairables
TRUE == TRUE
## [1] TRUE
#logical vairables
TRUE == FALSE
## [1] FALSE
#strings
"hello" == "goodbye"
## [1] FALSE
#numbers
3 == 2
## [1] FALSE
#logical vairables
TRUE != TRUE
## [1] FALSE
#logical vairables
TRUE != FALSE
## [1] TRUE
#strings
"hello" != "goodbye"
## [1] TRUE
#numbers
3 != 2
## [1] TRUE
#numerical
3 > 5
## [1] FALSE
3 < 5
## [1] TRUE
#logical vairables
"hello" > "goodbye"
## [1] TRUE
#strings
TRUE < FALSE
## [1] FALSE
linkedin <- c(16, 9, 13, 5, 2, 17, 14)
linkedin
## [1] 16 9 13 5 2 17 14
linkedin > 10
## [1] TRUE FALSE TRUE FALSE FALSE TRUE TRUE
facebook <- c(17, 7, 5, 16, 8, 13, 14)
facebook
## [1] 17 7 5 16 8 13 14
facebook <= linkedin
## [1] FALSE TRUE TRUE FALSE FALSE TRUE TRUE
& ~ True if and only if both are true| ~ True if at least one is true! ~ Negates the valueTRUE & TRUE
## [1] TRUE
TRUE & FALSE
## [1] FALSE
TRUE | FALSE
## [1] TRUE
FALSE | FALSE
## [1] FALSE
!TRUE
## [1] FALSE
!FALSE
## [1] TRUE
#Logical Operators & Vectors
a <- c(TRUE, TRUE, FALSE)
b <- c(TRUE, FALSE, FALSE)
a & b
## [1] TRUE FALSE FALSE
a | b
## [1] TRUE TRUE FALSE
a
## [1] TRUE TRUE FALSE
!a
## [1] FALSE FALSE TRUE
b
## [1] TRUE FALSE FALSE
!b
## [1] FALSE TRUE TRUE
& vs && and | vs ||&& only examines the first element in each vector|| only returns the result of the OR operator on the first element in each vectorc(TRUE, TRUE, FALSE) & c(TRUE, FALSE, FALSE)
## [1] TRUE FALSE FALSE
c(TRUE, TRUE, FALSE) && c(TRUE, FALSE, FALSE)
## [1] TRUE
c(TRUE, TRUE, FALSE) | c(TRUE, FALSE, FALSE)
## [1] TRUE TRUE FALSE
c(TRUE, TRUE, FALSE) || c(TRUE, FALSE, FALSE)
## [1] TRUE
#Note: Only the first statement that appears true in the code chunk will be printed reguardless if other statements are true that follow.
x <- -3
if(x < 0){
print("x is a negative number")
} else if(x == 0) {
print("x is zero")
} else{
print("x is a positive number")
}
## [1] "x is a negative number"
x <- 0
if(x < 0){
print("x is a negative number")
} else if(x == 0) {
print("x is zero")
} else{
print("x is a positive number")
}
## [1] "x is zero"
x <- 5
if(x < 0){
print("x is a negative number")
} else if(x == 0) {
print("x is zero")
} else{
print("x is a positive number")
}
## [1] "x is a positive number"
#ctr is a counter variable
ctr <- 1
while(ctr <= 7){
print(paste("ctr is set to", ctr))
ctr <- ctr + 1
}
## [1] "ctr is set to 1"
## [1] "ctr is set to 2"
## [1] "ctr is set to 3"
## [1] "ctr is set to 4"
## [1] "ctr is set to 5"
## [1] "ctr is set to 6"
## [1] "ctr is set to 7"
ctr <- 1
while(ctr <= 7){ # TRUE
if(ctr %% 5 == 0){ # Break if ctr is a 5 fold.
break
}
print(paste("ctr is set to", ctr))
ctr <- ctr + 1
}
## [1] "ctr is set to 1"
## [1] "ctr is set to 2"
## [1] "ctr is set to 3"
## [1] "ctr is set to 4"
#While loop stops if ctr is 5, no more printouts
Cities <- c("New York", "Paris", "London", "Tokyo", "Rio de Janeiro", "Cape Town")
for(City in Cities){
print(City)
}
## [1] "New York"
## [1] "Paris"
## [1] "London"
## [1] "Tokyo"
## [1] "Rio de Janeiro"
## [1] "Cape Town"
#Each time the code is run, it reassigns the variable City with the first value in the vector Cities, then prints that result.
Cities <- c("New York", "Paris", "London", "Tokyo", "Rio de Janeiro", "Cape Town")
for(City in Cities){
if(nchar(City) == 6){ #nchar() funtion stands for number of characters
break
}
print(City)
}
## [1] "New York"
## [1] "Paris"
for(City in Cities){
if(nchar(City) == 6){
next
}
print(City)
}
## [1] "New York"
## [1] "Paris"
## [1] "Tokyo"
## [1] "Rio de Janeiro"
## [1] "Cape Town"
#What if we want to know the vectors input in sequential order?
#We'll let i progress from 1 to the length of the Cities vector (which is 6) in steps of 1
for(i in 1:length(Cities)){
print(Cities[i]) #notice change in notation. we now gain access to the index
}
## [1] "New York"
## [1] "Paris"
## [1] "London"
## [1] "Tokyo"
## [1] "Rio de Janeiro"
## [1] "Cape Town"
for(i in 1:length(Cities)){
print(paste(Cities[i], "is on position", i, "in the Cities vector."))
}
## [1] "New York is on position 1 in the Cities vector."
## [1] "Paris is on position 2 in the Cities vector."
## [1] "London is on position 3 in the Cities vector."
## [1] "Tokyo is on position 4 in the Cities vector."
## [1] "Rio de Janeiro is on position 5 in the Cities vector."
## [1] "Cape Town is on position 6 in the Cities vector."
sd()?sd and help(sd) give more details on this functionsd(x, na.rm = FALSE)values <- c(1, 5, 6, NA)
sd(x = values, na.rm = FALSE)
## [1] NA
sd(x = values, na.rm = TRUE)
## [1] 2.645751
args(sd)
## function (x, na.rm = FALSE)
## NULL
ls() #shows what variables are in your workspace
## [1] "a" "b" "Cities" "City" "ctr" "facebook"
## [7] "i" "linkedin" "values" "x"
my_fun <- function(arg1, arg2){ body }#creating triple() function
triple <- function(x){
y <- 3 * x
return(y)
}
triple(6)
## [1] 18
#creating math_majic() function
math_majic <- function(a, b){
a*b +a/b
}
math_majic(4, 2)
## [1] 10
#What if only have one argument when the default function requires 2? Well, we could make the second argument optional.
math_majic <- function(a, b=1){
a*b +a/b
}
math_majic(4)
## [1] 8
install.packages("ggvis")search() function to see what packages are preloadedlibary() fucntionreqire() functionlibrary("ggvis")
search()
## [1] ".GlobalEnv" "package:ggvis" "package:ggplot2"
## [4] "package:dplyr" "package:stats" "package:graphics"
## [7] "package:grDevices" "package:utils" "package:datasets"
## [10] "package:methods" "Autoloads" "package:base"
ggvis(mtcars, ~wt, ~hp)
List necessary to store heterogenous content
lapply()!!!Lapply went over each element in nyc, and applyed the funtion class
#Lets look at some information in NYC:for
nyc <- list(pop = 8405837,
boroughs = c("Manhattan", "Bronx", "Brooklyn", "Queens", "Staten Island"),
capital = FALSE)
#Suppose you want to find out the class of each element of this list
#You can call a for loop
for(info in nyc) {
print(class(info))
}
## [1] "numeric"
## [1] "character"
## [1] "logical"
lapply(nyc, class)
## $pop
## [1] "numeric"
##
## $boroughs
## [1] "character"
##
## $capital
## [1] "logical"
cities <- c("New York", "Paris", "London", "Tokyo", "Rio de Janeiro", "Cape Town")
num_chars <- c()
for(i in 1:length(cities)){
num_chars[i] <- nchar(cities[i])
}
cities <- c("New York", "Paris", "London", "Tokyo", "Rio de Janeiro", "Cape Town")
result <- lapply(cities, nchar)
str(result)
## List of 6
## $ : int 8
## $ : int 5
## $ : int 6
## $ : int 5
## $ : int 14
## $ : int 9
unlist(result)
## [1] 8 5 6 5 14 9
lapply() function is a list while the input is a vector!unlist() functionunlist(lapply(cities, nchar))
## [1] 8 5 6 5 14 9
Example: Assume you have a list of oil prices $ per gallon
oil_prices <- list(2.37, 2.49, 2.18, 2.22, 2.47, 2.32)
#We want to create a function that triples each value
triple <- function(x){
3 * x
}
result <- lapply(oil_prices, triple)
str(result)
## List of 6
## $ : num 7.11
## $ : num 7.47
## $ : num 6.54
## $ : num 6.66
## $ : num 7.41
## $ : num 6.96
unlist(result)
## [1] 7.11 7.47 6.54 6.66 7.41 6.96
#Now you want to create a function that could multiply
#Create an additional argument called multiply
multiply <- function(x, factor){
x * factor
}
times3 <- lapply(oil_prices, multiply, factor = 3)
unlist(times3)
## [1] 7.11 7.47 6.54 6.66 7.41 6.96
times4 <- lapply(oil_prices, multiply, factor = 4)
unlist(times4)
## [1] 9.48 9.96 8.72 8.88 9.88 9.28
, USE.NAMES= FALSEunique_letters is a function that generates an output of unique letters in each wordcities <- c("New York", "Paris", "London", "Tokyo", "Rio de Janeiro", "Cape Town")
sapply(cities, nchar)
## New York Paris London Tokyo Rio de Janeiro
## 8 5 6 5 14
## Cape Town
## 9
lapply(X, FUN, ...)sapply(X, FUN, ..., simplify = TRUE, USE.NAMES = TRUE)vapply(X, FUN, FUN.VALUE, ..., USE.NAMES = TRUE)vapply(X, FUN, FUN.VALUE, ..., USE.NAMES = TRUE) Example: where sapply() and vapply() act very similarily
cities <- c("New York", "Paris", "London", "Tokyo", "Rio de Janeiro", "Cape Town")
sapply(cities, nchar)
## New York Paris London Tokyo Rio de Janeiro
## 8 5 6 5 14
## Cape Town
## 9
vapply(cities, nchar, numeric(1))
## New York Paris London Tokyo Rio de Janeiro
## 8 5 6 5 14
## Cape Town
## 9
Consider another example we saw prior:
cities <- c("New York", "Paris", "London", "Tokyo", "Rio de Janeiro", "Cape Town")
first_and_last <- function(name){
name <- gsub("","", name)
letters <- strsplit(name, split="")[[1]]
return(c(first = min(letters), last = max(letters)))
}
unique_letters <- function(name){
name <- gsub("","", name)
letters <- strsplit(name, split="")[1]
unique(letters)
}
sapply(cities, first_and_last)
## New York Paris London Tokyo Rio de Janeiro Cape Town
## first " " "a" "d" "k" " " " "
## last "Y" "s" "o" "y" "R" "w"
vapply(cities, first_and_last, character(2))
## New York Paris London Tokyo Rio de Janeiro Cape Town
## first " " "a" "d" "k" " " " "
## last "Y" "s" "o" "y" "R" "w"
sapply(cities, unique_letters)
## $`New York`
## [1] "N" "e" "w" " " "Y" "o" "r" "k"
##
## $Paris
## [1] "P" "a" "r" "i" "s"
##
## $London
## [1] "L" "o" "n" "d" "o" "n"
##
## $Tokyo
## [1] "T" "o" "k" "y" "o"
##
## $`Rio de Janeiro`
## [1] "R" "i" "o" " " "d" "e" " " "J" "a" "n" "e" "i" "r" "o"
##
## $`Cape Town`
## [1] "C" "a" "p" "e" " " "T" "o" "w" "n"
#Note: vapply() is safer than sapply()
li <- list(log = TRUE,
ch = "hello",
int_vec = sort(rep(seq(8, 2, by = -2), times = 2)))
#lets take a closer look inside int_vec
sort(rep(seq(8, 2, by = -2), times = 2))
## [1] 2 2 4 4 6 6 8 8
#innermost function generates a sequence going from 8 to 2 by steps of -2
seq(8, 2, by = -2)
## [1] 8 6 4 2
# repuclate its input
# times = 2 doubles the length of the vector
# each = 2 replicates each element in the vector
rep(c(8, 6, 4, 2), times = 2)
## [1] 8 6 4 2 8 6 4 2
# next inspect the sort vector
# sorts each element in the vector in increasing order (default decreasing = FALSE)
# can have elements listed in decending order by setting decreasing = TRUE
sort(c(8, 6, 4, 2, 8, 6, 4, 2), decreasing = FALSE)
## [1] 2 2 4 4 6 6 8 8
# Using the str function to inspect the contents of the data structure
str(li)
## List of 3
## $ log : logi TRUE
## $ ch : chr "hello"
## $ int_vec: num [1:8] 2 2 4 4 6 6 8 8
# is.list tells if data structure is a list, returns T or F value
is.list(li) #list
## [1] TRUE
is.list(c(1, 2, 3)) #vector
## [1] FALSE
# as.list lets you convert to a list if needbe
li2 <- as.list(c(1, 2, 3))
# inspect unlist() of li
unlist(li)
## log ch int_vec1 int_vec2 int_vec3 int_vec4 int_vec5 int_vec6
## "TRUE" "hello" "2" "2" "4" "4" "6" "6"
## int_vec7 int_vec8
## "8" "8"
# notice the difference in structure when using append() and rev()
str(rev(li))
## List of 3
## $ int_vec: num [1:8] 2 2 4 4 6 6 8 8
## $ ch : chr "hello"
## $ log : logi TRUE
str(append(li, rev(li)))
## List of 6
## $ log : logi TRUE
## $ ch : chr "hello"
## $ int_vec: num [1:8] 2 2 4 4 6 6 8 8
## $ int_vec: num [1:8] 2 2 4 4 6 6 8 8
## $ ch : chr "hello"
## $ log : logi TRUE
?regexgrep() and grepl() functinos
grepl(pattern = <refex>, x = <string>)grepl() output is logicalgrep() reutrns output of which element in vector yields the matchsub() and gsub() functions
sub(pattern = <regrex>, replacement = <str>, x = <str>)animals <- c("cat", "moose", "impala", "ant", "kiwi")
# to determine which function as an 'a' in their name can use grepl() function
grepl(pattern = "a", x = animals)
## [1] TRUE FALSE TRUE TRUE FALSE
# what if only want elements that START with the letter 'a'? use "^a".
grepl(pattern = "^a", x = animals)
## [1] FALSE FALSE FALSE TRUE FALSE
# what is only want elements that END with the letter 'a'? use "a$".
grepl(pattern = "a$", x = animals)
## [1] FALSE FALSE TRUE FALSE FALSE
# See how grep() function differes from grepl()
grep(pattern = "a", x = animals)
## [1] 1 3 4
# you can match this output using the grepl() function by using the which() function
which(grepl(pattern = "a", x = animals))
## [1] 1 3 4
# what if only want elements that START with the letter 'a'? use "^a".
grep(pattern = "^a", x = animals)
## [1] 4
# what is only want elements that END with the letter 'a'? use "a$".
grep(pattern = "a$", x = animals)
## [1] 3
# sub() function allows you to take a certain pattern and replace it with something else
# Note: How in impala, only the first a character was replaced. This is because the sub() function only looks for the first pattern that occures, replaces it, then keeps moving.
sub(pattern = "a", replacement = "o", x = animals)
## [1] "cot" "moose" "impola" "ont" "kiwi"
# What if you want to be able to replace every pattern with a replacement? use gsub() function
gsub(pattern = "a", replacement = "o", x = animals)
## [1] "cot" "moose" "impolo" "ont" "kiwi"
# Notice how you can include the OR metacharacter (OR symbol: |)
# We want to replace the letters a, i, and o with an underscore
gsub(pattern = "a|i|o", replacement = "_", x = animals)
## [1] "c_t" "m__se" "_mp_l_" "_nt" "k_w_"
# Today, right now!
today <- Sys.Date()
today
## [1] "2017-10-19"
# Special type of variable in R called Date
class(today)
## [1] "Date"
# Current time & date
now <- Sys.time()
now
## [1] "2017-10-19 08:32:39 EDT"
# Not a simple string, class allows for date and time to be compatible over different operating systems
class(now)
## [1] "POSIXct" "POSIXt"
# Convert a character string to a Date
# Notice date fomat "YYYY-MM-DD"
# default format is "%Y-%m-%d"
my_date <- as.Date("1994-01-15")
my_date
## [1] "1994-01-15"
class(my_date)
## [1] "Date"
# What if date entered is in different format? we can specify specifically
my_date <- as.Date("1994-15-01", format = "%Y-%d-%m")
my_date
## [1] "1994-01-15"
# To convert a string denoting in exact time, we can use as.POSIXct()
my_time <- as.POSIXct("1994-01-15 11:25:15")
my_time
## [1] "1994-01-15 11:25:15 EST"
my_date <- as.Date("1994-01-15")
my_date
## [1] "1994-01-15"
# day incremented by 1
my_date + 1
## [1] "1994-01-16"
# You can calculate the day difference between dates
my_date2 <- as.Date("1995-11-16")
my_date2 - my_date
## Time difference of 670 days
my_time <- as.POSIXct("1994-01-15 11:25:15")
my_time
## [1] "1994-01-15 11:25:15 EST"
# second incremented by 1
my_time + 1
## [1] "1994-01-15 11:25:16 EST"
# Calculate the difference between times
my_time2 <- as.POSIXct("1995-11-16 21:15:55")
my_time2 - my_time
## Time difference of 670.4102 days
# because the time difference is so large, R replaces the time difference with days
my_date <- as.Date("1994-01-15")
my_date3 <- as.Date("1970-01-15")
my_time <- as.POSIXct("1994-01-15 11:25:15")
my_time3 <- as.POSIXct("1970-01-15 01:00:00")
# output of unclass() will show how many days away from January 1, 1970
my_date
## [1] "1994-01-15"
unclass(my_date)
## [1] 8780
my_date3
## [1] "1970-01-15"
unclass(my_date3)
## [1] 14
# output of unclass on POSIXct objects shows how many seconds away from midnight on January 1, 1970
my_time
## [1] "1994-01-15 11:25:15 EST"
unclass(my_time)
## [1] 758651115
## attr(,"tzone")
## [1] ""
# approx 758MM seconds from January 1, 1970 00:00:00
my_time3
## [1] "1970-01-15 01:00:00 EST"
unclass(my_time3)
## [1] 1231200
## attr(,"tzone")
## [1] ""
# approx 1MM seconds from January 1, 1970 00:00:00