You can type Sys.Date () and R will return the current date. To get the current date and time, type Sys.time().
# What is the current date?
Sys.Date ()
## [1] "2017-06-26"
# What is the current date and time?
Sys.time()
## [1] "2017-06-26 16:29:28 UTC"
# Create the variable today
today <- Sys.Date()
# Confirm the class of today
class (today)
## [1] "Date"
You can use the as.Date() function to create dates from character strings.
# Create crash
crash <- as.Date ("2008-09-29")
# Print crash
crash
## [1] "2008-09-29"
# crash as a numeric
as.numeric (crash)
## [1] 14151
# Current time as a numeric
as.numeric (Sys.time())
## [1] 1498494568
# Incorrect date format
# You get an error stating that the character string is not in a standard unambiguous format
You can convert multiple dates from character to date format using vectors.
# Create dates from "2017-02-05" to "2017-02-08" inclusive.
dates <- c("2017-02-05", "2017-02-06", "2017-02-07", "2017-02-08")
# Add names to dates
names(dates) <- c("Sunday", "Monday", "Tuesday", "Wednesday")
# Subset dates to only return the date for Monday
dates["Monday"]
## Monday
## "2017-02-06"
You can use the format function to convert dates into the correct format.
# "08,30,30"
as.Date("08,30,1930", format = "%m, %d, %Y")
## [1] "1930-08-30"
# "Aug 30,1930"
as.Date("Aug 30,1930", format = "%b %d, %Y")
## [1] "1930-08-30"
# "30aug1930"
as.Date("30aug1930", format = "%d%b%Y")
## [1] "1930-08-30"
You can convert objects that are already dates to differently formatted dates using format().
# char_dates
char_dates <- c("1jan17", "2jan17", "3jan17", "4jan17", "5jan17")
# Create dates using as.Date() and the correct format
dates <- as.Date(char_dates, format = "%d%b%y")
# Use format() to go from "2017-01-04" -> "Jan 04, 17"
format(dates, format = "%b %d, %y")
## [1] "Jan 01, 17" "Jan 02, 17" "Jan 03, 17" "Jan 04, 17" "Jan 05, 17"
# Use format() to go from "2017-01-04" -> "01,04,2017"
format(dates, format = "%m,%d,%Y")
## [1] "01,01,2017" "01,02,2017" "01,03,2017" "01,04,2017" "01,05,2017"
you can find the difference between two dates, in days, by using subtraction, or you can use the difftime() function to find the time interval instead.
# Dates
dates <- as.Date(c("2017-01-01", "2017-01-02", "2017-01-03"))
# Create the origin
origin <- as.Date("1970-01-01")
# Use as.numeric() on dates
as.numeric(dates)
## [1] 17167 17168 17169
# Find the difference between dates and origin
difftime(dates, origin)
## Time differences in days
## [1] 17167 17168 17169
You can extract date components by using the functions: months(), weekdays(), and quarters().
# dates
dates <- as.Date(c("2017-01-02", "2017-05-03", "2017-08-04", "2017-10-17"))
# Extract the months
months (dates)
## [1] "January" "May" "August" "October"
# Extract the quarters
quarters (dates)
## [1] "Q1" "Q2" "Q3" "Q4"
# dates2
dates2 <- as.Date(c("2017-01-02", "2017-01-03", "2017-01-04", "2017-01-05"))
# Assign the weekdays() of dates2 as the names()
names(dates2) <- weekdays (dates2)
# Print dates2
dates2
## Monday Tuesday Wednesday Thursday
## "2017-01-02" "2017-01-03" "2017-01-04" "2017-01-05"
You can compare data elements using <, <=, >, >=, == and !=.
# Stock prices
apple <- 48.99
micr <- 77.93
# Apple vs Microsoft
apple > micr
## [1] FALSE
# Not equals
apple != micr
## [1] TRUE
# Dates - today and tomorrow
today <- as.Date(Sys.Date())
tomorrow <- as.Date(Sys.Date() + 1)
# Today vs Tomorrow
tomorrow < today
## [1] FALSE
You can also compare vectors against each other or a single number against each element in a vector.
date <- c("2017-01-2017", "2017-01-23", "2017-01-24", "2017-01-25")
ibm <- c(170.55, 171.03, 175.90, 178.29)
panera <- c(216.65, 216.06, 213.55, 212.22)
stocks <- data.frame (date,ibm,panera)
# Print stocks
stocks
## date ibm panera
## 1 2017-01-2017 170.55 216.65
## 2 2017-01-23 171.03 216.06
## 3 2017-01-24 175.90 213.55
## 4 2017-01-25 178.29 212.22
# IBM range
stocks$ibm_buy <- stocks$ibm < 175
# Panera range
stocks$panera_sell <- stocks$panera > 213
# IBM vs Panera
stocks$ibm_vs_panera <- "ibm" > "panera"
# Print stocks
stocks
## date ibm panera ibm_buy panera_sell ibm_vs_panera
## 1 2017-01-2017 170.55 216.65 TRUE TRUE FALSE
## 2 2017-01-23 171.03 216.06 TRUE TRUE FALSE
## 3 2017-01-24 175.90 213.55 FALSE TRUE FALSE
## 4 2017-01-25 178.29 212.22 FALSE FALSE FALSE
To apply multiple conditions to data you can use the And operator &, and the Or operator |.
# IBM buy range
stocks$ibm_buy_range <- (stocks$ibm > 171) & (stocks$ibm < 176)
# Panera spikes
stocks$panera_spike <- (stocks$panera < 213.20) | (stocks$panera > 216.50)
# Date range
stocks$good_dates <- (stocks$date > as.Date("2017-01-21")) & (stocks$date < as.Date("2017-01-25"))
# Print stocks
stocks
## date ibm panera ibm_buy panera_sell ibm_vs_panera
## 1 2017-01-2017 170.55 216.65 TRUE TRUE FALSE
## 2 2017-01-23 171.03 216.06 TRUE TRUE FALSE
## 3 2017-01-24 175.90 213.55 FALSE TRUE FALSE
## 4 2017-01-25 178.29 212.22 FALSE FALSE FALSE
## ibm_buy_range panera_spike good_dates
## 1 FALSE TRUE FALSE
## 2 TRUE FALSE FALSE
## 3 TRUE FALSE FALSE
## 4 FALSE TRUE FALSE
Adding ! in front of a logical expression will change that expression from TRUE to FALSE (and vice versa).
# IBM range
!(stocks$ibm > 176)
## [1] TRUE TRUE TRUE FALSE
# Missing data
missing <- c(24.5, 25.7, NA, 28, 28.6, NA)
# Is missing?
is.na(missing)
## [1] FALSE FALSE TRUE FALSE FALSE TRUE
# Not missing?
!is.na(missing)
## [1] TRUE TRUE FALSE TRUE TRUE FALSE
Use subset() on a data frame, vector or matrix to return a logical vector of rows.
# Panera range
subset(stocks, panera > 216)
## date ibm panera ibm_buy panera_sell ibm_vs_panera
## 1 2017-01-2017 170.55 216.65 TRUE TRUE FALSE
## 2 2017-01-23 171.03 216.06 TRUE TRUE FALSE
## ibm_buy_range panera_spike good_dates
## 1 FALSE TRUE FALSE
## 2 TRUE FALSE FALSE
# Specific date
subset(stocks, date == as.Date("2017-01-23"))
## [1] date ibm panera ibm_buy panera_sell
## [6] ibm_vs_panera ibm_buy_range panera_spike good_dates
## <0 rows> (or 0-length row.names)
# IBM and Panera joint range
subset(stocks, ibm < 175 & panera < 216.5)
## date ibm panera ibm_buy panera_sell ibm_vs_panera ibm_buy_range
## 2 2017-01-23 171.03 216.06 TRUE TRUE FALSE TRUE
## panera_spike good_dates
## 2 FALSE FALSE
Do an exercise that combines a number of operators together.
# Define stocks
date <- seq(from = as.Date("2016-12-01"), to = as.Date("2016-12-30"), by = "days")
date <- date[-26]
apple <- c(109.49, 109.90, NA, NA, 109.11, 109.95, 111.03, 112.12, 113.95, NA, NA, 113.30,
115.19, 115.19, 115.82, 115.97, NA, NA, 116.64, 116.95, 117.06, 116.29, 116.52,
NA, NA, 117.26, 116.76, 116.73, 115.82)
micr <- c(59.20, 59.25, NA, NA, 60.22, 59.95, 61.37, 61.01, 61.97, NA, NA, 62.17, 62.98,
62.68, 62.58, 62.30, NA, NA, 63.62, 63.54, 63.54, 63.55, 63.24, NA, NA, 63.28,
62.99, 62.90, 62.14)
stocks <- data.frame(date = date, apple = apple, micr = micr)
# View stocks
stocks
## date apple micr
## 1 2016-12-01 109.49 59.20
## 2 2016-12-02 109.90 59.25
## 3 2016-12-03 NA NA
## 4 2016-12-04 NA NA
## 5 2016-12-05 109.11 60.22
## 6 2016-12-06 109.95 59.95
## 7 2016-12-07 111.03 61.37
## 8 2016-12-08 112.12 61.01
## 9 2016-12-09 113.95 61.97
## 10 2016-12-10 NA NA
## 11 2016-12-11 NA NA
## 12 2016-12-12 113.30 62.17
## 13 2016-12-13 115.19 62.98
## 14 2016-12-14 115.19 62.68
## 15 2016-12-15 115.82 62.58
## 16 2016-12-16 115.97 62.30
## 17 2016-12-17 NA NA
## 18 2016-12-18 NA NA
## 19 2016-12-19 116.64 63.62
## 20 2016-12-20 116.95 63.54
## 21 2016-12-21 117.06 63.54
## 22 2016-12-22 116.29 63.55
## 23 2016-12-23 116.52 63.24
## 24 2016-12-24 NA NA
## 25 2016-12-25 NA NA
## 26 2016-12-27 117.26 63.28
## 27 2016-12-28 116.76 62.99
## 28 2016-12-29 116.73 62.90
## 29 2016-12-30 115.82 62.14
# Weekday investigation
stocks$weekday <- weekdays(stocks$date)
# View stocks again
stocks
## date apple micr weekday
## 1 2016-12-01 109.49 59.20 Thursday
## 2 2016-12-02 109.90 59.25 Friday
## 3 2016-12-03 NA NA Saturday
## 4 2016-12-04 NA NA Sunday
## 5 2016-12-05 109.11 60.22 Monday
## 6 2016-12-06 109.95 59.95 Tuesday
## 7 2016-12-07 111.03 61.37 Wednesday
## 8 2016-12-08 112.12 61.01 Thursday
## 9 2016-12-09 113.95 61.97 Friday
## 10 2016-12-10 NA NA Saturday
## 11 2016-12-11 NA NA Sunday
## 12 2016-12-12 113.30 62.17 Monday
## 13 2016-12-13 115.19 62.98 Tuesday
## 14 2016-12-14 115.19 62.68 Wednesday
## 15 2016-12-15 115.82 62.58 Thursday
## 16 2016-12-16 115.97 62.30 Friday
## 17 2016-12-17 NA NA Saturday
## 18 2016-12-18 NA NA Sunday
## 19 2016-12-19 116.64 63.62 Monday
## 20 2016-12-20 116.95 63.54 Tuesday
## 21 2016-12-21 117.06 63.54 Wednesday
## 22 2016-12-22 116.29 63.55 Thursday
## 23 2016-12-23 116.52 63.24 Friday
## 24 2016-12-24 NA NA Saturday
## 25 2016-12-25 NA NA Sunday
## 26 2016-12-27 117.26 63.28 Tuesday
## 27 2016-12-28 116.76 62.99 Wednesday
## 28 2016-12-29 116.73 62.90 Thursday
## 29 2016-12-30 115.82 62.14 Friday
# Remove missing data
stocks_no_NA <- subset(stocks, !is.na(apple))
# Apple and Microsoft joint range
subset(stocks_no_NA, apple > 117 | micr > 63)
## date apple micr weekday
## 19 2016-12-19 116.64 63.62 Monday
## 20 2016-12-20 116.95 63.54 Tuesday
## 21 2016-12-21 117.06 63.54 Wednesday
## 22 2016-12-22 116.29 63.55 Thursday
## 23 2016-12-23 116.52 63.24 Friday
## 26 2016-12-27 117.26 63.28 Tuesday
If statements add logical flow to your code. The basic structure of an if statement is: if(condition) { code } The condition is anything that returns a single TRUE or FALSE. If the condition is TRUE, then the code inside gets executed. Otherwise, the code gets skipped and the program continues.
# micr
micr <- 48.55
# Fill in the blanks
if( micr < 55 ) {
print("Buy!")
}
## [1] "Buy!"
If you add else after an if statement you can perform a different action if the condition is false.
# micr
micr <- 57.44
# Fill in the blanks
if( micr < 55 ) {
print("Buy!")
} else {
print("Do nothing!")
}
## [1] "Do nothing!"
You can add even more logic by using: if, else if, else. You can add as many else if’s as you need.
# micr
micr <- 105.67
# Fill in the blanks
if( micr < 55 ) {
print("Buy!")
} else if(micr >= 55 & micr < 75){
print("Do nothing!")
} else {
print("Sell!")
}
## [1] "Sell!"
You can create nested if statements by using the following structure:
if(condition1) {
if(condition2) {
code if both pass } else {
code if 1 passes, 2 fails } } else {
code if 1 fails }
# micr
micr <- 105.67
shares <- 1
# Fill in the blanks
if( micr < 55 ) {
print("Buy!")
} else if( micr >= 55 & micr < 75 ) {
print("Do nothing!")
} else {
if( shares >=1) {
print("Sell!")
} else {
print("Not enough shares to sell!")
}
}
## [1] "Sell!"
ifelse() creates an if statement in 1 line of code and it works on entire vectors.
# Define stocks
date <- seq(from = as.Date("2016-12-01"), to = as.Date("2016-12-30"), by = "days")
date <- date[-c(3,4,10,11,17,18,24,25,26)]
apple <- c(109.49, 109.90, 109.11, 109.95, 111.03, 112.12, 113.95, 113.30,
115.19, 115.19, 115.82, 115.97, 116.64, 116.95, 117.06, 116.29, 116.52,
117.26, 116.76, 116.73, 115.82)
micr <- c(59.20, 59.25, 60.22, 59.95, 61.37, 61.01, 61.97, 62.17, 62.98,
62.68, 62.58, 62.30, 63.62, 63.54, 63.54, 63.55, 63.24, 63.28,
62.99, 62.90, 62.14)
stocks <- data.frame(date = date, apple = apple, micr = micr)
# Microsoft test
stocks$micr_buy <- ifelse(test = stocks$micr > 60 & stocks$micr < 62, yes = 1, no = 0)
# Apple test
stocks$apple_date <- ifelse(test = stocks$apple >117, yes = stocks$date, no = NA)
# Print stocks
stocks
## date apple micr micr_buy apple_date
## 1 2016-12-01 109.49 59.20 0 NA
## 2 2016-12-02 109.90 59.25 0 NA
## 3 2016-12-05 109.11 60.22 1 NA
## 4 2016-12-06 109.95 59.95 0 NA
## 5 2016-12-07 111.03 61.37 1 NA
## 6 2016-12-08 112.12 61.01 1 NA
## 7 2016-12-09 113.95 61.97 1 NA
## 8 2016-12-12 113.30 62.17 0 NA
## 9 2016-12-13 115.19 62.98 0 NA
## 10 2016-12-14 115.19 62.68 0 NA
## 11 2016-12-15 115.82 62.58 0 NA
## 12 2016-12-16 115.97 62.30 0 NA
## 13 2016-12-19 116.64 63.62 0 NA
## 14 2016-12-20 116.95 63.54 0 NA
## 15 2016-12-21 117.06 63.54 0 17156
## 16 2016-12-22 116.29 63.55 0 NA
## 17 2016-12-23 116.52 63.24 0 NA
## 18 2016-12-27 117.26 63.28 0 17162
## 19 2016-12-28 116.76 62.99 0 NA
## 20 2016-12-29 116.73 62.90 0 NA
## 21 2016-12-30 115.82 62.14 0 NA
# Change the class() of apple_date.
class(stocks$apple_date) <- "Date"
# Print stocks again
stocks
## date apple micr micr_buy apple_date
## 1 2016-12-01 109.49 59.20 0 <NA>
## 2 2016-12-02 109.90 59.25 0 <NA>
## 3 2016-12-05 109.11 60.22 1 <NA>
## 4 2016-12-06 109.95 59.95 0 <NA>
## 5 2016-12-07 111.03 61.37 1 <NA>
## 6 2016-12-08 112.12 61.01 1 <NA>
## 7 2016-12-09 113.95 61.97 1 <NA>
## 8 2016-12-12 113.30 62.17 0 <NA>
## 9 2016-12-13 115.19 62.98 0 <NA>
## 10 2016-12-14 115.19 62.68 0 <NA>
## 11 2016-12-15 115.82 62.58 0 <NA>
## 12 2016-12-16 115.97 62.30 0 <NA>
## 13 2016-12-19 116.64 63.62 0 <NA>
## 14 2016-12-20 116.95 63.54 0 <NA>
## 15 2016-12-21 117.06 63.54 0 2016-12-21
## 16 2016-12-22 116.29 63.55 0 <NA>
## 17 2016-12-23 116.52 63.24 0 <NA>
## 18 2016-12-27 117.26 63.28 0 2016-12-27
## 19 2016-12-28 116.76 62.99 0 <NA>
## 20 2016-12-29 116.73 62.90 0 <NA>
## 21 2016-12-30 115.82 62.14 0 <NA>
To run a function over and over you use repeat, and inside the curly braces perform some action. You must specify when you want to break out of the loop by using an if statement and the break command or it will run for ever.
# Stock price
stock_price <- 126.34
repeat {
# New stock price
stock_price <- stock_price * runif(1, .985, 1.01)
print(stock_price)
# Check
if(stock_price < 125) {
print("Stock price is below 124.5! Buy it while it's cheap!")
break
}
}
## [1] 126.6079
## [1] 126.4361
## [1] 125.1533
## [1] 125.9051
## [1] 126.9133
## [1] 126.0457
## [1] 125.0674
## [1] 123.9623
## [1] "Stock price is below 124.5! Buy it while it's cheap!"
The order in which you execute your code inside the loop and check when you should break is important.
# Stock price
stock_price <- 67.55
repeat {
# New stock price
stock_price <- stock_price * .995
print(stock_price)
# Check
if(stock_price < 66) {
print("Stock price is below 66! Buy it while it's cheap!")
break
}
}
## [1] 67.21225
## [1] 66.87619
## [1] 66.54181
## [1] 66.2091
## [1] 65.87805
## [1] "Stock price is below 66! Buy it while it's cheap!"
While loops are slightly different from repeat loops. Like if statements, you specify the condition for them to run at the very beginning. There is no need for a break statement because the condition is checked at each iteration.
# Initial debt
debt <- 5000
# While loop to pay off your debt
while (debt >0) {
debt <- debt - 500
print(paste("Debt remaining", debt))
}
## [1] "Debt remaining 4500"
## [1] "Debt remaining 4000"
## [1] "Debt remaining 3500"
## [1] "Debt remaining 3000"
## [1] "Debt remaining 2500"
## [1] "Debt remaining 2000"
## [1] "Debt remaining 1500"
## [1] "Debt remaining 1000"
## [1] "Debt remaining 500"
## [1] "Debt remaining 0"
You can visualize a look with a plot.
debt <- 5000 # initial debt
i <- 0 # x axis counter
x_axis <- i # x axis
y_axis <- debt # y axis
# Initial plot
plot(x_axis, y_axis, xlim = c(0,10), ylim = c(0,5000))
# Graph your debt
while (debt > 0) {
# Updating variables
debt <- debt - 500
i <- i + 1
x_axis <- c(x_axis, i)
y_axis <- c(y_axis, debt)
# Next plot
plot(x_axis, y_axis, xlim = c(0,10), ylim = c(0,5000))
}
Sometimes, you have to end your while loop early. If you add an if statement and a break, the while loop will completely stop, and all lines after it will be run, if the breaking condition is met.
# debt and cash
debt <- 5000
cash <- 4000
# Pay off your debt...if you can!
while (debt > 0) {
debt <- debt - 500
cash <- cash - 500
print(paste("Debt remaining:", debt, "and Cash remaining:", cash))
if (cash == 0) {
print("You ran out of cash!")
break
}
}
## [1] "Debt remaining: 4500 and Cash remaining: 3500"
## [1] "Debt remaining: 4000 and Cash remaining: 3000"
## [1] "Debt remaining: 3500 and Cash remaining: 2500"
## [1] "Debt remaining: 3000 and Cash remaining: 2000"
## [1] "Debt remaining: 2500 and Cash remaining: 1500"
## [1] "Debt remaining: 2000 and Cash remaining: 1000"
## [1] "Debt remaining: 1500 and Cash remaining: 500"
## [1] "Debt remaining: 1000 and Cash remaining: 0"
## [1] "You ran out of cash!"
When you know how many times you want to repeat an action, a for loop is a good option. The idea of the for loop is that you are stepping through a sequence, one at a time, and performing an action at each step along the way.
# Sequence
seq <- c(1:10)
# Print loop
for (value in seq) {
print(value)
}
## [1] 1
## [1] 2
## [1] 3
## [1] 4
## [1] 5
## [1] 6
## [1] 7
## [1] 8
## [1] 9
## [1] 10
# A sum variable
sum <- 0
# Sum loop
for (value in seq) {
sum <- value + sum
print(sum)
}
## [1] 1
## [1] 3
## [1] 6
## [1] 10
## [1] 15
## [1] 21
## [1] 28
## [1] 36
## [1] 45
## [1] 55
You can run a loop over the rows of a data frame. Before you do so, note that you can get the number of rows in your data frame using nrow(stock). Then, you can create a sequence to loop over from 1:nrow(stock).
# Define stock
date <- seq(from = as.Date("2016-12-01"), to = as.Date("2016-12-30"), by = "days")
date <- date[-c(3,4,10,11,17,18,24,25,26)]
apple <- c(109.49, 109.90, 109.11, 109.95, 111.03, 112.12, 113.95, 113.30,
115.19, 115.19, 115.82, 115.97, 116.64, 116.95, 117.06, 116.29, 116.52,
117.26, 116.76, 116.73, 115.82)
stock <- data.frame(date = date, apple = apple)
# Loop over stock rows
for (row in 1:nrow(stock)) {
price <- stock[row, "apple"]
date <- stock[row, "date"]
if(price > 116) {
print(paste("On", date,
"the stock price was", price))
} else {
print(paste("The date:", date,
"is not an important day!"))
}
}
## [1] "The date: 2016-12-01 is not an important day!"
## [1] "The date: 2016-12-02 is not an important day!"
## [1] "The date: 2016-12-05 is not an important day!"
## [1] "The date: 2016-12-06 is not an important day!"
## [1] "The date: 2016-12-07 is not an important day!"
## [1] "The date: 2016-12-08 is not an important day!"
## [1] "The date: 2016-12-09 is not an important day!"
## [1] "The date: 2016-12-12 is not an important day!"
## [1] "The date: 2016-12-13 is not an important day!"
## [1] "The date: 2016-12-14 is not an important day!"
## [1] "The date: 2016-12-15 is not an important day!"
## [1] "The date: 2016-12-16 is not an important day!"
## [1] "On 2016-12-19 the stock price was 116.64"
## [1] "On 2016-12-20 the stock price was 116.95"
## [1] "On 2016-12-21 the stock price was 117.06"
## [1] "On 2016-12-22 the stock price was 116.29"
## [1] "On 2016-12-23 the stock price was 116.52"
## [1] "On 2016-12-27 the stock price was 117.26"
## [1] "On 2016-12-28 the stock price was 116.76"
## [1] "On 2016-12-29 the stock price was 116.73"
## [1] "The date: 2016-12-30 is not an important day!"
You can loop over elements in a matrix (columns and rows) by using nested loops.
# Define corr
corr <- matrix(c(1.00, 0.96, 0.88, 0.96, 1.00, 0.74, 0.88, 0.74, 1.00), 3, 3)
row.names(corr) <- c("apple", "ibm", "micr")
colnames(corr) <- c("apple", "ibm", "micr")
# Print out corr
corr
## apple ibm micr
## apple 1.00 0.96 0.88
## ibm 0.96 1.00 0.74
## micr 0.88 0.74 1.00
# Create a nested loop
for(row in 1:nrow(corr)) {
for(col in 1:ncol(corr)) {
print(paste(colnames(corr)[col], "and", rownames(corr)[row],
"have a correlation of", corr[row,col]))
}
}
## [1] "apple and apple have a correlation of 1"
## [1] "ibm and apple have a correlation of 0.96"
## [1] "micr and apple have a correlation of 0.88"
## [1] "apple and ibm have a correlation of 0.96"
## [1] "ibm and ibm have a correlation of 1"
## [1] "micr and ibm have a correlation of 0.74"
## [1] "apple and micr have a correlation of 0.88"
## [1] "ibm and micr have a correlation of 0.74"
## [1] "micr and micr have a correlation of 1"
If you want to skip the current iteration, and continue the loop, you can use the next statement. This can be useful if your loop encounters an error, but you don’t want it to break everything.
# Define apple
apple <- c(109.49, 109.90, NA, NA, 109.11, 109.95, 111.03, 112.12, 113.95, NA, NA,
113.30, 115.19, 115.19, 115.82, 115.97, NA, NA, 116.64, 116.95, 117.06,
116.29, 116.52, NA, NA, 117.26, 116.76, 116.73, 115.82)
# Print apple
apple
## [1] 109.49 109.90 NA NA 109.11 109.95 111.03 112.12 113.95 NA
## [11] NA 113.30 115.19 115.19 115.82 115.97 NA NA 116.64 116.95
## [21] 117.06 116.29 116.52 NA NA 117.26 116.76 116.73 115.82
# Loop through apple. Next if NA. Break if above 117.
for (value in apple) {
if(is.na(value)) {
print("Skipping NA")
next
}
if(value > 117) {
print("Time to sell!")
break
} else {
print("Nothing to do here!")
}
}
## [1] "Nothing to do here!"
## [1] "Nothing to do here!"
## [1] "Skipping NA"
## [1] "Skipping NA"
## [1] "Nothing to do here!"
## [1] "Nothing to do here!"
## [1] "Nothing to do here!"
## [1] "Nothing to do here!"
## [1] "Nothing to do here!"
## [1] "Skipping NA"
## [1] "Skipping NA"
## [1] "Nothing to do here!"
## [1] "Nothing to do here!"
## [1] "Nothing to do here!"
## [1] "Nothing to do here!"
## [1] "Nothing to do here!"
## [1] "Skipping NA"
## [1] "Skipping NA"
## [1] "Nothing to do here!"
## [1] "Nothing to do here!"
## [1] "Time to sell!"
You can type a ? and the name of a function and R will take you to the help site for that function.
?subset
?Sys.time
Optional arguments are ones that don’t have to be set by the user, either because they are given a default value, or because the function can infer them from the other data you have given it. Even though they don’t have to be set, they often provide extra flexibility.
# Round 5.4
round(5.4)
## [1] 5
# Round 5.4 with 1 decimal place
round(5.4, digits = 1)
## [1] 5.4
# numbers
numbers <- c(.002623, pi, 812.33345)
# Round numbers to 3 decimal places
round(numbers, digits = 3)
## [1] 0.003 3.142 812.333
You can use functions inside of other functions. This let’s you use the result of one function directly in another one, without having to create an intermediate variable.
# cbind() the stocks
stocks <- cbind(apple, ibm, micr)
# cor() to create the correlation matrix
cor(stocks)
## apple ibm micr
## apple 1 NA NA
## ibm NA 1.00000000 0.03846558
## micr NA 0.03846558 1.00000000
# All at once! Nest cbind() inside of cor()
cor(cbind(apple, ibm, micr))
## apple ibm micr
## apple 1 NA NA
## ibm NA 1.00000000 0.03846558
## micr NA 0.03846558 1.00000000
To create your own functions, is the basic structure of a function is:
func_name <- function(arguments) { body } Arguments are user inputs that the function works on. They can be the data that the function manipulates, or options that affect the calculation. The body of the function is the code that actually performs the manipulation. The value that a function returns is simply the last executed line of the function body.
# Percent to decimal function
percent_to_decimal <- function(percent) {
percent / 100
}
# Use percent_to_decimal() on 6
percent_to_decimal(6)
## [1] 0.06
# Example percentage
pct <- 8
# Use percent_to_decimal() on pct
percent_to_decimal(pct)
## [1] 0.08
Functions can have multiple arguments. The arguments are separated by a comma, and the default value is set using an equals sign.
# Percent to decimal function
percent_to_decimal <- function(percent, digits = 2) {
decimal <- percent / 100
round(decimal, digits = 2)
}
# percents
percents <- c(25.88, 9.045, 6.23)
# percent_to_decimal() with default digits
percent_to_decimal(percents)
## [1] 0.26 0.09 0.06
# percent_to_decimal() with digits = 4
percent_to_decimal(percents, digits = 4)
## [1] 0.26 0.09 0.06
You can use the same concept to create more complex functions.
# Present value function
pv <- function(cash_flow, i, year) {
# Discount multiplier
mult <- 1 + percent_to_decimal(i)
# Present value calculation
cash_flow * mult ^ -year
}
# Calculate a present value
pv(1200, 7, 3)
## [1] 979.5575
Scoping is the process of how R looks a variable’s value when given a name. For example, given x <- 5, scoping is how R knows where to look to find that the value of x is 5.
percent_to_decimal <- function(percent) {
decimal <- percent / 100
decimal
}
percent_to_decimal(6)
## [1] 0.06
# decimal was defined to live only inside the percent_to_decimal() function. If you try to access decimal outside of the scope of that function, you will get an error because it does not exist!
hundred <- 100
percent_to_decimal <- function(percent) {
percent / hundred
}
percent_to_decimal(6)
## [1] 0.06
hundred was defined outside of the percent_to_decimal() function. When the percent_to_decimal function came across hundred, it first looked inside the scope of the function for hundred, and when it couldn’t find it, it looked up one level to find where it was defined in the global scope.
Packages are combinations of functions that someone else has compiled, they are available in CRAN. The tidyquant package is focused on retrieving, manipulating, and scaling financial data analysis in the easiest way possible. To get the tidyquant package and start working with it, you first have to install it.
install.packages(“tidyquant”) This places it on your local computer. You then have to load it into your current R session. This gives you access to all of the functions in the package.
library(tidyquant) These steps of installing and librarying packages are necessary for any CRAN package you want to use.
# Library tidquant
library(tidyquant)
# Pull Apple stock data
apple <- tq_get("AAPL", get = "stock.prices",
from = "2007-01-03", to = "2017-06-05")
# Take a look at what it returned
head(apple)
## # A tibble: 6 x 7
## date open high low close volume adjusted
## <date> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 2007-01-03 95.539 95.860 90.679 83.80000 309579900 10.81246
## 2 2007-01-04 93.059 95.163 92.804 85.66000 211815100 11.05245
## 3 2007-01-05 94.964 95.440 93.447 85.04999 208685400 10.97374
## 4 2007-01-08 95.174 95.805 94.421 85.47000 199276700 11.02793
## 5 2007-01-09 95.716 102.946 94.277 92.57000 837324600 11.94403
## 6 2007-01-10 104.906 108.283 103.467 97.00000 738220000 12.51562
# Plot the stock price over time
plot(apple$date, apple$adjusted, type = "l")
# Calculate daily stock returns for the adjusted price
apple <- tq_mutate(data = apple,
ohlc_fun = Ad,
mutate_fun = dailyReturn)
# Sort the returns from least to greatest
sorted_returns <- sort(apple$daily.returns)
# Plot them
plot(sorted_returns)
You can apply the same function to each element of a list, by using lapply() to return another list.
apple <- c(0.37446342, -0.71883530, 0.76986527, 0.98226467, 0.98171665 , 1.63217981, -0.57042563, 1.66813769, 0.00000000, 0.54692248, 0.12951131, 0.57773562, 0.26577503, 0.09405729, -0.65778233, 0.19778141, 0.63508411, -0.42640287, -0.02569373, -0.77957680)
ibm <- c(0.1251408, -0.1124859, 0.3190691, 2.7689429, 0.3458948, 0.7014998, -0.6125390, 1.6858006, 0.1307267, -0.2907839, -0.7677657, -0.0299886, 0.5519558,-0.1610979, -0.1613578, -0.2095056, 0.2579329, -0.5683858, 0.2467056, -0.3661465)
micr <- c(0.08445946, 1.63713080, -0.44835603, 2.36864053, -0.58660583, 1.57351254, 0.32273681, 1.30287920, -0.47634170, -0.15954052, -0.44742729, 2.11878010, -0.12574662, 0.00000000, 0.01573812, -0.48780488, 0.06325111, -0.45828066, -0.14287982, -1.20826709)
stock_return <- list(apple, ibm, micr)
# Print stock_return
stock_return
## [[1]]
## [1] 0.37446342 -0.71883530 0.76986527 0.98226467 0.98171665
## [6] 1.63217981 -0.57042563 1.66813769 0.00000000 0.54692248
## [11] 0.12951131 0.57773562 0.26577503 0.09405729 -0.65778233
## [16] 0.19778141 0.63508411 -0.42640287 -0.02569373 -0.77957680
##
## [[2]]
## [1] 0.1251408 -0.1124859 0.3190691 2.7689429 0.3458948 0.7014998
## [7] -0.6125390 1.6858006 0.1307267 -0.2907839 -0.7677657 -0.0299886
## [13] 0.5519558 -0.1610979 -0.1613578 -0.2095056 0.2579329 -0.5683858
## [19] 0.2467056 -0.3661465
##
## [[3]]
## [1] 0.08445946 1.63713080 -0.44835603 2.36864053 -0.58660583
## [6] 1.57351254 0.32273681 1.30287920 -0.47634170 -0.15954052
## [11] -0.44742729 2.11878010 -0.12574662 0.00000000 0.01573812
## [16] -0.48780488 0.06325111 -0.45828066 -0.14287982 -1.20826709
# lapply to change percents to decimal
lapply(stock_return, FUN = percent_to_decimal)
## [[1]]
## [1] 0.0037446342 -0.0071883530 0.0076986527 0.0098226467 0.0098171665
## [6] 0.0163217981 -0.0057042563 0.0166813769 0.0000000000 0.0054692248
## [11] 0.0012951131 0.0057773562 0.0026577503 0.0009405729 -0.0065778233
## [16] 0.0019778141 0.0063508411 -0.0042640287 -0.0002569373 -0.0077957680
##
## [[2]]
## [1] 0.001251408 -0.001124859 0.003190691 0.027689429 0.003458948
## [6] 0.007014998 -0.006125390 0.016858006 0.001307267 -0.002907839
## [11] -0.007677657 -0.000299886 0.005519558 -0.001610979 -0.001613578
## [16] -0.002095056 0.002579329 -0.005683858 0.002467056 -0.003661465
##
## [[3]]
## [1] 0.0008445946 0.0163713080 -0.0044835603 0.0236864053 -0.0058660583
## [6] 0.0157351254 0.0032273681 0.0130287920 -0.0047634170 -0.0015954052
## [11] -0.0044742729 0.0211878010 -0.0012574662 0.0000000000 0.0001573812
## [16] -0.0048780488 0.0006325111 -0.0045828066 -0.0014287982 -0.0120826709
lapply() can also be used on a data frame, the output will be in the form of a list.
apple <- c(0.37446342, -0.71883530, 0.76986527, 0.98226467, 0.98171665 , 1.63217981, -0.57042563, 1.66813769, 0.00000000, 0.54692248, 0.12951131, 0.57773562, 0.26577503, 0.09405729, -0.65778233, 0.19778141, 0.63508411, -0.42640287, -0.02569373, -0.77957680)
ibm <- c(0.1251408, -0.1124859, 0.3190691, 2.7689429, 0.3458948, 0.7014998, -0.6125390, 1.6858006, 0.1307267, -0.2907839, -0.7677657, -0.0299886, 0.5519558,-0.1610979, -0.1613578, -0.2095056, 0.2579329, -0.5683858, 0.2467056, -0.3661465)
micr <- c(0.08445946, 1.63713080, -0.44835603, 2.36864053, -0.58660583, 1.57351254, 0.32273681, 1.30287920, -0.47634170, -0.15954052, -0.44742729, 2.11878010, -0.12574662, 0.00000000, 0.01573812, -0.48780488, 0.06325111, -0.45828066, -0.14287982, -1.20826709)
stock_return <- data.frame(apple, ibm, micr)
# Print stock_return
stock_return
## apple ibm micr
## 1 0.37446342 0.1251408 0.08445946
## 2 -0.71883530 -0.1124859 1.63713080
## 3 0.76986527 0.3190691 -0.44835603
## 4 0.98226467 2.7689429 2.36864053
## 5 0.98171665 0.3458948 -0.58660583
## 6 1.63217981 0.7014998 1.57351254
## 7 -0.57042563 -0.6125390 0.32273681
## 8 1.66813769 1.6858006 1.30287920
## 9 0.00000000 0.1307267 -0.47634170
## 10 0.54692248 -0.2907839 -0.15954052
## 11 0.12951131 -0.7677657 -0.44742729
## 12 0.57773562 -0.0299886 2.11878010
## 13 0.26577503 0.5519558 -0.12574662
## 14 0.09405729 -0.1610979 0.00000000
## 15 -0.65778233 -0.1613578 0.01573812
## 16 0.19778141 -0.2095056 -0.48780488
## 17 0.63508411 0.2579329 0.06325111
## 18 -0.42640287 -0.5683858 -0.45828066
## 19 -0.02569373 0.2467056 -0.14287982
## 20 -0.77957680 -0.3661465 -1.20826709
# lapply to get the average returns
lapply(stock_return, FUN = mean)
## $apple
## [1] 0.2838389
##
## $ibm
## [1] 0.1926806
##
## $micr
## [1] 0.2472939
# Sharpe ratio
sharpe <- function(returns) {
(mean(returns) - .0003) / sd(returns)
}
# lapply to get the sharpe ratio
lapply(stock_return, FUN = sharpe)
## $apple
## [1] 0.3961448
##
## $ibm
## [1] 0.2366101
##
## $micr
## [1] 0.2483864
In the call to lapply() you can specify the named optional arguments after the FUN argument, and they will get passed to the function that you are applying.
# sharpe
sharpe <- function(returns, rf =.0003) {
(mean(returns) - rf) / sd(returns)
}
# First lapply()
lapply(stock_return, FUN = sharpe, rf = .0004)
## $apple
## [1] 0.3960051
##
## $ibm
## [1] 0.2364871
##
## $micr
## [1] 0.2482859
# Second lapply()
lapply(stock_return, FUN = sharpe, rf = .0009)
## $apple
## [1] 0.3953065
##
## $ibm
## [1] 0.2358721
##
## $micr
## [1] 0.247783
sapply(), or simplify apply performs exactly like lapply(), but will attempt to simplify the output if it can.
# lapply() on stock_return
lapply(stock_return, FUN = sharpe)
## $apple
## [1] 0.3961448
##
## $ibm
## [1] 0.2366101
##
## $micr
## [1] 0.2483864
# sapply() on stock_return
sapply(stock_return, FUN = sharpe)
## apple ibm micr
## 0.3961448 0.2366101 0.2483864
# sapply() on stock_return with optional arguments
sapply(stock_return, FUN = sharpe, simplify = FALSE, USE.NAMES = FALSE)
## $apple
## [1] 0.3961448
##
## $ibm
## [1] 0.2366101
##
## $micr
## [1] 0.2483864
sapply() is not a safe option to be used when writing functions. If sapply() cannot simplify your output, then it will default to returning a list just like lapply(). This can be dangerous and break custom functions if you wrote them expecting sapply() to return a simplified vector.
# Market crash with as.Date()
market_crash <- list(dow_jones_drop = 777.68,
date = as.Date("2008-09-28"))
# Find the classes with sapply()
sapply(market_crash, FUN = class)
## dow_jones_drop date
## "numeric" "Date"
# Market crash with as.POSIXct()
market_crash2 <- list(dow_jones_drop = 777.68,
date = as.POSIXct("2008-09-28"))
# Find the classes with lapply()
lapply(market_crash2, FUN = class)
## $dow_jones_drop
## [1] "numeric"
##
## $date
## [1] "POSIXct" "POSIXt"
# Find the classes with sapply()
sapply(market_crash2, FUN = class)
## $dow_jones_drop
## [1] "numeric"
##
## $date
## [1] "POSIXct" "POSIXt"
If sapply() cannot simplify it does not indicate an error and this could be confusing. There is a more strict apply function called vapply(), which contains an extra argument FUN.VALUE where you can specify the type and length of the output that should be returned each time your applied function is called.
# Market crash with as.POSIXct()
market_crash2 <- list(dow_jones_drop = 777.68,
date = as.POSIXct("2008-09-28"))
# Find the classes with sapply()
sapply(market_crash2, FUN = class)
## $dow_jones_drop
## [1] "numeric"
##
## $date
## [1] "POSIXct" "POSIXt"
Find the classes with vapply() > vapply(market_crash2, FUN = class, FUN.VALUE = character(1)) Error: values must be length 1, but FUN(X[[2]]) result is length 2
When there are no errors, vapply() returns a simplified result according to the FUN.VALUE argument.
# Sharpe ratio for all stocks
vapply(stock_return, FUN = sharpe, FUN.VALUE = numeric(1))
## apple ibm micr
## 0.3961448 0.2366101 0.2483864
# Summarize Apple
summary(stock_return$apple)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -0.7796 -0.1259 0.2318 0.2838 0.6688 1.6680
# Summarize all stocks
vapply(stock_return, FUN = summary, FUN.VALUE = numeric(6))
## apple ibm micr
## Min. -0.7796 -0.76780 -1.20800
## 1st Qu. -0.1259 -0.22980 -0.45080
## Median 0.2318 0.04758 -0.06287
## Mean 0.2838 0.19270 0.24730
## 3rd Qu. 0.6688 0.32580 0.56780
## Max. 1.6680 2.76900 2.36900
Anonymous functions are functions that aren’t assigned a name.
# Max and min
vapply(stock_return,
FUN = function(x) { c(max(x), min(x)) },
FUN.VALUE = numeric(2))
## apple ibm micr
## [1,] 1.6681377 2.7689429 2.368641
## [2,] -0.7795768 -0.7677657 -1.208267
Complete the tasks below and include them at the end of your RMarkdown note. And publish it in RPubs.com and email me the link for grading.
# iPhone
iPhone <- 500
# Buy if less than $600, do nothing if $600 or more
if( iPhone < 600 ) {
print("Buy!")
} else {
print("Do nothing!")
}
## [1] "Buy!"
# iPhone
iPhone <- 700
if( iPhone < 600 ) {
print("Buy!")
} else {
print("Do nothing!")
}
## [1] "Do nothing!"
Loops are used to run the same operation over each element of a data set. Both while loops and repeat loops do the same thing, a while loop uses less code. A repeat loop will run forever unless you specify a break. A while loop will break when a certain condition is met. While loops are useful when you don’t know how many times you want the loop to run. A for loop repeats an operation a specific number of times. For loops are useful when you know how many times you want to repeat an action.
# raise x by 3
new_value <- function(x, value =3){
x ^ value
}
new_value(4)
## [1] 64
lapply(stock_return, new_value)
## $apple
## [1] 5.250833e-02 -3.714396e-01 4.562934e-01 9.477321e-01 9.461467e-01
## [6] 4.348145e+00 -1.856082e-01 4.641899e+00 0.000000e+00 1.635977e-01
## [11] 2.172316e-03 1.928357e-01 1.877338e-02 8.321036e-04 -2.846077e-01
## [16] 7.736712e-03 2.561496e-01 -7.752832e-02 -1.696217e-05 -4.737800e-01
##
## $ibm
## [1] 1.959732e-03 -1.423293e-03 3.248286e-02 2.122961e+01 4.138397e-02
## [6] 3.452094e-01 -2.298271e-01 4.790917e+00 2.234050e-03 -2.458731e-02
## [11] -4.525704e-01 -2.696923e-05 1.681562e-01 -4.180899e-03 -4.201166e-03
## [16] -9.195745e-03 1.716012e-02 -1.836241e-01 1.501540e-02 -4.908679e-02
##
## $micr
## [1] 6.024831e-04 4.387833e+00 -9.012993e-02 1.328916e+01 -2.018548e-01
## [6] 3.895925e+00 3.361596e-02 2.211630e+00 -1.080826e-01 -4.060813e-03
## [11] -8.957100e-02 9.511689e+00 -1.988332e-03 0.000000e+00 3.898150e-06
## [16] -1.160749e-01 2.530489e-04 -9.624864e-02 -2.916841e-03 -1.763960e+00