Chapter 1: The Basics

First R script

Simple way to execute script in R


# Addition!
3 + 5
## [1] 8

# Subtraction!
6-4
## [1] 2

Assignment and variables (1)

Assigning variables to store value in objects use <- to assign a variable:R


# Assign 200 to savings
savings <- 200

# Print the value of savings to the console
savings
## [1] 200

Assignment and variables (2)

Adding variables to combine value in objects use simple addition


# Assign 100 to my_money
my_money <- 100

# Assign 200 to dans_money
dans_money <- 200


# Add my_money and dans_money
my_money + dans_money
## [1] 300

Financial returns (1)

Calculating return based on fixed interest rates



# Variables for starting_cash and 5% return during January
starting_cash <- 200
jan_ret <- 5
jan_mult <- 1 + (jan_ret / 100)

# How much money do you have at the end of January?
post_jan_cash <- starting_cash * jan_mult

# Print post_jan_cash
post_jan_cash
## [1] 210

Financial returns (2)

Calculating return based on fluctuating interest rates

# Starting cash and returns 
starting_cash <- 200
jan_ret <- 4
feb_ret <- 5

# Multipliers
jan_mult <- 1 + 4 / 100
feb_mult <- 1 + 5 / 100

# Total cash at the end of the two months
total_cash <- starting_cash * jan_mult * feb_mult
 

# Print total_cash
total_cash
## [1] 218.4

Data type exploration

Using various data types in R, e.g. Numerics are decimal numbers like 4.5, Logicals are the boolean values TRUE / FALSE and Characters are text values.


# Apple's stock price is a numeric
apple_stock <- 150.45 

# Bond credit ratings are characters
credit_rating <- "AAA"

# You like the stock market. TRUE or FALSE?
my_answer <- FALSE

# Print my_answer
my_answer
## [1] FALSE

Chapter 2: Vectors and Matrices

c()ombine

vector can can be created using the combine function. Vector can only be composed of one data type, e.g. either Numerics, or Characters.

# Another numeric vector
ibm_stock <- c(159.82, 160.02, 159.84)

# Another character vector
finance <- c("stocks", "bonds", "investments")

# A logical vector
logic <- c(TRUE, FALSE, TRUE)

Vector names()

Vector names are used to combine data from vectors , Numerics, or Characters and text can be combined to display together.


# Vectors of 12 months of returns, and month names
ret <- c(5, 2, 3, 7, 8, 3, 5, 9, 1, 4, 6, 3)
months <- c("Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec")

# Add names to ret
names(ret) <- months

# Print out ret to see the new names!
ret
## Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec 
##   5   2   3   7   8   3   5   9   1   4   6   3

Visualize your vector

The plot() function can be used to create graphs



# Look at the data
apple_stock <- c(109.49, 109.90, 109.11, 109.95, 111.03, 112.12, 113.95, 113.30, 115.19, 115.19, 115.82, 115.97, 116.64, 116.95, 117.06, 116.29, 116.52, 117.26, 116.76, 116.73, 115.82)

# Plot the data points
plot(apple_stock)


# Plot the data as a line graph
plot(apple_stock, type = "l")

Weighted average (2)

More efficient can be computed by combining weighted average with vectors.


# Weights, returns, and company names
ret <- c(7, 9)
weight <- c(.2, .8)
companies <- c("Microsoft", "Sony")

# Assign company names to your vectors
names(ret) <- companies
names(weight) <- companies

# Multiply the returns and weights together 
ret_X_weight <- ret * weight


# Print ret_X_weight
ret_X_weight
## Microsoft      Sony 
##       1.4       7.2

# Sum to get the total portfolio return
portf_ret <-sum(ret_X_weight)

# Print portf_ret
portf_ret
## [1] 8.6

Weighted average (3)

Different techniques can be applied to computed weighted retunes based on stock weights.

# Print ret
ret
## Microsoft      Sony 
##         7         9

# Assign 1/3 to weight
weight <- 1/3

# Create ret_X_weight
ret_X_weight <- ret * weight
ret_X_weight
## Microsoft      Sony 
##  2.333333  3.000000
 

# Calculate your portfolio return
portf_ret <- sum(ret_X_weight)
portf_ret
## [1] 5.333333

# Vector of length 3 * Vector of length 2?
ret * c(.2, .6)
## Microsoft      Sony 
##       1.4       5.4

Vector subsetting

Manipulating information by taking specific pieces of vectors


# First 6 months of returns
ret[1:6]
## Microsoft      Sony      <NA>      <NA>      <NA>      <NA> 
##         7         9        NA        NA        NA        NA


# Just March and May
ret[c("Mar", "May")]
## <NA> <NA> 
##   NA   NA


# Omit the first month of returns
ret[-1]
## Sony 
##    9

Create a matrix

2 dimensions data can be built using matrix functions.



# A vector of 9 numbers
my_vector <- c(1, 2, 3, 4, 5, 6, 7, 8, 9)

# 3x3 matrix
my_matrix <- matrix(data = my_vector, nrow = 3, ncol = 3)

# Print my_matrix
my_matrix
##      [,1] [,2] [,3]
## [1,]    1    4    7
## [2,]    2    5    8
## [3,]    3    6    9

# Filling across using byrow = TRUE
matrix(data = c(2, 3, 4, 5), nrow = 2, ncol = 2, byrow = TRUE)
##      [,1] [,2]
## [1,]    2    3
## [2,]    4    5

Matrix <- bind vectors

Matrices can be created by combining vectors together. functions cbind() and rbind() (column bind and row bind respectively).

apple <- c(109.49, 109.90, 109.11, 109.95, 111.03, 112.12, 113.95, 113.30, 115.19, 115.19,
           115.82, 115.97, 116.64, 116.95, 117.06, 116.29, 116.52, 117.26, 116.76, 116.73,
           115.82)
ibm <- c(159.82, 160.02, 159.84, 160.35, 164.79, 165.36, 166.52, 165.50, 168.29, 168.51, 
         168.02, 166.73, 166.68, 167.60, 167.33, 167.06, 166.71, 167.14, 166.19, 166.60, 
         165.99)
micr <- c(59.20, 59.25, 60.22, 59.95, 61.37, 61.01, 61.97, 62.17, 62.98, 62.68, 62.58,
          62.30, 63.62, 63.54, 63.54, 63.55, 63.24, 63.28, 62.99, 62.90, 62.14)
          
# cbind the vectors together
cbind_stocks <- cbind(apple, ibm, micr)

# Print cbind_stocks
cbind_stocks
##        apple    ibm  micr
##  [1,] 109.49 159.82 59.20
##  [2,] 109.90 160.02 59.25
##  [3,] 109.11 159.84 60.22
##  [4,] 109.95 160.35 59.95
##  [5,] 111.03 164.79 61.37
##  [6,] 112.12 165.36 61.01
##  [7,] 113.95 166.52 61.97
##  [8,] 113.30 165.50 62.17
##  [9,] 115.19 168.29 62.98
## [10,] 115.19 168.51 62.68
## [11,] 115.82 168.02 62.58
## [12,] 115.97 166.73 62.30
## [13,] 116.64 166.68 63.62
## [14,] 116.95 167.60 63.54
## [15,] 117.06 167.33 63.54
## [16,] 116.29 167.06 63.55
## [17,] 116.52 166.71 63.24
## [18,] 117.26 167.14 63.28
## [19,] 116.76 166.19 62.99
## [20,] 116.73 166.60 62.90
## [21,] 115.82 165.99 62.14

# rbind the vectors together
rbind_stocks <- rbind(apple, ibm, micr)

# Print rbind_stocks
rbind_stocks
##         [,1]   [,2]   [,3]   [,4]   [,5]   [,6]   [,7]   [,8]   [,9]
## apple 109.49 109.90 109.11 109.95 111.03 112.12 113.95 113.30 115.19
## ibm   159.82 160.02 159.84 160.35 164.79 165.36 166.52 165.50 168.29
## micr   59.20  59.25  60.22  59.95  61.37  61.01  61.97  62.17  62.98
##        [,10]  [,11]  [,12]  [,13]  [,14]  [,15]  [,16]  [,17]  [,18]
## apple 115.19 115.82 115.97 116.64 116.95 117.06 116.29 116.52 117.26
## ibm   168.51 168.02 166.73 166.68 167.60 167.33 167.06 166.71 167.14
## micr   62.68  62.58  62.30  63.62  63.54  63.54  63.55  63.24  63.28
##        [,19]  [,20]  [,21]
## apple 116.76 116.73 115.82
## ibm   166.19 166.60 165.99
## micr   62.99  62.90  62.14

Visualize your matrix

Like vectors, matrices can also be visualized by creating graphs using plot() function


# Correlation of Apple and IBM
cor(apple, ibm)
## [1] 0.8872467

# stock matrix
stocks <- cbind(apple, micr, ibm)

# cor() of all three
cor(stocks)
##           apple      micr       ibm
## apple 1.0000000 0.9477010 0.8872467
## micr  0.9477010 1.0000000 0.9126597
## ibm   0.8872467 0.9126597 1.0000000

cor()relation

The cor() function enables to compute the correlation between two or more vectors, we can also create a correlation matrix when we have a matrix.


# Correlation of Apple and IBM
cor(apple, ibm)
## [1] 0.8872467

# stock matrix
stocks <- cbind(apple, micr, ibm)

# cor() of all three
cor(stocks)
##           apple      micr       ibm
## apple 1.0000000 0.9477010 0.8872467
## micr  0.9477010 1.0000000 0.9126597
## ibm   0.8872467 0.9126597 1.0000000

Matrix subsetting

Subset of matrix data can be manipulated with the Matrix subletting options, for example basic structure of sub set will be like my_matrix[row, col]

# Third row
stocks[3, ]
##  apple   micr    ibm 
## 109.11  60.22 159.84


# Fourth and fifth row of the ibm column
stocks[4:5, "ibm"]
## [1] 160.35 164.79

# apple and micr columns
stocks[, c("apple", "micr")]
##        apple  micr
##  [1,] 109.49 59.20
##  [2,] 109.90 59.25
##  [3,] 109.11 60.22
##  [4,] 109.95 59.95
##  [5,] 111.03 61.37
##  [6,] 112.12 61.01
##  [7,] 113.95 61.97
##  [8,] 113.30 62.17
##  [9,] 115.19 62.98
## [10,] 115.19 62.68
## [11,] 115.82 62.58
## [12,] 115.97 62.30
## [13,] 116.64 63.62
## [14,] 116.95 63.54
## [15,] 117.06 63.54
## [16,] 116.29 63.55
## [17,] 116.52 63.24
## [18,] 117.26 63.28
## [19,] 116.76 62.99
## [20,] 116.73 62.90
## [21,] 115.82 62.14

Chapter 3: Data Frames

Create your first data.frame()

We can use the data.frame() function to create a data frame, which enables to store different kind of data in columnar format


# Variables
company <- c("A", "A", "A", "B", "B", "B", "B")
cash_flow <- c(1000, 4000, 550, 1500, 1100, 750, 6000)
year <- c(1, 3, 4, 1, 2, 4, 5)

# Data frame
cash <- data.frame(company, cash_flow, year)

# Print cash
cash
##   company cash_flow year
## 1       A      1000    1
## 2       A      4000    3
## 3       A       550    4
## 4       B      1500    1
## 5       B      1100    2
## 6       B       750    4
## 7       B      6000    5

Making head()s and tail()s of your data with some str()ucture

Data can further be manipulated with the help of head()s, tail()s and str()ucture functions. head() selects first few rows, while tail() selects last few row. On the other hand, str() is used to view the structure of an object.

# Call head() for the first 4 rows
head(cash, n = 4)
##   company cash_flow year
## 1       A      1000    1
## 2       A      4000    3
## 3       A       550    4
## 4       B      1500    1


# Call tail() for the last 3 rows
tail(cash, n= 3)
##   company cash_flow year
## 5       B      1100    2
## 6       B       750    4
## 7       B      6000    5


# Call str()
str(cash)
## 'data.frame':    7 obs. of  3 variables:
##  $ company  : Factor w/ 2 levels "A","B": 1 1 1 2 2 2 2
##  $ cash_flow: num  1000 4000 550 1500 1100 750 6000
##  $ year     : num  1 3 4 1 2 4 5

Naming your columns / rows

Rows and columns can be renamed with help of colnames()and rownames() functions


# Fix your column names
colnames(cash) <- c("company", "cash_flow", "year")

# Print out the column names of cash
colnames(cash)
## [1] "company"   "cash_flow" "year"

Accessing and subsetting data frames (1)

Subsets can be accessed form Data frames with the simple use [ ].


# Third row, second column
cash[3, 2]
## [1] 550
# Fifth row of the "year" column
cash[5, "year"]
## [1] 2

Accessing and subsetting data frames (2)

Specific data can be selected form Data frames with the simple use $.



# Select the year column
cash$year
## [1] 1 3 4 1 2 4 5


# Select the cash_flow column and multiply by 2
cash$cash_flow * 2
## [1]  2000  8000  1100  3000  2200  1500 12000


# Delete the company column
cash$company <- NULL

cash
##   cash_flow year
## 1      1000    1
## 2      4000    3
## 3       550    4
## 4      1500    1
## 5      1100    2
## 6       750    4
## 7      6000    5
# Print cash again

Accessing and subsetting data frames (3)

subset()!function can be used to further refine the search. For example, cashflow from specific period can be selected with the help of subset()! And ==.


# Rows about company B
subset(cash, company == "B")
##   cash_flow year
## 4      1500    1
## 5      1100    2
## 6       750    4
## 7      6000    5


# Rows with cash flows due in 1 year
subset(cash, year == 1)
##   cash_flow year
## 1      1000    1
## 4      1500    1

Adding new columns

New columns can be added to existing data with the help of \(, i.e. data_frame\)new_column


# Quarter cash flow scenario
cash$quarter_cash <- cash$cash_flow * 0.25
cash
##   cash_flow year quarter_cash
## 1      1000    1        250.0
## 2      4000    3       1000.0
## 3       550    4        137.5
## 4      1500    1        375.0
## 5      1100    2        275.0
## 6       750    4        187.5
## 7      6000    5       1500.0
# Double year scenario
cash$double_year <- cash$year * 2

cash
##   cash_flow year quarter_cash double_year
## 1      1000    1        250.0           2
## 2      4000    3       1000.0           6
## 3       550    4        137.5           8
## 4      1500    1        375.0           2
## 5      1100    2        275.0           4
## 6       750    4        187.5           8
## 7      6000    5       1500.0          10

Present value of projected cash flows (1)

Present value can be computed with the help of simple PV formula

# Present value of $4000, in 3 years, at 5%
present_value_4k <- 4000 * (1+0.05)^(-3)

# Present value of all cash flows
cash$present_value <-cash$cash_flow * (1+0.05)^(-cash$year)


# Print out cash
cash
##   cash_flow year quarter_cash double_year present_value
## 1      1000    1        250.0           2      952.3810
## 2      4000    3       1000.0           6     3455.3504
## 3       550    4        137.5           8      452.4864
## 4      1500    1        375.0           2     1428.5714
## 5      1100    2        275.0           4      997.7324
## 6       750    4        187.5           8      617.0269
## 7      6000    5       1500.0          10     4701.1570

Present value of projected cash flows (2)

Various other functions can be used to manipulate the Present value e.g. PV ca be totaled, subset can be extracted etc..


# Total present value of cash
total_pv <- sum(cash$present_value)
total_pv
## [1] 12604.71

# Company B information
cash_B <- subset(cash, company == "B")
cash_B
##   cash_flow year quarter_cash double_year present_value
## 4      1500    1        375.0           2     1428.5714
## 5      1100    2        275.0           4      997.7324
## 6       750    4        187.5           8      617.0269
## 7      6000    5       1500.0          10     4701.1570

# Total present value of cash_B
total_pv_B <- sum(cash_B$present_value)
total_pv_B
## [1] 7744.488

Chapter 4: Factors

Create a factor

Data can be categorized with the help of factor() function, .


# credit_rating character vector
credit_rating <- c("BB", "AAA", "AA", "CCC", "AA", "AAA", "B", "BB")

# Create a factor from credit_rating
credit_factor <- factor(credit_rating)

# Print out your new factor
credit_factor
## [1] BB  AAA AA  CCC AA  AAA B   BB 
## Levels: AA AAA B BB CCC

# Call str() on credit_rating
str(credit_rating)
##  chr [1:8] "BB" "AAA" "AA" "CCC" "AA" "AAA" "B" "BB"


# Call str() on credit_factor
str(credit_factor)
##  Factor w/ 5 levels "AA","AAA","B",..: 4 2 1 5 1 2 3 4

Factor levels

Factor levels enable another layer to the data manipulation by using the levels() function. We can also rename our factors to further refine our data.


# Identify unique levels
levels(credit_factor)
## [1] "AA"  "AAA" "B"   "BB"  "CCC"


# Rename the levels of credit_factor
levels(credit_factor)
## [1] "AA"  "AAA" "B"   "BB"  "CCC"
levels(credit_factor) <- c("2A", "3A", "1B", "2B", "3C")

# Print credit_factor
credit_factor
## [1] 2B 3A 2A 3C 2A 3A 1B 2B
## Levels: 2A 3A 1B 2B 3C

Factor summary

summary() function can be used to summarize Factor levels and further manipulate the data.


# Summarize the character vector, credit_rating
summary(credit_rating)
##    Length     Class      Mode 
##         8 character character

# Summarize the factor, credit_factor
summary(credit_factor)
## 2A 3A 1B 2B 3C 
##  2  2  1  2  1

Visualize your factor

plot() function can again can be used to create interactive charts and visuals.

# Visualize your factor!
plot(credit_factor)

### Bucketing a numeric variable into a factor

cut() function can used to create a factor from a numeric vector.

AAA_rank <- c(31,  48, 100, 53, 85, 73, 62, 74, 42, 38, 97, 61, 48, 86, 44, 9, 43, 18,  62,38, 23, 37, 54, 80, 78, 93, 47, 100, 22,  22, 18, 26, 81, 17, 98, 4,  83, 5,6,  52, 29, 44, 50, 2,  25, 19, 15, 42, 30, 27)

# Create 4 buckets for AAA_rank using cut()
AAA_factor <- cut(x = AAA_rank, breaks = c(0, 25, 50, 75, 100))

# Rename the levels 
levels(AAA_factor)
## [1] "(0,25]"   "(25,50]"  "(50,75]"  "(75,100]"
levels(AAA_factor) <- c("low", "medium", "high", "very_high")


# Print AAA_factor
AAA_factor
##  [1] medium    medium    very_high high      very_high high      high     
##  [8] high      medium    medium    very_high high      medium    very_high
## [15] medium    low       medium    low       high      medium    low      
## [22] medium    high      very_high very_high very_high medium    very_high
## [29] low       low       low       medium    very_high low       very_high
## [36] low       very_high low       low       high      medium    medium   
## [43] medium    low       low       low       low       medium    medium   
## [50] medium   
## Levels: low medium high very_high


# Plot AAA_factor
plot(AAA_factor)

Create an ordered factor

While creating factors, simple adding an ordered = TRUE adds unique levels in sorted order from least to greatest.



# Use unique() to find unique words
unique(credit_rating)
## [1] "BB"  "AAA" "AA"  "CCC" "B"

# Create an ordered factor
credit_factor_ordered <- factor(credit_rating, ordered = TRUE, levels = c("AAA", "AA", "BB", "B", "CCC"))


# Plot credit_factor_ordered
plot(credit_factor_ordered)

Subsetting a factor

Again [ ] is the key to extract subsets from factors.


# Remove the A bonds at positions 3 and 7. Don't drop the A level.
keep_level <- credit_factor[-c(3, 7)]

# Plot keep_level
plot(keep_level)



# Remove the A bonds at positions 3 and 7. Drop the A level.
drop_level <-droplevels(keep_level)


# Plot drop_level
plot(drop_level)

stringsAsFactors

While creating data frames, R groups characters into factors by default, to prevent this we simple use strings i.e., stringsAsFactors = FALSE.


# Variables
credit_rating <- c("AAA", "A", "BB")
bond_owners <- c("Dan", "Tom", "Joe")

# Create the data frame of character vectors, bonds
bonds <-data.frame(credit_rating, bond_owners, stringsAsFactors = FALSE)
bonds
##   credit_rating bond_owners
## 1           AAA         Dan
## 2             A         Tom
## 3            BB         Joe

# Use str() on bonds
str(bonds)
## 'data.frame':    3 obs. of  2 variables:
##  $ credit_rating: chr  "AAA" "A" "BB"
##  $ bond_owners  : chr  "Dan" "Tom" "Joe"


# Create a factor column in bonds called credit_factor from credit_rating
bonds$credit_factor <- factor(bonds$credit_rating, ordered = TRUE, levels = c("AAA", "A", "BB"))


# Use str() on bonds again
str(bonds)
## 'data.frame':    3 obs. of  3 variables:
##  $ credit_rating: chr  "AAA" "A" "BB"
##  $ bond_owners  : chr  "Dan" "Tom" "Joe"
##  $ credit_factor: Ord.factor w/ 3 levels "AAA"<"A"<"BB": 1 2 3

Chapter 5: Lists

Create a list

list() function can be used to create lists, which is a kind of super data type to manipulate the data for further analysis.

# List components
name <- "Apple and IBM"
apple <- c(109.49, 109.90, 109.11, 109.95, 111.03)
ibm <- c(159.82, 160.02, 159.84, 160.35, 164.79)
cor_matrix <- cor(cbind(apple, ibm))

# Create a list
portfolio <- list(name, apple, ibm, cor_matrix)


# View your first list
portfolio
## [[1]]
## [1] "Apple and IBM"
## 
## [[2]]
## [1] 109.49 109.90 109.11 109.95 111.03
## 
## [[3]]
## [1] 159.82 160.02 159.84 160.35 164.79
## 
## [[4]]
##           apple       ibm
## apple 1.0000000 0.9131575
## ibm   0.9131575 1.0000000

# Create a factor column in bonds called credit_factor from credit_rating
bonds$credit_factor <- factor(bonds$credit_rating, ordered = TRUE, levels = c("AAA", "A", "BB"))


# Use str() on bonds again
str(bonds)
## 'data.frame':    3 obs. of  3 variables:
##  $ credit_rating: chr  "AAA" "A" "BB"
##  $ bond_owners  : chr  "Dan" "Tom" "Joe"
##  $ credit_factor: Ord.factor w/ 3 levels "AAA"<"A"<"BB": 1 2 3

Named lists

We can name the list while creating the list itself or we may rename is the list already exists.


# Add names to your portfolio
names(portfolio) <- c("portfolio_name", "apple", "ibm", "correlation")


# Print portfolio
portfolio
## $portfolio_name
## [1] "Apple and IBM"
## 
## $apple
## [1] 109.49 109.90 109.11 109.95 111.03
## 
## $ibm
## [1] 159.82 160.02 159.84 160.35 164.79
## 
## $correlation
##           apple       ibm
## apple 1.0000000 0.9131575
## ibm   0.9131575 1.0000000

Access elements in a list

Elements in a list can be accessed with the help of [ ] and further refined by the use of $.



# Second and third elements of portfolio
portfolio[c(2,3)]
## $apple
## [1] 109.49 109.90 109.11 109.95 111.03
## 
## $ibm
## [1] 159.82 160.02 159.84 160.35 164.79


# Use $ to get the correlation data

portfolio$correlation
##           apple       ibm
## apple 1.0000000 0.9131575
## ibm   0.9131575 1.0000000

Adding to a list

Elements can be added to the list by use of \(. For example, using existingList\)newElement or c(existingList, newElement)


# Add weight: 20% Apple, 80% IBM
portfolio$weight <- c(apple = 0.2, ibm = 0.8)



# Print portfolio
portfolio
## $portfolio_name
## [1] "Apple and IBM"
## 
## $apple
## [1] 109.49 109.90 109.11 109.95 111.03
## 
## $ibm
## [1] 159.82 160.02 159.84 160.35 164.79
## 
## $correlation
##           apple       ibm
## apple 1.0000000 0.9131575
## ibm   0.9131575 1.0000000
## 
## $weight
## apple   ibm 
##   0.2   0.8

# Change the weight variable: 30% Apple, 70% IBM
portfolio$weight <- c(apple = 0.3, ibm = 0.7)


portfolio
## $portfolio_name
## [1] "Apple and IBM"
## 
## $apple
## [1] 109.49 109.90 109.11 109.95 111.03
## 
## $ibm
## [1] 159.82 160.02 159.84 160.35 164.79
## 
## $correlation
##           apple       ibm
## apple 1.0000000 0.9131575
## ibm   0.9131575 1.0000000
## 
## $weight
## apple   ibm 
##   0.3   0.7
# Print portfolio to see the changes

Removing from a list

Elements can be removed form the to the list by use of $, [], or [[]].


# Take a look at portfolio
portfolio
## $portfolio_name
## [1] "Apple and IBM"
## 
## $apple
## [1] 109.49 109.90 109.11 109.95 111.03
## 
## $ibm
## [1] 159.82 160.02 159.84 160.35 164.79
## 
## $correlation
##           apple       ibm
## apple 1.0000000 0.9131575
## ibm   0.9131575 1.0000000
## 
## $weight
## apple   ibm 
##   0.3   0.7

# Remove the microsoft stock prices from your portfolio
portfolio$microsoft <- NULL
portfolio
## $portfolio_name
## [1] "Apple and IBM"
## 
## $apple
## [1] 109.49 109.90 109.11 109.95 111.03
## 
## $ibm
## [1] 159.82 160.02 159.84 160.35 164.79
## 
## $correlation
##           apple       ibm
## apple 1.0000000 0.9131575
## ibm   0.9131575 1.0000000
## 
## $weight
## apple   ibm 
##   0.3   0.7

Split it

We can split the data within a data frame by using the using split() function, and can bring the data frame to its original state by using unsplit() function.



# Define grouping from year
grouping <- cash$year

# Split cash on your new grouping
split_cash <- split(cash, grouping)

# Look at your split_cash list
split_cash
## $`1`
##   cash_flow year quarter_cash double_year present_value
## 1      1000    1          250           2       952.381
## 4      1500    1          375           2      1428.571
## 
## $`2`
##   cash_flow year quarter_cash double_year present_value
## 5      1100    2          275           4      997.7324
## 
## $`3`
##   cash_flow year quarter_cash double_year present_value
## 2      4000    3         1000           6       3455.35
## 
## $`4`
##   cash_flow year quarter_cash double_year present_value
## 3       550    4        137.5           8      452.4864
## 6       750    4        187.5           8      617.0269
## 
## $`5`
##   cash_flow year quarter_cash double_year present_value
## 7      6000    5         1500          10      4701.157
str(split_cash)
## List of 5
##  $ 1:'data.frame':   2 obs. of  5 variables:
##   ..$ cash_flow    : num [1:2] 1000 1500
##   ..$ year         : num [1:2] 1 1
##   ..$ quarter_cash : num [1:2] 250 375
##   ..$ double_year  : num [1:2] 2 2
##   ..$ present_value: num [1:2] 952 1429
##  $ 2:'data.frame':   1 obs. of  5 variables:
##   ..$ cash_flow    : num 1100
##   ..$ year         : num 2
##   ..$ quarter_cash : num 275
##   ..$ double_year  : num 4
##   ..$ present_value: num 998
##  $ 3:'data.frame':   1 obs. of  5 variables:
##   ..$ cash_flow    : num 4000
##   ..$ year         : num 3
##   ..$ quarter_cash : num 1000
##   ..$ double_year  : num 6
##   ..$ present_value: num 3455
##  $ 4:'data.frame':   2 obs. of  5 variables:
##   ..$ cash_flow    : num [1:2] 550 750
##   ..$ year         : num [1:2] 4 4
##   ..$ quarter_cash : num [1:2] 138 188
##   ..$ double_year  : num [1:2] 8 8
##   ..$ present_value: num [1:2] 452 617
##  $ 5:'data.frame':   1 obs. of  5 variables:
##   ..$ cash_flow    : num 6000
##   ..$ year         : num 5
##   ..$ quarter_cash : num 1500
##   ..$ double_year  : num 10
##   ..$ present_value: num 4701

# Unsplit split_cash to get the original data back.
original_cash <- unsplit(split_cash, grouping)

# Print original_cash
cash
##   cash_flow year quarter_cash double_year present_value
## 1      1000    1        250.0           2      952.3810
## 2      4000    3       1000.0           6     3455.3504
## 3       550    4        137.5           8      452.4864
## 4      1500    1        375.0           2     1428.5714
## 5      1100    2        275.0           4      997.7324
## 6       750    4        187.5           8      617.0269
## 7      6000    5       1500.0          10     4701.1570

Split-Apply-Combine

Grouping of data frame as well as recombine those pieces back into one data frame can be achieved with the help of $ combined with split() and unsplit() function.


# Print split_cash
split_cash
## $`1`
##   cash_flow year quarter_cash double_year present_value
## 1      1000    1          250           2       952.381
## 4      1500    1          375           2      1428.571
## 
## $`2`
##   cash_flow year quarter_cash double_year present_value
## 5      1100    2          275           4      997.7324
## 
## $`3`
##   cash_flow year quarter_cash double_year present_value
## 2      4000    3         1000           6       3455.35
## 
## $`4`
##   cash_flow year quarter_cash double_year present_value
## 3       550    4        137.5           8      452.4864
## 6       750    4        187.5           8      617.0269
## 
## $`5`
##   cash_flow year quarter_cash double_year present_value
## 7      6000    5         1500          10      4701.157

# Print the cash_flow column of B in split_cash
split_cash$B$cash_flow
## NULL


# Set the cash_flow column of company A in split_cash to 0
split_cash$A$cash_flow <- 0

# Use the grouping to unsplit split_cash
cash_no_A <- unsplit(split_cash, grouping)

# Print cash_no_A
cash_no_A
##   cash_flow year quarter_cash double_year present_value
## 1      1000    1        250.0           2      952.3810
## 2      4000    3       1000.0           6     3455.3504
## 3       550    4        137.5           8      452.4864
## 4      1500    1        375.0           2     1428.5714
## 5      1100    2        275.0           4      997.7324
## 6       750    4        187.5           8      617.0269
## 7      6000    5       1500.0          10     4701.1570

Attributes

attributes() function can be used to create a list of attributes about an object. On the other hand, the attr() function can be used to access a specific attribute.


# my_matrix and my_factor
my_matrix <- matrix(c(1,2,3,4,5,6), nrow = 2, ncol = 3)
rownames(my_matrix) <- c("Row1", "Row2")
colnames(my_matrix) <- c("Col1", "Col2", "Col3")

my_factor <- factor(c("A", "A", "B"), ordered = T, levels = c("A", "B"))

# attributes of my_matrix
attributes(my_matrix)
## $dim
## [1] 2 3
## 
## $dimnames
## $dimnames[[1]]
## [1] "Row1" "Row2"
## 
## $dimnames[[2]]
## [1] "Col1" "Col2" "Col3"


# Just the dim attribute of my_matrix
attr(my_matrix, which = "dim")
## [1] 2 3


# attributes of my_factor
attributes(my_factor)
## $levels
## [1] "A" "B"
## 
## $class
## [1] "ordered" "factor"