Summary, My first R script. #Addition 3+5 hit enter next line is 8. Same procedure for subtraction.
# Addition!
3 + 5
## [1] 8
# Subtraction!
6-4
## [1] 2
Playing with the new calculator. Addition: + Subtraction: - Multiplication: * Division: / Exponentiation: ^ or ** Modulo: %% The ^ operator raises the number to its left to the power of the number to its right. Clicking on a line of code in the script, and then pressing Command + Enter will execute just that line in the R Console.
# Addition
2 + 2
## [1] 4
# Subtraction
4 - 1
## [1] 3
# Multiplication
3 * 4
## [1] 12
# Division
4/2
## [1] 2
# Exponentiation
2^4
## [1] 16
# Modulo
7%%3
## [1] 1
The order is PEMDAS or Parenthesis, Exponentiation, Multiplication and Division, Addition and Subtraction.
I used a variable to store a value or an object in R.
# Assign 200 to savings
savings <- 200
# Print the value of savings to the console
savings
## [1] 200
I added together two variables to find the sum of my money and dan’s money.
# Assign 100 to my_money
my_money <- 100
# Assign 200 to dans_money
dans_money <- 200
# Add my_money and dans_money
my_money+dans_money
## [1] 300
# Add my_money and dans_money again, save the result to our_money
our_money <-my_money + dans_money
I calculated the returns for my mock investment.
# Variables for starting_cash and 5% return during January
starting_cash <- 200
jan_ret <- 5
jan_mult <- 1 + (jan_ret / 100)
# How much money do you have at the end of January?
post_jan_cash <- starting_cash * jan_mult
# Print post_jan_cash
post_jan_cash
## [1] 210
# January 10% return multiplier
jan_ret_10 <- 10
jan_mult_10 <- 1.10
# How much money do you have at the end of January now?
post_jan_cash_10 <- starting_cash * jan_mult_10
# Print post_jan_cash_10
post_jan_cash_10
## [1] 220
I computed returns that compounded on top of my previous returns.
# Starting cash and returns
starting_cash <- 200
jan_ret <- 4
feb_ret <- 5
# Multipliers
jan_mult <- 1 + 4 / 100
feb_mult <- 1 + 5 / 100
# Total cash at the end of the two months
total_cash <- starting_cash * jan_mult *feb_mult
# Print total_cash
total_cash
## [1] 218.4
I practiced assigning values to Numerics, Logicals, Characters.
# Apple's stock price is a numeric
apple_stock <- 150.45
# Bond credit ratings are characters
credit_rating <- "AAA"
# You like the stock market. TRUE or FALSE?
my_answer <- TRUE
# Print my_answer
my_answer
## [1] TRUE
I learned how to find what data type a variable is: class(my_var)
a <- TRUE
class(a)
## [1] "logical"
b <- 5.5
class(b)
## [1] "numeric"
c <- "Hello World"
class(c)
## [1] "character"
I created my first vector.
# Another numeric vector
ibm_stock <- c(159.82, 160.02, 159.84)
# Another character vector
finance <- c("stocks", "bonds", "investments")
# A logical vector
logic <- c(TRUE, FALSE, TRUE)
A vector can only be composed of one data type. This means that you cannot have both a numeric and a character in the same vector.
The hierarchy for coercion is:
logical < integer < numeric < character
I assigned months to a vector.
# Vectors of 12 months of returns, and month names
ret <- c(5, 2, 3, 7, 8, 3, 5, 9, 1, 4, 6, 3)
months <- c("Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec")
# Add names to ret
names(ret) <- months
# Print out ret to see the new names!
ret
## Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec
## 5 2 3 7 8 3 5 9 1 4 6 3
I learned how to use plot() function.
# Look at the data
apple_stock <- c(109.49, 109.90, 109.11, 109.95, 111.03, 112.12, 113.95, 113.30, 115.19, 115.19, 115.82, 115.97, 116.64, 116.95, 117.06, 116.29, 116.52, 117.26, 116.76, 116.73, 115.82)
# Plot the data points
plot(apple_stock)
# Plot the data as a line graph
plot(apple_stock, type = "l")
I used a vector in order to figure out weighted average portfolio return.
# Weights and returns
micr_ret <- 7
sony_ret <- 9
micr_weight <- .2
sony_weight <- .8
# Portfolio return
portf_ret <- micr_ret * micr_weight + sony_ret * sony_weight
I figured out the wieghted average return.
# Weights, returns, and company names
ret <- c(7, 9)
weight <- c(.2, .8)
companies <- c("Microsoft", "Sony")
# Assign company names to your vectors
names(ret) <- companies
names(weight) <- companies
# Multiply the returns and weights together
ret_X_weight <- ret * weight
# Print ret_X_weight
ret_X_weight
## Microsoft Sony
## 1.4 7.2
# Sum to get the total portfolio return
portf_ret <- sum(ret_X_weight)
# Print portf_ret
portf_ret
## [1] 8.6
## [1] 8.6
I calculate wieghted average return using equal weights.
# Print ret
ret
## Microsoft Sony
## 7 9
## Microsoft Sony
## 7 9
# Assign 1/3 to weight
weight <- 1/3
# Create ret_X_weight
ret_X_weight <- ret * weight
ret_X_weight
## Microsoft Sony
## 2.333333 3.000000
# Calculate your portfolio return
portf_ret <- sum(ret_X_weight)
portf_ret
## [1] 5.333333
# Vector of length 3 * Vector of length 2?
ret * c(.2, .6) # R reuses the 1st value of the vector of length 2, but notice the warning!
## Microsoft Sony
## 1.4 5.4
This time I used only specific parts of vectors.
# Define ret
ret <- c(5, 2, 3, 7, 8, 3, 5, 9, 1, 4, 6, 3)
names(ret) <- c("Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec")
ret
## Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec
## 5 2 3 7 8 3 5 9 1 4 6 3
# First 6 months of returns
ret[1:6]
## Jan Feb Mar Apr May Jun
## 5 2 3 7 8 3
# Just March and May
ret[c("Mar", "May")]
## Mar May
## 3 8
# Omit the first month of returns
ret[-1]
## Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec
## 2 3 7 8 3 5 9 1 4 6 3
I created my first matrix.
# A vector of 9 numbers
my_vector <- c(1, 2, 3, 4, 5, 6, 7, 8, 9)
# 3x3 matrix
my_matrix <- matrix(data =my_vector, nrow = 3, ncol = 3)
# Print my_matrix
my_matrix
## [,1] [,2] [,3]
## [1,] 1 4 7
## [2,] 2 5 8
## [3,] 3 6 9
# Filling across using byrow = TRUE
matrix(data = c(2, 3, 4, 5), nrow = 2, ncol = 2, byrow = TRUE)
## [,1] [,2]
## [1,] 2 3
## [2,] 4 5
I combined vectors.
# Define vectors
apple <- c(109.49, 109.90, 109.11, 109.95, 111.03, 112.12, 113.95, 113.30, 115.19, 115.19,
115.82, 115.97, 116.64, 116.95, 117.06, 116.29, 116.52, 117.26, 116.76, 116.73,
115.82)
ibm <- c(159.82, 160.02, 159.84, 160.35, 164.79, 165.36, 166.52, 165.50, 168.29, 168.51,
168.02, 166.73, 166.68, 167.60, 167.33, 167.06, 166.71, 167.14, 166.19, 166.60,
165.99)
micr <- c(59.20, 59.25, 60.22, 59.95, 61.37, 61.01, 61.97, 62.17, 62.98, 62.68, 62.58,
62.30, 63.62, 63.54, 63.54, 63.55, 63.24, 63.28, 62.99, 62.90, 62.14)
# cbind the vectors together
cbind_stocks <- cbind(apple, ibm, micr)
# Print cbind_stocks
cbind_stocks
## apple ibm micr
## [1,] 109.49 159.82 59.20
## [2,] 109.90 160.02 59.25
## [3,] 109.11 159.84 60.22
## [4,] 109.95 160.35 59.95
## [5,] 111.03 164.79 61.37
## [6,] 112.12 165.36 61.01
## [7,] 113.95 166.52 61.97
## [8,] 113.30 165.50 62.17
## [9,] 115.19 168.29 62.98
## [10,] 115.19 168.51 62.68
## [11,] 115.82 168.02 62.58
## [12,] 115.97 166.73 62.30
## [13,] 116.64 166.68 63.62
## [14,] 116.95 167.60 63.54
## [15,] 117.06 167.33 63.54
## [16,] 116.29 167.06 63.55
## [17,] 116.52 166.71 63.24
## [18,] 117.26 167.14 63.28
## [19,] 116.76 166.19 62.99
## [20,] 116.73 166.60 62.90
## [21,] 115.82 165.99 62.14
# rbind the vectors together
rbind_stocks <- rbind(apple, ibm, micr)
# Print rbind_stocks
rbind_stocks
## [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9]
## apple 109.49 109.90 109.11 109.95 111.03 112.12 113.95 113.30 115.19
## ibm 159.82 160.02 159.84 160.35 164.79 165.36 166.52 165.50 168.29
## micr 59.20 59.25 60.22 59.95 61.37 61.01 61.97 62.17 62.98
## [,10] [,11] [,12] [,13] [,14] [,15] [,16] [,17] [,18]
## apple 115.19 115.82 115.97 116.64 116.95 117.06 116.29 116.52 117.26
## ibm 168.51 168.02 166.73 166.68 167.60 167.33 167.06 166.71 167.14
## micr 62.68 62.58 62.30 63.62 63.54 63.54 63.55 63.24 63.28
## [,19] [,20] [,21]
## apple 116.76 116.73 115.82
## ibm 166.19 166.60 165.99
## micr 62.99 62.90 62.14
I plotted the matrix of Apple and Microsoft stock prices.
# Define matrix
apple_micr_matrix <- cbind(apple, micr)
# View the data
apple_micr_matrix
## apple micr
## [1,] 109.49 59.20
## [2,] 109.90 59.25
## [3,] 109.11 60.22
## [4,] 109.95 59.95
## [5,] 111.03 61.37
## [6,] 112.12 61.01
## [7,] 113.95 61.97
## [8,] 113.30 62.17
## [9,] 115.19 62.98
## [10,] 115.19 62.68
## [11,] 115.82 62.58
## [12,] 115.97 62.30
## [13,] 116.64 63.62
## [14,] 116.95 63.54
## [15,] 117.06 63.54
## [16,] 116.29 63.55
## [17,] 116.52 63.24
## [18,] 117.26 63.28
## [19,] 116.76 62.99
## [20,] 116.73 62.90
## [21,] 115.82 62.14
# Scatter plot of Microsoft vs Apple
plot(apple_micr_matrix)
I found the correlation of some stocks.
# Correlation of Apple and IBM
cor(apple, ibm)
## [1] 0.8872467
# stock matrix
stocks <- cbind(apple, micr, ibm)
# cor() of all three
cor(stocks)
## apple micr ibm
## apple 1.0000000 0.9477010 0.8872467
## micr 0.9477010 1.0000000 0.9126597
## ibm 0.8872467 0.9126597 1.0000000
# Note how it fails when using more than 2 vectors! Try to run the code for the correlation of all three stocks.
#cor(apple, micr, ibm)
I selected things within the matrix.
# Third row
stocks[3, ]
## apple micr ibm
## 109.11 60.22 159.84
# Fourth and fifth row of the ibm column
stocks[4:5, "ibm"]
## [1] 160.35 164.79
# apple and micr columns
stocks[, c("apple", "micr")]
## apple micr
## [1,] 109.49 59.20
## [2,] 109.90 59.25
## [3,] 109.11 60.22
## [4,] 109.95 59.95
## [5,] 111.03 61.37
## [6,] 112.12 61.01
## [7,] 113.95 61.97
## [8,] 113.30 62.17
## [9,] 115.19 62.98
## [10,] 115.19 62.68
## [11,] 115.82 62.58
## [12,] 115.97 62.30
## [13,] 116.64 63.62
## [14,] 116.95 63.54
## [15,] 117.06 63.54
## [16,] 116.29 63.55
## [17,] 116.52 63.24
## [18,] 117.26 63.28
## [19,] 116.76 62.99
## [20,] 116.73 62.90
## [21,] 115.82 62.14
I created my first data frame.
# Variables
company <- c("A", "A", "A", "B", "B", "B", "B")
cash_flow <- c(1000, 4000, 550, 1500, 1100, 750, 6000)
year <- c(1, 3, 4, 1, 2, 4, 5)
# Data frame
cash <- data.frame(company, cash_flow, year)
# Print cash
cash
## company cash_flow year
## 1 A 1000 1
## 2 A 4000 3
## 3 A 550 4
## 4 B 1500 1
## 5 B 1100 2
## 6 B 750 4
## 7 B 6000 5
I learned how to use head(), tail(), and str() functions.
# Call head() for the first 4 rows
head(cash, n=4)
## company cash_flow year
## 1 A 1000 1
## 2 A 4000 3
## 3 A 550 4
## 4 B 1500 1
# Call tail() for the last 3 rows
tail(cash, n=3)
## company cash_flow year
## 5 B 1100 2
## 6 B 750 4
## 7 B 6000 5
# Call str(cash)
str(cash)
## 'data.frame': 7 obs. of 3 variables:
## $ company : Factor w/ 2 levels "A","B": 1 1 1 2 2 2 2
## $ cash_flow: num 1000 4000 550 1500 1100 750 6000
## $ year : num 1 3 4 1 2 4 5
I learned how to use colnames() function.
# Fix your column names
colnames(cash) <- c("company", "cash_flow", "year")
# Print out the column names of cash
colnames(cash)
## [1] "company" "cash_flow" "year"
I selected subsets of data frames.
# Third row, second column
cash[3,2]
## [1] 550
# Fifth row of the "year" column
cash[5,"year"]
## [1] 2
I found a shortcut to find dataframes.
# Select the year column
cash$year
## [1] 1 3 4 1 2 4 5
# Select the cash_flow column and multiply by 2
cash$cash_flow * 2
## [1] 2000 8000 1100 3000 2200 1500 12000
# Delete the company column
cash$company <- NULL
# Print cash again
cash
## cash_flow year
## 1 1000 1
## 2 4000 3
## 3 550 4
## 4 1500 1
## 5 1100 2
## 6 750 4
## 7 6000 5
I learned how to use subset funtion.
# Restore cash
company <- c("A", "A", "A", "B", "B", "B", "B")
cash_flow <- c(1000, 4000, 550, 1500, 1100, 750, 6000)
year <- c(1, 3, 4, 1, 2, 4, 5)
cash <- data.frame(company, cash_flow, year)
# Rows about company B
subset(cash, company == "B")
## company cash_flow year
## 4 B 1500 1
## 5 B 1100 2
## 6 B 750 4
## 7 B 6000 5
# Rows with cash flows due in 1 year
subset(cash, year == 1)
## company cash_flow year
## 1 A 1000 1
## 4 B 1500 1
I ran analysis of worst case scenario where you only receive half of your expected cash flow.
# Quarter cash flow scenario
cash$quarter_cash <- cash$cash_flow * 0.25
cash
## company cash_flow year quarter_cash
## 1 A 1000 1 250.0
## 2 A 4000 3 1000.0
## 3 A 550 4 137.5
## 4 B 1500 1 375.0
## 5 B 1100 2 275.0
## 6 B 750 4 187.5
## 7 B 6000 5 1500.0
# Double year scenario
cash$double_year <- cash$year * 2
cash
## company cash_flow year quarter_cash double_year
## 1 A 1000 1 250.0 2
## 2 A 4000 3 1000.0 6
## 3 A 550 4 137.5 8
## 4 B 1500 1 375.0 2
## 5 B 1100 2 275.0 4
## 6 B 750 4 187.5 8
## 7 B 6000 5 1500.0 10
I Calculated the present value of money recieved in the future.
# Restore cash
cash$quarter_cash <- NULL
cash$double_year <- NULL
# Present value of $4000, in 3 years, at 5%
present_value_4k <- 4000 * (1+0.05)^(-3)
# Present value of all cash flows
cash$present_value <- cash$cash_flow * (1+0.05)^(-cash$year)
# Print out cash
cash
## company cash_flow year present_value
## 1 A 1000 1 952.3810
## 2 A 4000 3 3455.3504
## 3 A 550 4 452.4864
## 4 B 1500 1 1428.5714
## 5 B 1100 2 997.7324
## 6 B 750 4 617.0269
## 7 B 6000 5 4701.1570
I computed present value of company A and B combined.
# Total present value of cash
total_pv <- sum(cash$present_value)
total_pv
## [1] 12604.71
# Company B information
cash_B <- subset(cash, company == "B")
cash_B
## company cash_flow year present_value
## 4 B 1500 1 1428.5714
## 5 B 1100 2 997.7324
## 6 B 750 4 617.0269
## 7 B 6000 5 4701.1570
# Total present value of cash_B
total_pv_B <- sum(cash_B$present_value)
total_pv_B
## [1] 7744.488
I learned how to use the factor() function.
# credit_rating character vector
credit_rating <- c("BB", "AAA", "AA", "CCC", "AA", "AAA", "B", "BB")
# Create a factor from credit_rating
credit_factor <- factor(credit_rating)
# Print out your new factor
credit_factor
## [1] BB AAA AA CCC AA AAA B BB
## Levels: AA AAA B BB CCC
# Call str() on credit_rating
str(credit_rating)
## chr [1:8] "BB" "AAA" "AA" "CCC" "AA" "AAA" "B" "BB"
# Call str() on credit_factor
str(credit_factor)
## Factor w/ 5 levels "AA","AAA","B",..: 4 2 1 5 1 2 3 4
I accessed the levels of factor is simple enough by using the levels() function.
# Identify unique levels
levels(credit_factor)
## [1] "AA" "AAA" "B" "BB" "CCC"
# Rename the levels of credit_factor
levels(credit_factor) <- c("2A", "3A", "1B", "2B", "3C")
# Print credit_factor
credit_factor
## [1] 2B 3A 2A 3C 2A 3A 1B 2B
## Levels: 2A 3A 1B 2B 3C
I learned how to use the summary() function.
# Restore credit_factor
levels(credit_factor)
## [1] "2A" "3A" "1B" "2B" "3C"
levels(credit_factor) <- c("AA", "AAA", "B", "BB", "CCC")
# Summarize the character vector, credit_rating
summary(credit_rating)
## Length Class Mode
## 8 character character
# Summarize the factor, credit_factor
summary(credit_factor)
## AA AAA B BB CCC
## 2 2 1 2 1
I learned how to use the plot() function.
# Visualize your factor!
plot(credit_factor)
I learned how to use the cut() function.
# Define AAA_rank.
AAA_rank <- c(31, 48, 100, 53, 85, 73, 62, 74, 42, 38, 97, 61, 48, 86, 44, 9, 43, 18, 62,
38, 23, 37, 54, 80, 78, 93, 47, 100, 22, 22, 18, 26, 81, 17, 98, 4, 83, 5,
6, 52, 29, 44, 50, 2, 25, 19, 15, 42, 30, 27)
# Create 4 buckets for AAA_rank using cut()
AAA_factor <- cut(x = AAA_rank, breaks = c(0, 25, 50, 75, 100))
# Rename the levels
levels(AAA_factor) <- c("low", "medium", "high", "very_high")
# Print AAA_factor
AAA_factor
## [1] medium medium very_high high very_high high high
## [8] high medium medium very_high high medium very_high
## [15] medium low medium low high medium low
## [22] medium high very_high very_high very_high medium very_high
## [29] low low low medium very_high low very_high
## [36] low very_high low low high medium medium
## [43] medium low low low low medium medium
## [50] medium
## Levels: low medium high very_high
# Plot AAA_factor
plot(AAA_factor)
I ordered credit rating from most risky to lowest risk.
# Use unique() to find unique words
unique(credit_rating)
## [1] "BB" "AAA" "AA" "CCC" "B"
# Create an ordered factor
credit_factor_ordered <- factor(credit_rating, ordered = TRUE, levels = c("AAA", "AA", "BB", "B", "CCC"))
# Plot credit_factor_ordered
plot(credit_factor_ordered)
I removed the A credit ratings from the plot.
# Define credit_factor
credit_factor <- factor(c("AAA", "AA", "A", "BBB", "AA", "BBB", "A"),
ordered = TRUE,
levels = c("BBB", "A", "AA", "AAA"))
# Remove the A bonds at positions 3 and 7. Don't drop the A level.
keep_level <- credit_factor[-c(3, 7)]
# Plot keep_level
plot(keep_level)
R’s default behavior when creating data frames is to convert all characters into factors. You can turn off this behavior by adding stringsAsFactors = FALSE.
# Variables
credit_rating <- c("AAA", "A", "BB")
bond_owners <- c("Dan", "Tom", "Joe")
# Create the data frame of character vectors, bonds
bonds <- data.frame(credit_rating, bond_owners, stringsAsFactors = FALSE)
bonds
## credit_rating bond_owners
## 1 AAA Dan
## 2 A Tom
## 3 BB Joe
# Use str() on bonds
str(bonds)
## 'data.frame': 3 obs. of 2 variables:
## $ credit_rating: chr "AAA" "A" "BB"
## $ bond_owners : chr "Dan" "Tom" "Joe"
# Create a factor column in bonds called credit_factor from credit_rating
bonds$credit_factor <- factor(bonds$credit_rating, ordered = TRUE, levels = c("AAA", "A", "BB"))
# Use str() on bonds again
str(bonds)
## 'data.frame': 3 obs. of 3 variables:
## $ credit_rating: chr "AAA" "A" "BB"
## $ bond_owners : chr "Dan" "Tom" "Joe"
## $ credit_factor: Ord.factor w/ 3 levels "AAA"<"A"<"BB": 1 2 3
I created my first list.
# List components
name <- "Apple and IBM"
apple <- c(109.49, 109.90, 109.11, 109.95, 111.03)
ibm <- c(159.82, 160.02, 159.84, 160.35, 164.79)
cor_matrix <- cor(cbind(apple, ibm))
# Create a list
portfolio <- list(name, apple, ibm, cor_matrix)
# View your first list
portfolio
## [[1]]
## [1] "Apple and IBM"
##
## [[2]]
## [1] 109.49 109.90 109.11 109.95 111.03
##
## [[3]]
## [1] 159.82 160.02 159.84 160.35 164.79
##
## [[4]]
## apple ibm
## apple 1.0000000 0.9131575
## ibm 0.9131575 1.0000000
I added names to a list.
# Add names to your portfolio
names(portfolio) <- c("portfolio_name", "apple", "ibm", "correlation")
# Print portfolio
portfolio
## $portfolio_name
## [1] "Apple and IBM"
##
## $apple
## [1] 109.49 109.90 109.11 109.95 111.03
##
## $ibm
## [1] 159.82 160.02 159.84 160.35 164.79
##
## $correlation
## apple ibm
## apple 1.0000000 0.9131575
## ibm 0.9131575 1.0000000
I used [ ] in order to access the elements in the list.
# Second and third elements of portfolio
portfolio[c(2,3)]
## $apple
## [1] 109.49 109.90 109.11 109.95 111.03
##
## $ibm
## [1] 159.82 160.02 159.84 160.35 164.79
# Use $ to get the correlation data
portfolio$correlation
## apple ibm
## apple 1.0000000 0.9131575
## ibm 0.9131575 1.0000000
# Third item of the second element of portfolio
portfolio[[c(2,3)]]
## [1] 109.11
I added new elements to a list.
# Add weight: 20% Apple, 80% IBM
portfolio$weight <- c(apple = 0.2, ibm = 0.8)
# Print portfolio
portfolio
## $portfolio_name
## [1] "Apple and IBM"
##
## $apple
## [1] 109.49 109.90 109.11 109.95 111.03
##
## $ibm
## [1] 159.82 160.02 159.84 160.35 164.79
##
## $correlation
## apple ibm
## apple 1.0000000 0.9131575
## ibm 0.9131575 1.0000000
##
## $weight
## apple ibm
## 0.2 0.8
# Change the weight variable: 30% Apple, 70% IBM
portfolio$weight <- c(apple = 0.3, ibm = 0.7)
# Print portfolio to see the changes
portfolio
## $portfolio_name
## [1] "Apple and IBM"
##
## $apple
## [1] 109.49 109.90 109.11 109.95 111.03
##
## $ibm
## [1] 159.82 160.02 159.84 160.35 164.79
##
## $correlation
## apple ibm
## apple 1.0000000 0.9131575
## ibm 0.9131575 1.0000000
##
## $weight
## apple ibm
## 0.3 0.7
I learned how to remove elements from a list.
# Define portfolio
portfolio_name <- "Apple and IBM"
apple <- c(109.49, 109.90, 109.11, 109.95, 111.03)
ibm <- c(159.82, 160.02, 159.84, 160.35, 164.79)
microsoft <- c(150.0, 152.0, 154.0, 154.5)
correlation <- cor(cbind(apple, ibm))
portfolio <- list(portfolio_name = portfolio_name,
apple = apple,
ibm = ibm,
microsoft = microsoft,
correlation = correlation)
# Take a look at portfolio
portfolio
## $portfolio_name
## [1] "Apple and IBM"
##
## $apple
## [1] 109.49 109.90 109.11 109.95 111.03
##
## $ibm
## [1] 159.82 160.02 159.84 160.35 164.79
##
## $microsoft
## [1] 150.0 152.0 154.0 154.5
##
## $correlation
## apple ibm
## apple 1.0000000 0.9131575
## ibm 0.9131575 1.0000000
# Remove the microsoft stock prices from your portfolio
portfolio$microsoft <- NULL
portfolio
## $portfolio_name
## [1] "Apple and IBM"
##
## $apple
## [1] 109.49 109.90 109.11 109.95 111.03
##
## $ibm
## [1] 159.82 160.02 159.84 160.35 164.79
##
## $correlation
## apple ibm
## apple 1.0000000 0.9131575
## ibm 0.9131575 1.0000000
I learned how to use the split() function.
# Define cash
cash$present_value <- NULL
# Define grouping from year
grouping <- cash$year
# Split cash on your new grouping
split_cash <- split(cash, grouping)
# Look at your split_cash list
split_cash
## $`1`
## company cash_flow year
## 1 A 1000 1
## 4 B 1500 1
##
## $`2`
## company cash_flow year
## 5 B 1100 2
##
## $`3`
## company cash_flow year
## 2 A 4000 3
##
## $`4`
## company cash_flow year
## 3 A 550 4
## 6 B 750 4
##
## $`5`
## company cash_flow year
## 7 B 6000 5
str(split_cash)
## List of 5
## $ 1:'data.frame': 2 obs. of 3 variables:
## ..$ company : Factor w/ 2 levels "A","B": 1 2
## ..$ cash_flow: num [1:2] 1000 1500
## ..$ year : num [1:2] 1 1
## $ 2:'data.frame': 1 obs. of 3 variables:
## ..$ company : Factor w/ 2 levels "A","B": 2
## ..$ cash_flow: num 1100
## ..$ year : num 2
## $ 3:'data.frame': 1 obs. of 3 variables:
## ..$ company : Factor w/ 2 levels "A","B": 1
## ..$ cash_flow: num 4000
## ..$ year : num 3
## $ 4:'data.frame': 2 obs. of 3 variables:
## ..$ company : Factor w/ 2 levels "A","B": 1 2
## ..$ cash_flow: num [1:2] 550 750
## ..$ year : num [1:2] 4 4
## $ 5:'data.frame': 1 obs. of 3 variables:
## ..$ company : Factor w/ 2 levels "A","B": 2
## ..$ cash_flow: num 6000
## ..$ year : num 5
# Unsplit split_cash to get the original data back.
original_cash <- unsplit(split_cash, grouping)
# Print original_cash
cash
## company cash_flow year
## 1 A 1000 1
## 2 A 4000 3
## 3 A 550 4
## 4 B 1500 1
## 5 B 1100 2
## 6 B 750 4
## 7 B 6000 5
Company A went out of business so we unslipt.
# Define split_cash and grouping
split_cash <- split(cash, company)
grouping <- company
# Print split_cash
split_cash
## $A
## company cash_flow year
## 1 A 1000 1
## 2 A 4000 3
## 3 A 550 4
##
## $B
## company cash_flow year
## 4 B 1500 1
## 5 B 1100 2
## 6 B 750 4
## 7 B 6000 5
# Print the cash_flow column of B in split_cash
split_cash$B$cash_flow
## [1] 1500 1100 750 6000
# Set the cash_flow column of company A in split_cash to 0
split_cash$A$cash_flow <- 0
# Use the grouping to unsplit split_cash
cash_no_A <- unsplit(split_cash, grouping)
# Print cash_no_A
cash_no_A
## company cash_flow year
## 1 A 0 1
## 2 A 0 3
## 3 A 0 4
## 4 B 1500 1
## 5 B 1100 2
## 6 B 750 4
## 7 B 6000 5
I learned how to use the attributes function.
# my_matrix and my_factor
my_matrix <- matrix(c(1,2,3,4,5,6), nrow = 2, ncol = 3)
rownames(my_matrix) <- c("Row1", "Row2")
colnames(my_matrix) <- c("Col1", "Col2", "Col3")
my_factor <- factor(c("A", "A", "B"), ordered = T, levels = c("A", "B"))
# attributes of my_matrix
attributes(my_matrix)
## $dim
## [1] 2 3
##
## $dimnames
## $dimnames[[1]]
## [1] "Row1" "Row2"
##
## $dimnames[[2]]
## [1] "Col1" "Col2" "Col3"
# Just the dim attribute of my_matrix
attr(my_matrix, which = "dim")
## [1] 2 3
# attributes of my_factor
attributes(my_factor)
## $levels
## [1] "A" "B"
##
## $class
## [1] "ordered" "factor"
A vector is a collection of data that is all the same type. Vectors are one-dimension arrays that can hold numeric data, character data, or logical data. In other words, a vector is a simple tool to store data.
Matrix, a 2D vector. We can save data.Like vector, can only have one type of data. In R, a matrix is a collection of elements of the same data type (numeric, character, or logical) arranged into a fixed number of rows and columns. Since you are only working with rows and columns, a matrix is called two-dimensional.
Matrices and data frames are different because data frames only contain numerical values while matrices and contain non-numerical values.
vector
# Another numeric vector
samsung_stock <- c(2,275,000, 2,297,000, 2,273,000)
# Another character vector
finance <- c("stocks", "bonds", "investments")
# A logical vector
logic <- c(TRUE, FALSE, TRUE)
matrix
my_vector <- c(1, 2, 3, 4, 5, 6, 7, 8, 9)
# 3x3 matrix
my_matrix <- matrix(data =my_vector, nrow = 3, ncol = 3)
# Print my_matrix
my_matrix
## [,1] [,2] [,3]
## [1,] 1 4 7
## [2,] 2 5 8
## [3,] 3 6 9
# Filling across using byrow = TRUE
matrix(data = c(2, 3, 4, 5), nrow = 2, ncol = 2, byrow = TRUE)
## [,1] [,2]
## [1,] 2 3
## [2,] 4 5
data frame
# Variables
company <- c("A", "A", "A", "B", "B", "B", "B")
cash_flow <- c(100500, 850000, 550600, 750000, 350000, 750000, 850000)
year <- c(1, 3, 4, 1, 2, 4, 5)
# Data frame
cash <- data.frame(company, cash_flow, year)
# Print cash
cash
## company cash_flow year
## 1 A 100500 1
## 2 A 850000 3
## 3 A 550600 4
## 4 B 750000 1
## 5 B 350000 2
## 6 B 750000 4
## 7 B 850000 5
factor
# my_grades character vector
my_grades <- c("A", "B", "C", "D", "F")
# Create a factor from my_grades
my_grades <- factor(my_grades)
# Print out your new factor
my_grades
## [1] A B C D F
## Levels: A B C D F
# Call str() on credit_rating
str(my_grades)
## Factor w/ 5 levels "A","B","C","D",..: 1 2 3 4 5
# Call str() on credit_factor
str(my_grades)
## Factor w/ 5 levels "A","B","C","D",..: 1 2 3 4 5
list
# List components
name <- "Samsung and LG"
Samsung <- c(2,275,000, 2,297,000, 2,273,000, 2,279,000, 2,275,000)
LG <- c(87,200, 87,500, 85,800, 86,100, 86,500)
cor_matrix <- cor(cbind(Samsung, LG))
# Create a list
portfolio <- list(name, Samsung, LG, cor_matrix)
# View your first list
portfolio
## [[1]]
## [1] "Samsung and LG"
##
## [[2]]
## [1] 2 275 0 2 297 0 2 273 0 2 279 0 2 275 0
##
## [[3]]
## [1] 87 200 87 500 85 800 86 100 86 500
##
## [[4]]
## Samsung LG
## Samsung 1.0000000 -0.1288603
## LG -0.1288603 1.0000000