#PART I - 1.1: Vector
# 1. Create 2 vectors, each containing 10 random numbers
set.seed(42)
vec1 <- runif(10, min = 0, max = 100)
vec2 <- runif(10, min = 0, max = 100)
cat("Vector 1:\n"); print(vec1)
## Vector 1:
## [1] 91.48060 93.70754 28.61395 83.04476 64.17455 51.90959 73.65883 13.46666
## [9] 65.69923 70.50648
cat("Vector 2:\n"); print(vec2)
## Vector 2:
## [1] 45.77418 71.91123 93.46722 25.54288 46.22928 94.00145 97.82264 11.74874
## [9] 47.49971 56.03327
# 2. Append the second vector to the first one
combined.vec <- c(vec1, vec2)
cat("Combined Vector:\n"); print(combined.vec)
## Combined Vector:
## [1] 91.48060 93.70754 28.61395 83.04476 64.17455 51.90959 73.65883 13.46666
## [9] 65.69923 70.50648 45.77418 71.91123 93.46722 25.54288 46.22928 94.00145
## [17] 97.82264 11.74874 47.49971 56.03327
# 3. Calculate the mean of the new combined vector
vec.mean <- mean(combined.vec)
cat("Mean of combined vector:", vec.mean, "\n")
## Mean of combined vector: 61.31464
# 4. Print 'True' if element > mean, else 'False'
comparison.result <- ifelse(combined.vec > vec.mean, "True", "False")
cat("Comparison result (element > mean):\n")
## Comparison result (element > mean):
print(comparison.result)
## [1] "True" "True" "False" "True" "True" "False" "True" "False" "True"
## [10] "True" "False" "True" "True" "False" "False" "True" "True" "False"
## [19] "False" "False"
# 1. Create a vector with 100 random numbers
set.seed(42)
vec.100 <- runif(100, min = 0, max = 100)
# 2. Transfer the vector into a 10 by 10 matrix M
M <- matrix(vec.100, nrow = 10, ncol = 10)
cat("\nMatrix M (10x10):\n"); print(M)
##
## Matrix M (10x10):
## [,1] [,2] [,3] [,4] [,5] [,6] [,7]
## [1,] 91.48060 45.77418 90.403139 73.7595618 37.955924 33.342721 67.56073
## [2,] 93.70754 71.91123 13.871017 81.1055141 43.577158 34.674825 98.28172
## [3,] 28.61395 93.46722 98.889173 38.8108283 3.743103 39.848541 75.95443
## [4,] 83.04476 25.54288 94.666823 68.5169729 97.353991 78.469278 56.64884
## [5,] 64.17455 46.22928 8.243756 0.3948339 43.175125 3.893649 84.96897
## [6,] 51.90959 94.00145 51.421178 83.2916080 95.757660 74.879539 18.94739
## [7,] 73.65883 97.82264 39.020347 0.7334147 88.775491 67.727683 27.12866
## [8,] 13.46666 11.74874 90.573813 20.7658973 63.997877 17.126433 82.81585
## [9,] 65.69923 47.49971 44.696963 90.6601408 97.096661 26.108796 69.32048
## [10,] 70.50648 56.03327 83.600426 61.1778643 61.883821 51.441293 24.05447
## [,8] [,9] [,10]
## [1,] 4.2988796 58.160400 66.74265147
## [2,] 14.0479094 15.790521 0.02388966
## [3,] 21.6385415 35.902831 20.85699569
## [4,] 47.9398564 64.563188 93.30341273
## [5,] 19.7410342 77.582336 92.56447486
## [6,] 71.9355838 56.364684 73.40943010
## [7,] 0.7884739 23.370340 33.30719834
## [8,] 37.5489965 8.998052 51.50633298
## [9,] 51.4407708 8.561206 74.39746463
## [10,] 0.1570554 30.521837 61.91592400
# 3. Find the transposed matrix MT and print element [2, 1]
MT <- t(M)
cat("\nTransposed Matrix MT:\n"); print(MT)
##
## Transposed Matrix MT:
## [,1] [,2] [,3] [,4] [,5] [,6] [,7]
## [1,] 91.48060 93.70754133 28.613953 83.04476 64.1745519 51.90959 73.6588315
## [2,] 45.77418 71.91122517 93.467225 25.54288 46.2292823 94.00145 97.8226428
## [3,] 90.40314 13.87101677 98.889173 94.66682 8.2437558 51.42118 39.0203467
## [4,] 73.75956 81.10551413 38.810828 68.51697 0.3948339 83.29161 0.7334147
## [5,] 37.95592 43.57715850 3.743103 97.35399 43.1751249 95.75766 88.7754906
## [6,] 33.34272 34.67482482 39.848541 78.46928 3.8936491 74.87954 67.7276830
## [7,] 67.56073 98.28171979 75.954427 56.64884 84.9689719 18.94739 27.1286615
## [8,] 4.29888 14.04790941 21.638542 47.93986 19.7410342 71.93558 0.7884739
## [9,] 58.16040 15.79052082 35.902831 64.56319 77.5823363 56.36468 23.3703399
## [10,] 66.74265 0.02388966 20.856996 93.30341 92.5644749 73.40943 33.3071983
## [,8] [,9] [,10]
## [1,] 13.466660 65.699229 70.5064784
## [2,] 11.748736 47.499708 56.0332746
## [3,] 90.573813 44.696963 83.6004260
## [4,] 20.765897 90.660141 61.1778643
## [5,] 63.997877 97.096661 61.8838207
## [6,] 17.126433 26.108796 51.4412935
## [7,] 82.815849 69.320482 24.0544740
## [8,] 37.548996 51.440771 0.1570554
## [9,] 8.998052 8.561206 30.5218369
## [10,] 51.506333 74.397465 61.9159240
cat("Element at row 2, column 1 of MT:", MT[2, 1], "\n")
## Element at row 2, column 1 of MT: 45.77418
# 4. Nested loop to calculate inner product N = MT %*% M
n <- nrow(MT)
p <- ncol(M)
inner.dim <- ncol(MT) # = nrow(M)
N.loop <- matrix(0, nrow = n, ncol = p)
for (i in 1:n) {
for (j in 1:p) {
total <- 0
for (k in 1:inner.dim) {
total <- total + MT[i, k] * M[k, j]
}
N.loop[i, j] <- total
}
}
cat("\nInner product N (via nested loop):\n"); print(N.loop)
##
## Inner product N (via nested loop):
## [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8]
## [1,] 46572.50 38003.18 36384.29 36100.65 41632.39 28654.68 38068.19 15265.43
## [2,] 38003.18 42731.73 33700.04 30484.17 36220.72 27916.49 32682.79 15099.17
## [3,] 36384.29 33700.04 48531.16 33479.77 37676.06 28408.24 35688.46 16867.47
## [4,] 36100.65 30484.17 33479.77 37550.83 35125.29 24352.48 30891.26 17033.93
## [5,] 41632.39 36220.72 37676.06 35125.29 49099.24 30730.51 34057.14 20741.67
## [6,] 28654.68 27916.49 28408.24 24352.48 30730.51 23889.75 21185.01 12765.50
## [7,] 38068.19 32682.79 35688.46 30891.26 34057.14 21185.01 43759.02 15771.45
## [8,] 15265.43 15099.17 16867.47 17033.93 20741.67 12765.50 15771.45 12603.44
## [9,] 25650.93 22196.32 23338.42 18960.43 23433.13 17036.83 22232.58 10731.55
## [10,] 36604.31 29435.82 35933.55 29904.90 40030.35 24501.46 32453.01 18116.65
## [,9] [,10]
## [1,] 25650.93 36604.31
## [2,] 22196.32 29435.82
## [3,] 23338.42 35933.55
## [4,] 18960.43 29904.90
## [5,] 23433.13 40030.35
## [6,] 17036.83 24501.46
## [7,] 22232.58 32453.01
## [8,] 10731.55 18116.65
## [9,] 19917.40 25742.59
## [10,] 25742.59 40683.09
# 5. Calculate inner product using %*% and compare
N.operator <- MT %*% M
cat("\nInner product N (via %*% operator):\n"); print(N.operator)
##
## Inner product N (via %*% operator):
## [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8]
## [1,] 46572.50 38003.18 36384.29 36100.65 41632.39 28654.68 38068.19 15265.43
## [2,] 38003.18 42731.73 33700.04 30484.17 36220.72 27916.49 32682.79 15099.17
## [3,] 36384.29 33700.04 48531.16 33479.77 37676.06 28408.24 35688.46 16867.47
## [4,] 36100.65 30484.17 33479.77 37550.83 35125.29 24352.48 30891.26 17033.93
## [5,] 41632.39 36220.72 37676.06 35125.29 49099.24 30730.51 34057.14 20741.67
## [6,] 28654.68 27916.49 28408.24 24352.48 30730.51 23889.75 21185.01 12765.50
## [7,] 38068.19 32682.79 35688.46 30891.26 34057.14 21185.01 43759.02 15771.45
## [8,] 15265.43 15099.17 16867.47 17033.93 20741.67 12765.50 15771.45 12603.44
## [9,] 25650.93 22196.32 23338.42 18960.43 23433.13 17036.83 22232.58 10731.55
## [10,] 36604.31 29435.82 35933.55 29904.90 40030.35 24501.46 32453.01 18116.65
## [,9] [,10]
## [1,] 25650.93 36604.31
## [2,] 22196.32 29435.82
## [3,] 23338.42 35933.55
## [4,] 18960.43 29904.90
## [5,] 23433.13 40030.35
## [6,] 17036.83 24501.46
## [7,] 22232.58 32453.01
## [8,] 10731.55 18116.65
## [9,] 19917.40 25742.59
## [10,] 25742.59 40683.09
cat("\nAre the two results equal?", all.equal(N.loop, N.operator), "\n")
##
## Are the two results equal? TRUE
# 1. Load the CSV file
stock.data <- read.csv("/Users/aasthasingh/Downloads/stock_data-1.csv", header = TRUE, stringsAsFactors = FALSE, row.names = 1)
cat("\nRaw data dimensions:", dim(stock.data), "\n")
##
## Raw data dimensions: 6041 29
cat("First 3 rows:\n"); print(head(stock.data, 3))
## First 3 rows:
## AAPL AMGN AXP BA CAT CRM CSCO CVX
## 1996-01-02 0.286830 14.56250 12.10832 39.9375 14.875 NA 4.243055 26.4375
## 1996-01-03 0.286830 14.40625 12.10832 39.5625 15.125 NA 4.076389 26.5000
## 1996-01-04 0.281808 13.78125 11.99890 38.5625 15.000 NA 3.923611 27.2500
## DIS DOW GS HD IBM INTC JNJ JPM
## 1996-01-02 20.01773 NA NA 10.52778 22.71875 7.328125 21.06250 19.58333
## 1996-01-03 20.14104 NA NA 10.33333 22.31250 7.218750 21.90625 19.58333
## 1996-01-04 19.89442 NA NA 10.30556 21.71875 7.187500 21.68750 18.75000
## KO MCD MMM MRK MSFT NKE PG TRV
## 1996-01-02 18.75000 22.750 33.8750 32.1250 5.609375 4.445313 20.78125 28.250
## 1996-01-03 18.90625 22.750 33.8125 31.6875 5.429688 4.312500 21.40625 28.625
## 1996-01-04 18.75000 22.875 33.6875 31.8750 5.460938 4.265625 21.75000 29.000
## UNH V VZ WBA WMT
## 1996-01-02 8.078125 NA 30.46456 7.53125 11.625
## 1996-01-03 8.109375 NA 31.42009 7.50000 11.750
## 1996-01-04 8.187500 NA 30.85801 7.40625 11.875
dates <- as.Date(rownames(stock.data))
# 2. Delete columns containing any NA values
na.cols <- sapply(stock.data, function(col) any(is.na(col)))
stock.clean <- stock.data[, !na.cols]
cat("\nColumns removed (contained NA):", names(stock.data)[na.cols], "\n")
##
## Columns removed (contained NA): CRM DOW GS V
cat("Remaining stocks:", ncol(stock.clean), "\n")
## Remaining stocks: 25
cat("Clean data (first 3 rows):\n"); print(head(stock.clean, 3))
## Clean data (first 3 rows):
## AAPL AMGN AXP BA CAT CSCO CVX DIS
## 1996-01-02 0.286830 14.56250 12.10832 39.9375 14.875 4.243055 26.4375 20.01773
## 1996-01-03 0.286830 14.40625 12.10832 39.5625 15.125 4.076389 26.5000 20.14104
## 1996-01-04 0.281808 13.78125 11.99890 38.5625 15.000 3.923611 27.2500 19.89442
## HD IBM INTC JNJ JPM KO MCD MMM
## 1996-01-02 10.52778 22.71875 7.328125 21.06250 19.58333 18.75000 22.750 33.8750
## 1996-01-03 10.33333 22.31250 7.218750 21.90625 19.58333 18.90625 22.750 33.8125
## 1996-01-04 10.30556 21.71875 7.187500 21.68750 18.75000 18.75000 22.875 33.6875
## MRK MSFT NKE PG TRV UNH VZ WBA
## 1996-01-02 32.1250 5.609375 4.445313 20.78125 28.250 8.078125 30.46456 7.53125
## 1996-01-03 31.6875 5.429688 4.312500 21.40625 28.625 8.109375 31.42009 7.50000
## 1996-01-04 31.8750 5.460938 4.265625 21.75000 29.000 8.187500 30.85801 7.40625
## WMT
## 1996-01-02 11.625
## 1996-01-03 11.750
## 1996-01-04 11.875
# 3. Calculate daily log returns for each stock
CalcLogReturn <- function(prices) {
log(as.numeric(prices[-1]) / as.numeric(prices[-length(prices)]))
}
log.returns <- as.data.frame(apply(stock.clean, 2, CalcLogReturn))
log.return.dates <- dates[-1] # one fewer row after differencing
cat("\nLog returns (first 3 rows):\n"); print(head(log.returns, 3))
##
## Log returns (first 3 rows):
## AAPL AMGN AXP BA CAT CSCO
## 1 0.00000000 -0.01078759 0.000000000 -0.009434032 0.016667052 -0.04007198
## 2 -0.01766372 -0.04435317 -0.009077177 -0.025601398 -0.008298803 -0.03819914
## 3 0.08171839 0.02242246 -0.003044156 0.017671143 0.016529302 0.01231323
## CVX DIS HD IBM INTC JNJ
## 1 0.002361276 0.006141243 -0.018642404 -0.01804352 -0.015037877 0.03927778
## 2 0.027908788 -0.012320435 -0.002691813 -0.02697112 -0.004338402 -0.01003593
## 3 0.015927527 0.018424292 -0.024557852 0.01994368 0.000000000 0.00000000
## JPM KO MCD MMM MRK MSFT
## 1 0.000000000 0.008298803 0.000000000 -0.001846723 -0.013712262 -0.032557631
## 2 -0.043485146 -0.008298803 0.005479466 -0.003703708 0.005899722 0.005738896
## 3 -0.004454386 -0.005012542 -0.016529302 0.001853569 -0.011834458 -0.011510917
## NKE PG TRV UNH VZ WBA
## 1 -0.03033250 0.029631798 0.013187004 0.003861009 0.03088352 -0.00415801
## 2 -0.01092907 0.015930822 0.013015368 0.009587801 -0.01805110 -0.01257878
## 3 -0.03163042 0.004301082 0.002152853 -0.040901514 0.01086967 0.03727139
## WMT
## 1 0.01069529
## 2 0.01058211
## 3 -0.01591546
# 4. Calculate mean and SD; store as 2 x N data frame
ret.mean <- apply(log.returns, 2, mean)
ret.sd <- apply(log.returns, 2, sd)
stats.df <- as.data.frame(rbind(Mean = ret.mean, SD = ret.sd))
cat("\nStatistics (2 x N data frame):\n"); print(stats.df)
##
## Statistics (2 x N data frame):
## AAPL AMGN AXP BA CAT
## Mean 0.0009168344 0.0004641248 0.0003855638 0.0003478159 0.000379848
## SD 0.0284047796 0.0208557858 0.0219789482 0.0193786437 0.020493476
## CSCO CVX DIS HD IBM
## Mean 0.0004002217 0.0002502413 0.0003264233 0.0005012099 0.0002923392
## SD 0.0247623992 0.0159682591 0.0188428129 0.0195658648 0.0173720510
## INTC JNJ JPM KO MCD
## Mean 0.0003470648 0.0003197527 0.0003240281 0.0001789796 0.0003573148
## SD 0.0237095985 0.0129114075 0.0238587249 0.0138393263 0.0149395005
## MMM MRK MSFT NKE PG
## Mean 0.0002726557 0.0001724428 0.0005522446 0.0005167696 0.0002963599
## SD 0.0149365129 0.0173385301 0.0195438394 0.0199581843 0.0142370700
## TRV UNH VZ WBA WMT
## Mean 0.0002607877 0.0005950182 0.000115521 0.0003405546 0.0003856492
## SD 0.0177976490 0.0219630363 0.015967949 0.0181026340 0.0160858239
# 5. Two sub-plot graph
stock.names <- colnames(stock.clean)
colors.3 <- c("blue", "red", "darkgreen")
pdf("FE513_Homework1_Part1_Plots.pdf", width = 14, height = 10)
par(mfrow = c(2, 1), mar = c(5, 4, 3, 2))
# Sub-plot 1: First 3 stocks' daily prices
y.range <- range(stock.clean[, 1:3])
plot(dates, stock.clean[, 1],
type = "l",
col = colors.3[1],
ylim = y.range,
xlab = "Date",
ylab = "Stock Price (USD)",
main = "Daily Stock Prices: First 3 Stocks",
lwd = 1.5)
lines(dates, stock.clean[, 2], col = colors.3[2], lwd = 1.5)
lines(dates, stock.clean[, 3], col = colors.3[3], lwd = 1.5)
legend("topleft",
legend = stock.names[1:3],
col = colors.3,
lty = 1,
lwd = 1.5,
bty = "n")
# Sub-plot 2: Scatter plot of mean and SD for each stock
n.stocks <- length(stock.names)
x.pos <- 1:n.stocks
y.range2 <- range(c(ret.mean, ret.sd))
plot(x.pos, ret.mean,
pch = 16,
col = "blue",
xaxt = "n",
xlab = "Stock",
ylab = "Statistical Value",
main = "Log Return Statistics per Stock (Mean & SD)",
ylim = y.range2,
cex = 1.2)
points(x.pos, ret.sd, pch = 17, col = "red", cex = 1.2)
axis(1, at = x.pos, labels = stock.names, las = 2, cex.axis = 0.75)
legend("topright",
legend = c("Mean", "Standard Deviation"),
col = c("blue", "red"),
pch = c(16, 17),
bty = "n")
#1.3 using pipe
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
stats.pipe <- read.csv("/Users/aasthasingh/Downloads/stock_data-1.csv", header = TRUE,
stringsAsFactors = FALSE, row.names = 1) %>%
select(where(~ !any(is.na(.)))) %>% # 2. Remove NA columns
mutate(across(everything(),
~ log(. / lag(.)))) %>% # 3. Log returns
slice(-1) %>% # Drop first NA row
summarise(across(everything(),
list(Mean = mean, SD = sd),
.names = "{.col}_{.fn}")) # 4. Mean & SD
cat("\nStats via pipe (first 6 columns):\n")
##
## Stats via pipe (first 6 columns):
print(stats.pipe[, 1:6])
## AAPL_Mean AAPL_SD AMGN_Mean AMGN_SD AXP_Mean AXP_SD
## 1 0.0009168344 0.02840478 0.0004641248 0.02085579 0.0003855638 0.02197895
library(quantmod)
## Loading required package: xts
## Loading required package: zoo
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
##
## ######################### Warning from 'xts' package ##########################
## # #
## # The dplyr lag() function breaks how base R's lag() function is supposed to #
## # work, which breaks lag(my_xts). Calls to lag(my_xts) that you type or #
## # source() into this session won't work correctly. #
## # #
## # Use stats::lag() to make sure you're not using dplyr::lag(), or you can add #
## # conflictRules('dplyr', exclude = 'lag') to your .Rprofile to stop #
## # dplyr from breaking base R's lag() function. #
## # #
## # Code in packages is not affected. It's protected by R's namespace mechanism #
## # Set `options(xts.warn_dplyr_breaks_lag = FALSE)` to suppress this warning. #
## # #
## ###############################################################################
##
## Attaching package: 'xts'
## The following objects are masked from 'package:dplyr':
##
## first, last
## Loading required package: TTR
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
# 1. Download Amazon daily stock price data 2021-01-01 to 2021-12-31
getSymbols("AMZN",
src = "yahoo",
from = "2021-01-01",
to = "2021-12-31",
auto.assign = TRUE)
## [1] "AMZN"
amzn.df <- as.data.frame(AMZN)
amzn.df$Date <- rownames(amzn.df)
write.csv(amzn.df, "AMZN_2021.csv", row.names = FALSE)
cat("\nAmazon 2021 data saved to AMZN_2021.csv\n")
##
## Amazon 2021 data saved to AMZN_2021.csv
cat("Rows downloaded:", nrow(amzn.df), "\n")
## Rows downloaded: 251
# 2. Calculate weekly log returns based on adjusted close price
amzn.weekly.adj <- Ad(to.weekly(AMZN))
weekly.log.ret <- na.omit(diff(log(amzn.weekly.adj)))
cat("\nWeekly log returns (first 5):\n"); print(head(weekly.log.ret, 5))
##
## Weekly log returns (first 5):
## AMZN.Adjusted
## 2021-01-15 -0.02495776
## 2021-01-22 0.05879302
## 2021-01-29 -0.02647870
## 2021-02-05 0.04451550
## 2021-02-12 -0.02245692
# 3. Calculate median, mean, and standard deviation
ret.median <- as.numeric(median(weekly.log.ret))
ret.mean <- as.numeric(mean(weekly.log.ret))
ret.sd <- as.numeric(sd(weekly.log.ret))
cat(sprintf("\nAMZN 2021 Weekly Log Return Statistics:\n Median : %.6f\n Mean : %.6f\n Std Dev: %.6f\n",
ret.median, ret.mean, ret.sd))
##
## AMZN 2021 Weekly Log Return Statistics:
## Median : -0.002562
## Mean : 0.001138
## Std Dev: 0.033926
# 4. Plot distribution of daily log returns
amzn.daily.ret <- na.omit(diff(log(Ad(AMZN))))
hist(as.numeric(amzn.daily.ret),
breaks = 40,
col = "steelblue",
border = "white",
main = "Amazon Daily Log Return Distribution (2021)",
xlab = "Daily Log Return",
ylab = "Frequency")
abline(v = mean(amzn.daily.ret), col = "red", lwd = 2, lty = 2)
abline(v = median(amzn.daily.ret), col = "orange", lwd = 2, lty = 2)
legend("topright",
legend = c("Mean", "Median"),
col = c("red", "orange"),
lty = 2,
lwd = 2,
bty = "n")
# 5. Count observations with log return between 0.01 and 0.015
count.in.range <- sum(amzn.daily.ret >= 0.01 & amzn.daily.ret <= 0.015)
cat(sprintf("\nNumber of days with log return in [0.01, 0.015]: %d\n", count.in.range))
##
## Number of days with log return in [0.01, 0.015]: 31