#PART I - 1.1: Vector

# 1. Create 2 vectors, each containing 10 random numbers
set.seed(42)
vec1 <- runif(10, min = 0, max = 100)
vec2 <- runif(10, min = 0, max = 100)

cat("Vector 1:\n"); print(vec1)
## Vector 1:
##  [1] 91.48060 93.70754 28.61395 83.04476 64.17455 51.90959 73.65883 13.46666
##  [9] 65.69923 70.50648
cat("Vector 2:\n"); print(vec2)
## Vector 2:
##  [1] 45.77418 71.91123 93.46722 25.54288 46.22928 94.00145 97.82264 11.74874
##  [9] 47.49971 56.03327
# 2. Append the second vector to the first one
combined.vec <- c(vec1, vec2)
cat("Combined Vector:\n"); print(combined.vec)
## Combined Vector:
##  [1] 91.48060 93.70754 28.61395 83.04476 64.17455 51.90959 73.65883 13.46666
##  [9] 65.69923 70.50648 45.77418 71.91123 93.46722 25.54288 46.22928 94.00145
## [17] 97.82264 11.74874 47.49971 56.03327
# 3. Calculate the mean of the new combined vector
vec.mean <- mean(combined.vec)
cat("Mean of combined vector:", vec.mean, "\n")
## Mean of combined vector: 61.31464
# 4. Print 'True' if element > mean, else 'False'
comparison.result <- ifelse(combined.vec > vec.mean, "True", "False")
cat("Comparison result (element > mean):\n")
## Comparison result (element > mean):
print(comparison.result)
##  [1] "True"  "True"  "False" "True"  "True"  "False" "True"  "False" "True" 
## [10] "True"  "False" "True"  "True"  "False" "False" "True"  "True"  "False"
## [19] "False" "False"

PART I - Section 1.2: Matrix

# 1. Create a vector with 100 random numbers
set.seed(42)
vec.100 <- runif(100, min = 0, max = 100)
# 2. Transfer the vector into a 10 by 10 matrix M
M <- matrix(vec.100, nrow = 10, ncol = 10)
cat("\nMatrix M (10x10):\n"); print(M)
## 
## Matrix M (10x10):
##           [,1]     [,2]      [,3]       [,4]      [,5]      [,6]     [,7]
##  [1,] 91.48060 45.77418 90.403139 73.7595618 37.955924 33.342721 67.56073
##  [2,] 93.70754 71.91123 13.871017 81.1055141 43.577158 34.674825 98.28172
##  [3,] 28.61395 93.46722 98.889173 38.8108283  3.743103 39.848541 75.95443
##  [4,] 83.04476 25.54288 94.666823 68.5169729 97.353991 78.469278 56.64884
##  [5,] 64.17455 46.22928  8.243756  0.3948339 43.175125  3.893649 84.96897
##  [6,] 51.90959 94.00145 51.421178 83.2916080 95.757660 74.879539 18.94739
##  [7,] 73.65883 97.82264 39.020347  0.7334147 88.775491 67.727683 27.12866
##  [8,] 13.46666 11.74874 90.573813 20.7658973 63.997877 17.126433 82.81585
##  [9,] 65.69923 47.49971 44.696963 90.6601408 97.096661 26.108796 69.32048
## [10,] 70.50648 56.03327 83.600426 61.1778643 61.883821 51.441293 24.05447
##             [,8]      [,9]       [,10]
##  [1,]  4.2988796 58.160400 66.74265147
##  [2,] 14.0479094 15.790521  0.02388966
##  [3,] 21.6385415 35.902831 20.85699569
##  [4,] 47.9398564 64.563188 93.30341273
##  [5,] 19.7410342 77.582336 92.56447486
##  [6,] 71.9355838 56.364684 73.40943010
##  [7,]  0.7884739 23.370340 33.30719834
##  [8,] 37.5489965  8.998052 51.50633298
##  [9,] 51.4407708  8.561206 74.39746463
## [10,]  0.1570554 30.521837 61.91592400
# 3. Find the transposed matrix MT and print element [2, 1]
MT <- t(M)
cat("\nTransposed Matrix MT:\n"); print(MT)
## 
## Transposed Matrix MT:
##           [,1]        [,2]      [,3]     [,4]       [,5]     [,6]       [,7]
##  [1,] 91.48060 93.70754133 28.613953 83.04476 64.1745519 51.90959 73.6588315
##  [2,] 45.77418 71.91122517 93.467225 25.54288 46.2292823 94.00145 97.8226428
##  [3,] 90.40314 13.87101677 98.889173 94.66682  8.2437558 51.42118 39.0203467
##  [4,] 73.75956 81.10551413 38.810828 68.51697  0.3948339 83.29161  0.7334147
##  [5,] 37.95592 43.57715850  3.743103 97.35399 43.1751249 95.75766 88.7754906
##  [6,] 33.34272 34.67482482 39.848541 78.46928  3.8936491 74.87954 67.7276830
##  [7,] 67.56073 98.28171979 75.954427 56.64884 84.9689719 18.94739 27.1286615
##  [8,]  4.29888 14.04790941 21.638542 47.93986 19.7410342 71.93558  0.7884739
##  [9,] 58.16040 15.79052082 35.902831 64.56319 77.5823363 56.36468 23.3703399
## [10,] 66.74265  0.02388966 20.856996 93.30341 92.5644749 73.40943 33.3071983
##            [,8]      [,9]      [,10]
##  [1,] 13.466660 65.699229 70.5064784
##  [2,] 11.748736 47.499708 56.0332746
##  [3,] 90.573813 44.696963 83.6004260
##  [4,] 20.765897 90.660141 61.1778643
##  [5,] 63.997877 97.096661 61.8838207
##  [6,] 17.126433 26.108796 51.4412935
##  [7,] 82.815849 69.320482 24.0544740
##  [8,] 37.548996 51.440771  0.1570554
##  [9,]  8.998052  8.561206 30.5218369
## [10,] 51.506333 74.397465 61.9159240
cat("Element at row 2, column 1 of MT:", MT[2, 1], "\n")
## Element at row 2, column 1 of MT: 45.77418
# 4. Nested loop to calculate inner product N = MT %*% M
n <- nrow(MT)
p <- ncol(M)
inner.dim <- ncol(MT)  # = nrow(M)

N.loop <- matrix(0, nrow = n, ncol = p)

for (i in 1:n) {
  for (j in 1:p) {
    total <- 0
    for (k in 1:inner.dim) {
      total <- total + MT[i, k] * M[k, j]
    }
    N.loop[i, j] <- total
  }
}
cat("\nInner product N (via nested loop):\n"); print(N.loop)
## 
## Inner product N (via nested loop):
##           [,1]     [,2]     [,3]     [,4]     [,5]     [,6]     [,7]     [,8]
##  [1,] 46572.50 38003.18 36384.29 36100.65 41632.39 28654.68 38068.19 15265.43
##  [2,] 38003.18 42731.73 33700.04 30484.17 36220.72 27916.49 32682.79 15099.17
##  [3,] 36384.29 33700.04 48531.16 33479.77 37676.06 28408.24 35688.46 16867.47
##  [4,] 36100.65 30484.17 33479.77 37550.83 35125.29 24352.48 30891.26 17033.93
##  [5,] 41632.39 36220.72 37676.06 35125.29 49099.24 30730.51 34057.14 20741.67
##  [6,] 28654.68 27916.49 28408.24 24352.48 30730.51 23889.75 21185.01 12765.50
##  [7,] 38068.19 32682.79 35688.46 30891.26 34057.14 21185.01 43759.02 15771.45
##  [8,] 15265.43 15099.17 16867.47 17033.93 20741.67 12765.50 15771.45 12603.44
##  [9,] 25650.93 22196.32 23338.42 18960.43 23433.13 17036.83 22232.58 10731.55
## [10,] 36604.31 29435.82 35933.55 29904.90 40030.35 24501.46 32453.01 18116.65
##           [,9]    [,10]
##  [1,] 25650.93 36604.31
##  [2,] 22196.32 29435.82
##  [3,] 23338.42 35933.55
##  [4,] 18960.43 29904.90
##  [5,] 23433.13 40030.35
##  [6,] 17036.83 24501.46
##  [7,] 22232.58 32453.01
##  [8,] 10731.55 18116.65
##  [9,] 19917.40 25742.59
## [10,] 25742.59 40683.09
# 5. Calculate inner product using %*% and compare
N.operator <- MT %*% M
cat("\nInner product N (via %*% operator):\n"); print(N.operator)
## 
## Inner product N (via %*% operator):
##           [,1]     [,2]     [,3]     [,4]     [,5]     [,6]     [,7]     [,8]
##  [1,] 46572.50 38003.18 36384.29 36100.65 41632.39 28654.68 38068.19 15265.43
##  [2,] 38003.18 42731.73 33700.04 30484.17 36220.72 27916.49 32682.79 15099.17
##  [3,] 36384.29 33700.04 48531.16 33479.77 37676.06 28408.24 35688.46 16867.47
##  [4,] 36100.65 30484.17 33479.77 37550.83 35125.29 24352.48 30891.26 17033.93
##  [5,] 41632.39 36220.72 37676.06 35125.29 49099.24 30730.51 34057.14 20741.67
##  [6,] 28654.68 27916.49 28408.24 24352.48 30730.51 23889.75 21185.01 12765.50
##  [7,] 38068.19 32682.79 35688.46 30891.26 34057.14 21185.01 43759.02 15771.45
##  [8,] 15265.43 15099.17 16867.47 17033.93 20741.67 12765.50 15771.45 12603.44
##  [9,] 25650.93 22196.32 23338.42 18960.43 23433.13 17036.83 22232.58 10731.55
## [10,] 36604.31 29435.82 35933.55 29904.90 40030.35 24501.46 32453.01 18116.65
##           [,9]    [,10]
##  [1,] 25650.93 36604.31
##  [2,] 22196.32 29435.82
##  [3,] 23338.42 35933.55
##  [4,] 18960.43 29904.90
##  [5,] 23433.13 40030.35
##  [6,] 17036.83 24501.46
##  [7,] 22232.58 32453.01
##  [8,] 10731.55 18116.65
##  [9,] 19917.40 25742.59
## [10,] 25742.59 40683.09
cat("\nAre the two results equal?", all.equal(N.loop, N.operator), "\n")
## 
## Are the two results equal? TRUE

PART I - 1.3: Function (using stock CSV file)

# 1. Load the CSV file
stock.data <- read.csv("/Users/aasthasingh/Downloads/stock_data-1.csv", header = TRUE, stringsAsFactors = FALSE, row.names = 1)
cat("\nRaw data dimensions:", dim(stock.data), "\n")
## 
## Raw data dimensions: 6041 29
cat("First 3 rows:\n"); print(head(stock.data, 3))
## First 3 rows:
##                AAPL     AMGN      AXP      BA    CAT CRM     CSCO     CVX
## 1996-01-02 0.286830 14.56250 12.10832 39.9375 14.875  NA 4.243055 26.4375
## 1996-01-03 0.286830 14.40625 12.10832 39.5625 15.125  NA 4.076389 26.5000
## 1996-01-04 0.281808 13.78125 11.99890 38.5625 15.000  NA 3.923611 27.2500
##                 DIS DOW GS       HD      IBM     INTC      JNJ      JPM
## 1996-01-02 20.01773  NA NA 10.52778 22.71875 7.328125 21.06250 19.58333
## 1996-01-03 20.14104  NA NA 10.33333 22.31250 7.218750 21.90625 19.58333
## 1996-01-04 19.89442  NA NA 10.30556 21.71875 7.187500 21.68750 18.75000
##                  KO    MCD     MMM     MRK     MSFT      NKE       PG    TRV
## 1996-01-02 18.75000 22.750 33.8750 32.1250 5.609375 4.445313 20.78125 28.250
## 1996-01-03 18.90625 22.750 33.8125 31.6875 5.429688 4.312500 21.40625 28.625
## 1996-01-04 18.75000 22.875 33.6875 31.8750 5.460938 4.265625 21.75000 29.000
##                 UNH  V       VZ     WBA    WMT
## 1996-01-02 8.078125 NA 30.46456 7.53125 11.625
## 1996-01-03 8.109375 NA 31.42009 7.50000 11.750
## 1996-01-04 8.187500 NA 30.85801 7.40625 11.875
dates <- as.Date(rownames(stock.data))
# 2. Delete columns containing any NA values
na.cols     <- sapply(stock.data, function(col) any(is.na(col)))
stock.clean <- stock.data[, !na.cols]
cat("\nColumns removed (contained NA):", names(stock.data)[na.cols], "\n")
## 
## Columns removed (contained NA): CRM DOW GS V
cat("Remaining stocks:", ncol(stock.clean), "\n")
## Remaining stocks: 25
cat("Clean data (first 3 rows):\n"); print(head(stock.clean, 3))
## Clean data (first 3 rows):
##                AAPL     AMGN      AXP      BA    CAT     CSCO     CVX      DIS
## 1996-01-02 0.286830 14.56250 12.10832 39.9375 14.875 4.243055 26.4375 20.01773
## 1996-01-03 0.286830 14.40625 12.10832 39.5625 15.125 4.076389 26.5000 20.14104
## 1996-01-04 0.281808 13.78125 11.99890 38.5625 15.000 3.923611 27.2500 19.89442
##                  HD      IBM     INTC      JNJ      JPM       KO    MCD     MMM
## 1996-01-02 10.52778 22.71875 7.328125 21.06250 19.58333 18.75000 22.750 33.8750
## 1996-01-03 10.33333 22.31250 7.218750 21.90625 19.58333 18.90625 22.750 33.8125
## 1996-01-04 10.30556 21.71875 7.187500 21.68750 18.75000 18.75000 22.875 33.6875
##                MRK     MSFT      NKE       PG    TRV      UNH       VZ     WBA
## 1996-01-02 32.1250 5.609375 4.445313 20.78125 28.250 8.078125 30.46456 7.53125
## 1996-01-03 31.6875 5.429688 4.312500 21.40625 28.625 8.109375 31.42009 7.50000
## 1996-01-04 31.8750 5.460938 4.265625 21.75000 29.000 8.187500 30.85801 7.40625
##               WMT
## 1996-01-02 11.625
## 1996-01-03 11.750
## 1996-01-04 11.875
# 3. Calculate daily log returns for each stock
CalcLogReturn <- function(prices) {
  log(as.numeric(prices[-1]) / as.numeric(prices[-length(prices)]))
}

log.returns      <- as.data.frame(apply(stock.clean, 2, CalcLogReturn))
log.return.dates <- dates[-1]  # one fewer row after differencing
cat("\nLog returns (first 3 rows):\n"); print(head(log.returns, 3))
## 
## Log returns (first 3 rows):
##          AAPL        AMGN          AXP           BA          CAT        CSCO
## 1  0.00000000 -0.01078759  0.000000000 -0.009434032  0.016667052 -0.04007198
## 2 -0.01766372 -0.04435317 -0.009077177 -0.025601398 -0.008298803 -0.03819914
## 3  0.08171839  0.02242246 -0.003044156  0.017671143  0.016529302  0.01231323
##           CVX          DIS           HD         IBM         INTC         JNJ
## 1 0.002361276  0.006141243 -0.018642404 -0.01804352 -0.015037877  0.03927778
## 2 0.027908788 -0.012320435 -0.002691813 -0.02697112 -0.004338402 -0.01003593
## 3 0.015927527  0.018424292 -0.024557852  0.01994368  0.000000000  0.00000000
##            JPM           KO          MCD          MMM          MRK         MSFT
## 1  0.000000000  0.008298803  0.000000000 -0.001846723 -0.013712262 -0.032557631
## 2 -0.043485146 -0.008298803  0.005479466 -0.003703708  0.005899722  0.005738896
## 3 -0.004454386 -0.005012542 -0.016529302  0.001853569 -0.011834458 -0.011510917
##           NKE          PG         TRV          UNH          VZ         WBA
## 1 -0.03033250 0.029631798 0.013187004  0.003861009  0.03088352 -0.00415801
## 2 -0.01092907 0.015930822 0.013015368  0.009587801 -0.01805110 -0.01257878
## 3 -0.03163042 0.004301082 0.002152853 -0.040901514  0.01086967  0.03727139
##           WMT
## 1  0.01069529
## 2  0.01058211
## 3 -0.01591546
# 4. Calculate mean and SD; store as 2 x N data frame
ret.mean <- apply(log.returns, 2, mean)
ret.sd   <- apply(log.returns, 2, sd)

stats.df <- as.data.frame(rbind(Mean = ret.mean, SD = ret.sd))
cat("\nStatistics (2 x N data frame):\n"); print(stats.df)
## 
## Statistics (2 x N data frame):
##              AAPL         AMGN          AXP           BA         CAT
## Mean 0.0009168344 0.0004641248 0.0003855638 0.0003478159 0.000379848
## SD   0.0284047796 0.0208557858 0.0219789482 0.0193786437 0.020493476
##              CSCO          CVX          DIS           HD          IBM
## Mean 0.0004002217 0.0002502413 0.0003264233 0.0005012099 0.0002923392
## SD   0.0247623992 0.0159682591 0.0188428129 0.0195658648 0.0173720510
##              INTC          JNJ          JPM           KO          MCD
## Mean 0.0003470648 0.0003197527 0.0003240281 0.0001789796 0.0003573148
## SD   0.0237095985 0.0129114075 0.0238587249 0.0138393263 0.0149395005
##               MMM          MRK         MSFT          NKE           PG
## Mean 0.0002726557 0.0001724428 0.0005522446 0.0005167696 0.0002963599
## SD   0.0149365129 0.0173385301 0.0195438394 0.0199581843 0.0142370700
##               TRV          UNH          VZ          WBA          WMT
## Mean 0.0002607877 0.0005950182 0.000115521 0.0003405546 0.0003856492
## SD   0.0177976490 0.0219630363 0.015967949 0.0181026340 0.0160858239
# 5. Two sub-plot graph
stock.names <- colnames(stock.clean)
colors.3    <- c("blue", "red", "darkgreen")

pdf("FE513_Homework1_Part1_Plots.pdf", width = 14, height = 10)
par(mfrow = c(2, 1), mar = c(5, 4, 3, 2))
# Sub-plot 1: First 3 stocks' daily prices
y.range <- range(stock.clean[, 1:3])
plot(dates, stock.clean[, 1],
     type  = "l",
     col   = colors.3[1],
     ylim  = y.range,
     xlab  = "Date",
     ylab  = "Stock Price (USD)",
     main  = "Daily Stock Prices: First 3 Stocks",
     lwd   = 1.5)
lines(dates, stock.clean[, 2], col = colors.3[2], lwd = 1.5)
lines(dates, stock.clean[, 3], col = colors.3[3], lwd = 1.5)
legend("topleft",
       legend = stock.names[1:3],
       col    = colors.3,
       lty    = 1,
       lwd    = 1.5,
       bty    = "n")

# Sub-plot 2: Scatter plot of mean and SD for each stock
n.stocks <- length(stock.names)
x.pos    <- 1:n.stocks
y.range2 <- range(c(ret.mean, ret.sd))

plot(x.pos, ret.mean,
     pch   = 16,
     col   = "blue",
     xaxt  = "n",
     xlab  = "Stock",
     ylab  = "Statistical Value",
     main  = "Log Return Statistics per Stock (Mean & SD)",
     ylim  = y.range2,
     cex   = 1.2)
points(x.pos, ret.sd, pch = 17, col = "red", cex = 1.2)
axis(1, at = x.pos, labels = stock.names, las = 2, cex.axis = 0.75)
legend("topright",
       legend = c("Mean", "Standard Deviation"),
       col    = c("blue", "red"),
       pch    = c(16, 17),
       bty    = "n")

#1.3 using pipe

library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
stats.pipe <- read.csv("/Users/aasthasingh/Downloads/stock_data-1.csv", header = TRUE,
                       stringsAsFactors = FALSE, row.names = 1) %>%
  select(where(~ !any(is.na(.)))) %>%          # 2. Remove NA columns
  mutate(across(everything(),
                ~ log(. / lag(.)))) %>%         # 3. Log returns
  slice(-1) %>%                                 # Drop first NA row
  summarise(across(everything(),
                   list(Mean = mean, SD = sd),
                   .names = "{.col}_{.fn}"))    # 4. Mean & SD

cat("\nStats via pipe (first 6 columns):\n")
## 
## Stats via pipe (first 6 columns):
print(stats.pipe[, 1:6])
##      AAPL_Mean    AAPL_SD    AMGN_Mean    AMGN_SD     AXP_Mean     AXP_SD
## 1 0.0009168344 0.02840478 0.0004641248 0.02085579 0.0003855638 0.02197895

PART II: quantmod

library(quantmod)
## Loading required package: xts
## Loading required package: zoo
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
## 
## ######################### Warning from 'xts' package ##########################
## #                                                                             #
## # The dplyr lag() function breaks how base R's lag() function is supposed to  #
## # work, which breaks lag(my_xts). Calls to lag(my_xts) that you type or       #
## # source() into this session won't work correctly.                            #
## #                                                                             #
## # Use stats::lag() to make sure you're not using dplyr::lag(), or you can add #
## # conflictRules('dplyr', exclude = 'lag') to your .Rprofile to stop           #
## # dplyr from breaking base R's lag() function.                                #
## #                                                                             #
## # Code in packages is not affected. It's protected by R's namespace mechanism #
## # Set `options(xts.warn_dplyr_breaks_lag = FALSE)` to suppress this warning.  #
## #                                                                             #
## ###############################################################################
## 
## Attaching package: 'xts'
## The following objects are masked from 'package:dplyr':
## 
##     first, last
## Loading required package: TTR
## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo
# 1. Download Amazon daily stock price data 2021-01-01 to 2021-12-31
getSymbols("AMZN",
           src         = "yahoo",
           from        = "2021-01-01",
           to          = "2021-12-31",
           auto.assign = TRUE)
## [1] "AMZN"
amzn.df      <- as.data.frame(AMZN)
amzn.df$Date <- rownames(amzn.df)
write.csv(amzn.df, "AMZN_2021.csv", row.names = FALSE)
cat("\nAmazon 2021 data saved to AMZN_2021.csv\n")
## 
## Amazon 2021 data saved to AMZN_2021.csv
cat("Rows downloaded:", nrow(amzn.df), "\n")
## Rows downloaded: 251
# 2. Calculate weekly log returns based on adjusted close price
amzn.weekly.adj <- Ad(to.weekly(AMZN))
weekly.log.ret  <- na.omit(diff(log(amzn.weekly.adj)))
cat("\nWeekly log returns (first 5):\n"); print(head(weekly.log.ret, 5))
## 
## Weekly log returns (first 5):
##            AMZN.Adjusted
## 2021-01-15   -0.02495776
## 2021-01-22    0.05879302
## 2021-01-29   -0.02647870
## 2021-02-05    0.04451550
## 2021-02-12   -0.02245692
# 3. Calculate median, mean, and standard deviation
ret.median <- as.numeric(median(weekly.log.ret))
ret.mean   <- as.numeric(mean(weekly.log.ret))
ret.sd     <- as.numeric(sd(weekly.log.ret))

cat(sprintf("\nAMZN 2021 Weekly Log Return Statistics:\n  Median : %.6f\n  Mean   : %.6f\n  Std Dev: %.6f\n",
ret.median, ret.mean, ret.sd))
## 
## AMZN 2021 Weekly Log Return Statistics:
##   Median : -0.002562
##   Mean   : 0.001138
##   Std Dev: 0.033926
# 4. Plot distribution of daily log returns
amzn.daily.ret <- na.omit(diff(log(Ad(AMZN))))


hist(as.numeric(amzn.daily.ret),
     breaks = 40,
     col    = "steelblue",
     border = "white",
     main   = "Amazon Daily Log Return Distribution (2021)",
     xlab   = "Daily Log Return",
     ylab   = "Frequency")
abline(v = mean(amzn.daily.ret),   col = "red",    lwd = 2, lty = 2)
abline(v = median(amzn.daily.ret), col = "orange", lwd = 2, lty = 2)
legend("topright",
       legend = c("Mean", "Median"),
       col    = c("red", "orange"),
       lty    = 2,
       lwd    = 2,
       bty    = "n")

# 5. Count observations with log return between 0.01 and 0.015
count.in.range <- sum(amzn.daily.ret >= 0.01 & amzn.daily.ret <= 0.015)
cat(sprintf("\nNumber of days with log return in [0.01, 0.015]: %d\n", count.in.range))
## 
## Number of days with log return in [0.01, 0.015]: 31