# packages
library(tidyquant) # for importing stock data
## Warning: package 'tidyquant' was built under R version 4.4.3
## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo
## Warning: package 'xts' was built under R version 4.4.3
## Warning: package 'zoo' was built under R version 4.4.3
## Warning: package 'quantmod' was built under R version 4.4.3
## Warning: package 'TTR' was built under R version 4.4.3
## Warning: package 'PerformanceAnalytics' was built under R version 4.4.3
## ── Attaching core tidyquant packages ─────────────────────── tidyquant 1.0.11 ──
## ✔ PerformanceAnalytics 2.0.8      ✔ TTR                  0.24.4
## ✔ quantmod             0.4.27     ✔ xts                  0.14.1
## ── Conflicts ────────────────────────────────────────── tidyquant_conflicts() ──
## ✖ zoo::as.Date()                 masks base::as.Date()
## ✖ zoo::as.Date.numeric()         masks base::as.Date.numeric()
## ✖ PerformanceAnalytics::legend() masks graphics::legend()
## ✖ quantmod::summary()            masks base::summary()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(tidyverse) # for working with data
## Warning: package 'tidyverse' was built under R version 4.4.3
## Warning: package 'ggplot2' was built under R version 4.4.3
## Warning: package 'tibble' was built under R version 4.4.3
## Warning: package 'tidyr' was built under R version 4.4.3
## Warning: package 'readr' was built under R version 4.4.3
## Warning: package 'purrr' was built under R version 4.4.3
## Warning: package 'dplyr' was built under R version 4.4.3
## Warning: package 'stringr' was built under R version 4.4.3
## Warning: package 'forcats' was built under R version 4.4.3
## Warning: package 'lubridate' was built under R version 4.4.3
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.2     ✔ tibble    3.2.1
## ✔ lubridate 1.9.4     ✔ tidyr     1.3.1
## ✔ purrr     1.0.4     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::first()  masks xts::first()
## ✖ dplyr::lag()    masks stats::lag()
## ✖ dplyr::last()   masks xts::last()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(broom) # for tidying output from various statistical procedures
## Warning: package 'broom' was built under R version 4.4.3
library(knitr) # for tables
## Warning: package 'knitr' was built under R version 4.4.3
library(kableExtra) # for improving the appearance of tables
## Warning: package 'kableExtra' was built under R version 4.4.3
## 
## Attaching package: 'kableExtra'
## 
## The following object is masked from 'package:dplyr':
## 
##     group_rows
library(tinytex)
## Warning: package 'tinytex' was built under R version 4.4.3
library(quantmod)
library(magrittr)
## Warning: package 'magrittr' was built under R version 4.4.3
## 
## Attaching package: 'magrittr'
## 
## The following object is masked from 'package:purrr':
## 
##     set_names
## 
## The following object is masked from 'package:tidyr':
## 
##     extract
library(ggplot2)
library(modeest)
## Warning: package 'modeest' was built under R version 4.4.3
## 
## Attaching package: 'modeest'
## 
## The following object is masked from 'package:PerformanceAnalytics':
## 
##     skewness
library(e1071)
## Warning: package 'e1071' was built under R version 4.4.3
## 
## Attaching package: 'e1071'
## 
## The following object is masked from 'package:modeest':
## 
##     skewness
## 
## The following objects are masked from 'package:PerformanceAnalytics':
## 
##     kurtosis, skewness
library(dplyr)
library(bookdown)
## Warning: package 'bookdown' was built under R version 4.4.3
library(Hmisc)
## Warning: package 'Hmisc' was built under R version 4.4.3
## 
## Attaching package: 'Hmisc'
## 
## The following object is masked from 'package:e1071':
## 
##     impute
## 
## The following objects are masked from 'package:dplyr':
## 
##     src, summarize
## 
## The following object is masked from 'package:quantmod':
## 
##     Lag
## 
## The following objects are masked from 'package:base':
## 
##     format.pval, units

#Question 1

AllockStock<-c("CSCO","ELA","GE")%>%
tq_get(get= "stock.prices",from = "2000-01-01")%>%
select(symbol, date, adjusted)
## Registered S3 method overwritten by 'future':
##   method               from      
##   all.equal.connection parallelly
#output for the first 6 rows of the data
head(AllockStock, n=6) %>%
kable(caption = "first 6 rows of stock prices")
first 6 rows of stock prices
symbol date adjusted
CSCO 2000-01-03 35.57295
CSCO 2000-01-04 33.57724
CSCO 2000-01-05 33.47437
CSCO 2000-01-06 32.91886
CSCO 2000-01-07 34.85285
CSCO 2000-01-10 36.14904

#Question 2

#2.1 Plot prices over time (4 points) Plot the prices of each asset over time separately. Succinctly describe in words the evolution of each asset over time. (limit: 100 words for each time series).

CSCO- In 2000, Cisco’s share price started high, driven by tech sector momentum. However, it declined sharply following the burst of the dot-com bubble. The volatility reflected broader investor fears and rapid sell-offs in the tech industry. Despite brief recoveries, the downward trend persisted, with the price falling significantly by year-end.

ELA-ELA’s price history is marked by high volatility and low absolute price levels. The stock exhibited erratic movements with occasional sharp spikes, likely driven by speculative interest or low liquidity. Its micro-cap nature makes it prone to abrupt fluctuations, with no consistent trend emerging during the 2000–2001 period.

GE- GE, a diversified industrial giant, showed a downward trend through 2000. While it began with relative stability, the price gradually declined as broader market sentiment weakened. The tech-driven crash and weakening global growth impacted industrials, causing investor caution even in traditionally stable blue-chip stocks like GE.

#to filter my stock prices list for CSCO
CSCOPrices <- AllockStock %>%
filter(symbol == "CSCO")
2
## [1] 2
#plot times series for CSCO stock prices
ggplot(data = CSCOPrices, aes(x = date, y= adjusted)) + geom_line(color = "red", size = 0.2) + labs (time)
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

#to filter my stock prices list for GE
GEPrices <- AllockStock %>%
filter(symbol == "GE")
2
## [1] 2
#plot times series for GE stock prices
ggplot(data = GEPrices, aes(x = date, y= adjusted)) + geom_line(color = "blue", size = 0.2) + labs (time)

#to filter my stock prices list for ELA
ELAPrices <- AllockStock %>%
filter(symbol == "ELA")
2
## [1] 2
#plot times series for ELA stock prices
ggplot(data = ELAPrices, aes(x = date, y= adjusted)) + geom_line(color = "purple", size = 0.2) + labs (time)

## Calculate returns and plot returns over time (4 points)```

# Ensure your packages are loaded
library(dplyr)
library(tidyquant)

# Load data
AllockStock <- tq_get(c("CSCO", "ELA", "GE"),
                      get = "stock.prices",
                      from = "2000-01-01")

# Calculate returns
AllockReturns <- AllockStock %>%
  group_by(symbol) %>%
  arrange(date) %>%
  mutate(log_return = 100 * (log(adjusted) - log(lag(adjusted)))) %>%
  filter(!is.na(log_return))
ggplot(AllockReturns, aes(x = date, y = log_return, color = symbol)) +
  geom_line() +
  labs(title = "Log Returns Over Time")

#Question 2.3 Histogram of returns (6 points)

Create a histogram for each of the returns series.

You have to explain your choice of bins. (Hint: Discuss the formula you use to calculate the bins)

# Load required libraries
library(tidyquant)
library(tidyverse)

# Step 1: Get stock prices
AllockStock <- tq_get(c("CSCO", "ELA", "GE"), from = "2000-01-01") %>%
  select(symbol, date, adjusted)

# Step 2: Calculate log returns
AllockReturns <- AllockStock %>%
  group_by(symbol) %>%
  arrange(date) %>%
  mutate(returns = log(adjusted / lag(adjusted))) %>%
  ungroup()

# Step 3: Define Rice Rule for bin calculation
rice_rule <- function(n) {
  ceiling(2 * n^(1/3))  # Rice Rule formula
}

# Step 4: Determine optimal number of bins
n_obs <- nrow(AllockReturns %>% drop_na(returns))
Bins <- rice_rule(n_obs)

# Step 5: Create histogram faceted by stock symbol
ggplot(AllockReturns %>% drop_na(returns), aes(x = returns)) +
  geom_histogram(bins = Bins, fill = "steelblue", color = "white") +
  facet_wrap(~symbol, scales = "free_y") +
  labs(
    title = "Histograms of Log Returns by Stock",
    x = "Log Returns",
    y = "Frequency"
  )
Histogram for returns

Histogram for returns

#Question 2.4 2.4 Summary table of returns (5 points) Report the descriptive statistics in a single table which includes the mean, median, variance, standard deviation, skewness and kurtosis for each series. What conclusions can you draw from these descriptive statistics?

# Load required libraries
library(tidyquant)
library(tidyverse)
library(moments)  # for skewness and kurtosis
## 
## Attaching package: 'moments'
## The following objects are masked from 'package:e1071':
## 
##     kurtosis, moment, skewness
## The following object is masked from 'package:modeest':
## 
##     skewness
## The following objects are masked from 'package:PerformanceAnalytics':
## 
##     kurtosis, skewness
library(knitr)    # for nicely formatted table

# Calculate log returns if not already done
AllockReturns <- AllockStock %>%
  group_by(symbol) %>%
  arrange(date) %>%
  mutate(returns = log(adjusted / lag(adjusted))) %>%
  ungroup()

# Compute descriptive statistics
summary_stats <- AllockReturns %>%
  drop_na(returns) %>%
  group_by(symbol) %>%
  summarise(
    Mean = mean(returns),
    Median = median(returns),
    Variance = var(returns),
    `Standard Deviation` = sd(returns),
    Skewness = skewness(returns),
    Kurtosis = kurtosis(returns)
  )

# Format table
kable(summary_stats, caption = "Descriptive Statistics of Log Returns by Stock", digits = 4)
Descriptive Statistics of Log Returns by Stock
symbol Mean Median Variance Standard Deviation Skewness Kurtosis
CSCO 1e-04 5e-04 0.0005 0.0229 0.0407 13.2330
ELA 0e+00 0e+00 0.0024 0.0489 1.4130 36.7596
GE 1e-04 0e+00 0.0004 0.0210 -0.0594 10.3748

##Interpretation: #Mean & Median: All stocks have average returns close to zero, which is typical for daily stock returns. Median values near zero indicate that returns are generally centered around no gain or loss.

#Variance & Standard Deviation: ELA shows substantially higher variance (0.0024) and volatility (std dev 0.0489) compared to CSCO and GE, indicating that ELA’s returns are much more volatile and risky.

#Skewness:

CSCO’s returns are nearly symmetric (skewness ≈ 0.04).

ELA’s returns are positively skewed (1.413), meaning there are more frequent large positive returns or outliers on the right tail.

GE’s returns are slightly negatively skewed (-0.059), with occasional larger negative returns.

#Kurtosis: All three stocks have very high kurtosis values (> 10), especially ELA (36.76), indicating heavy tails and a higher probability of extreme returns (both positive and negative) compared to a normal distribution (kurtosis = 3). This implies risk of rare but extreme events.

#Question 2.5 Are average returns significantly different from zero? (6 points)

step 1: state the hypothesis \[H_0 : \mu =0\] \[H_1 : \mu \neq 0\]

##step 2: set the significance Level \[\alpha = 0.01\]

##step 3: The Test Statistic \[t = \frac{\hat{\mu} - \mu}{\hat{\sigma}/\sqrt{n}} \sim t_{n-1}\]

library(dplyr)

# Run t-tests by symbol
t_test_results <- AllockReturns %>%
  group_by(symbol) %>%
  summarise(
    t_statistic = t.test(returns)$statistic,
    mean_return = t.test(returns)$estimate,
    p_value = t.test(returns)$p.value,
    df = t.test(returns)$parameter,
    conf_low = t.test(returns)$conf.int[1],
    conf_high = t.test(returns)$conf.int[2]
  )

print(t_test_results)
## # A tibble: 3 × 7
##   symbol t_statistic mean_return p_value    df  conf_low conf_high
##   <chr>        <dbl>       <dbl>   <dbl> <dbl>     <dbl>     <dbl>
## 1 CSCO        0.314   0.0000898    0.754  6385 -0.000471  0.000651
## 2 ELA         0.0104  0.00000639   0.992  6385 -0.00119   0.00121 
## 3 GE          0.347   0.0000911    0.728  6385 -0.000423  0.000605
# Load required libraries
library(tidyquant)
library(dplyr)

# Step 1: Load and prepare your data
symbols <- c("CSCO", "ELA", "GE")

# Get stock prices from 2000-01-01
AllockStock <- tq_get(symbols, get = "stock.prices", from = "2000-01-01") %>%
  select(symbol, date, adjusted) %>%
  group_by(symbol) %>%
  arrange(date) %>%
  mutate(returns = 100 * (log(adjusted) - log(lag(adjusted)))) %>%
  drop_na()

# Step 2: Perform one-sample t-tests for each stock
csco_t <- t.test(AllockStock$returns[AllockStock$symbol == "CSCO"], mu = 0)
ela_t  <- t.test(AllockStock$returns[AllockStock$symbol == "ELA"], mu = 0)
ge_t   <- t.test(AllockStock$returns[AllockStock$symbol == "GE"], mu = 0)

# Step 3: Extract and summarise results
results <- data.frame(
  Symbol = c("CSCO", "ELA", "GE"),
  Mean = c(csco_t$estimate, ela_t$estimate, ge_t$estimate),
  SD = c(sd(AllockStock$returns[AllockStock$symbol == "CSCO"]),
         sd(AllockStock$returns[AllockStock$symbol == "ELA"]),
         sd(AllockStock$returns[AllockStock$symbol == "GE"])),
  N = c(length(AllockStock$returns[AllockStock$symbol == "CSCO"]),
        length(AllockStock$returns[AllockStock$symbol == "ELA"]),
        length(AllockStock$returns[AllockStock$symbol == "GE"])),
  t_statistic = c(csco_t$statistic, ela_t$statistic, ge_t$statistic),
  p_value = c(csco_t$p.value, ela_t$p.value, ge_t$p.value),
  Decision = c(
    ifelse(csco_t$p.value < 0.01, "Reject H0", "Fail to Reject H0"),
    ifelse(ela_t$p.value < 0.01, "Reject H0", "Fail to Reject H0"),
    ifelse(ge_t$p.value < 0.01, "Reject H0", "Fail to Reject H0")
  )
)

# Step 4: Print the summary table
print(results)
##   Symbol        Mean       SD    N t_statistic   p_value          Decision
## 1   CSCO 0.008977353 2.286972 6386  0.31369091 0.7537661 Fail to Reject H0
## 2    ELA 0.000639242 4.892291 6386  0.01044161 0.9916693 Fail to Reject H0
## 3     GE 0.009108299 2.096362 6386  0.34720465 0.7284490 Fail to Reject H0

ChatGPT Prompt Can you perform a t-test to determine if the mean monthly returns of CSCO (0.008977353, SD = 2.286972), ELA (0.000639242, SD = 4.892291), and GE (0.009108306, SD = 2.096362) are significantly different from 0 using a sample size of 60 and 1% significance level? Evaluation If you provide the necessary inputs (mean, standard deviation, sample size), ChatGPT will provide a similar result. However, accuracy is dependent on the quality of the prompt. If you ignore important information such as sample size or test type (one-tailed vs two-tailed), the results may be ambiguous 16 or erroneous. You must instruct ChatGPT to utilize the appropriate t-distribution and rejection criteria.

#Question 2.6 Are average returns different from each other? (7 points)

head(AllockStock)
## # A tibble: 6 × 4
## # Groups:   symbol [3]
##   symbol date       adjusted returns
##   <chr>  <date>        <dbl>   <dbl>
## 1 CSCO   2000-01-04    33.6   -5.77 
## 2 ELA    2000-01-04     5.62  -6.45 
## 3 GE     2000-01-04   125.    -4.08 
## 4 CSCO   2000-01-05    33.5   -0.307
## 5 ELA    2000-01-05     6      6.45 
## 6 GE     2000-01-05   125.    -0.174
AllockStock%>%
group_by(symbol) %>%
select(returns)%>%
rstatix:: get_summary_stats()
## Adding missing grouping variables: `symbol`
## # A tibble: 3 × 14
##   symbol variable     n   min   max median     q1    q3   iqr   mad  mean    sd
##   <chr>  <fct>    <dbl> <dbl> <dbl>  <dbl>  <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 CSCO   returns   6386 -17.7  21.8  0.047 -0.864 0.97   1.83  1.36 0.009  2.29
## 2 ELA    returns   6386 -46.4  88.1  0     -1.92  1.82   3.74  2.77 0.001  4.89
## 3 GE     returns   6386 -16.4  18.0  0     -0.873 0.932  1.80  1.33 0.009  2.10
## # ℹ 2 more variables: se <dbl>, ci <dbl>

Levene’s test

step 1: The null and alternative hypothesis

\[H_0: \sigma_1^2 = \sigma_2^2 = \sigma_3^2\]

\[ H_1: \text{At least one of the } \sigma_j^2 \text{ is not equal to the others} \] # Step 2: Significance Level \[\alpha = 0.01\]

Step 3: Test Statistic Formula

\[ W = \frac{(n - c)}{(c - 1)} \cdot \frac{\sum_{j=1}^{c} n_j (\bar{X}_j - \bar{X})^2}{\sum_{j=1}^{c} \sum_{i=1}^{n_j} (X_{ij} - \bar{X}_j)^2} \]

# step 4
library("car")
## Warning: package 'car' was built under R version 4.4.3
## Loading required package: carData
## Warning: package 'carData' was built under R version 4.4.3
## 
## Attaching package: 'car'
## The following object is masked from 'package:dplyr':
## 
##     recode
## The following object is masked from 'package:purrr':
## 
##     some
levene_result <- leveneTest(returns ~ symbol, data=AllockStock)
## Warning in leveneTest.default(y = y, group = group, ...): group coerced to
## factor.
print(levene_result)
## Levene's Test for Homogeneity of Variance (center = median)
##          Df F value    Pr(>F)    
## group     2  669.33 < 2.2e-16 ***
##       19155                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# One-way ANOVA
anova_result <- aov(returns ~ symbol, data = AllockStock)
summary(anova_result)
##                Df Sum Sq Mean Sq F value Pr(>F)
## symbol          2      0    0.15   0.013  0.987
## Residuals   19155 214277   11.19
# Welch's ANOVA (for unequal variances)
welch_result <- oneway.test(returns ~ symbol, data = AllockStock, var.equal = FALSE)
print(welch_result)
## 
##  One-way analysis of means (not assuming equal variances)
## 
## data:  returns and symbol
## F = 0.0085832, num df = 2, denom df = 11951, p-value = 0.9915
TukeyHSD(anova_result)
##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = returns ~ symbol, data = AllockStock)
## 
## $symbol
##                   diff        lwr       upr     p adj
## ELA-CSCO -0.0083381111 -0.1470723 0.1303961 0.9891192
## GE-CSCO   0.0001309459 -0.1386033 0.1388652 0.9999973
## GE-ELA    0.0084690571 -0.1302652 0.1472033 0.9887768
# Example interpretation for Welch's ANOVA

if (welch_result$p.value < 0.01) {
  print("Step 5 Conclusion: Reject H0. The mean returns are significantly different across stocks at the 1% significance level.")
} else {
  print("Step 5 Conclusion: Do not reject H0. There is insufficient evidence to say that the mean returns differ across stocks.")
}
## [1] "Step 5 Conclusion: Do not reject H0. There is insufficient evidence to say that the mean returns differ across stocks."

Now, compare your answer to Chat-GPT, why do you think your answer is different or similar? Please attach a picture of the screenshot of the answer you have got from Chat-GPT. What do you learn from this exercise? (1 points) My answer was different from that of ChatGPT’s answer. If the proper input is given, ChatGPT is likely to produce the same results. While ChatGPT is beneficial for automating statistical tests and speeding up analysis, it does not always validate essential assumptions, such as whether sample sizes are equal or whether the test should be one- or two-tailed, unless explicitly stated. I learned from this exercise that, while ChatGPT is a useful tool for quick verification and advice, relying only on it without comprehending the underlying logic can result in errors. Mastering the topics by hand ensures appropriate interpretation, especially in academic situations or exams.

#Question2.7 Correlation (2points)

library(tidyquant)
library(dplyr)
library(ggplot2)
library(knitr)
library(car)
library(tidyverse)
library(moments)
library(tseries)
## Warning: package 'tseries' was built under R version 4.4.3
#matrix
Matrixforstocks <-AllockStock %>%
select(date, symbol, returns) %>%
drop_na() %>%
pivot_wider(date, names_from = symbol, values_from = returns)
## Warning: Specifying the `id_cols` argument by position was deprecated in tidyr 1.3.0.
## ℹ Please explicitly name `id_cols`, like `id_cols = date`.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
cor(Matrixforstocks %>% select(-date)) %>%
kable(caption = "Correlation Matrix")
Correlation Matrix
CSCO ELA GE
CSCO 1.0000000 0.0887849 0.4192241
ELA 0.0887849 1.0000000 0.0632778
GE 0.4192241 0.0632778 1.0000000

The correlation matrix demonstrates that all three stocks—CSCO, ELA, and GE—have positive correlations with one another, indicating that their returns tend to move in the same direction. The most significant relationship is between CSCO and GE, with a correlation coefficient of 0.419 indicating a substantial positive relationship. This indicates that, while the two companies are driven by similar market pressures, they are not overly reliant on one another. Given that both are major, diverse enterprises operating in several industries, their moderate connection could indicate shared exposure to broader economic trends. In comparison, ELA has rather small correlations with the other two stocks—0.887 with CSCO and 0.063 with GE. This suggests that ELA’s returns are only loosely tied to those of CSCO and GE.As a fast-growing consumer company, MNST tends to act more independently, possibly influenced by various risk factors and market trends. These low correlations indicate that integrating GE in a portfolio with CSCO and ELA could provide considerable diversification benefits, lowering total portfolio risk without significantly reducing expected returns.

#QUESTION2.8 Testing the significance of correlations (2 points) Is the assumption of independence of stock returns realistic? Provide evidence (the hypothesis test including all 5 steps of the hypothesis test and the equation for the test statistic) and a rationale to support your conclusion.

\[ H_0 : \rho = 0 \\ H_1 : \rho \ne 0 \] ## Step 2: Specify level of significance and number of observation \[\alpha = 0.05\]

\[number\ of\ observations: n_1=n_2=n_3=n\] ## Step 3: The test statistic \[t = \hat{\rho} \cdot \sqrt{\frac{n-2}{1-\hat{\rho}^2}} \sim t_{n-2}\] #Step4

head(AllockStock)
## # A tibble: 6 × 4
## # Groups:   symbol [3]
##   symbol date       adjusted returns
##   <chr>  <date>        <dbl>   <dbl>
## 1 CSCO   2000-01-04    33.6   -5.77 
## 2 ELA    2000-01-04     5.62  -6.45 
## 3 GE     2000-01-04   125.    -4.08 
## 4 CSCO   2000-01-05    33.5   -0.307
## 5 ELA    2000-01-05     6      6.45 
## 6 GE     2000-01-05   125.    -0.174
library(dplyr)
library(tidyr)

# Filter for CSCO and GE, select returns, pivot to wide format
CSCO_GE <- AllockStock %>%
  filter(symbol %in% c("CSCO", "GE")) %>%
  select(date, symbol, returns) %>%
  pivot_wider(names_from = symbol, values_from = returns) %>%
  filter(is.finite(CSCO), is.finite(GE)) %>%  # remove Inf, -Inf, NA
  drop_na(CSCO, GE)

cor.test(CSCO_GE$CSCO, CSCO_GE$GE)
## 
##  Pearson's product-moment correlation
## 
## data:  CSCO_GE$CSCO and CSCO_GE$GE
## t = 36.895, df = 6384, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.3987975 0.4392350
## sample estimates:
##       cor 
## 0.4192241

0.0887849 0.4192241

#Step 5: Conclusion

Since the p-value < 0.05, we reject the null hypothesis. There is statistically significant evidence of a correlation between CSCO and GE returns.

#question 2.9

stock_returns <- AllockStock %>%
  select(symbol, date, returns) %>%
  pivot_wider(names_from = symbol, values_from = returns)

head(stock_returns)
## # A tibble: 6 × 4
##   date         CSCO   ELA      GE
##   <date>      <dbl> <dbl>   <dbl>
## 1 2000-01-04 -5.77  -6.45 -4.08  
## 2 2000-01-05 -0.307  6.45 -0.174 
## 3 2000-01-06 -1.67  -2.11  1.33  
## 4 2000-01-07  5.71  -3.24  3.80  
## 5 2000-01-10  3.65   3.24 -0.0413
## 6 2000-01-11 -3.06   0     0.165
cov(stock_returns[, c("CSCO", "ELA", "GE")], use = "complete.obs")
##           CSCO        ELA        GE
## CSCO 5.2302406  0.9933723 2.0098947
## ELA  0.9933723 23.9345150 0.6489781
## GE   2.0098947  0.6489781 4.3947322
# Load necessary libraries
library(tidyverse)

# Step 1: Pivot data to wide format to get returns per stock in columns
stock_returns <- AllockStock %>%
  select(date, symbol, returns) %>%
  pivot_wider(names_from = symbol, values_from = returns)

# Check if the columns exist
print(colnames(stock_returns))
## [1] "date" "CSCO" "ELA"  "GE"
# Step 2: Calculate means, variances, and covariances
means <- colMeans(stock_returns[, c("CSCO", "ELA", "GE")], na.rm = TRUE)
variances <- apply(stock_returns[, c("CSCO", "ELA", "GE")], 2, var, na.rm = TRUE)
cov_matrix <- cov(stock_returns[, c("CSCO", "ELA", "GE")], use = "complete.obs")

# Step 3: Portfolio optimisation function
optimize_happiness <- function(r1, r2, mean1, mean2, var1, var2, cov12) {
  w1_seq <- seq(0, 1, 0.01)
  results <- data.frame()
  
  for (w1 in w1_seq) {
    w2 <- 1 - w1
    mean_r <- w1 * mean1 + w2 * mean2
    var_r <- w1^2 * var1 + w2^2 * var2 + 2 * w1 * w2 * cov12
    happiness <- mean_r - var_r
    results <- rbind(results, data.frame(Stock1 = r1, Stock2 = r2, w1, w2, mean_r, var_r, happiness))
  }
  
  best <- results[which.max(results$happiness), ]
  return(best)
}

# Step 4: Apply function to all 3 possible combinations
opt1 <- optimize_happiness("CSCO", "ELA", means["CSCO"], means["ELA"], variances["CSCO"], variances["ELA"], cov_matrix["CSCO", "ELA"])
opt2 <- optimize_happiness("CSCO", "GE", means["CSCO"], means["GE"], variances["CSCO"], variances["GE"], cov_matrix["CSCO", "GE"])
opt3 <- optimize_happiness("ELA", "GE", means["ELA"], means["GE"], variances["ELA"], variances["GE"], cov_matrix["ELA", "GE"])

# Step 5: Combine results and show recommendation
Portfoliotable <- rbind(opt1, opt2, opt3)
print(Portfoliotable)
##        Stock1 Stock2   w1   w2      mean_r    var_r happiness
## CSCO84   CSCO    ELA 0.84 0.16 0.007643255 4.570200 -4.562557
## CSCO43   CSCO     GE 0.43 0.57 0.009051992 3.380170 -3.371118
## ELA14     ELA     GE 0.14 0.86 0.007922631 3.875734 -3.867812
# Recommend the portfolio with the highest happiness
best_portfolio <- Portfoliotable[which.max(Portfoliotable$happiness), ]
cat("✅ Best Portfolio Recommendation:\n")
## ✅ Best Portfolio Recommendation:
print(best_portfolio)
##        Stock1 Stock2   w1   w2      mean_r   var_r happiness
## CSCO43   CSCO     GE 0.43 0.57 0.009051992 3.38017 -3.371118