# importing data and getting started 
library(readr)
library(ggplot2)
library(knitr)
library(gridExtra)
returns_df <- read_csv("IbmYhooAaplReturnData.csv")
## Rows: 1258 Columns: 3
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (3): IBMreturn, YhooReturn, Aaplreturn
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
View(returns_df)
names(returns_df)
## [1] "IBMreturn"  "YhooReturn" "Aaplreturn"

scatter plot of each pair of variables

# Scatter plot IBM Retrun vs Yahoo Return
ggplot(data=returns_df,aes(x= IBMreturn, y= YhooReturn))+geom_point(color='blue')+
labs(x="IBM Return",y="Yahoo Return")

# Scatter plot Yahoo Return vs Apple Return
ggplot(data=returns_df,aes(x= YhooReturn, y= Aaplreturn))+geom_point(color='magenta')+
labs(x="Yahoo Return",y="Apple Return")

# Scatter Plot IBM Return vs Apple Return 
ggplot(data=returns_df,aes(x= IBMreturn, y= Aaplreturn))+geom_point(color='green')+
labs(x="IBM Return",y="Apple Return")

Correlating Each Pair

# Correlation factor for IBM vs Yahoo return
cor(returns_df$IBMreturn, returns_df$YhooReturn)
## [1] 0.3143849
# Correlation factor for Yahoo vs Apple return 
cor(returns_df$YhooReturn, returns_df$Aaplreturn)
## [1] 0.2754087
# Correlation factor for IBM vs Apple return
cor(returns_df$IBMreturn, returns_df$Aaplreturn)
## [1] 0.3603621

Interpret Corelation Factor with Scatter Plots

All three scatter plots show positive correlation and are in agreement with the correlation factor. However based on the scatter plots only, not much can be said about the magnitude of the correlation as the data in all three of the scatter plots seems to be concentrated in one area only. As data is clustered around one point mainly, its safe to say that there is strong correlation but exact magnitude is unknown. This is where we need exact correlation factor percentages to make sure what the exact percentages are. In our case these are:

The correlation between IBM and Yahoo returns is 31.43%

The correlation between Yahooo and Apple returns is 27.54%

The correlation between IBM and Apple returns is 36.03%

Scatter Plor Matrix

# Create a scatter plot matrix for the three variables
returns_subset <- returns_df[c("IBMreturn", "YhooReturn", "Aaplreturn")]

# Add colors to distinguish the data points
colors <- c("blue", "magenta", "green")

pairs(returns_subset, pch = 19, col = colors, main = "Correlation Scatter Plot")

# Add a legend to label the points
legend("topleft", legend = c("IBM", "Yahoo", "Apple"), col = colors, pch = 19)