#loading the libraries we will use for this exercise
library(readr)
library(ggplot2)
#set working directory and import the data
setwd("~/NYU/classes/4. Statistical Modeling/Week 1")
library(readr)
data <- read_csv("IbmYhooAaplReturnData.csv")
## Rows: 1258 Columns: 3
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## dbl (3): IBMreturn, YhooReturn, Aaplreturn
##
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
View(data)
# printing the data
# 'head()' function here will print only the first 10 lines from the dataset.
head(data,10)
## # A tibble: 10 x 3
## IBMreturn YhooReturn Aaplreturn
## <dbl> <dbl> <dbl>
## 1 0.00858 0.0135 0.0107
## 2 -0.00325 0.0248 0.00440
## 3 -0.00349 -0.00296 0.00443
## 4 -0.00677 0.0285 -0.00270
## 5 0.00901 0.00980 0.00767
## 6 -0.00311 0.00685 0.00207
## 7 0.00522 0.0306 0.000578
## 8 0.0172 0.0110 0.0134
## 9 -0.00274 0.0321 0.0131
## 10 -0.00199 -0.0422 -0.00611
#1) Make scatter plots of each pair of variables.
# command for loading the plot and assigning the axis + plotting the points on graph in blue
# 'labs()' function is used to label the axis
#Scatter of IBM & Yahoo
ggplot(data=data,aes(x= IBMreturn, y= YhooReturn))+geom_point(color='blue')+
labs(x="IBM Returns",y="Yahoo Returns")
# command for loading the plot and assigning the axis + plotting the points on graph in red
# 'labs()' function is used to label the axis
#Scatter of IBM & Apple
ggplot(data=data,aes(x= IBMreturn, y= Aaplreturn))+geom_point(color='red')+
labs(x="IBM Returns",y="Apple Returns")
# command for loading the plot and assigning the axis + plotting the points on graph in green
# 'labs()' function is used to label the axis
#Scatter of Yahoo & Apple
ggplot(data=data,aes(x= YhooReturn, y= Aaplreturn))+geom_point(color='green')+
labs(x="Yahoo Returns",y="Apple Returns")
#2) Compute the correlation in the data for each pair of variables.
#Correlation of IBM & Yahoo
cor(data$IBMreturn, data$YhooReturn)
## [1] 0.3143849
#Correlation of IBM & Apple
cor(data$IBMreturn, data$Aaplreturn)
## [1] 0.3603622
#Correlation of Yahoo & Apple
cor(data$YhooReturn, data$Aaplreturn)
## [1] 0.2754087
#3) Are the correlation values that you get in accord what you see in the scatter plots?
#3) Response: Yes, all of the correlations are postive and generally around .3. Additionally the scatter plots all are increasing (ie. positive correlation), but are not that close to 1, and do not appear to be clustered exactly around a line.