#loading the libraries we will use for this exercise
library(readr)
library(ggplot2)
#set working directory and import the data 
setwd("~/NYU/classes/4. Statistical Modeling/Week 1")
library(readr)
eps <- read_csv("Returns_VS_EPS.csv")
## Rows: 51 Columns: 12
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (4): Symbol, Company, 2004 Release Date, 2005 Release Date
## dbl (8): EPS Q4 2004, Stck Price Day Bfr Q4 04 Relse, Stck Prce 1 Mo Aftr Q4...
## 
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
#rename the column heads of the data to be more tidy
names(eps)=c("symbol", "company", "eps04", "releasedate04", "preprice04", "postprice04", "return04", "eps05", "releasedate05", "preprice05", "postprice05", "return05")
View(eps)
head(eps)
## # A tibble: 6 x 12
##   symbol company       eps04 releasedate04 preprice04 postprice04 return04 eps05
##   <chr>  <chr>         <dbl> <chr>              <dbl>       <dbl>    <dbl> <dbl>
## 1 MMM    3M Company     0.91 18-Jan-05 BMO       81.2        81.8  0.00763  1.04
## 2 ABT    Abbott Labs    0.67 18-Jan-05 BMO       44.5        44.8  0.00517  0.76
## 3 AA     Alcoa Inc      0.39 10-Jan-05 AMC       29.4        28.5 -0.0299   0.35
## 4 ALL    Allstate Cor~  1.42 2-Feb-05 AMC        49.3        52.1  0.0555   1.49
## 5 AMGN   Amgen          0.58 27-Jan-05 BMO       63.6        61.6 -0.0307   0.75
## 6 T      AT&T Inc.      0.34 26-Jan-05 BMO       22.6        22.4 -0.00973  0.48
## # ... with 4 more variables: releasedate05 <chr>, preprice05 <dbl>,
## #   postprice05 <dbl>, return05 <dbl>

#1) The question of interest is: Is there evidence, based on the data, of a linear relationship between Y=the return on the stock price a month after the EPS announcement and X=the EPS of a company?

#2) Plot Y vs X to see visually whether you can detect a linear relationship, if any, between the two. Then, run a regression analysis and report the results, stating your conclusion regarding the question whether any predictability exists in the X variable to predict Y. Support your conclusion with relevant numbers.

# simple regression model stored in the variable named "linearModel"
linearModel05<- lm(eps05 ~ return05, data= eps)

#getting the summary
summary(linearModel05)
## 
## Call:
## lm(formula = eps05 ~ return05, data = eps)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.77463 -0.30159 -0.04405  0.22420  1.41698 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.68795    0.06982   9.853 3.29e-13 ***
## return05    -0.20212    1.16924  -0.173    0.863    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.4537 on 49 degrees of freedom
## Multiple R-squared:  0.0006095,  Adjusted R-squared:  -0.01979 
## F-statistic: 0.02988 on 1 and 49 DF,  p-value: 0.8635
# command for loading the plot and assigning the axis + plotting the points on graph in blue
# 'labs()' function is used to lable the axis
ggplot(data=eps,aes(x=eps05,y=return05))+geom_point(color='blue')+labs(x="the EPS of a company",y="return on stock price 1 month post EPS announcement")

#correlation of '05 eps & returns:
cor(eps$eps05,eps$return05)
## [1] -0.02468796
# assigning the dataset
data(eps)
## Warning in data(eps): data set 'eps' not found
# deciding on X and Y axis + plotting the points on graph in purple
p1 = ggplot( data= eps,aes( x= eps05,y= return05)) + geom_point( color= 'purple')

# plotting the regression line through the points
# "labs()" function is used to lable the axis
p1 + geom_smooth( method= 'lm', se= F, col= "orange")+ labs(x= "2005 EPS", y= "2005 Return")
## `geom_smooth()` using formula 'y ~ x'

##Anser to #1 and #2 is, NO ##T-Stat is onl -0.173, which is not at least +/- 2, ##P-Value is .863 which is over 5% ##R-Square value 0.0006095, also indicating there is not a linear relationship ##also visually unable to detect a linear relationship

=======================================

#Separately evaluate the same for 2004

# simple regression model stored in the variable named "linearModel"
linearModel04<- lm(eps04 ~ return04, data= eps)

#getting the summary
summary(linearModel04)
## 
## Call:
## lm(formula = eps04 ~ return04, data = eps)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.63580 -0.29163 -0.05621  0.14755  1.37941 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.57689    0.06411   8.999 5.92e-12 ***
## return04     0.89401    0.88058   1.015    0.315    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.4099 on 49 degrees of freedom
## Multiple R-squared:  0.0206, Adjusted R-squared:  0.0006147 
## F-statistic: 1.031 on 1 and 49 DF,  p-value: 0.315
# command for loading the plot and assigning the axis + plotting the points on graph in blue
# 'labs()' function is used to lable the axis
ggplot(data=eps,aes(x=eps04,y=return04))+geom_point(color='blue')+labs(x="the EPS of a company",y="return on stock price 1 month post EPS announcement")

#correlation of '04 eps & returns:
cor(eps$eps04,eps$return04)
## [1] 0.1435355
# assigning the dataset
data(eps)
## Warning in data(eps): data set 'eps' not found
# deciding on X and Y axis + plotting the points on graph in purple
p1 = ggplot( data= eps,aes( x= eps04,y= return04)) + geom_point( color= 'purple')

# plotting the regression line through the points
# "labs()" function is used to lable the axis
p1 + geom_smooth( method= 'lm', se= F, col= "orange")+ labs(x= "2004 EPS", y= "2004 Return")
## `geom_smooth()` using formula 'y ~ x'

##Anser to #1 and #2 is, NO ##T-Stat is onl 1.015, which is not at least +/- 2, but closer than 2005 ##P-Value is 0.315 which is over 5% ##R-Square value 0.0206, also indicating there is not a linear relationship ##also visually unable to detect a linear relationship, but closer than 2005