#loading the libraries we will use for this exercise
library(readr)
library(ggplot2)
#set working directory and import the data
setwd("~/NYU/classes/4. Statistical Modeling/Week 1")
library(readr)
eps <- read_csv("Returns_VS_EPS.csv")
## Rows: 51 Columns: 12
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (4): Symbol, Company, 2004 Release Date, 2005 Release Date
## dbl (8): EPS Q4 2004, Stck Price Day Bfr Q4 04 Relse, Stck Prce 1 Mo Aftr Q4...
##
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
#rename the column heads of the data to be more tidy
names(eps)=c("symbol", "company", "eps04", "releasedate04", "preprice04", "postprice04", "return04", "eps05", "releasedate05", "preprice05", "postprice05", "return05")
View(eps)
head(eps)
## # A tibble: 6 x 12
## symbol company eps04 releasedate04 preprice04 postprice04 return04 eps05
## <chr> <chr> <dbl> <chr> <dbl> <dbl> <dbl> <dbl>
## 1 MMM 3M Company 0.91 18-Jan-05 BMO 81.2 81.8 0.00763 1.04
## 2 ABT Abbott Labs 0.67 18-Jan-05 BMO 44.5 44.8 0.00517 0.76
## 3 AA Alcoa Inc 0.39 10-Jan-05 AMC 29.4 28.5 -0.0299 0.35
## 4 ALL Allstate Cor~ 1.42 2-Feb-05 AMC 49.3 52.1 0.0555 1.49
## 5 AMGN Amgen 0.58 27-Jan-05 BMO 63.6 61.6 -0.0307 0.75
## 6 T AT&T Inc. 0.34 26-Jan-05 BMO 22.6 22.4 -0.00973 0.48
## # ... with 4 more variables: releasedate05 <chr>, preprice05 <dbl>,
## # postprice05 <dbl>, return05 <dbl>
#1) The question of interest is: Is there evidence, based on the data, of a linear relationship between Y=the return on the stock price a month after the EPS announcement and X=the EPS of a company?
#2) Plot Y vs X to see visually whether you can detect a linear relationship, if any, between the two. Then, run a regression analysis and report the results, stating your conclusion regarding the question whether any predictability exists in the X variable to predict Y. Support your conclusion with relevant numbers.
# simple regression model stored in the variable named "linearModel"
linearModel05<- lm(eps05 ~ return05, data= eps)
#getting the summary
summary(linearModel05)
##
## Call:
## lm(formula = eps05 ~ return05, data = eps)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.77463 -0.30159 -0.04405 0.22420 1.41698
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.68795 0.06982 9.853 3.29e-13 ***
## return05 -0.20212 1.16924 -0.173 0.863
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.4537 on 49 degrees of freedom
## Multiple R-squared: 0.0006095, Adjusted R-squared: -0.01979
## F-statistic: 0.02988 on 1 and 49 DF, p-value: 0.8635
# command for loading the plot and assigning the axis + plotting the points on graph in blue
# 'labs()' function is used to lable the axis
ggplot(data=eps,aes(x=eps05,y=return05))+geom_point(color='blue')+labs(x="the EPS of a company",y="return on stock price 1 month post EPS announcement")
#correlation of '05 eps & returns:
cor(eps$eps05,eps$return05)
## [1] -0.02468796
# assigning the dataset
data(eps)
## Warning in data(eps): data set 'eps' not found
# deciding on X and Y axis + plotting the points on graph in purple
p1 = ggplot( data= eps,aes( x= eps05,y= return05)) + geom_point( color= 'purple')
# plotting the regression line through the points
# "labs()" function is used to lable the axis
p1 + geom_smooth( method= 'lm', se= F, col= "orange")+ labs(x= "2005 EPS", y= "2005 Return")
## `geom_smooth()` using formula 'y ~ x'
##Anser to #1 and #2 is, NO ##T-Stat is onl -0.173, which is not at least +/- 2, ##P-Value is .863 which is over 5% ##R-Square value 0.0006095, also indicating there is not a linear relationship ##also visually unable to detect a linear relationship
=======================================
#Separately evaluate the same for 2004
# simple regression model stored in the variable named "linearModel"
linearModel04<- lm(eps04 ~ return04, data= eps)
#getting the summary
summary(linearModel04)
##
## Call:
## lm(formula = eps04 ~ return04, data = eps)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.63580 -0.29163 -0.05621 0.14755 1.37941
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.57689 0.06411 8.999 5.92e-12 ***
## return04 0.89401 0.88058 1.015 0.315
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.4099 on 49 degrees of freedom
## Multiple R-squared: 0.0206, Adjusted R-squared: 0.0006147
## F-statistic: 1.031 on 1 and 49 DF, p-value: 0.315
# command for loading the plot and assigning the axis + plotting the points on graph in blue
# 'labs()' function is used to lable the axis
ggplot(data=eps,aes(x=eps04,y=return04))+geom_point(color='blue')+labs(x="the EPS of a company",y="return on stock price 1 month post EPS announcement")
#correlation of '04 eps & returns:
cor(eps$eps04,eps$return04)
## [1] 0.1435355
# assigning the dataset
data(eps)
## Warning in data(eps): data set 'eps' not found
# deciding on X and Y axis + plotting the points on graph in purple
p1 = ggplot( data= eps,aes( x= eps04,y= return04)) + geom_point( color= 'purple')
# plotting the regression line through the points
# "labs()" function is used to lable the axis
p1 + geom_smooth( method= 'lm', se= F, col= "orange")+ labs(x= "2004 EPS", y= "2004 Return")
## `geom_smooth()` using formula 'y ~ x'
##Anser to #1 and #2 is, NO ##T-Stat is onl 1.015, which is not at least +/- 2, but closer than 2005 ##P-Value is 0.315 which is over 5% ##R-Square value 0.0206, also indicating there is not a linear relationship ##also visually unable to detect a linear relationship, but closer than 2005