r = getOption("repos")
r["CRAN"] = "http://cran.us.r-project.org"
options(repos = r)
install.packages("weatherData")
## Installing package into 'C:/Users/Madalina/Documents/R/win-library/4.0'
## (as 'lib' is unspecified)
## Warning: package 'weatherData' is not available for this version of R
## 
## A version of this package for your version of R might be available elsewhere,
## see the ideas at
## https://cran.r-project.org/doc/manuals/r-patched/R-admin.html#Installing-packages
library(ggplot2)
library(formattable)
library(magrittr)
library(latexpdf)
library(TeachingDemos)
## 
## Attaching package: 'TeachingDemos'
## The following object is masked from 'package:formattable':
## 
##     digits
library(stargazer)
## 
## Please cite as:
##  Hlavac, Marek (2018). stargazer: Well-Formatted Regression and Summary Statistics Tables.
##  R package version 5.2.2. https://CRAN.R-project.org/package=stargazer
library(lmtest)
## Loading required package: zoo
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
library(latexpdf)
library(latex2exp)
library(tinytex)

EXERCICE 16.1

*The annual bonuses ($1,000s) of six employees with different years of experience were recorded as follows.

We wish to determine the straight-line relationship between annual bonus and years of experience.

n=6
x=years=1:6
y=bonus=c(6,1,9,5,17,12)
bonusdata=data.frame(years,bonus)

Representer les donnees par un nuage de points (Graph)

PP<-ggplot(bonusdata, aes(x=years, y=bonus))+ geom_point()+ggtitle("Bonus Data")
PP

# Calculate Manually _ Sums, Covariance and Variance for b1 and b0 ## COVARIANCE (x,y)

n=6
sum(x)
## [1] 21
sum(y)
## [1] 50
sum(x*y)
## [1] 212
sum((x^2))
## [1] 91
S=(1/(n-1)*(sum(x*y)-((sum(x)*sum(y))/n)))
cov(x,y)
## [1] 7.4
A=1/(n-1)*(sum((x^2))-((sum(x)^2))/n)
var(x)
## [1] 3.5
b1=S/A
b1=cov(x,y)/var(x)
b1
## [1] 2.114286
xbar=sum(x)/n
xbar
## [1] 3.5
ybar=sum(y)/n
ybar
## [1] 8.333333
b0=ybar-b1*xbar
round(b0,3)
## [1] 0.933
ychap=b0+b1*x ## This is the LEAST SQUARED LINE or REGRESSION LINE

Errors/Residuals (e) = Points near Regression Line and SSE = Sum of Squared Deviations for Errors

b0=3
b1=1
sum((y-(b0+b1*x))^2)
## [1] 123
SS<- function(b0,b1,x,y) {
}
SS(0,2)
## NULL
curve(exp,xlin = c(0,10))
## Warning in plot.window(...): "xlin" is not a graphical parameter
## Warning in plot.xy(xy, type, ...): "xlin" is not a graphical parameter
## Warning in axis(side = side, at = at, labels = labels, ...): "xlin" is not a
## graphical parameter

## Warning in axis(side = side, at = at, labels = labels, ...): "xlin" is not a
## graphical parameter
## Warning in box(...): "xlin" is not a graphical parameter
## Warning in title(...): "xlin" is not a graphical parameter

# Representer la droite des moindres carres

PP+geom_smooth(method='lm',se=FALSE)
## `geom_smooth()` using formula 'y ~ x'

# Calcul manual des coefficients de la Regression Lineare

b1=cov(years,bonus)/var(years)
b1=cov(x,y)/var(x)
round((b1),3)
## [1] 2.114

Calcul b0

b0=mean(bonus)-b1*mean(years)
b0
## [1] 0.9333333

Calcul de le residu ou Error ou Epsilon

e<-y-b0-b1*x
sum(e)
## [1] -3.108624e-15

Representation graphique de erreurs

PP+geom_segment(aes(x = years, y = bonus, xend = years, yend = b0+b1*years))+geom_smooth(method='lm',se=FALSE)
## `geom_smooth()` using formula 'y ~ x'

# Sum of Squared of Errors (SSE) or le resultat de la fonction objectif

b0=mean(bonus)-b1*mean(years)
b1=cov(x,y)/var(x)
ychap=b0+b1*x
ychap
## [1]  3.047619  5.161905  7.276190  9.390476 11.504762 13.619048

Faire une Regression avec la Foction LM (Linear Models)

LM<-lm(bonus~years,data=bonusdata)

LM$coefficients ## Sortir seul les Coefficients
## (Intercept)       years 
##   0.9333333   2.1142857
LM$coefficients[1]## Sortir seul le Premier Coefficient Intercept = b0
## (Intercept) 
##   0.9333333
LM$coefficients[2]## Sortir seul le Premier Coefficient Slope = b1
##    years 
## 2.114286
LM$residuals ## Les residus = Epsion
##         1         2         3         4         5         6 
##  2.952381 -4.161905  1.723810 -4.390476  5.495238 -1.619048
LM$fitted.values ## Les valeurs ajustes (ychap_x = b0+b1*x) i.e. les ordonnees sur la droite.
##         1         2         3         4         5         6 
##  3.047619  5.161905  7.276190  9.390476 11.504762 13.619048

Une Bilan de la Regression avec Summary

summary(LM)
## 
## Call:
## lm(formula = bonus ~ years, data = bonusdata)
## 
## Residuals:
##      1      2      3      4      5      6 
##  2.952 -4.162  1.724 -4.390  5.495 -1.619 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)
## (Intercept)   0.9333     4.1920   0.223    0.835
## years         2.1143     1.0764   1.964    0.121
## 
## Residual standard error: 4.503 on 4 degrees of freedom
## Multiple R-squared:  0.491,  Adjusted R-squared:  0.3637 
## F-statistic: 3.858 on 1 and 4 DF,  p-value: 0.121

Une presentation des resultats plus professionnelle avec Stargazer

stargazer(LM,type = "text")
## 
## ===============================================
##                         Dependent variable:    
##                     ---------------------------
##                                bonus           
## -----------------------------------------------
## years                          2.114           
##                               (1.076)          
##                                                
## Constant                       0.933           
##                               (4.192)          
##                                                
## -----------------------------------------------
## Observations                     6             
## R2                             0.491           
## Adjusted R2                    0.364           
## Residual Std. Error       4.503 (df = 4)       
## F Statistic              3.858 (df = 1; 4)     
## ===============================================
## Note:               *p<0.1; **p<0.05; ***p<0.01
summary(LM)
## 
## Call:
## lm(formula = bonus ~ years, data = bonusdata)
## 
## Residuals:
##      1      2      3      4      5      6 
##  2.952 -4.162  1.724 -4.390  5.495 -1.619 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)
## (Intercept)   0.9333     4.1920   0.223    0.835
## years         2.1143     1.0764   1.964    0.121
## 
## Residual standard error: 4.503 on 4 degrees of freedom
## Multiple R-squared:  0.491,  Adjusted R-squared:  0.3637 
## F-statistic: 3.858 on 1 and 4 DF,  p-value: 0.121

SSE

sum((LM$residuals)^2)
## [1] 81.10476
var(bonusdata$bonus)
## [1] 31.86667
cov(bonusdata$years, bonusdata$bonus)
## [1] 7.4
# n-1 =5

5*(var(bonusdata$bonus)-
    (cov(bonusdata$years,bonusdata$bonus))^2/var(bonusdata$years))
## [1] 81.10476
SSE=sum((LM$residuals)^2)
sqrt(SSE/(6-2))
## [1] 4.502909
summary(LM)
## 
## Call:
## lm(formula = bonus ~ years, data = bonusdata)
## 
## Residuals:
##      1      2      3      4      5      6 
##  2.952 -4.162  1.724 -4.390  5.495 -1.619 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)
## (Intercept)   0.9333     4.1920   0.223    0.835
## years         2.1143     1.0764   1.964    0.121
## 
## Residual standard error: 4.503 on 4 degrees of freedom
## Multiple R-squared:  0.491,  Adjusted R-squared:  0.3637 
## F-statistic: 3.858 on 1 and 4 DF,  p-value: 0.121

EXERCICE 16.2

library(readxl)
Xm16_02 <- read_excel("E:/Univ FR/02_CNAM_Efab_Master 1 Finance/02_M1_Semestre 2/S2_04_Vendredi_ESD 109_Introduction a l'econometrie/Seance 13_14_Regression lineaire simple/13/Xm16-02.xlsx")

n=100
x<-Xm16_02$Odometer
sum(x)
## [1] 3601.1
y<-Xm16_02$Price
sum(y)
## [1] 1484.1
sum((x)^2)
## [1] 133986.6
mean(x)
## [1] 36.011
mean(y)
## [1] 14.841
cov(x,y)
## [1] -2.909041
var(x)
## [1] 43.50887
b1=cov(x,y)/var(x)
b1 ## This Slope coefficient indicates how much the price of the car is decreasing for each 1000 mile on odometer.
## [1] -0.06686089
xbar<-sum(x)/n
xbar
## [1] 36.011
ybar<-sum(y)/n
ybar
## [1] 14.841
b0=ybar-(b1*xbar)
b0 ## The Intercept represent where y axis is intersecting the Regression line, in x=0, in this case as cars were not driven (which is false). So this represent the price of a car to which the odometer would have shown 0
## [1] 17.24873
ychap=b0-b1*x ## This is the REGRESSION EQUATION
LM=lm(y~x,data=Xm16_02)
LM
## 
## Call:
## lm(formula = y ~ x, data = Xm16_02)
## 
## Coefficients:
## (Intercept)            x  
##    17.24873     -0.06686
stargazer(LM, type="text")
## 
## ===============================================
##                         Dependent variable:    
##                     ---------------------------
##                                  y             
## -----------------------------------------------
## x                            -0.067***         
##                               (0.005)          
##                                                
## Constant                     17.249***         
##                               (0.182)          
##                                                
## -----------------------------------------------
## Observations                    100            
## R2                             0.648           
## Adjusted R2                    0.645           
## Residual Std. Error       0.326 (df = 98)      
## F Statistic           180.643*** (df = 1; 98)  
## ===============================================
## Note:               *p<0.1; **p<0.05; ***p<0.01
sqrt(sum((LM$residuals)^2)/98)
## [1] 0.3264886
summary(LM)
## 
## Call:
## lm(formula = y ~ x, data = Xm16_02)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.68679 -0.27263  0.00521  0.23210  0.70071 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 17.248727   0.182093   94.72   <2e-16 ***
## x           -0.066861   0.004975  -13.44   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3265 on 98 degrees of freedom
## Multiple R-squared:  0.6483, Adjusted R-squared:  0.6447 
## F-statistic: 180.6 on 1 and 98 DF,  p-value: < 2.2e-16
SLM=summary(LM) 
SLM$sigma
## [1] 0.3264886

Estimation de l’ecart type de b1 qui est une estimation de Beta 1

SLM$sigma/sqrt(99*var(Xm16_02$Odometer))
## [1] 0.004974639

Calcul de p-value du test de nullite de la pente. C’est une test bi-lateral (two-tail test)_ donc on multiplie par 2

2*pt(13.44,98,lower.tail = FALSE)
## [1] 5.760335e-24
2*pt(-13.44,98)
## [1] 5.760335e-24
LM$coefficients[2]-qt(0.025,98,lower.tail = FALSE)*SLM$sigma*(SLM$sigma/sqrt(99*var(Xm16_02$Odometer)))
##           x 
## -0.07008398