r = getOption("repos")
r["CRAN"] = "http://cran.us.r-project.org"
options(repos = r)
install.packages("weatherData")
## Installing package into 'C:/Users/Madalina/Documents/R/win-library/4.0'
## (as 'lib' is unspecified)
## Warning: package 'weatherData' is not available for this version of R
##
## A version of this package for your version of R might be available elsewhere,
## see the ideas at
## https://cran.r-project.org/doc/manuals/r-patched/R-admin.html#Installing-packages
library(ggplot2)
library(formattable)
library(magrittr)
library(latexpdf)
library(TeachingDemos)
##
## Attaching package: 'TeachingDemos'
## The following object is masked from 'package:formattable':
##
## digits
library(stargazer)
##
## Please cite as:
## Hlavac, Marek (2018). stargazer: Well-Formatted Regression and Summary Statistics Tables.
## R package version 5.2.2. https://CRAN.R-project.org/package=stargazer
library(lmtest)
## Loading required package: zoo
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
library(latexpdf)
library(latex2exp)
library(tinytex)
*The annual bonuses ($1,000s) of six employees with different years of experience were recorded as follows.
We wish to determine the straight-line relationship between annual bonus and years of experience.
n=6
x=years=1:6
y=bonus=c(6,1,9,5,17,12)
bonusdata=data.frame(years,bonus)
PP<-ggplot(bonusdata, aes(x=years, y=bonus))+ geom_point()+ggtitle("Bonus Data")
PP
# Calculate Manually _ Sums, Covariance and Variance for b1 and b0 ## COVARIANCE (x,y)
n=6
sum(x)
## [1] 21
sum(y)
## [1] 50
sum(x*y)
## [1] 212
sum((x^2))
## [1] 91
S=(1/(n-1)*(sum(x*y)-((sum(x)*sum(y))/n)))
cov(x,y)
## [1] 7.4
A=1/(n-1)*(sum((x^2))-((sum(x)^2))/n)
var(x)
## [1] 3.5
b1=S/A
b1=cov(x,y)/var(x)
b1
## [1] 2.114286
xbar=sum(x)/n
xbar
## [1] 3.5
ybar=sum(y)/n
ybar
## [1] 8.333333
b0=ybar-b1*xbar
round(b0,3)
## [1] 0.933
ychap=b0+b1*x ## This is the LEAST SQUARED LINE or REGRESSION LINE
b0=3
b1=1
sum((y-(b0+b1*x))^2)
## [1] 123
SS<- function(b0,b1,x,y) {
}
SS(0,2)
## NULL
curve(exp,xlin = c(0,10))
## Warning in plot.window(...): "xlin" is not a graphical parameter
## Warning in plot.xy(xy, type, ...): "xlin" is not a graphical parameter
## Warning in axis(side = side, at = at, labels = labels, ...): "xlin" is not a
## graphical parameter
## Warning in axis(side = side, at = at, labels = labels, ...): "xlin" is not a
## graphical parameter
## Warning in box(...): "xlin" is not a graphical parameter
## Warning in title(...): "xlin" is not a graphical parameter
# Representer la droite des moindres carres
PP+geom_smooth(method='lm',se=FALSE)
## `geom_smooth()` using formula 'y ~ x'
# Calcul manual des coefficients de la Regression Lineare
b1=cov(years,bonus)/var(years)
b1=cov(x,y)/var(x)
round((b1),3)
## [1] 2.114
b0=mean(bonus)-b1*mean(years)
b0
## [1] 0.9333333
e<-y-b0-b1*x
sum(e)
## [1] -3.108624e-15
PP+geom_segment(aes(x = years, y = bonus, xend = years, yend = b0+b1*years))+geom_smooth(method='lm',se=FALSE)
## `geom_smooth()` using formula 'y ~ x'
# Sum of Squared of Errors (SSE) or le resultat de la fonction objectif
b0=mean(bonus)-b1*mean(years)
b1=cov(x,y)/var(x)
ychap=b0+b1*x
ychap
## [1] 3.047619 5.161905 7.276190 9.390476 11.504762 13.619048
LM<-lm(bonus~years,data=bonusdata)
LM$coefficients ## Sortir seul les Coefficients
## (Intercept) years
## 0.9333333 2.1142857
LM$coefficients[1]## Sortir seul le Premier Coefficient Intercept = b0
## (Intercept)
## 0.9333333
LM$coefficients[2]## Sortir seul le Premier Coefficient Slope = b1
## years
## 2.114286
LM$residuals ## Les residus = Epsion
## 1 2 3 4 5 6
## 2.952381 -4.161905 1.723810 -4.390476 5.495238 -1.619048
LM$fitted.values ## Les valeurs ajustes (ychap_x = b0+b1*x) i.e. les ordonnees sur la droite.
## 1 2 3 4 5 6
## 3.047619 5.161905 7.276190 9.390476 11.504762 13.619048
summary(LM)
##
## Call:
## lm(formula = bonus ~ years, data = bonusdata)
##
## Residuals:
## 1 2 3 4 5 6
## 2.952 -4.162 1.724 -4.390 5.495 -1.619
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.9333 4.1920 0.223 0.835
## years 2.1143 1.0764 1.964 0.121
##
## Residual standard error: 4.503 on 4 degrees of freedom
## Multiple R-squared: 0.491, Adjusted R-squared: 0.3637
## F-statistic: 3.858 on 1 and 4 DF, p-value: 0.121
stargazer(LM,type = "text")
##
## ===============================================
## Dependent variable:
## ---------------------------
## bonus
## -----------------------------------------------
## years 2.114
## (1.076)
##
## Constant 0.933
## (4.192)
##
## -----------------------------------------------
## Observations 6
## R2 0.491
## Adjusted R2 0.364
## Residual Std. Error 4.503 (df = 4)
## F Statistic 3.858 (df = 1; 4)
## ===============================================
## Note: *p<0.1; **p<0.05; ***p<0.01
summary(LM)
##
## Call:
## lm(formula = bonus ~ years, data = bonusdata)
##
## Residuals:
## 1 2 3 4 5 6
## 2.952 -4.162 1.724 -4.390 5.495 -1.619
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.9333 4.1920 0.223 0.835
## years 2.1143 1.0764 1.964 0.121
##
## Residual standard error: 4.503 on 4 degrees of freedom
## Multiple R-squared: 0.491, Adjusted R-squared: 0.3637
## F-statistic: 3.858 on 1 and 4 DF, p-value: 0.121
sum((LM$residuals)^2)
## [1] 81.10476
var(bonusdata$bonus)
## [1] 31.86667
cov(bonusdata$years, bonusdata$bonus)
## [1] 7.4
# n-1 =5
5*(var(bonusdata$bonus)-
(cov(bonusdata$years,bonusdata$bonus))^2/var(bonusdata$years))
## [1] 81.10476
SSE=sum((LM$residuals)^2)
sqrt(SSE/(6-2))
## [1] 4.502909
summary(LM)
##
## Call:
## lm(formula = bonus ~ years, data = bonusdata)
##
## Residuals:
## 1 2 3 4 5 6
## 2.952 -4.162 1.724 -4.390 5.495 -1.619
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.9333 4.1920 0.223 0.835
## years 2.1143 1.0764 1.964 0.121
##
## Residual standard error: 4.503 on 4 degrees of freedom
## Multiple R-squared: 0.491, Adjusted R-squared: 0.3637
## F-statistic: 3.858 on 1 and 4 DF, p-value: 0.121
library(readxl)
Xm16_02 <- read_excel("E:/Univ FR/02_CNAM_Efab_Master 1 Finance/02_M1_Semestre 2/S2_04_Vendredi_ESD 109_Introduction a l'econometrie/Seance 13_14_Regression lineaire simple/13/Xm16-02.xlsx")
n=100
x<-Xm16_02$Odometer
sum(x)
## [1] 3601.1
y<-Xm16_02$Price
sum(y)
## [1] 1484.1
sum((x)^2)
## [1] 133986.6
mean(x)
## [1] 36.011
mean(y)
## [1] 14.841
cov(x,y)
## [1] -2.909041
var(x)
## [1] 43.50887
b1=cov(x,y)/var(x)
b1 ## This Slope coefficient indicates how much the price of the car is decreasing for each 1000 mile on odometer.
## [1] -0.06686089
xbar<-sum(x)/n
xbar
## [1] 36.011
ybar<-sum(y)/n
ybar
## [1] 14.841
b0=ybar-(b1*xbar)
b0 ## The Intercept represent where y axis is intersecting the Regression line, in x=0, in this case as cars were not driven (which is false). So this represent the price of a car to which the odometer would have shown 0
## [1] 17.24873
ychap=b0-b1*x ## This is the REGRESSION EQUATION
LM=lm(y~x,data=Xm16_02)
LM
##
## Call:
## lm(formula = y ~ x, data = Xm16_02)
##
## Coefficients:
## (Intercept) x
## 17.24873 -0.06686
stargazer(LM, type="text")
##
## ===============================================
## Dependent variable:
## ---------------------------
## y
## -----------------------------------------------
## x -0.067***
## (0.005)
##
## Constant 17.249***
## (0.182)
##
## -----------------------------------------------
## Observations 100
## R2 0.648
## Adjusted R2 0.645
## Residual Std. Error 0.326 (df = 98)
## F Statistic 180.643*** (df = 1; 98)
## ===============================================
## Note: *p<0.1; **p<0.05; ***p<0.01
sqrt(sum((LM$residuals)^2)/98)
## [1] 0.3264886
summary(LM)
##
## Call:
## lm(formula = y ~ x, data = Xm16_02)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.68679 -0.27263 0.00521 0.23210 0.70071
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 17.248727 0.182093 94.72 <2e-16 ***
## x -0.066861 0.004975 -13.44 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3265 on 98 degrees of freedom
## Multiple R-squared: 0.6483, Adjusted R-squared: 0.6447
## F-statistic: 180.6 on 1 and 98 DF, p-value: < 2.2e-16
SLM=summary(LM)
SLM$sigma
## [1] 0.3264886
SLM$sigma/sqrt(99*var(Xm16_02$Odometer))
## [1] 0.004974639
2*pt(13.44,98,lower.tail = FALSE)
## [1] 5.760335e-24
2*pt(-13.44,98)
## [1] 5.760335e-24
LM$coefficients[2]-qt(0.025,98,lower.tail = FALSE)*SLM$sigma*(SLM$sigma/sqrt(99*var(Xm16_02$Odometer)))
## x
## -0.07008398