1 Regressão Linear Múltipla : Exercício 01

1.1 Dados do Exercício

#=============================================================
## Configuracoes iniciais
##-- Limpar a bancada de trabalho
rm(list = ls())
##-- Desligar notação cientifica, exceto para números grandes
options(scipen = 9)
#==============================================================

#====================================================================================
# DADOS
#====================================================================================

# definir um vetor para todas as observações (n)
observacoes <- c(1,2,3,4,5,6)

# definir uma matriz de valores da variavel 01 (X1),  variavel 02 (X2) e variavel dependente (Y)
Y <- c(1.5,6.5,10,11,11.5,16.5)
X1 <-c(0,1,1,2,2,3)
X2 <- c(0,2,4,2,4,6)
matrix <- cbind(Y, X1, X2)
rownames(matrix) <- observacoes
matrix

##      Y X1 X2
## 1  1.5  0  0
## 2  6.5  1  2
## 3 10.0  1  4
## 4 11.0  2  2
## 5 11.5  2  4
## 6 16.5  3  6

# transformando a matriz em um dataframe
matrix_df <- as.data.frame(matrix)
matrix_df

##      Y X1 X2
## 1  1.5  0  0
## 2  6.5  1  2
## 3 10.0  1  4
## 4 11.0  2  2
## 5 11.5  2  4
## 6 16.5  3  6

1.2 Analisando os dados

1.2.1 Fazendo a regressão linear multipla NA MÃO !

1.2.1.1 Obtendo os valores básicos da matriz de dados (n,p,k,somas e médias)

## Obter os dados básicos da matrix de dados

n <- length(matrix_df$Y)
p <- ncol(matrix)
k <- p-1
OBS <-c(n)
Parametros.p <- c(p)
Parametros.k <- c(k)
dados_basicos <- cbind(OBS,Parametros.p,Parametros.k)
dados_basicos_df <- as_data_frame(dados_basicos)
dados_basicos_df

## # A tibble: 1 x 3
##     OBS Parametros.p Parametros.k
##   <dbl>        <dbl>        <dbl>
## 1     6            3            2

##Obter as somas das variaveis

Y_sigma <- sum(matrix_df$Y)
X1_sigma <- sum(matrix_df$X1)
X2_sigma <- sum(matrix_df$X2)
Y_sigma

## [1] 57

X1_sigma

## [1] 9

X2_sigma

## [1] 18

##----- Obter as médias das variáveis

Y_barra <- Y_sigma/n
X1_barra <- X1_sigma/n
X2_barra <- X2_sigma/n
Y_barra

## [1] 9.5

X1_barra

## [1] 1.5

X2_barra

## [1] 3

1.2.1.2 Ampliando a matriz original com as variaveis centradas e o produto delas

##----- Adicionando as variaveis centradas na matrix

matrix_df$y <- matrix_df$Y - Y_barra
matrix_df$x1 <- matrix_df$X1 - X1_barra
matrix_df$x2 <- matrix_df$X2 - X2_barra
matrix_df$x1x2 <- matrix_df$x1 * matrix_df$x2
matrix_df

##      Y X1 X2    y   x1 x2 x1x2
## 1  1.5  0  0 -8.0 -1.5 -3  4.5
## 2  6.5  1  2 -3.0 -0.5 -1  0.5
## 3 10.0  1  4  0.5 -0.5  1 -0.5
## 4 11.0  2  2  1.5  0.5 -1 -0.5
## 5 11.5  2  4  2.0  0.5  1  0.5
## 6 16.5  3  6  7.0  1.5  3  4.5

##----- Adicionando as variaveis ao quadrado na matrix
matrix_df$y_quad <- matrix_df$y * matrix_df$y
matrix_df$x1_quad <- matrix_df$x1 * matrix_df$x1
matrix_df$x2_quad <- matrix_df$x2 * matrix_df$x2
matrix_df

##      Y X1 X2    y   x1 x2 x1x2 y_quad x1_quad x2_quad
## 1  1.5  0  0 -8.0 -1.5 -3  4.5  64.00    2.25       9
## 2  6.5  1  2 -3.0 -0.5 -1  0.5   9.00    0.25       1
## 3 10.0  1  4  0.5 -0.5  1 -0.5   0.25    0.25       1
## 4 11.0  2  2  1.5  0.5 -1 -0.5   2.25    0.25       1
## 5 11.5  2  4  2.0  0.5  1  0.5   4.00    0.25       1
## 6 16.5  3  6  7.0  1.5  3  4.5  49.00    2.25       9

##----- Adicionando o produto das variaveis centradas e Y
matrix_df$Y.x1 <- matrix_df$Y * matrix_df$x1
matrix_df$Y.x2 <- matrix_df$Y * matrix_df$x2
matrix_df

##      Y X1 X2    y   x1 x2 x1x2 y_quad x1_quad x2_quad  Y.x1  Y.x2
## 1  1.5  0  0 -8.0 -1.5 -3  4.5  64.00    2.25       9 -2.25  -4.5
## 2  6.5  1  2 -3.0 -0.5 -1  0.5   9.00    0.25       1 -3.25  -6.5
## 3 10.0  1  4  0.5 -0.5  1 -0.5   0.25    0.25       1 -5.00  10.0
## 4 11.0  2  2  1.5  0.5 -1 -0.5   2.25    0.25       1  5.50 -11.0
## 5 11.5  2  4  2.0  0.5  1  0.5   4.00    0.25       1  5.75  11.5
## 6 16.5  3  6  7.0  1.5  3  4.5  49.00    2.25       9 24.75  49.5

1.2.1.3 Calculando o produto da matrix transposta de X pela matrix X (X`X)

##----- Obtendo o produto da matrix X transposta (X') pela matrix de X

X_transp.X <- matrix(ncol=3,
              c(n,
                0,
                0,
                0,
                sum(matrix_df$x1_quad),
                sum(matrix_df$x1x2),
                0,
                sum(matrix_df$x1x2),
                sum(matrix_df$x2_quad))
              )
X_transp.X

##      [,1] [,2] [,3]
## [1,]    6  0.0    0
## [2,]    0  5.5    9
## [3,]    0  9.0   22

1.2.1.4 Calculando o produto de X`X pela matriz Y

##----- Obtendo o produto da matrix X transposta (X`) pela matrix Y

X_transp.Y <- matrix(ncol=1,
              c(sum(matrix_df$Y),
                sum(matrix_df$Y.x1),
                sum(matrix_df$Y.x2))
              )
X_transp.Y

##      [,1]
## [1,] 57.0
## [2,] 25.5
## [3,] 49.0

1.2.1.5 Calculando a matriz inversa de X`X

###-----  Encontrar o determinante

X_transposta.X_det <- n*(
                          (
                            sum(matrix_df$x1_quad)*sum(matrix_df$x2_quad)
                          )
                          -
                          (
                            sum(matrix_df$x1x2)*sum(matrix_df$x1x2)
                          )
                          )
X_transposta.X_det

## [1] 240

###-----  Criar a matriz de cofatores

a11 <- (X_transp.X[2,2]*X_transp.X[3,3]) - (X_transp.X[3,2]*X_transp.X[2,3])
a12 <- - (X_transp.X[2,1]*X_transp.X[3,3]) - (X_transp.X[3,1]*X_transp.X[2,3])
a13 <- (X_transp.X[2,1]*X_transp.X[3,2]) - (X_transp.X[3,1]*X_transp.X[2,2])
a21 <- - (X_transp.X[1,2]*X_transp.X[3,3]) - (X_transp.X[3,2]*X_transp.X[1,3])
a22 <- (X_transp.X[1,1]*X_transp.X[3,3]) - (X_transp.X[3,1]*X_transp.X[1,3])
a23 <- - (X_transp.X[1,1]*X_transp.X[3,2]) - (X_transp.X[3,1]*X_transp.X[1,2])
a31 <- (X_transp.X[1,2]*X_transp.X[2,3]) - (X_transp.X[2,2]*X_transp.X[1,3])
a32 <- - (X_transp.X[1,1]*X_transp.X[2,3]) - (X_transp.X[2,1]*X_transp.X[1,3])
a33 <- (X_transp.X[1,1]*X_transp.X[2,2]) - (X_transp.X[2,1]*X_transp.X[1,2])

matrix_cofatores <- matrix(
                    c(a11,a21,a31,a12,a22,a32,a13,a23,a33),
                    ncol= 3
                    )
matrix_cofatores

##      [,1] [,2] [,3]
## [1,]   40    0    0
## [2,]    0  132  -54
## [3,]    0  -54   33

# Produto da matrix de cofatores pelo determinante

X_transposta.X_inversa <- (1/X_transposta.X_det)*matrix_cofatores
X_transposta.X_inversa

##           [,1]   [,2]    [,3]
## [1,] 0.1666667  0.000  0.0000
## [2,] 0.0000000  0.550 -0.2250
## [3,] 0.0000000 -0.225  0.1375

# em frações

MASS::fractions(X_transposta.X_inversa)

##      [,1]  [,2]  [,3] 
## [1,]   1/6     0     0
## [2,]     0 11/20 -9/40
## [3,]     0 -9/40 11/80

1.2.1.6 Calculando a matriz dos BETAS ESTIMADOS

##----- Obtendo a matrix de BETAs estimados

matrix_BETAS <- (X_transposta.X_inversa) %*% (X_transp.Y)
matrix_BETAS

##      [,1]
## [1,]  9.5
## [2,]  3.0
## [3,]  1.0

###------ Corrigindo o BETA0 para variaveis nao-centradas

Beta0_estimado_namedia <- matrix_BETAS[1,1]
Beta0_corrijido <- matrix_BETAS[1,1] - ((matrix_BETAS[2,1]*X1_barra)+(matrix_BETAS[3,1]*X2_barra))


###------- BETAS ESTIMADOS
Beta0_estimado <- Beta0_corrijido
Beta1_estimado <- matrix_BETAS[2,1]
Beta2_estimado <- matrix_BETAS[3,1]
Beta0_estimado

## [1] 2

Beta1_estimado

## [1] 3

Beta2_estimado

## [1] 1

1.2.1.7 Testando os BETAS ESTIMADOS

1.2.1.7.1 Teste F

##----- Calculando a SOMA dos QUADRADOS

SQE <- matrix_BETAS[2,1]*(sum(matrix_df$Y.x1)) + matrix_BETAS[3,1]*(sum(matrix_df$Y.x2))
SQE

## [1] 125.5

SQR <- (sum(matrix_df$y_quad)) - SQE
SQR

## [1] 3

SQT <- (sum(matrix_df$y_quad))
SQT

## [1] 128.5

#----- Calculando os QUADRADOS MEDIOS

QME <- SQE/k
QME

## [1] 62.75

QMR <- SQR/(n-p)
QMR

## [1] 1

QMT <- SQT/(n-1)
QMT

## [1] 25.7

##----- Testando F e apresentando os resultados
CV <- c("Regressao","Residuo","Total")
GL <- c(k,(n-p),(n-1))
SQ <- c(SQE, SQR, SQT)
QM <- c(QME,QMR,0)
F_test <- c((QME/QMR),0,0)
F_test_nivel_significancia <- 0.010
F_critico <- c(qf((1-F_test_nivel_significancia), df1=k, df2=(n-p)),0,0)
resultados_df <- data.frame(CV,GL,SQ,QM,F_test,F_critico)
resultados_df

##          CV GL    SQ    QM F_test F_critico
## 1 Regressao  2 125.5 62.75  62.75  30.81652
## 2   Residuo  3   3.0  1.00   0.00   0.00000
## 3     Total  5 128.5  0.00   0.00   0.00000

##------ Grafico da distribuição F
F_test_df1 <- k
F_test_df2 <- (n-p)
F_observado <- QME/QMR
F_critico <- qf((1-F_test_nivel_significancia), df1 = F_test_df1, df2 = F_test_df2)
F_critico_d <- df(F_critico, df1 = F_test_df1, df2 = F_test_df2)
Limite_superior_X <- (F_observado*1.4)
Limite_inferior_X <- (0)
ggplot(data.frame(x = c(Limite_inferior_X, Limite_superior_X)), aes(x)) +
  stat_function(fun = df,
                geom = "line",
                args = list(df1 = F_test_df1,df2 = F_test_df2)) +
  geom_segment(aes(x = (F_critico), 
                   y = 0, 
                   xend = (F_critico), 
                   yend = 0.25), 
               color = "red", 
               size = 1, 
               alpha = 0.8) +
  annotate("text", 
           label = paste0("F = ", round(F_critico, digits = 4)), 
           x = F_critico + 1, 
           y = -0.05, 
           color = "red") +
  geom_segment(aes(x = (F_observado), 
                   y = 0, 
                   xend = (F_observado), 
                   yend = 0.25), 
               color = "black", 
               size = 1, 
               alpha = 0.8) +
  annotate("text", 
           label = paste0("F(obs) = ", round(F_observado, digits = 4)), 
           x = F_observado + 0.75, 
           y = 0.30, 
           color = "black")

1.2.1.7.2 Testando o R2

##----- Obtendo o R2 (quanto o modelo explica)

R2 <- SQE/SQT
R2

## [1] 0.9766537

R2_ajust <- R2 - (((p-1)/(n-p))*(1-R2))
R2_ajust

## [1] 0.9610895

1.2.1.7.3 Obtendo a matrix de variancia e covariancia dos BETAS ESTIMADOS

##----- Obtendo a matrix de variancia e covariancia

matrix_var.covar <- X_transposta.X_inversa * QMR
matrix_var.covar

##           [,1]   [,2]    [,3]
## [1,] 0.1666667  0.000  0.0000
## [2,] 0.0000000  0.550 -0.2250
## [3,] 0.0000000 -0.225  0.1375

MASS::fractions(matrix_var.covar)

##      [,1]  [,2]  [,3] 
## [1,]   1/6     0     0
## [2,]     0 11/20 -9/40
## [3,]     0 -9/40 11/80

Variancia_estimada_BETA.0centrada <- matrix_var.covar[1,1]
Variancia_estimada_BETA.0centrada

## [1] 0.1666667

Variancia_estimada_BETA.0 <- (matrix_var.covar[1,1] 
                              +
                              (X1_barra^2)*(matrix_var.covar[2,2])
                              +
                              (X2_barra^2)*(matrix_var.covar[3,3])
                              -
                              2*(X1_barra)*(matrix_var.covar[2,1])
                              -
                              2*(X2_barra)*(matrix_var.covar[3,1])
                              +
                              2*(X1_barra)*(X2_barra)*(matrix_var.covar[2,3])
                              )
Variancia_estimada_BETA.0

## [1] 0.6166667

MASS::fractions(Variancia_estimada_BETA.0)

## [1] 37/60

Variancia_estimada_BETA.1 <- matrix_var.covar[2,2]
Variancia_estimada_BETA.1

## [1] 0.55

Variancia_estimada_BETA.2 <- matrix_var.covar[3,3]
Variancia_estimada_BETA.2

## [1] 0.1375

Covarianca_estimada_BETA.0_BETA.1 <- matrix_var.covar[2,1]
Covarianca_estimada_BETA.0_BETA.1

## [1] 0

Covarianca_estimada_BETA.0_BETA.2 <- matrix_var.covar[3,1]
Covarianca_estimada_BETA.0_BETA.2

## [1] 0

Covarianca_estimada_BETA.1_BETA.2 <- matrix_var.covar[2,3]
Covarianca_estimada_BETA.1_BETA.2

## [1] -0.225

1.2.1.7.4 Testes de Hipóteses

1.2.1.7.4.1 Testando Hipotese A

## Item (a)
# Ho : Beta0 = 10 contra Ha : Beta0 < 10  (UNILATERAL)
Beta0 <- 10
T_test_a <- ( (Beta0_estimado) - (Beta0) ) / sqrt(Variancia_estimada_BETA.0)
T_test_a

## [1] -10.18743

T_critico <- qt(.99,df=(n-p), lower.tail = FALSE)
T_critico

## [1] -4.540703

Item_a_resultados_df <- data.frame(T_test_a,T_critico)
Item_a_resultados_df

##    T_test_a T_critico
## 1 -10.18743 -4.540703

##------ Grafico da distribuição T para teste UNILATERAL
T_test_nivel_significancia <- 0.01
T_test_df <- (n-p)
T_test_a_modulo <- abs(T_test_a)
T_critico <- qt((1-T_test_nivel_significancia), df = T_test_df)
T_critico_d <- dt(T_critico, df = T_test_df)
Limite_superior_X <- (T_test_a_modulo*1.4)
Limite_inferior_X <- (0)
ggplot(data.frame(x = c(Limite_inferior_X, Limite_superior_X)), aes(x)) +
  stat_function(fun = dt,
                geom = "line",
                args = list(df = T_test_df)) +
  geom_segment(aes(x = (T_critico), 
                   y = 0, 
                   xend = (T_critico), 
                   yend = 0.25), 
               color = "red", 
               size = 1, 
               alpha = 0.8) +
  annotate("text", 
           label = paste0("F = ", round(T_critico, digits = 4)), 
           x = T_critico + 1, 
           y = -0.05, 
           color = "red") +
  geom_segment(aes(x = (T_test_a_modulo), 
                   y = 0, 
                   xend = (T_test_a_modulo), 
                   yend = 0.25), 
               color = "black", 
               size = 1, 
               alpha = 0.8) +
  annotate("text", 
           label = paste0("F(obs) = ", round(T_test_a_modulo, digits = 4)), 
           x = T_test_a_modulo + 0.75, 
           y = 0.30, 
           color = "black")

# O t obtido é maior que o t critico, portanto, rejeita Ho em favor de Ha
## (em outras palavras, a probabilidade de Ho ser verdadeiro é menor do que 1%)

1.2.1.7.4.2 Testando Hipotese B

## Item (b)
# Ho : Beta1 = 0 contra Ha : Beta1 diferente de 0 (BILATERAL)
Beta1 <- 0
T_test_b <- ( (Beta1_estimado) - (Beta1) ) / sqrt(Variancia_estimada_BETA.1)
T_test_b

## [1] 4.045199

T_critico.010 <- qt(.995,df=(n-p), lower.tail = TRUE)
T_critico.010

## [1] 5.840909

Item_b_resultados_df <- data.frame(T_test_b,T_critico.010)
Item_b_resultados_df

##   T_test_b T_critico.010
## 1 4.045199      5.840909

##------ Grafico da distribuição T para teste BIILATERAL
T_test_nivel_significancia <- 0.01
T_test_df <- (n-p)
T_test_b_modulo <- abs(T_test_b)
T_critico <- qt((1-(T_test_nivel_significancia/2)), df = T_test_df)
T_critico_d <- dt(T_critico, df = T_test_df)
Limite_superior_X <- (T_test_b_modulo*1.4)
Limite_inferior_X <- (0)
ggplot(data.frame(x = c(Limite_inferior_X, Limite_superior_X)), aes(x)) +
  stat_function(fun = dt,
                geom = "line",
                args = list(df = T_test_df)) +
  geom_segment(aes(x = (T_critico), 
                   y = 0, 
                   xend = (T_critico), 
                   yend = 0.25), 
               color = "red", 
               size = 1, 
               alpha = 0.8) +
  annotate("text", 
           label = paste0("F = ", round(T_critico, digits = 4)), 
           x = T_critico + 1, 
           y = -0.05, 
           color = "red") +
  geom_segment(aes(x = (T_test_b_modulo), 
                   y = 0, 
                   xend = (T_test_b_modulo), 
                   yend = 0.25), 
               color = "black", 
               size = 1, 
               alpha = 0.8) +
  annotate("text", 
           label = paste0("F(obs) = ", round(T_test_b_modulo, digits = 4)), 
           x = T_test_b_modulo + 0.75, 
           y = 0.30, 
           color = "black")

# O t obtido é menor que o t critico, portanto não é possível rejeitar Ho.

1.2.1.7.4.3 Testando hipotese C

## Item (c)
# Ho : Beta2 = 0 contra Ha : Beta2 diferente de 0 (BILATERAL)
Beta2 <- 0
T_test_c <- ( (Beta2_estimado) - (Beta2) ) / sqrt(Variancia_estimada_BETA.2)
T_test_c

## [1] 2.696799

T_critico.010 <- qt(.995,df=(n-p), lower.tail = TRUE)
T_critico.010

## [1] 5.840909

Item_c_resultados_df <- data.frame(T_test_c,T_critico.010)
Item_c_resultados_df

##   T_test_c T_critico.010
## 1 2.696799      5.840909

##------ Grafico da distribuição T para teste BIILATERAL
T_test_nivel_significancia <- 0.01
T_test_df <- (n-p)
T_test_c_modulo <- abs(T_test_c)
T_critico <- qt((1-(T_test_nivel_significancia/2)), df = T_test_df)
T_critico_d <- dt(T_critico, df = T_test_df)
Limite_superior_X <- (T_test_c_modulo*1.4)
Limite_inferior_X <- (0)
ggplot(data.frame(x = c(Limite_inferior_X, Limite_superior_X)), aes(x)) +
  stat_function(fun = dt,
                geom = "line",
                args = list(df = T_test_df)) +
  geom_segment(aes(x = (T_critico), 
                   y = 0, 
                   xend = (T_critico), 
                   yend = 0.25), 
               color = "red", 
               size = 1, 
               alpha = 0.8) +
  annotate("text", 
           label = paste0("F = ", round(T_critico, digits = 4)), 
           x = T_critico + 1, 
           y = -0.05, 
           color = "red") +
  geom_segment(aes(x = (T_test_c_modulo), 
                   y = 0, 
                   xend = (T_test_c_modulo), 
                   yend = 0.25), 
               color = "black", 
               size = 1, 
               alpha = 0.8) +
  annotate("text", 
           label = paste0("F(obs) = ", round(T_test_c_modulo, digits = 4)), 
           x = T_test_c_modulo + 0.75, 
           y = 0.30, 
           color = "black")

# O t obtido é menor que o t critico, portanto não é possível rejeitar Ho.

NOTA IMPORTANTE: No text F (o primeiro feito) foi possivel rejeitar Ho : Beta1 = Beta2 = 0 ao nível de significancia de 1% Contudo, neste mesmo nível, não foi possivel rejeitar Ho : Beta1 = 0 e nem Ho : Beta2 = 0.

1.2.2 Fazendo a regressão linear multipla pelas funções de matriz do R

#====================
# Regressao usando funcoes de matrizes do R
#======================

# definir um vetor para todas as observações (n)
observacoes <- c(1,2,3,4,5,6)
# definir uma matriz de valores da variavel dependente (Y)
Y <- c(
        matrix[1,1],
        matrix[2,1],
        matrix[3,1],
        matrix[4,1],
        matrix[5,1],
        matrix[6,1]
        )
y <- cbind(Y)
rownames(y) <- observacoes
y

##      Y
## 1  1.5
## 2  6.5
## 3 10.0
## 4 11.0
## 5 11.5
## 6 16.5

# definir um vetor para todas as observações (n)
observacoes <- c(1,2,3,4,5,6)
# definir uma matriz de valores CENTRADOS das variaveis independentes 01 (X1) e 02 (X2) e o intercepto X0
x0 <- c(1,1,1,1,1,1)
x1 <- c(
        matrix[1,2]-X1_barra,
        matrix[2,2]-X1_barra,
        matrix[3,2]-X1_barra,
        matrix[4,2]-X1_barra,
        matrix[5,2]-X1_barra,
        matrix[6,2]-X1_barra
        )
x2 <- c(
        matrix[1,3]-X2_barra,
        matrix[2,3]-X2_barra,
        matrix[3,3]-X2_barra,
        matrix[4,3]-X2_barra,
        matrix[5,3]-X2_barra,
        matrix[6,3]-X2_barra
        )
X <- cbind(x0, x1, x2)
rownames(X) <- observacoes
X

##   x0   x1 x2
## 1  1 -1.5 -3
## 2  1 -0.5 -1
## 3  1 -0.5  1
## 4  1  0.5 -1
## 5  1  0.5  1
## 6  1  1.5  3

# criando a transposta de X
X_transposta <- t(X)
X_transposta

##       1    2    3    4   5   6
## x0  1.0  1.0  1.0  1.0 1.0 1.0
## x1 -1.5 -0.5 -0.5  0.5 0.5 1.5
## x2 -3.0 -1.0  1.0 -1.0 1.0 3.0

# criando o produto da transposta de X por X
X_transposta.X <- X_transposta %*% X
X_transposta.X

##    x0  x1 x2
## x0  6 0.0  0
## x1  0 5.5  9
## x2  0 9.0 22

# obtendo o determinante de X
det.x <- det(X_transposta.X)
det.x

## [1] 240

# obtendo a inversa
inv_X_transposta.X <- solve(X_transposta.X)
inv_X_transposta.X

##           x0     x1      x2
## x0 0.1666667  0.000  0.0000
## x1 0.0000000  0.550 -0.2250
## x2 0.0000000 -0.225  0.1375

# 
X_transposta.y <- X_transposta %*% y
X_transposta.y

##       Y
## x0 57.0
## x1 25.5
## x2 49.0

# obtendo os BETAS estimados
beta_est <- inv_X_transposta.X %*% X_transposta.y
beta_est

##      Y
## x0 9.5
## x1 3.0
## x2 1.0

1.2.3 Regressão linear direto pelo R

#=============================================================
# REGRESSAO DIRETO PELO R : (lm, stargazer, ggplot)
#============================================================

## Calculando a regressão linear usando a função LM (linear model)
modelo1 <- lm(Y ~ X1+X2, data=matrix_df)
modelo1

## 
## Call:
## lm(formula = Y ~ X1 + X2, data = matrix_df)
## 
## Coefficients:
## (Intercept)           X1           X2  
##           2            3            1

## Apresentando os dados através do pacote Stargazer
stargazer(modelo1, type="text")

## 
## ===============================================
##                         Dependent variable:    
##                     ---------------------------
##                                  Y             
## -----------------------------------------------
## X1                            3.000**          
##                               (0.742)          
##                                                
## X2                            1.000*           
##                               (0.371)          
##                                                
## Constant                      2.000*           
##                               (0.785)          
##                                                
## -----------------------------------------------
## Observations                     6             
## R2                             0.977           
## Adjusted R2                    0.961           
## Residual Std. Error       1.000 (df = 3)       
## F Statistic            62.750*** (df = 2; 3)   
## ===============================================
## Note:               *p<0.1; **p<0.05; ***p<0.01

# Apresentando os dados em um grafico do ggplot
ggplot(data = matrix_df, aes(x = `X1`, y = `X2`)) + 
  geom_point(color='black') +
  geom_smooth(method = "lm", formula = y ~ x, linetype = "dotted", se = FALSE)

SE308 - RegLinMult- Exercicio 01

Alceu Nascimento

10/10/2021