###Proyecto de Econometria II

######Study case 2.4 Below are data on y = green liquor concentration (g/liter) and x = paper machine speed (ft/min) from a kraft paper machine (the data were read from a graph in an article in the Tappi Journal, March, 1986).

library(matlib)
y <- c(16.0, 15.8, 15.6, 15.5, 14.8, 14.0, 13.5, 13.0, 12.0, 11.0)
x <- c(1700, 1720, 1730, 1740, 1750, 1760, 1770, 1780, 1790, 1795)
plot(x,y, col = "red")

######(a) Fit the model y = 𝛽0 + 𝛽1x + 𝛽2x2 + 𝜀 using least squares.

#We create the variable x^2
x_2 <- x^2
x_2
##  [1] 2890000 2958400 2992900 3027600 3062500 3097600 3132900 3168400 3204100
## [10] 3222025

Now we have 3 columns in our \(X\) matrix:

#Creation of the X matrix
X <- cbind(rep(1,length(x)),x,x_2)
X
##            x     x_2
##  [1,] 1 1700 2890000
##  [2,] 1 1720 2958400
##  [3,] 1 1730 2992900
##  [4,] 1 1740 3027600
##  [5,] 1 1750 3062500
##  [6,] 1 1760 3097600
##  [7,] 1 1770 3132900
##  [8,] 1 1780 3168400
##  [9,] 1 1790 3204100
## [10,] 1 1795 3222025

\[\begin{equation*} X = \begin{bmatrix}{} 1 & 1700 & 2890000 \\ 1 & 1720 & 2958400 \\ 1 & 1730 & 2992900 \\ 1 & 1740 & 3027600 \\ 1 & 1750 & 3062500 \\ 1 & 1760 & 3097600 \\ 1 & 1770 & 3132900 \\ 1 & 1780 & 3168400 \\ 1 & 1790 & 3204100 \\ 1 & 1796 & 3222025 \\ \end{bmatrix} Y = \begin{bmatrix}{} 16.0 \\ 15.8 \\ 15.6 \\ 15.5 \\ 14.8 \\ 14.0 \\ 13.5 \\ 13.0 \\ 12.0 \\ 11.0 \\ \end{bmatrix} \end{equation*}\]

#Calculate the matrix X transpose times X
xT <- t(X)
xTx<- xT%*%X
xTx
##                        x          x_2
##           10       17535 3.075642e+07
## x      17535    30756425 5.396220e+10
## x_2 30756425 53962198875 9.470360e+13

\[\begin{equation*} X^T X = \begin{bmatrix}{} 10 & 17535 & 3.075642e+07 \\ 17535 & 30756425 & 5.396220e+10 \\ 30756425 & 53962198875 & 9.470360e+13 \\ \end{bmatrix} \end{equation*}\]

#Calculate X transposed times Y
xTy <- xT%*%y
xTy
##            [,1]
##           141.2
## x      247134.0
## x_2 432665985.0
#Estimates in order B0, B1 for X and B2 for X^2
b <- inv(xTx)%*%xTy
b
##               [,1]
## [1,] -1707.2484977
## [2,]     0.5111515
## [3,]    -2.0887368
curve( -1707.2484977 + 0.5111515*x - 2.0887368*x^2)

\(\hat{y} = -1707.25 + 0.51*x - 2.09*x^2\)

######(b) Test for significance of regression using 𝛼 = 0.05. What are your conclusions? In order to do so, we need to calculate p < alfa.

#Regression sum of squares
SSr <- t(b)%*%t(X)%*%y - (sum(y))^2/(length(y))
SSr
##            [,1]
## [1,] -903842120
#Total sum of squares
SSt <- t(y)%*%y - (sum(y))^2/(length(y))
SSt
##        [,1]
## [1,] 26.796
#Error sum of squares
SSe <- (SSt - SSr)
SSe
##           [,1]
## [1,] 903842147

######Mean square total and error

#Mean square regression
MSr <- SSr/2
MSr
##            [,1]
## [1,] -451921060
#Mean square error
MSe <- (SSt - SSr)/7
MSe
##           [,1]
## [1,] 129120307

#####F statistic

F_stat <- MSr/MSe
F_stat
##      [,1]
## [1,] -3.5

###Case study 2.6 An engineer at a semiconductor company wants to model the relationship between the device gain hFE(y) and three parameters: emitter RS (x1), base RS (x2), and emitter-to-base RS (x3).

x1 <- c(14.620, 15.630, 14.620, 15.000, 14.500, 15.250, 16.120, 15.130, 15.500, 15.130, 15.500, 16.120, 15.130, 15.630, 15.380, 15.500, 14.250, 14.500, 14.620)
x2 <- c(226.00, 220.00, 217.40, 220.00, 226.50, 224.10, 220.50, 223.50, 217.60, 228.50, 230.20, 226.50, 226.60, 225.60, 234.00, 230.00, 224.30, 240.50, 223.70)
x3 <- c(7.000, 3.375, 6.375, 6.000, 7.625, 6.000, 3.375, 6.125, 5.000, 6.625, 5.750, 3.750, 6.125, 5.375, 8.875, 4.000, 8.000, 10.870, 7.375)
y <- c(128.40, 52.62, 113.90, 98.01, 139.90, 102.60, 48.14, 109.60, 82.68, 112.60, 97.52, 59.06, 111.80, 89.09, 171.90, 66.80, 157.10, 208.40, 133.40)
x0 <- rep(1, length(x1))

First we add the column of ones and the rest of the columns. In this graphs we can see the relationship between the variables.

bX <- cbind(x0,x1,x2,x3)
bX
##       x0    x1    x2     x3
##  [1,]  1 14.62 226.0  7.000
##  [2,]  1 15.63 220.0  3.375
##  [3,]  1 14.62 217.4  6.375
##  [4,]  1 15.00 220.0  6.000
##  [5,]  1 14.50 226.5  7.625
##  [6,]  1 15.25 224.1  6.000
##  [7,]  1 16.12 220.5  3.375
##  [8,]  1 15.13 223.5  6.125
##  [9,]  1 15.50 217.6  5.000
## [10,]  1 15.13 228.5  6.625
## [11,]  1 15.50 230.2  5.750
## [12,]  1 16.12 226.5  3.750
## [13,]  1 15.13 226.6  6.125
## [14,]  1 15.63 225.6  5.375
## [15,]  1 15.38 234.0  8.875
## [16,]  1 15.50 230.0  4.000
## [17,]  1 14.25 224.3  8.000
## [18,]  1 14.50 240.5 10.870
## [19,]  1 14.62 223.7  7.375
plot(x1,y, col = "red")

plot(x2, y, col = "blue")

plot(x3, y, col = "green")

plot(x1*x2, y, col = "purple")

We can calculate B in order to find the coefficientes of our linear regression

b <- inv(t(bX)%*%bX)%*%t(bX)%*%y
b
##             [,1]
## [1,] -21.4693001
## [2,]  -3.3222273
## [3,]   0.2470019
## [4,]  20.3450599

The equation is: \(\hat{y} = -21.47 - 3.32*x_1 + 0.25*x_2 + 20.35*x_3\)

Estimating for the value, we get:

yHatOne <- -21.47 - 3.32*14.5 + 0.25*220 + 20.35*5
yHatOne
## [1] 87.14

Trying to improve the linear regression we get:

cX <- cbind(x0,x1,x2,x3,x2x3 = x2*x3)
cX
##       x0    x1    x2     x3      x2x3
##  [1,]  1 14.62 226.0  7.000 1582.0000
##  [2,]  1 15.63 220.0  3.375  742.5000
##  [3,]  1 14.62 217.4  6.375 1385.9250
##  [4,]  1 15.00 220.0  6.000 1320.0000
##  [5,]  1 14.50 226.5  7.625 1727.0625
##  [6,]  1 15.25 224.1  6.000 1344.6000
##  [7,]  1 16.12 220.5  3.375  744.1875
##  [8,]  1 15.13 223.5  6.125 1368.9375
##  [9,]  1 15.50 217.6  5.000 1088.0000
## [10,]  1 15.13 228.5  6.625 1513.8125
## [11,]  1 15.50 230.2  5.750 1323.6500
## [12,]  1 16.12 226.5  3.750  849.3750
## [13,]  1 15.13 226.6  6.125 1387.9250
## [14,]  1 15.63 225.6  5.375 1212.6000
## [15,]  1 15.38 234.0  8.875 2076.7500
## [16,]  1 15.50 230.0  4.000  920.0000
## [17,]  1 14.25 224.3  8.000 1794.4000
## [18,]  1 14.50 240.5 10.870 2614.2350
## [19,]  1 14.62 223.7  7.375 1649.7875
c <- inv(t(cX)%*%cX)%*%t(cX)%*%y
c
##               [,1]
## [1,] -31.995138693
## [2,]  -3.210249489
## [3,]   0.300021826
## [4,]  21.717448916
## [5,]  -0.007237622
yHatTwo <- -31.995138693 - 3.210249489*14.5 + 0.300021826*220 + 21.717448916*5  -0.007237622*220*5
yHatTwo
## [1] 88.08691