Datos del propelente.

X=c(15.50,23.75,8.00,17.00,5.50,19.00,24.00,2.50,7.50,11.00,13.00,3.75,25.00,9.75,22.00,18.00,6.00,12.50,2.00,21.50)

Y=c(2158.70,1678.15,2316.00,2061.30,2207.50,1708.30,1784.70,2575.00,2357.90,2256.70,2165.20,2399.55,1779.80,2336.75,1765.30,2053.50,2414.40,2200.50,2654.20,1753.70)

datos <- data.frame( X, Y)

Diagrama de dispercion

ggplot(datos,aes(x=X,y=Y))+geom_point() +ggtitle("Propelente",subtitle = "Y ~ X") 

Observando el diagrama variables cumplen con el supuesto de linealidad.

Obtención de los parametros estimados de la regresion lineal.

Recordemos que un modelo lineal esta dato por la siguiente ecuación

\[Y= \widehat\beta_0 + \widehat \beta_1X \] donde:

\[\widehat\beta_0 = \bar y - \widehat\beta_1 \bar x\] \[ \widehat \beta_1 = \frac{S_{xy}}{S_{xx}}\]

Mx = mean(datos$X)
My = mean(datos$Y)
Sxx = round(sum( (datos$X- Mx)^2 ),2)   
Sxy = round(sum( (datos$X- Mx) * (datos$Y - My) ),2)
B_1 = Sxy/Sxx
print(paste("Valores obtenidos: Sxx = ",Sxx, " Sxy = ",Sxy   ))
## [1] "Valores obtenidos: Sxx =  1106.56  Sxy =  -41112.65"
print(paste("B1 = ",B_1   ))
## [1] "B1 =  -37.1535660063621"

Remplazando los valores obtenidos

\[ \widehat \beta_1 = \frac{-41112.65}{1106.56} = -37.15\]

print(paste("MediaX =", Mx))
## [1] "MediaX = 13.3625"
print(paste("MediaY =", My))
## [1] "MediaY = 2131.3575"
bo = My-(B_1*Mx) 
print(paste("Bo = ", bo))
## [1] "Bo =  2627.82202576001"

\[\widehat\beta_0 = 2 131.357 - [(37.15)(13.3625)] = 2 627 .82\] Resultado:

\[Y = 2637.82 - 37.15X\]

Estimacion de \(\sigma^2\)

res = datos %>% mutate( Yestimado = (2627.82 - 37.15*X) )  %>%  mutate( residual = Y - Yestimado )
knitr::kable(res ,align = 'c' ,digits = 2 ,)
X Y Yestimado residual
15.50 2158.70 2052.00 106.70
23.75 1678.15 1745.51 -67.36
8.00 2316.00 2330.62 -14.62
17.00 2061.30 1996.27 65.03
5.50 2207.50 2423.50 -216.00
19.00 1708.30 1921.97 -213.67
24.00 1784.70 1736.22 48.48
2.50 2575.00 2534.95 40.05
7.50 2357.90 2349.20 8.70
11.00 2256.70 2219.17 37.53
13.00 2165.20 2144.87 20.33
3.75 2399.55 2488.51 -88.96
25.00 1779.80 1699.07 80.73
9.75 2336.75 2265.61 71.14
22.00 1765.30 1810.52 -45.22
18.00 2053.50 1959.12 94.38
6.00 2414.40 2404.92 9.48
12.50 2200.50 2163.45 37.05
2.00 2654.20 2553.52 100.68
21.50 1753.70 1829.10 -75.40
sumaCuadradosRes = sum((res$residual)^2)



print(sumaCuadradosRes)
## [1] 166254.9
SST = sum(res$Y^2) - (sum(res$Y)^2)/20

SSres = SST - (B_1*Sxy)
print(paste("SSres = ",SSres))
## [1] "SSres =  166256.045903523"

\(\sigma^2\) Está dado por:

sigma = SSres/(20-2)

Anova de la regresion lineal simple

VNE = sum(res$residual^2) # varianza residuales
VT = sum( (res$Y -My)^2 )#Varianza Total 
VR = VT - VNE # Varianza regresion

print(paste("NNE= ",VNE , " VT= ", VT, "VR = ",VR ))
## [1] "NNE=  166254.91396875  VT=  1693737.601375 VR =  1527482.68740625"
SMRes = (VNE/18) 
SMReg = VR


print(paste("SMres= ", SMRes , " SMReg= ", SMReg ))
## [1] "SMres=  9236.384109375  SMReg=  1527482.68740625"
print (paste("Fo = ", SMReg/SMRes  ))
## [1] "Fo =  165.376696044488"
Fuente de variabilidad Suma de cuadrados Grados de libertad Cuadrados Medios \(F_0\)
Varianza Total \(\sum_1^{20}( \widehat Y -\bar Y) = 1527187.53\) \(19\)
residuales \(\sum_1^{20}e_i= 166254\) \(18\) \(9236.3841\)
Var regresion \(Vt - Vres= 1527482.62\) \(1\) \(1527482\) \(165.37\)
summary( lm(datos,formula =  Y ~  X))
## 
## Call:
## lm(formula = Y ~ X, data = datos)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -215.98  -50.68   28.74   66.61  106.76 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 2627.822     44.184   59.48  < 2e-16 ***
## X            -37.154      2.889  -12.86 1.64e-10 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 96.11 on 18 degrees of freedom
## Multiple R-squared:  0.9018, Adjusted R-squared:  0.8964 
## F-statistic: 165.4 on 1 and 18 DF,  p-value: 1.643e-10