Dados
X <- c(1, 1, 2, 3, 4, 4, 5, 6, 6, 7, 8, 8, 9, 9, 1, 2, 3, 4, 3, 2, 5, 6, 7)
Y <- c(25, 21, 31, 30, 38, 31, 43, 39, 44, 48, 53, 59, 49, 55, 19, 25, 35, 44, 43, 25, 40, 45, 50)
mean(X)
## [1] 4.608696
mean(Y)
## [1] 38.78261
train <- data.frame(X,Y)
s1 <- sum((X-mean(X))*(Y-mean(Y)))
s1
## [1] 593.0435
\[\sum_{i=1}^n (X_i-\bar{X})(Y_i-\bar{Y})=593.0435\]
s2<- sum((X-mean(X))**2)
s2
## [1] 147.4783
\[\sum_{i=1}^n (X^2_i-\bar{X})=147.4783\]
mean(Y)-((s1/s2)*mean(X))
## [1] 20.25
\[\hat{b}_1 = \frac{593.0435}{147.4783} = 4.021225\]
\[\hat{b}_0 = 38.78261 - 4.021225*4.608696 = 20.25001\]
\[y = 20.25001 + 4.021225x_i\]
Y_hat <- (X*4.021225) + 20.25001
SQRes <- sum((Y-Y_hat)^2)
SQRes
## [1] 425.1509
\[SQRes = \sum_{i=1}^n (Y_i-\hat{Y}_i)^2 = 425.1509\]
SQReg <- sum((mean(Y)-Y_hat)^2)
SQReg
## [1] 2384.76
\[SQReg = \sum_{i=1}^n (\bar{Y}-\hat{Y}_i)^2 = 2384.76\]
SQT <- SQReg+SQRes
SQT
## [1] 2809.911
\[SQT = SQRes + SQReg = 2809.911\] \[MSE = Bias + Variance = 2809.911\]
R_squared <- SQReg/SQT
R_squared
## [1] 0.848696
regressao <- lm(train$Y ~ train$X)
summary(regressao)
##
## Call:
## lm(formula = train$Y ~ train$X)
##
## Residuals:
## Min 1Q Median 3Q Max
## -7.4410 -3.2818 -0.3561 2.1545 10.6863
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 20.2500 1.9483 10.39 9.81e-10 ***
## train$X 4.0212 0.3705 10.85 4.53e-10 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.499 on 21 degrees of freedom
## Multiple R-squared: 0.8487, Adjusted R-squared: 0.8415
## F-statistic: 117.8 on 1 and 21 DF, p-value: 4.533e-10
X <- c(1, 1, 2, 2, 4, 4, 5, 7, 6, 7, 8, 8, 9, 10, 1, 3, 3, 4, 2, 2, 6, 6, 7)
Y <- c(23, 25, 27, 35, 38, 31, 43, 35, 44, 48, 52, 59, 42, 55, 23, 25, 35, 44, 43, 25, 40, 45, 50)
test = data.frame(X,Y)
predictions <- predict(regressao,test)
axisRange = extendrange(c(test$Y,predictions))
plot(test$Y,predictions,main="Predição")
abline(0,1,col="blue",lty=2,lwd=2)
Quanto maior o RMSE, melhor sera o modelo
library(caret)
## Loading required package: lattice
## Loading required package: ggplot2
lm_prediction <- data.frame(pred = predictions, obs = test$Y)
round(defaultSummary(lm_prediction), digits = 3)
## RMSE Rsquared
## 5.100 0.771